## ----style, echo = FALSE, results = 'asis'-------------------------------------------------------- BiocStyle::markdown() options(width=100, max.print=1000) knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")), error=FALSE) ## ------------------------------------------------------------------------------------------------- library(GenomicRanges) gr <- GRanges(seqnames = paste0("chr", c(1:22, tail(letters, 11))), ranges = IRanges(start=1:33, width = 1000 ), strand = c(rep("+", 10), rep("-", 23)), score = 1:33, GC = seq(1, 0, length=33)) ## ----se-data-------------------------------------------------------------------------------------- data <- matrix(1:180, ncol=9, byrow=TRUE) ## ----se-gr---------------------------------------------------------------------------------------- gr_20gene <- GRanges(seqnames = paste0("gene", 1:20), ranges = IRanges(start=1:20, width = 1000 ), strand = c(rep("+", 10), rep("-", 10)), score = 1:20, GC = seq(1, 0, length=20)) ## -----mat----------------------------------------------------------------------------------------- sample_df <- data.frame( names=c("Martin", "Herve", "Dan", "Marc", "Valerie", "Jim", "Nate","Paul", "Sonali"), sex=c(rep("Male", 4), "Female", rep("Male", 3), "Female")) ## ----load-pkg, eval=FALSE------------------------------------------------------------------------- # source("http://bioconductor.org/biocLite.R") # biocLite("GenomeInfoDb") # # vignette(package="GenomeInfoDb") # # ?sortSeqlevels ## ----basic-R-------------------------------------------------------------------------------------- ## Reading the data fname <- system.file("extdata", "epi_metadata.txt", package="BioC2015Introduction") df <- read.delim(fname, stringsAsFactors=FALSE) ## Exploring the data class(df) head(df) tail(df) dim(df) colnames(df) sapply(df, class) ## Summarize the data summary(df) table(df$SEX) ## Subset the data df[df$GROUP %in% c("Brain", "Digestive"),] ## ----gr-pkg--------------------------------------------------------------------------------------- library(GenomicRanges) gr <- GRanges(seqnames = paste0("chr", c(1:22, tail(letters, 11))), ranges = IRanges(start=1:33, width = 1000 ), strand = c(rep("+", 10), rep("-", 23)), score = 1:33, GC = seq(1, 0, length=33)) ## extract ranges only from chromosome 3 gr[seqnames(gr) %in% "chr3",] ## extract the first five ranges from the GRanges. gr[1:5, ] ## extract the score and sequence column from a GRanges mcols(gr) ## keep only the standard chromosomes (i.e.) from chromosome 1 to 22, x, y,m keepStandardChromosomes(gr) ## change the chromosome naming style to NCBI seqlevelsStyle(gr) <- "NCBI" gr ## gaps in the ranges gaps(gr) ## find degree of overlap for ranges. coverage(gr) ## ----se-ans--------------------------------------------------------------------------------------- library(SummarizedExperiment) ## data for the SummarizedExperiment object sample_df <- data.frame( names=c("Martin", "Herve", "Dan", "Marc", "Valerie", "Jim", "Nate","Paul", "Sonali"), sex=c(rep("Male", 4), "Female", rep("Male", 3), "Female")) gr_20genes <- GRanges(seqnames = paste0("gene", 1:20), ranges = IRanges(start=1:20, width = 1000 ), strand = c(rep("+", 10), rep("-", 10)), score = 1:20, GC = seq(1, 0, length=20)) data <- matrix(1:180, ncol=9, byrow=TRUE) ## create a SummarizedExperiment object core_se <- SummarizedExperiment(assays=data, rowRanges=gr_20genes, colData=DataFrame(sample_df)) core_se ## exploring the SummarizedExperiment object dim(core_se) head(assay(core_se)) # data matrix rowRanges(core_se) # information about the genes colData(core_se) # sample information ## subset the SummarizedExperiment object ## subsetting the sample information core_se[, core_se$sex == "Female"] ## subsetting the gene information core_se[,1:2] ## ----sessionInfo---------------------------------------------------------------------------------- sessionInfo()