## ----style, echo = FALSE, results = 'asis'------------------------------- knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")) ) options(width = 75) ## ------------------------------------------------------------------------ x <- c(1, 3, 5) y <- 1:5 ## ------------------------------------------------------------------------ x <- c(TRUE, FALSE, NA) outer(x, x, `&`) outer(x, x, `|`) ## ------------------------------------------------------------------------ x <- c("Male", "Female", "male") gender <- factor(x, levels=c("Female", "Male")) gender ## ------------------------------------------------------------------------ x <- rnorm(100) hist(x) y <- x + rnorm(100) df <- data.frame(x, y) plot(y ~ x, df) ## ------------------------------------------------------------------------ ridx <- df$x > 0 table(ridx) plot(y ~ x, df[ridx, ]) ## ------------------------------------------------------------------------ fit <- lm(y ~ x, df) class(fit) anova(fit) plot(y ~ x, df) abline(fit) ## ---- messgae = FALSE---------------------------------------------------- library(ggplot2) ## ------------------------------------------------------------------------ ggplot(df, aes(x, y)) + geom_point() + geom_smooth(method="lm") ## ---- message = FALSE---------------------------------------------------- library(readr) ## ---- eval = FALSE------------------------------------------------------- ## pdata_file <- file.choose() # ALL-sample-sheet.csv ## ---- echo = FALSE------------------------------------------------------- pdata_file <- system.file(package="BiocIntro", "extdata", "ALL-sample-sheet.csv") ## ------------------------------------------------------------------------ pdata <- read_csv(pdata_file) pdata ## ---- message = FALSE---------------------------------------------------- library(dplyr) ## ------------------------------------------------------------------------ pdata %>% select(sample, sex, age, mol.biol) pdata %>% filter(sex == "F", age < 50) pdata %>% mutate(sex = factor(sex, levels = c("F", "M"))) pdata %>% summarize( n = n(), ave_age = mean(age, na.rm=TRUE) ) ## ------------------------------------------------------------------------ pdata %>% group_by(sex) %>% summarize( n = n(), ave_age = mean(age, na.rm = TRUE) ) ## ---- eval = FALSE------------------------------------------------------- ## pdata_file <- file.choose() # airway-sample-sheet.csv ## count_file <- file.choose() # airway-read-counts.csv ## ---- echo = FALSE------------------------------------------------------- pdata_file <- system.file(package="BiocIntro", "extdata", "airway-sample-sheet.csv") count_file <- system.file(package="BiocIntro", "extdata", "airway-read-counts.csv") ## ------------------------------------------------------------------------ pdata <- read_csv(pdata_file) pdata <- pdata %>% select(Run, cell, dex) ## ------------------------------------------------------------------------ counts <- read_csv(count_file) eg <- counts[, 1:6] # make it easy to work with eg ## ------------------------------------------------------------------------ data <- left_join(pdata, eg) data ## ---- message = FALSE---------------------------------------------------- library(tidyr) ## ------------------------------------------------------------------------ tbl <- gather(data, "Gene", "Count", -(1:3)) tbl ## ------------------------------------------------------------------------ tbl %>% group_by(Run) %>% summarize(lib_size = sum(Count)) tbl %>% group_by(Gene) %>% summarize( ave_count = mean(Count), ave_log_count = mean(log(1 + Count)) ) ## ------------------------------------------------------------------------ counts_tbl <- gather(counts, "Gene", "Count", -Run) data_tbl <- left_join(pdata, counts_tbl) data_tbl ## ------------------------------------------------------------------------ gene_summaries <- data_tbl %>% group_by(Gene) %>% summarize( ave_count = mean(Count), ave_log_count = mean(log(1 + Count)) ) gene_summaries ## ---- message=FALSE------------------------------------------------------ library(ggplot2) ## ------------------------------------------------------------------------ ggplot(gene_summaries, aes(ave_log_count)) + geom_density() ## ---- message = FALSE---------------------------------------------------- library(Biostrings) ## ------------------------------------------------------------------------ seq = c("AAACA", "CATGC") dna <- DNAStringSet(seq) reverseComplement(dna) ## ------------------------------------------------------------------------ dm3_upstream_file <- system.file(package="Biostrings", "extdata", "dm3_upstream2000.fa.gz") readLines(dm3_upstream_file, 10) ## ------------------------------------------------------------------------ dna <- readDNAStringSet(dm3_upstream_file) dna ## ------------------------------------------------------------------------ gc <- letterFrequency(dna, "GC", as.prob = TRUE) hist(gc) ## ---- message = FALSE---------------------------------------------------- library(BSgenome) library(BSgenome.Hsapiens.UCSC.hg38) ## ------------------------------------------------------------------------ BSgenome.Hsapiens.UCSC.hg38 chr17 <- BSgenome.Hsapiens.UCSC.hg38[["chr17"]] chr17 letterFrequency(chr17, "GC", as.prob=TRUE) ## ---- message = FALSE---------------------------------------------------- library(S4Vectors) ## ------------------------------------------------------------------------ DataFrame(x = rnorm(100), y = rnorm(100)) ## ------------------------------------------------------------------------ length(dna) dna[1:4] ## ------------------------------------------------------------------------ nms = names(dna) pos = sub(".* ", "", nms) df <- DataFrame( dna = unname(dna), pos = pos ) df$gc <- letterFrequency(df$dna, "GC", as.prob=TRUE)[,1] df ## ---- message = FALSE---------------------------------------------------- library(BiocManager) ## ------------------------------------------------------------------------ BiocManager::available("BSgenome.Hsapiens") ## ---- eval=FALSE--------------------------------------------------------- ## ## also possible to install CRAN, github packages ## BiocManager::install("BSgenome.Hsapiens.UCSC.hg38") ## ------------------------------------------------------------------------ sessionInfo()