## ----style, echo = FALSE, results = "asis"------------------------------------ BiocStyle::markdown() ## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 5, fig.height = 4.5, dpi = 200 ) ## ----InstallGeomxTools, echo = TRUE, eval = FALSE----------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # # The following initializes most up to date version of Bioc # BiocManager::install() # # BiocManager::install("NanoStringNCTools") # BiocManager::install("GeomxTools") # BiocManager::install("GeoMxWorkflows") ## ----libs, message = FALSE, warning = FALSE, eval = TRUE---------------------- library(NanoStringNCTools) library(GeomxTools) library(GeoMxWorkflows) ## ----checking package versions------------------------------------------------ if(packageVersion("GeomxTools") < "2.1" & packageVersion("GeoMxWorkflows") >= "1.0.1"){ stop("GeomxTools and Workflow versions do not match. Please use the same version. This workflow is meant to be used with most current version of packages. If you are using an older version of Bioconductor please reinstall GeoMxWorkflows and use vignette(GeoMxWorkflows) instead") } if(packageVersion("GeomxTools") > "2.1" & packageVersion("GeoMxWorkflows") <= "1.0.1"){ stop("GeomxTools and Workflow versions do not match. Please use the same version, see install instructions above.") # to remove current package version # remove.packages("GeomxTools") # remove.packages("GeoMxWorkflows") # see install instructions above } ## ----quickstart, message = FALSE, warning = FALSE----------------------------- # Reference the main folder 'file.path' containing the sub-folders with each # data file type: datadir <- system.file("extdata", "WTA_NGS_Example", package="GeoMxWorkflows") # to locate a specific file path replace the above line with # datadir <- file.path("~/Folder/SubFolder/DataLocation") # replace the Folder, SubFolder, DataLocation as needed # the DataLocation folder should contain a dccs, pkcs, and annotation folder # with each set of files present as needed ## ----locateFiles, message = FALSE, warning = FALSE---------------------------- # automatically list files in each directory for use DCCFiles <- dir(file.path(datadir, "dccs"), pattern = ".dcc$", full.names = TRUE, recursive = TRUE) PKCFiles <- unzip(zipfile = dir(file.path(datadir, "pkcs"), pattern = ".zip$", full.names = TRUE, recursive = TRUE)) SampleAnnotationFile <- dir(file.path(datadir, "annotation"), pattern = ".xlsx$", full.names = TRUE, recursive = TRUE) ## ----loadData, message = FALSE, warning = FALSE------------------------------- # load data demoData <- readNanoStringGeoMxSet(dccFiles = DCCFiles, pkcFiles = PKCFiles, phenoDataFile = SampleAnnotationFile, phenoDataSheet = "Template", phenoDataDccColName = "Sample_ID", protocolDataColNames = c("aoi", "roi"), experimentDataColNames = c("panel")) ## ----modules------------------------------------------------------------------ library(knitr) pkcs <- annotation(demoData) modules <- gsub(".pkc", "", pkcs) kable(data.frame(PKCs = pkcs, modules = modules)) ## ----sampleFlow, fig.width = 10, fig.height = 8, fig.wide = TRUE, message = FALSE, warning = FALSE---- library(dplyr) library(ggforce) # select the annotations we want to show, use `` to surround column names with # spaces or special symbols count_mat <- count(pData(demoData), `slide name`, class, region, segment) # simplify the slide names count_mat$`slide name` <- gsub("disease", "d", gsub("normal", "n", count_mat$`slide name`)) # gather the data and plot in order: class, slide name, region, segment test_gr <- gather_set_data(count_mat, 1:4) test_gr$x <- factor(test_gr$x, levels = c("class", "slide name", "region", "segment")) # plot Sankey ggplot(test_gr, aes(x, id = id, split = y, value = n)) + geom_parallel_sets(aes(fill = region), alpha = 0.5, axis.width = 0.1) + geom_parallel_sets_axes(axis.width = 0.2) + geom_parallel_sets_labels(color = "white", size = 5) + theme_classic(base_size = 17) + theme(legend.position = "bottom", axis.ticks.y = element_blank(), axis.line = element_blank(), axis.text.y = element_blank()) + scale_y_continuous(expand = expansion(0)) + scale_x_discrete(expand = expansion(0)) + labs(x = "", y = "") + annotate(geom = "segment", x = 4.25, xend = 4.25, y = 20, yend = 120, lwd = 2) + annotate(geom = "text", x = 4.19, y = 70, angle = 90, size = 5, hjust = 0.5, label = "100 segments") ## ----shiftCounts, eval = TRUE------------------------------------------------- # Shift counts to one demoData <- shiftCountsOne(demoData, useDALogic = TRUE) ## ----setqcflagupdated, eval = TRUE------------------------------------------- # Default QC cutoffs are commented in () adjacent to the respective parameters # study-specific values were selected after visualizing the QC results in more # detail below QC_params <- list(minSegmentReads = 1000, # Minimum number of reads (1000) percentTrimmed = 80, # Minimum % of reads trimmed (80%) percentStitched = 80, # Minimum % of reads stitched (80%) percentAligned = 75, # Minimum % of reads aligned (80%) percentSaturation = 50, # Minimum sequencing saturation (50%) minNegativeCount = 1, # Minimum negative control counts (10) maxNTCCount = 9000, # Maximum counts observed in NTC well (1000) minNuclei = 20, # Minimum # of nuclei estimated (100) minArea = 1000) # Minimum segment area (5000) demoData <- setSegmentQCFlags(demoData, qcCutoffs = QC_params) # Collate QC Results QCResults <- protocolData(demoData)[["QCFlags"]] flag_columns <- colnames(QCResults) QC_Summary <- data.frame(Pass = colSums(!QCResults[, flag_columns]), Warning = colSums(QCResults[, flag_columns])) QCResults$QCStatus <- apply(QCResults, 1L, function(x) { ifelse(sum(x) == 0L, "PASS", "WARNING") }) QC_Summary["TOTAL FLAGS", ] <- c(sum(QCResults[, "QCStatus"] == "PASS"), sum(QCResults[, "QCStatus"] == "WARNING")) ## ----qcflagHistogramsCode, eval = TRUE, warning = FALSE, message = FALSE------ library(ggplot2) col_by <- "segment" # Graphical summaries of QC statistics plot function QC_histogram <- function(assay_data = NULL, annotation = NULL, fill_by = NULL, thr = NULL, scale_trans = NULL) { plt <- ggplot(assay_data, aes_string(x = paste0("unlist(`", annotation, "`)"), fill = fill_by)) + geom_histogram(bins = 50) + geom_vline(xintercept = thr, lty = "dashed", color = "black") + theme_bw() + guides(fill = "none") + facet_wrap(as.formula(paste("~", fill_by)), nrow = 4) + labs(x = annotation, y = "Segments, #", title = annotation) if(!is.null(scale_trans)) { plt <- plt + scale_x_continuous(trans = scale_trans) } plt } ## ----plotQCHist, warning = FALSE, message = FALSE----------------------------- QC_histogram(sData(demoData), "Trimmed (%)", col_by, 80) QC_histogram(sData(demoData), "Stitched (%)", col_by, 80) QC_histogram(sData(demoData), "Aligned (%)", col_by, 75) QC_histogram(sData(demoData), "Saturated (%)", col_by, 50) + labs(title = "Sequencing Saturation (%)", x = "Sequencing Saturation (%)") QC_histogram(sData(demoData), "area", col_by, 1000, scale_trans = "log10") QC_histogram(sData(demoData), "nuclei", col_by, 20) # calculate the negative geometric means for each module negativeGeoMeans <- esBy(negativeControlSubset(demoData), GROUP = "Module", FUN = function(x) { assayDataApply(x, MARGIN = 2, FUN = ngeoMean, elt = "exprs") }) protocolData(demoData)[["NegGeoMean"]] <- negativeGeoMeans # explicitly copy the Negative geoMeans from sData to pData negCols <- paste0("NegGeoMean_", modules) pData(demoData)[, negCols] <- sData(demoData)[["NegGeoMean"]] for(ann in negCols) { plt <- QC_histogram(pData(demoData), ann, col_by, 2, scale_trans = "log10") print(plt) } # detatch neg_geomean columns ahead of aggregateCounts call pData(demoData) <- pData(demoData)[, !colnames(pData(demoData)) %in% negCols] # show all NTC values, Freq = # of Segments with a given NTC count: kable(table(NTC_Count = sData(demoData)$NTC), col.names = c("NTC Count", "# of Segments")) ## ----QCSummaryTable, results = "asis"----------------------------------------- kable(QC_Summary, caption = "QC Summary Table for each Segment") ## ----removeQCSampleProbe, eval = TRUE----------------------------------------- demoData <- demoData[, QCResults$QCStatus == "PASS"] # Subsetting our dataset has removed samples which did not pass QC dim(demoData) ## ----setbioprobeqcflag, eval = TRUE------------------------------------------ # Generally keep the qcCutoffs parameters unchanged. Set removeLocalOutliers to # FALSE if you do not want to remove local outliers demoData <- setBioProbeQCFlags(demoData, qcCutoffs = list(minProbeRatio = 0.1, percentFailGrubbs = 20), removeLocalOutliers = TRUE) ProbeQCResults <- fData(demoData)[["QCFlags"]] # Define QC table for Probe QC qc_df <- data.frame(Passed = sum(rowSums(ProbeQCResults[, -1]) == 0), Global = sum(ProbeQCResults$GlobalGrubbsOutlier), Local = sum(rowSums(ProbeQCResults[, -2:-1]) > 0 & !ProbeQCResults$GlobalGrubbsOutlier)) ## ----bioprobeQCTable, echo = FALSE, results = "asis"-------------------------- kable(qc_df, caption = "Probes flagged or passed as outliers") ## ----excludeOutlierProbes----------------------------------------------------- #Subset object to exclude all that did not pass Ratio & Global testing ProbeQCPassed <- subset(demoData, fData(demoData)[["QCFlags"]][,c("LowProbeRatio")] == FALSE & fData(demoData)[["QCFlags"]][,c("GlobalGrubbsOutlier")] == FALSE) dim(ProbeQCPassed) demoData <- ProbeQCPassed ## ----aggregateCounts, eval = TRUE--------------------------------------------- # Check how many unique targets the object has length(unique(featureData(demoData)[["TargetName"]])) # collapse to targets target_demoData <- aggregateCounts(demoData) dim(target_demoData) exprs(target_demoData)[1:5, 1:2] ## ----calculateLOQ, eval = TRUE------------------------------------------------ # Define LOQ SD threshold and minimum value cutoff <- 2 minLOQ <- 2 # Calculate LOQ per module tested LOQ <- data.frame(row.names = colnames(target_demoData)) for(module in modules) { vars <- paste0(c("NegGeoMean_", "NegGeoSD_"), module) if(all(vars[1:2] %in% colnames(pData(target_demoData)))) { LOQ[, module] <- pmax(minLOQ, pData(target_demoData)[, vars[1]] * pData(target_demoData)[, vars[2]] ^ cutoff) } } pData(target_demoData)$LOQ <- LOQ ## ----LOQMat, eval = TRUE------------------------------------------------------ LOQ_Mat <- c() for(module in modules) { ind <- fData(target_demoData)$Module == module Mat_i <- t(esApply(target_demoData[ind, ], MARGIN = 1, FUN = function(x) { x > LOQ[, module] })) LOQ_Mat <- rbind(LOQ_Mat, Mat_i) } # ensure ordering since this is stored outside of the geomxSet LOQ_Mat <- LOQ_Mat[fData(target_demoData)$TargetName, ] ## ----segDetectionBarplot------------------------------------------------------ # Save detection rate information to pheno data pData(target_demoData)$GenesDetected <- colSums(LOQ_Mat, na.rm = TRUE) pData(target_demoData)$GeneDetectionRate <- pData(target_demoData)$GenesDetected / nrow(target_demoData) # Determine detection thresholds: 1%, 5%, 10%, 15%, >15% pData(target_demoData)$DetectionThreshold <- cut(pData(target_demoData)$GeneDetectionRate, breaks = c(0, 0.01, 0.05, 0.1, 0.15, 1), labels = c("<1%", "1-5%", "5-10%", "10-15%", ">15%")) # stacked bar plot of different cut points (1%, 5%, 10%, 15%) ggplot(pData(target_demoData), aes(x = DetectionThreshold)) + geom_bar(aes(fill = region)) + geom_text(stat = "count", aes(label = ..count..), vjust = -0.5) + theme_bw() + scale_y_continuous(expand = expansion(mult = c(0, 0.1))) + labs(x = "Gene Detection Rate", y = "Segments, #", fill = "Segment Type") ## ----segTable----------------------------------------------------------------- # cut percent genes detected at 1, 5, 10, 15 kable(table(pData(target_demoData)$DetectionThreshold, pData(target_demoData)$class)) ## ----filterSegments----------------------------------------------------------- target_demoData <- target_demoData[, pData(target_demoData)$GeneDetectionRate >= .1] dim(target_demoData) ## ----replotSankey, fig.width = 10, fig.height = 8, fig.wide = TRUE, message = FALSE, warning = FALSE---- # select the annotations we want to show, use `` to surround column names with # spaces or special symbols count_mat <- count(pData(demoData), `slide name`, class, region, segment) # simplify the slide names count_mat$`slide name` <- gsub("disease", "d", gsub("normal", "n", count_mat$`slide name`)) # gather the data and plot in order: class, slide name, region, segment test_gr <- gather_set_data(count_mat, 1:4) test_gr$x <- factor(test_gr$x, levels = c("class", "slide name", "region", "segment")) # plot Sankey ggplot(test_gr, aes(x, id = id, split = y, value = n)) + geom_parallel_sets(aes(fill = region), alpha = 0.5, axis.width = 0.1) + geom_parallel_sets_axes(axis.width = 0.2) + geom_parallel_sets_labels(color = "white", size = 5) + theme_classic(base_size = 17) + theme(legend.position = "bottom", axis.ticks.y = element_blank(), axis.line = element_blank(), axis.text.y = element_blank()) + scale_y_continuous(expand = expansion(0)) + scale_x_discrete(expand = expansion(0)) + labs(x = "", y = "") + annotate(geom = "segment", x = 4.25, xend = 4.25, y = 20, yend = 120, lwd = 2) + annotate(geom = "text", x = 4.19, y = 70, angle = 90, size = 5, hjust = 0.5, label = "100 segments") ## ----goi detection------------------------------------------------------------ library(scales) # for percent # Calculate detection rate: LOQ_Mat <- LOQ_Mat[, colnames(target_demoData)] fData(target_demoData)$DetectedSegments <- rowSums(LOQ_Mat, na.rm = TRUE) fData(target_demoData)$DetectionRate <- fData(target_demoData)$DetectedSegments / nrow(pData(target_demoData)) # Gene of interest detection table goi <- c("PDCD1", "CD274", "IFNG", "CD8A", "CD68", "EPCAM", "KRT18", "NPHS1", "NPHS2", "CALB1", "CLDN8") goi_df <- data.frame( Gene = goi, Number = fData(target_demoData)[goi, "DetectedSegments"], DetectionRate = percent(fData(target_demoData)[goi, "DetectionRate"])) ## ----tableGOI, echo = FALSE, results = "asis"--------------------------------- kable(goi_df, caption = "Detection rate for Genes of Interest", align = "c", col.names = c("Gene", "Detection, # Segments", "Detection Rate, % of Segments")) ## ----plotDetectionRate, eval = TRUE------------------------------------------- # Plot detection rate: plot_detect <- data.frame(Freq = c(1, 5, 10, 20, 30, 50)) plot_detect$Number <- unlist(lapply(c(0.01, 0.05, 0.1, 0.2, 0.3, 0.5), function(x) {sum(fData(target_demoData)$DetectionRate >= x)})) plot_detect$Rate <- plot_detect$Number / nrow(fData(target_demoData)) rownames(plot_detect) <- plot_detect$Freq ggplot(plot_detect, aes(x = as.factor(Freq), y = Rate, fill = Rate)) + geom_bar(stat = "identity") + geom_text(aes(label = formatC(Number, format = "d", big.mark = ",")), vjust = 1.6, color = "black", size = 4) + scale_fill_gradient2(low = "orange2", mid = "lightblue", high = "dodgerblue3", midpoint = 0.65, limits = c(0,1), labels = scales::percent) + theme_bw() + scale_y_continuous(labels = scales::percent, limits = c(0,1), expand = expansion(mult = c(0, 0))) + labs(x = "% of Segments", y = "Genes Detected, % of Panel > LOQ") ## ----subsetGenes, eval = TRUE------------------------------------------------- # Subset to target genes detected in at least 10% of the samples. # Also manually include the negative control probe, for downstream use negativeProbefData <- subset(fData(target_demoData), CodeClass == "Negative") neg_probes <- unique(negativeProbefData$TargetName) target_demoData <- target_demoData[fData(target_demoData)$DetectionRate >= 0.1 | fData(target_demoData)$TargetName %in% neg_probes, ] dim(target_demoData) # retain only detected genes of interest goi <- goi[goi %in% rownames(target_demoData)] ## ----previewNF, fig.width = 8, fig.height = 8, fig.wide = TRUE, eval = TRUE, warning = FALSE, message = FALSE---- library(reshape2) # for melt library(cowplot) # for plot_grid # Graph Q3 value vs negGeoMean of Negatives ann_of_interest <- "region" Stat_data <- data.frame(row.names = colnames(exprs(target_demoData)), Segment = colnames(exprs(target_demoData)), Annotation = pData(target_demoData)[, ann_of_interest], Q3 = unlist(apply(exprs(target_demoData), 2, quantile, 0.75, na.rm = TRUE)), NegProbe = exprs(target_demoData)[neg_probes, ]) Stat_data_m <- melt(Stat_data, measure.vars = c("Q3", "NegProbe"), variable.name = "Statistic", value.name = "Value") plt1 <- ggplot(Stat_data_m, aes(x = Value, fill = Statistic)) + geom_histogram(bins = 40) + theme_bw() + scale_x_continuous(trans = "log2") + facet_wrap(~Annotation, nrow = 1) + scale_fill_brewer(palette = 3, type = "qual") + labs(x = "Counts", y = "Segments, #") plt2 <- ggplot(Stat_data, aes(x = NegProbe, y = Q3, color = Annotation)) + geom_abline(intercept = 0, slope = 1, lty = "dashed", color = "darkgray") + geom_point() + guides(color = "none") + theme_bw() + scale_x_continuous(trans = "log2") + scale_y_continuous(trans = "log2") + theme(aspect.ratio = 1) + labs(x = "Negative Probe GeoMean, Counts", y = "Q3 Value, Counts") plt3 <- ggplot(Stat_data, aes(x = NegProbe, y = Q3 / NegProbe, color = Annotation)) + geom_hline(yintercept = 1, lty = "dashed", color = "darkgray") + geom_point() + theme_bw() + scale_x_continuous(trans = "log2") + scale_y_continuous(trans = "log2") + theme(aspect.ratio = 1) + labs(x = "Negative Probe GeoMean, Counts", y = "Q3/NegProbe Value, Counts") btm_row <- plot_grid(plt2, plt3, nrow = 1, labels = c("B", ""), rel_widths = c(0.43,0.57)) plot_grid(plt1, btm_row, ncol = 1, labels = c("A", "")) ## ----normalizeObject, eval = TRUE--------------------------------------------- # Q3 norm (75th percentile) for WTA/CTA with or without custom spike-ins target_demoData <- normalize(target_demoData , norm_method = "quant", desiredQuantile = .75, toElt = "q_norm") # Background normalization for WTA/CTA without custom spike-in target_demoData <- normalize(target_demoData , norm_method = "neg", fromElt = "exprs", toElt = "neg_norm") ## ----normplot, fig.small = TRUE----------------------------------------------- # visualize the first 10 segments with each normalization method boxplot(exprs(target_demoData)[,1:10], col = "#9EDAE5", main = "Raw Counts", log = "y", names = 1:10, xlab = "Segment", ylab = "Counts, Raw") boxplot(assayDataElement(target_demoData[,1:10], elt = "q_norm"), col = "#2CA02C", main = "Q3 Norm Counts", log = "y", names = 1:10, xlab = "Segment", ylab = "Counts, Q3 Normalized") boxplot(assayDataElement(target_demoData[,1:10], elt = "neg_norm"), col = "#FF7F0E", main = "Neg Norm Counts", log = "y", names = 1:10, xlab = "Segment", ylab = "Counts, Neg. Normalized") ## ----dimReduction, eval = TRUE------------------------------------------------ library(umap) library(Rtsne) # update defaults for umap to contain a stable random_state (seed) custom_umap <- umap::umap.defaults custom_umap$random_state <- 42 # run UMAP umap_out <- umap(t(log2(assayDataElement(target_demoData , elt = "q_norm"))), config = custom_umap) pData(target_demoData)[, c("UMAP1", "UMAP2")] <- umap_out$layout[, c(1,2)] ggplot(pData(target_demoData), aes(x = UMAP1, y = UMAP2, color = region, shape = class)) + geom_point(size = 3) + theme_bw() # run tSNE set.seed(42) # set the seed for tSNE as well tsne_out <- Rtsne(t(log2(assayDataElement(target_demoData , elt = "q_norm"))), perplexity = ncol(target_demoData)*.15) pData(target_demoData)[, c("tSNE1", "tSNE2")] <- tsne_out$Y[, c(1,2)] ggplot(pData(target_demoData), aes(x = tSNE1, y = tSNE2, color = region, shape = class)) + geom_point(size = 3) + theme_bw() ## ----CVheatmap, eval = TRUE, echo = TRUE, fig.width = 8, fig.height = 6.5, fig.wide = TRUE---- library(pheatmap) # for pheatmap # create a log2 transform of the data for analysis assayDataElement(object = target_demoData, elt = "log_q") <- assayDataApply(target_demoData, 2, FUN = log, base = 2, elt = "q_norm") # create CV function calc_CV <- function(x) {sd(x) / mean(x)} CV_dat <- assayDataApply(target_demoData, elt = "log_q", MARGIN = 1, calc_CV) # show the highest CD genes and their CV values sort(CV_dat, decreasing = TRUE)[1:5] # Identify genes in the top 3rd of the CV values GOI <- names(CV_dat)[CV_dat > quantile(CV_dat, 0.8)] pheatmap(assayDataElement(target_demoData[GOI, ], elt = "log_q"), scale = "row", show_rownames = FALSE, show_colnames = FALSE, border_color = NA, clustering_method = "average", clustering_distance_rows = "correlation", clustering_distance_cols = "correlation", breaks = seq(-3, 3, 0.05), color = colorRampPalette(c("purple3", "black", "yellow2"))(120), annotation_col = pData(target_demoData)[, c("class", "segment", "region")]) ## ----deNativeComplex, eval = TRUE, message = FALSE, warning = FALSE----------- # convert test variables to factors pData(target_demoData)$testRegion <- factor(pData(target_demoData)$region, c("glomerulus", "tubule")) pData(target_demoData)[["slide"]] <- factor(pData(target_demoData)[["slide name"]]) assayDataElement(object = target_demoData, elt = "log_q") <- assayDataApply(target_demoData, 2, FUN = log, base = 2, elt = "q_norm") # run LMM: # formula follows conventions defined by the lme4 package results <- c() for(status in c("DKD", "normal")) { ind <- pData(target_demoData)$class == status mixedOutmc <- mixedModelDE(target_demoData[, ind], elt = "log_q", modelFormula = ~ testRegion + (1 + testRegion | slide), groupVar = "testRegion", nCores = parallel::detectCores(), multiCore = FALSE) # format results as data.frame r_test <- do.call(rbind, mixedOutmc["lsmeans", ]) tests <- rownames(r_test) r_test <- as.data.frame(r_test) r_test$Contrast <- tests # use lapply in case you have multiple levels of your test factor to # correctly associate gene name with it's row in the results table r_test$Gene <- unlist(lapply(colnames(mixedOutmc), rep, nrow(mixedOutmc["lsmeans", ][[1]]))) r_test$Subset <- status r_test$FDR <- p.adjust(r_test$`Pr(>|t|)`, method = "fdr") r_test <- r_test[, c("Gene", "Subset", "Contrast", "Estimate", "Pr(>|t|)", "FDR")] results <- rbind(results, r_test) } ## ----DEtable, echo = TRUE, results = "asis"----------------------------------- kable(subset(results, Gene %in% goi & Subset == "normal"), digits = 3, caption = "DE results for Genes of Interest", align = "lc", row.names = FALSE) ## ----DEsimple, eval = TRUE, echo = TRUE, message = FALSE, warning = FALSE----- # convert test variables to factors pData(target_demoData)$testClass <- factor(pData(target_demoData)$class, c("normal", "DKD")) # run LMM: # formula follows conventions defined by the lme4 package results2 <- c() for(region in c("glomerulus", "tubule")) { ind <- pData(target_demoData)$region == region mixedOutmc <- mixedModelDE(target_demoData[, ind], elt = "log_q", modelFormula = ~ testClass + (1 | slide), groupVar = "testClass", nCores = parallel::detectCores(), multiCore = FALSE) # format results as data.frame r_test <- do.call(rbind, mixedOutmc["lsmeans", ]) tests <- rownames(r_test) r_test <- as.data.frame(r_test) r_test$Contrast <- tests # use lapply in case you have multiple levels of your test factor to # correctly associate gene name with it's row in the results table r_test$Gene <- unlist(lapply(colnames(mixedOutmc), rep, nrow(mixedOutmc["lsmeans", ][[1]]))) r_test$Subset <- region r_test$FDR <- p.adjust(r_test$`Pr(>|t|)`, method = "fdr") r_test <- r_test[, c("Gene", "Subset", "Contrast", "Estimate", "Pr(>|t|)", "FDR")] results2 <- rbind(results2, r_test) } ## ----DEtable2, echo = TRUE, results = "asis"---------------------------------- kable(subset(results2, Gene %in% goi & Subset == "tubule"), digits = 3, caption = "DE results for Genes of Interest", align = "lc", row.names = FALSE) ## ----glomindex, eval = TRUE, echo = FALSE------------------------------------- # since we didn't execute above, ID the glomeruli for later use ## ----volcanoPlot, fig.width = 11, fig.height = 7, fig.wide = TRUE, warning = FALSE, message = FALSE---- library(ggrepel) # Categorize Results based on P-value & FDR for plotting results$Color <- "NS or FC < 0.5" results$Color[results$`Pr(>|t|)` < 0.05] <- "P < 0.05" results$Color[results$FDR < 0.05] <- "FDR < 0.05" results$Color[results$FDR < 0.001] <- "FDR < 0.001" results$Color[abs(results$Estimate) < 0.5] <- "NS or FC < 0.5" results$Color <- factor(results$Color, levels = c("NS or FC < 0.5", "P < 0.05", "FDR < 0.05", "FDR < 0.001")) # pick top genes for either side of volcano to label # order genes for convenience: results$invert_P <- (-log10(results$`Pr(>|t|)`)) * sign(results$Estimate) top_g <- c() for(cond in c("DKD", "normal")) { ind <- results$Subset == cond top_g <- c(top_g, results[ind, 'Gene'][ order(results[ind, 'invert_P'], decreasing = TRUE)[1:15]], results[ind, 'Gene'][ order(results[ind, 'invert_P'], decreasing = FALSE)[1:15]]) } top_g <- unique(top_g) results <- results[, -1*ncol(results)] # remove invert_P from matrix # Graph results ggplot(results, aes(x = Estimate, y = -log10(`Pr(>|t|)`), color = Color, label = Gene)) + geom_vline(xintercept = c(0.5, -0.5), lty = "dashed") + geom_hline(yintercept = -log10(0.05), lty = "dashed") + geom_point() + labs(x = "Enriched in Tubules <- log2(FC) -> Enriched in Glomeruli", y = "Significance, -log10(P)", color = "Significance") + scale_color_manual(values = c(`FDR < 0.001` = "dodgerblue", `FDR < 0.05` = "lightblue", `P < 0.05` = "orange2", `NS or FC < 0.5` = "gray"), guide = guide_legend(override.aes = list(size = 4))) + scale_y_continuous(expand = expansion(mult = c(0,0.05))) + geom_text_repel(data = subset(results, Gene %in% top_g & FDR < 0.001), size = 4, point.padding = 0.15, color = "black", min.segment.length = .1, box.padding = .2, lwd = 2, max.overlaps = 50) + theme_bw(base_size = 16) + theme(legend.position = "bottom") + facet_wrap(~Subset, scales = "free_y") ## ----targetTable, eval = TRUE, as.is = TRUE----------------------------------- kable(subset(results, Gene %in% c('PDHA1','ITGB1')), row.names = FALSE) ## ----targetExprs, eval = TRUE------------------------------------------------- # show expression for a single target: PDHA1 ggplot(pData(target_demoData), aes(x = region, fill = region, y = assayDataElement(target_demoData["PDHA1", ], elt = "q_norm"))) + geom_violin() + geom_jitter(width = .2) + labs(y = "PDHA1 Expression") + scale_y_continuous(trans = "log2") + facet_wrap(~class) + theme_bw() ## ----targetExprs2, fig.width = 8, fig.wide = TRUE, eval = TRUE---------------- glom <- pData(target_demoData)$region == "glomerulus" # show expression of PDHA1 vs ITGB1 ggplot(pData(target_demoData), aes(x = assayDataElement(target_demoData["PDHA1", ], elt = "q_norm"), y = assayDataElement(target_demoData["ITGB1", ], elt = "q_norm"), color = region)) + geom_vline(xintercept = max(assayDataElement(target_demoData["PDHA1", glom], elt = "q_norm")), lty = "dashed", col = "darkgray") + geom_hline(yintercept = max(assayDataElement(target_demoData["ITGB1", !glom], elt = "q_norm")), lty = "dashed", col = "darkgray") + geom_point(size = 3) + theme_bw() + scale_x_continuous(trans = "log2") + scale_y_continuous(trans = "log2") + labs(x = "PDHA1 Expression", y = "ITGB1 Expression") + facet_wrap(~class) ## ----heatmap, eval = TRUE, fig.width = 8, fig.height = 6.5, fig.wide = TRUE---- # select top significant genes based on significance, plot with pheatmap GOI <- unique(subset(results, `FDR` < 0.001)$Gene) pheatmap(log2(assayDataElement(target_demoData[GOI, ], elt = "q_norm")), scale = "row", show_rownames = FALSE, show_colnames = FALSE, border_color = NA, clustering_method = "average", clustering_distance_rows = "correlation", clustering_distance_cols = "correlation", cutree_cols = 2, cutree_rows = 2, breaks = seq(-3, 3, 0.05), color = colorRampPalette(c("purple3", "black", "yellow2"))(120), annotation_col = pData(target_demoData)[, c("region", "class")]) ## ----maPlot, fig.width = 8, fig.height = 12, fig.wide = TRUE, warning = FALSE, message = FALSE---- results$MeanExp <- rowMeans(assayDataElement(target_demoData, elt = "q_norm")) top_g2 <- results$Gene[results$Gene %in% top_g & results$FDR < 0.001 & abs(results$Estimate) > .5 & results$MeanExp > quantile(results$MeanExp, 0.9)] ggplot(subset(results, !Gene %in% neg_probes), aes(x = MeanExp, y = Estimate, size = -log10(`Pr(>|t|)`), color = Color, label = Gene)) + geom_hline(yintercept = c(0.5, -0.5), lty = "dashed") + scale_x_continuous(trans = "log2") + geom_point(alpha = 0.5) + labs(y = "Enriched in Glomeruli <- log2(FC) -> Enriched in Tubules", x = "Mean Expression", color = "Significance") + scale_color_manual(values = c(`FDR < 0.001` = "dodgerblue", `FDR < 0.05` = "lightblue", `P < 0.05` = "orange2", `NS or FC < 0.5` = "gray")) + geom_text_repel(data = subset(results, Gene %in% top_g2), size = 4, point.padding = 0.15, color = "black", min.segment.length = .1, box.padding = .2, lwd = 2) + theme_bw(base_size = 16) + facet_wrap(~Subset, nrow = 2, ncol = 1) ## ----sessInfo----------------------------------------------------------------- sessionInfo()