## ----options, results="hide", include=FALSE, cache=FALSE, results='hide', message=FALSE---- knitr::opts_chunk$set(fig.align="center", cache=FALSE,error=FALSE, #make it stop on error fig.width=7, fig.height=7, autodep=TRUE, out.width="600px", out.height="600px", results="markup", echo=TRUE, eval=TRUE) #knitr::opts_knit$set(stop_on_error = 2L) #really make it stop #knitr::dep_auto() options(getClass.msg=FALSE) #get rid of annoying messages about cache until fixed internally in R set.seed(98883) ## for reproducibility library(bioc2016singlecell) ## for now load individual dependencies library(clusterExperiment) ## use develop for now library(SummarizedExperiment) library(cluster) library(MAST) ## ----datain, eval=TRUE--------------------------------------------------- data(scone_res) data(ws_input) # Summarized Experiment se <- SummarizedExperiment(list(counts=norm_logcounts), colData=data.frame(batch=batch[colnames(norm_logcounts)], time_points=bio[colnames(norm_logcounts)])) ## ----pca----------------------------------------------------------------- pca <- prcomp(t(assay(se)), center=TRUE, scale=TRUE) plot(pca$x, pch=19, col=bigPalette[bio]) legend("topleft", levels(bio), fill=bigPalette) res1 <- pam(pca$x[,1:2], k=3) table(res1$clustering) res2 <- pam(pca$x[,1:3], k=3) table(res2$clustering) plot(pca$sdev^2/sum(pca$sdev^2), xlab="PC", ylab="Percent of explained variance") res3 <- pam(pca$x[,1:6], k=3) table(res3$clustering) ## ----pca_cm-------------------------------------------------------------- cm <- clusterMany(se, dimReduce="PCA", nPCADims=c(2, 3, 6), ks = 3, clusterFunction = "pam") cm apply(clusterMatrix(cm), 2, table) ## ----plot_cm------------------------------------------------------------- defaultMar <- par("mar") plotCMar <- c(1.1,8.1,4.1,1.1) par(mar=plotCMar) plotClusters(cm) ## ----combine_cm---------------------------------------------------------- cm <- combineMany(cm) plotClusters(cm) cm ## ----subsample----------------------------------------------------------- cl3 <- clusterSingle(se, subsample = TRUE, sequential = FALSE, clusterFunction = c("hierarchical01"), clusterDArgs = list(minSize=5, alpha=0.3), subsampleArgs = list("k"=3,"clusterFunction"="pam"), dimReduce = c("PCA"), ndims=10) plotCoClustering(cl3) cl7 <- clusterSingle(se, subsample = TRUE, sequential = FALSE, clusterFunction = c("hierarchical01"), clusterDArgs = list(minSize=5, alpha=0.3), subsampleArgs = list("k"=7,"clusterFunction"="pam"), dimReduce = c("PCA"), ndims=10) plotCoClustering(cl7) ## ----sequential---------------------------------------------------------- cl <- clusterSingle(se, subsample = TRUE, sequential = TRUE, clusterFunction = c("hierarchical01"), clusterDArgs = list(minSize=5, alpha=0.3), subsampleArgs = list("clusterFunction"="pam"), seqArgs = list(k0=5, remain.n=10, top.can=5), dimReduce = c("PCA"), ndims=10) cl ## ----rsec---------------------------------------------------------------- rs <- RSEC(se, k0s = 4:15, dimReduce = "PCA", nPCADims=c(10, 20), alphas=c(0.2, 0.3), clusterFunction="hierarchical01", betas=0.7, combineProportion=0.5, combineMinSize=3, dendroReduce = "mad", dendroNDims = 1000, mergeMethod = "adjP", mergeCutoff=0.01, seqArgs = list(remain.n=10, top.can=5)) rs ## ----plotClusterEx1------------------------------------------------------ par(mar=plotCMar) plotClusters(rs, main="Clusters from RSEC", axisLine=-1, sampleData=c("time_points", "batch")) ## ----plotCoClustering---------------------------------------------------- plotCoClustering(rs) ## ----clusterMatrix------------------------------------------------------- head(clusterMatrix(rs)[,1:3]) table(primaryClusterNamed(rs)) ## ----makeDendrogram------------------------------------------------------ manual <- makeDendrogram(rs, whichCluster = "combineMany", dimReduce="mad", ndims=1000) plotDendrogram(manual) ## ----mergeClustersPlot--------------------------------------------------- mergeClusters(manual, mergeMethod="adjP", plot="adjP", cutoff=0.01) ## ----mergeClusters------------------------------------------------------- manual <- mergeClusters(manual, mergeMethod="adjP", plot="none", cutoff=0.01) par(mar=plotCMar) plotClusters(manual, whichClusters = c("mergeClusters", "combineMany")) plotCoClustering(manual, whichClusters=c("mergeClusters","combineMany")) ## ----dendro_merge-------------------------------------------------------- rs <- makeDendrogram(rs, dimReduce="mad", ndims=1000) ## set good breaks for heatmap colors breaks <- c(min(norm_logcounts), seq(0, quantile(norm_logcounts[norm_logcounts > 0], .99, na.rm = TRUE), length = 50), max(norm_logcounts)) ## ----getBestFeatures----------------------------------------------------- genesF <- getBestFeatures(rs, contrastType="F", number=500, isCount=FALSE) head(genesF) ## ----getBestFeatures_heatmap--------------------------------------------- plotHeatmap(rs, clusterSamplesData="dendrogramValue", clusterFeaturesData=unique(genesF[,"IndexInOriginal"]), main="F statistics", breaks=breaks, sampleData=c("time_points")) ## ----pairwise------------------------------------------------------------ genesPairs <- getBestFeatures(rs, contrastType="Pairs", number=50, isCount=FALSE) plotHeatmap(rs, clusterSamplesData="dendrogramValue", clusterFeaturesData=unique(genesPairs[,"IndexInOriginal"]), main="All pairwise comparisons", breaks=breaks, sampleData=c("time_points")) ## ----one_all------------------------------------------------------------- genesOneAll <- getBestFeatures(rs, contrastType="OneAgainstAll", number=50, isCount=FALSE) plotHeatmap(rs, clusterSamplesData="dendrogramValue", clusterFeaturesData=unique(genesOneAll[,"IndexInOriginal"]), main="One versus All", breaks=breaks, sampleData=c("time_points")) ## ----dendro-------------------------------------------------------------- genesDendro <- getBestFeatures(rs, contrastType="Dendro", number=50, isCount=FALSE) plotHeatmap(rs, clusterSamplesData="dendrogramValue", clusterFeaturesData=unique(genesDendro[,"IndexInOriginal"]), main="Constrasts based on dendrogram", breaks=breaks, sampleData=c("time_points")) ## ----get_contrasts------------------------------------------------------- ## get contrasts from clusterExperiment object contrMat <- clusterContrasts(rs, contrastType="Dendro")$contrastMatrix wh_rm <- which(primaryCluster(rs)==-1) ## ----mast---------------------------------------------------------------- ## threshold data to apply MAST norm <- transform(rs) norm[norm<0] <- 0 ## create MAST object sca <- FromMatrix("SingleCellAssay", t(norm[,-wh_rm]), cData=data.frame(Clusters=primaryClusterNamed(rs)[-wh_rm])) ## model fit fit <- zlm(~0+Clusters, sca) ## hp test cont_levels <- paste("Clustersm", 1:max(primaryCluster(rs)), sep="") mast_res <- lapply(1:ncol(contrMat), function(i) { cont <- gsub("X", "Clustersm", colnames(contrMat))[i] hp <- Hypothesis(cont, cont_levels) wald <- waldTest(fit, hp) retval <- data.frame(Feature=rownames(wald), ContrastName=paste("Node", i, sep=""), Contrast=colnames(contrMat)[i], P.Value=wald[,1,3], stringsAsFactors = FALSE) retval <- retval[order(retval$P.Value),] return(retval[1:50,]) }) mast_res <- do.call(rbind, mast_res) plotHeatmap(rs, clusterSamplesData="dendrogramValue", clusterFeaturesData=unique(mast_res$Feature), main="Constrasts based on dendrogram", breaks=breaks, sampleData=c("time_points")) ## ----session------------------------------------------------------------- sessionInfo()