## ----knitr-options, echo=FALSE, message=FALSE, warning=FALSE------------------ library(knitr) opts_chunk$set(fig.align = 'center', fig.width = 4.5, fig.height = 3, dev = 'png') ## ----Bioconductor install, eval = FALSE--------------------------------------- # if (!requireNamespace("BiocManager")) # install.packages("BiocManager") # BiocManager::install("HGC") ## ----Github install, eval = FALSE--------------------------------------------- # if(!require(devtools)) # install.packages("devtools") # devtools::install_github("XuegongLab/HGC") ## ----message=FALSE, warning=FALSE--------------------------------------------- library(HGC) data(Pollen) Pollen.PCs <- Pollen[["PCs"]] Pollen.Label.Tissue <- Pollen[["Tissue"]] Pollen.Label.CellLine <- Pollen[["CellLine"]] dim(Pollen.PCs) table(Pollen.Label.Tissue) table(Pollen.Label.CellLine) ## ----------------------------------------------------------------------------- Pollen.SNN <- SNN.Construction(mat = Pollen.PCs, k = 25, threshold = 0.15) Pollen.ClusteringTree <- HGC.dendrogram(G = Pollen.SNN) ## ----------------------------------------------------------------------------- cluster.k5 <- cutree(Pollen.ClusteringTree, k = 5) ## ----eval = FALSE------------------------------------------------------------- # library(dplyr) # library(Seurat) # library(patchwork) # library(HGC) # # # Load the PBMC dataset # pbmc.data <- Read10X(data.dir = # "../data/pbmc3k/filtered_gene_bc_matrices/hg19/") # # Initialize the Seurat object with the raw (non-normalized data). # pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", # min.cells = 3, min.features = 200) # # # QC and selecting cells for further analysis # pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-") # pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & # nFeature_RNA < 2500 & percent.mt < 5) # # # Normalizing the data # pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", # scale.factor = 10000) # # # Identification of highly variable features (feature selection) # pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", # nfeatures = 2000) # # # Scaling the data # all.genes <- rownames(pbmc) # pbmc <- ScaleData(pbmc, features = all.genes) # # # Perform linear dimensional reduction # pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc)) # # # Determine the ‘dimensionality’ of the dataset # pbmc <- JackStraw(pbmc, num.replicate = 100) # pbmc <- ScoreJackStraw(pbmc, dims = 1:20) # # # Construct the graph and cluster the cells with HGC # pbmc <- FindNeighbors(pbmc, dims = 1:10) # pbmc <- FindClusteringTree(pbmc, graph.type = "SNN") # # # Output the tree # pbmc.tree <- pbmc@graphs$ClusteringTree ## ----eval = FALSE------------------------------------------------------------- # # Setting up the data # library(scRNAseq) # sce <- GrunPancreasData() # # library(scuttle) # qcstats <- perCellQCMetrics(sce) # qcfilter <- quickPerCellQC(qcstats, # percent_subsets="altexps_ERCC_percent") # sce <- sce[,!qcfilter$discard] # # library(scran) # clusters <- quickCluster(sce) # sce <- computeSumFactors(sce, clusters=clusters) # sce <- logNormCounts(sce) # # # Variance modelling # dec <- modelGeneVar(sce) # plot(dec$mean, dec$total, xlab="Mean log-expression", # ylab="Variance") # curve(metadata(dec)$trend(x), col="blue", add=TRUE) # # # Get the top 10% of genes. # top.hvgs <- getTopHVGs(dec, prop=0.1) # # sce <- fixedPCA(sce, subset.row=top.hvgs) # reducedDimNames(sce) # # # Automated PC choice # output <- getClusteredPCs(reducedDim(sce)) # npcs <- metadata(output)$chosen # reducedDim(sce, "PCAsub") <- # reducedDim(sce, "PCA")[,1:npcs,drop=FALSE] # # # library(HGC) # # Graph construction # g <- buildSNNGraph(sce, use.dimred="PCAsub") # # Graph-based clustering # cluster.tree <- HGC.dendrogram(G = g) # cluster.k12 <- cutree(cluster.tree, k = 12) # # colLabels(sce) <- factor(cluster.k12) # # library(scater) # sce <- runTSNE(sce, dimred="PCAsub") # plotTSNE(sce, colour_by="label", text_by="label") ## ----fig.height = 4.5--------------------------------------------------------- Pollen.ClusteringTree$height = log(Pollen.ClusteringTree$height + 1) Pollen.ClusteringTree$height = log(Pollen.ClusteringTree$height + 1) HGC.PlotDendrogram(tree = Pollen.ClusteringTree, k = 5, plot.label = FALSE) ## ----fig.height = 4.5--------------------------------------------------------- Pollen.labels <- data.frame(Tissue = Pollen.Label.Tissue, CellLine = Pollen.Label.CellLine) HGC.PlotDendrogram(tree = Pollen.ClusteringTree, k = 5, plot.label = TRUE, labels = Pollen.labels) ## ----------------------------------------------------------------------------- ARI.mat <- HGC.PlotARIs(tree = Pollen.ClusteringTree, labels = Pollen.labels) ## ----------------------------------------------------------------------------- Pollen.ParameterRecord <- HGC.parameter(G = Pollen.SNN) HGC.PlotParameter(Pollen.ParameterRecord, parameter = "CL") HGC.PlotParameter(Pollen.ParameterRecord, parameter = "ANN")