## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----knitr-options, echo=FALSE, message=FALSE, warning=FALSE------------------ library(knitr) opts_chunk$set(fig.align = 'center', fig.width = 6, fig.height = 6, dev = 'png') ## ----eval=FALSE--------------------------------------------------------------- # library(BiocManager) # BiocManager::install('SGCP') ## ----message=FALSE, warning=FALSE--------------------------------------------- library(SGCP) ## ----message=FALSE, warning=FALSE--------------------------------------------- library(SummarizedExperiment) data(cheng) cheng print("gene expression...") print("rownames and colnames correspond to gene Entrez ids and sample names") head(assay(cheng)) print(" \n gene ids...") print("rownames are the gene Entrez ids") head(rowData(cheng)) ## ----message=TRUE, warning=FALSE---------------------------------------------- message("expData") expData <- assay(cheng) head(expData) dim(expData) message(" \n geneID") geneID <- rowData(cheng) geneID <- geneID$ENTREZID head(geneID) length(geneID) library(org.Hs.eg.db) annotation_db <- "org.Hs.eg.db" ## ----message=TRUE, warning=FALSE---------------------------------------------- # sgcp <- ezSGCP(expData = expData, geneID = geneID, annotation_db = annotation_db, # eff.egs = FALSE , saveOrig = FALSE, sil = TRUE, hm= NULL) data(sgcp) summary(sgcp, show.data=TRUE) ## ----message=TRUE, warning=FALSE---------------------------------------------- ## starting network construction step... ## normalization... ## Gaussian kernel... ## it may take time... ## TOM... ## it may take time... ## network is created, done!... ## starting network clustering step... ## calculating normalized Laplacian ## it may take time... ## calculating eigenvalues/vectors ## it may take time... ## n_egvec is 100 ## number of clusters for relativeGap method is 2 ## number of clusters for secondOrderGap method is 5 ## number of clusters for additiveGap method is 3 ## Gene Ontology Validation... ## method relativeGap.... ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## method secondOrderGap... ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## method additiveGap.... ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## method relativeGap is selected using GO validation and k is 2 ## calculating the Silhouette index ## it may take time... ## network clustering is done... ## starting initial gene ontology enrichment step... ## GOenrichment for cluster 2 ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## GOenrichment for cluster 1 ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## gene ontology done!.. ## starting semi-labeling stage... ## cutoff value is 9.28130152105493e-05 ## semiLabeling done!.. ## starting semi-supervised step... ## creating train and test sets based on remarkable and unremarkable genes... ## number of remarkable genes is 1165 ## number of unremarkable genes is 335 ## performing knn... ## it may take time ## Loading required package: ggplot2 ## Loading required package: lattice ## Length Class Mode ## learn 2 -none- list ## k 1 -none- numeric ## theDots 0 -none- list ## xNames 4 -none- character ## problemType 1 -none- character ## tuneValue 1 data.frame list ## obsLevels 2 -none- character ## param 0 -none- list ## 24-nearest neighbor model ## Training set outcome distribution: ## 1 2 ## 498 667 ## class assignment for unremarkable genes... ## top class labels, and bottom number of predicted genes ## prediction ## 1 2 ## 130 205 ## semi-supervised done!.. ## starting final gene ontology enrichment step... ## GOenrichment for cluster 2 ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## GOenrichment for cluster 1 ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## gene ontology done!.. ## ezSGCP done!.. ## ----message=FALSE, warning=FALSE--------------------------------------------- message("PCA of expression data without label") SGCP_ezPLOT(sgcp = sgcp, expreData = expData, silhouette_index = TRUE, keep = FALSE) ## ----message=TRUE, warning=FALSE---------------------------------------------- rm(sgcp) ## ----message=TRUE, warning=FALSE---------------------------------------------- message("Plotting PCA of expression data") pl <- SGCP_plot_pca(m = expData, clusLabs = NULL, tit = "PCA plot", ps = .5) print(pl) ## ----message=TRUE, warning=TRUE----------------------------------------------- resAdja <- adjacencyMatrix(expData = expData, hm = NULL) resAdja[0:10, 0:5] ## ----message=TRUE, warning=FALSE---------------------------------------------- #message("Plotting adjacency heatmap") #pl <- SGCP_plot_heatMap(m = resAdja, tit = "Adjacency Heatmap", # xname = "genes", yname = "genes") #print(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- # resClus <- clustering(adjaMat = resAdja, geneID = geneID, annotation_db = annotation_db, # eff.egs = FALSE , saveOrig = FALSE, sil = TRUE) data(resClus) summary(resClus) # lets drop noisy genes from expData geneID <- resClus$geneID if(length(resClus$dropped.indices)>0 ){ expData <- expData[-resClus$dropped.indices, ]} # removing the adjacency matrix rm(resAdja) ## ----message=TRUE, warning=FALSE---------------------------------------------- ## calculating normalized Laplacian ## it may take time... ## calculating eigenvalues/vectors ## it may take time... ## n_egvec is 100 ## number of clusters for relativeGap method is 2 ## number of clusters for secondOrderGap method is 5 ## number of clusters for additiveGap method is 3 ## Gene Ontology Validation... ## method relativeGap.... ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## method secondOrderGap... ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## method additiveGap.... ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## method relativeGap is selected using GO validation and k is 2 ## calculating the Silhouette index ## it may take time... ## network clustering is done... ## ----message=TRUE, warning=FALSE---------------------------------------------- message("Plotting PCA of expression data with label") # pl <- SGCP_plot_pca(m = expData, clusLabs = NULL, tit = "PCA plot without label", ps = .5) # print(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- message("Plptting PCA of transformed data without label") # pl <- SGCP_plot_pca(m = resClus$Y, clusLabs = NULL, tit = "PCA plot without label", ps = .5) # print(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- message("Plotting PCA of transformed data with label") # pl <- SGCP_plot_pca(m = resClus$Y, clusLabs = resClus$clusterLabels, tit = "PCA plot with label", ps = .5) # print(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- if(resClus$cv == "cvGO" || resClus$cv == "cvConductance"){ message("plotting relativeGap, secondOrderGap, additiveGap methods ...") print(resClus$clusterNumberPlot$relativeGap) } ## ----message=TRUE, warning=FALSE---------------------------------------------- if(resClus$cv == "cvGO" || resClus$cv == "cvConductance"){ message("plotting relativeGap, secondOrderGap, additiveGap methods ...") print(resClus$clusterNumberPlot$secondOrderGap) } ## ----message=TRUE, warning=FALSE---------------------------------------------- if(resClus$cv == "cvGO" || resClus$cv == "cvConductance"){ message("plotting relativeGap, secondOrderGap, additiveGap methods ...") print(resClus$clusterNumberPlot$additiveGap) } ## ----message=TRUE, warning=FALSE---------------------------------------------- # checking if conductance index is calculated if(resClus$cv == "cvGO" || resClus$cv == "cvConductance" ){ message("plotting cluster conductance index...") pl <- SGCP_plot_conductance(conduct = resClus$conductance, tit = "Cluster Conductance Index", xname = "clusterLabel", yname = "conductance index") print(pl)} ## ----message=TRUE, warning=FALSE---------------------------------------------- # checking if silhouette index is calculated if(resClus$cv == "cvGO" || resClus$cv == "cvConductance" ){ message("plotting gene silhouette index...") pl <- SGCP_plot_silhouette(resClus$silhouette, tit = "Gene Silhouette Index", xname = "genes", yname = "silhouette index") print(pl)} ## ----message=TRUE, warning=TRUE----------------------------------------------- # resInitialGO <- geneOntology(geneUniv = geneID, clusLab = resClus$clusterLabels, # annotation_db = annotation_db) data(resInitialGO) head(resInitialGO$GOresults) ## ----message=TRUE, warning=TRUE----------------------------------------------- ## GOenrichment for cluster 1 ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## GOenrichment for cluster 2 ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## gene ontology done!.. ## ----message=TRUE, warning=FALSE---------------------------------------------- message("plotting initial GO p-values jitters...") pl <- SGCP_plot_jitter(df = resInitialGO$GOresults, tit = "Initial GO p-values", xname = "cluster", yname = "-log10 p-value", ps = 3) print(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- message("plotting initial GO p-values density") pl <- SGCP_plot_density(df = resInitialGO$GOresults, tit = "Initial GO p-values Density", xname = "cluster", yname = "-log10 p-value") print(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- message("plotting initial GO p-values mean") pl <- SGCP_plot_bar(df = resInitialGO$GOresults, tit = "Cluster Performance", xname = "cluster", yname = "mean -log10 p-value") print(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- message("plotting initial GO p-values pie chart...") pl <- SGCP_plot_pie(df = resInitialGO$GOresults, tit = "Initial GO Analysis", xname = "cluster", yname = "count", posx = 1.8) print(pl) ## ----message=TRUE, warning=TRUE----------------------------------------------- resSemiLabel <- semiLabeling(geneID = geneID, df_GO = resInitialGO$GOresults, GOgenes = resInitialGO$FinalGOTermGenes) print(head(resSemiLabel$geneLabel)) table(resSemiLabel$geneLabel$label) ## ----message=TRUE, warning=TRUE----------------------------------------------- resSemiSupervised <- semiSupervised(specExp = resClus$Y, geneLab = resSemiLabel$geneLabel, model = "knn", kn = NULL) print(head(resSemiSupervised$FinalLabeling)) print(table(resSemiSupervised$FinalLabeling$FinalLabel)) ## ----message=TRUE, warning=FALSE---------------------------------------------- # message("Plotting PCA of transformed data with label") # pl <- SGCP_plot_pca(m = expData, # clusLabs = resSemiSupervised$FinalLabeling$FinalLabel, # tit = "PCA plot with label", ps = .5) print(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- # message("Plotting PCA of transformed data with label") # pl <- SGCP_plot_pca(m = resClus$Y, # clusLabs = resSemiSupervised$FinalLabeling$FinalLabel, # tit = "PCA plot with label", ps = .5) #print(pl) ## ----message=TRUE, warning=TRUE----------------------------------------------- # resFinalGO <- geneOntology(geneUniv = geneID, clusLab = resSemiSupervised$FinalLabeling$FinalLabel, # annotation_db = annotation_db) data(resFinalGO) head(resFinalGO$GOresults) ## ----message=TRUE, warning=TRUE----------------------------------------------- ## GOenrichment for cluster 1 ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## GOenrichment for cluster 2 ## calling GOstats for overBP... ## identifying genes for each GOTerm... ## calling GOstats for overCC... ## identifying genes for each GOTerm... ## calling GOstats for overMF... ## identifying genes for each GOTerm... ## calling GOstats for underBP... ## identifying genes for each GOTerm... ## calling GOstats for underCC... ## identifying genes for each GOTerm... ## calling GOstats for underMF... ## identifying genes for each GOTerm... ## gene ontology done!.. ## ----message=TRUE, warning=FALSE---------------------------------------------- message("plotting final GO p-values jitters...") pl <- SGCP_plot_jitter(df = resFinalGO$GOresults, tit = "Final GO p-values", xname = "module", yname = "-log10 p-value", ps = 3) print(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- message("plotting final GO p-values density...") pl <- SGCP_plot_density(df = resFinalGO$GOresults, tit = "Final GO p-values Density", xname = "module", yname = "-log10 p-value") print(pl) rm(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- message("plotting final GO p-values mean...") pl <- SGCP_plot_bar(df = resFinalGO$GOresults, tit = "Module Performance", xname = "module", yname = "mean -log10 p-value") print(pl) rm(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- message("plotting final GO p-values pie xhart...") pl <- SGCP_plot_pie(df = resFinalGO$GOresults, tit = "Final GO Analysis", xname = "module", yname = "count", posx = 1.9) print(pl) rm(pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- rm(resClus, resInitialGO, resSemiLabel, resSemiSupervised, resFinalGO, pl) ## ----message=TRUE, warning=FALSE---------------------------------------------- sI <- sessionInfo() sI