## ----ky1, load-depenencies, results="hide", message=FALSE, warning=FALSE------ library(sesame) sesameDataCache() ## ----ky2, message=FALSE------------------------------------------------------- query <- KYCG_getDBs("MM285.designGroup")[["PGCMeth"]] head(query) ## ----ky3, fig.width=8, fig.height=5, message=FALSE---------------------------- results_pgc <- testEnrichment(query, platform="MM285") head(results_pgc) ## ----ky4---------------------------------------------------------------------- KYCG_plotEnrichAll(results_pgc) ## ----ky9, echo = FALSE, results="asis"---------------------------------------- library(knitr) df = data.frame( "Continuous DB"=c("Correlation-based","Set Enrichment Analysis"), "Discrete DB"=c("Set Enrichment Analysis","Fisher's Exact Test")) rownames(df) = c("Continuous Query", "Discrete Query") kable(df, caption="Four KnowYourCG Testing Scenarios") ## ----ky10, run-test-single, echo=TRUE, eval=TRUE, message=FALSE--------------- library(SummarizedExperiment) ## prepare a query df <- rowData(sesameDataGet('MM285.tissueSignature')) query <- df$Probe_ID[df$branch == "fetal_brain" & df$type == "Hypo"] results <- testEnrichment(query, "TFBS", platform="MM285") results %>% dplyr::filter(overlap>10) %>% head ## prepare another query query <- df$Probe_ID[df$branch == "fetal_liver" & df$type == "Hypo"] results <- testEnrichment(query, "TFBS", platform="MM285") results %>% dplyr::filter(overlap>10) %>% dplyr::select(dbname, estimate, test, FDR) %>% head ## ----ky5, list-data, eval=TRUE, echo=TRUE------------------------------------- KYCG_listDBGroups("MM285") ## ----ky6, cache-data, eval=TRUE, warning=FALSE-------------------------------- dbs <- KYCG_getDBs("MM285.design") ## ----ky7, view-data1, eval=TRUE, warning=FALSE-------------------------------- str(dbs[["PGCMeth"]]) ## ----ky8, message=FALSE------------------------------------------------------- df <- rowData(sesameDataGet('MM285.tissueSignature')) query <- df$Probe_ID[df$branch == "B_cell"] head(query) ## ----ky16, fig.width=7, fig.height=6, echo=TRUE, warning=FALSE, message=FALSE---- query <- names(sesameData_getProbesByGene("Dnmt3a", "MM285")) results <- testEnrichment(query, KYCG_buildGeneDBs(query, max_distance=100000, platform="MM285"), platform="MM285") results[,c("dbname","estimate","gene_name","FDR", "nQ", "nD", "overlap")] ## ----ky17, fig.width=5, fig.height=4, echo=TRUE------------------------------- KYCG_plotLollipop(results, label="gene_name") ## ----eval=FALSE--------------------------------------------------------------- # KYCG_listDBGroups(path = "~/Downloads/KYCG_knowledgebase_EPICv2") # ## [1] "ABCompartment.20220911.gz" "Blacklist.20220304.gz" # ## [3] "CGI.20220904.gz" "ChromHMM.20220303.gz" # ## [5] "CTCFbind.20220911.gz" "HM.20221013.gz" # ## [7] "ImprintingDMR.20220818.gz" "MetagenePC.20220911.gz" # ## [9] "nFlankCG.20220321.gz" "PMD.20220911.gz" # ## [11] "ProbeType.gz" "REMCChromHMM.20220911.gz" # ## [13] "rmsk1.20220307.gz" "rmsk2.20220321.gz" # ## [15] "Tetranuc2.20220321.gz" "TFBS.20220921.gz" # ## [17] "TFBSrm.20221005.gz" # ## load all database files in the folder # dbs <- KYCG_loadDBs("~/Downloads/KYCG_knowledgebase_EPICv2/") # ## or one database file # dbs <- KYCG_loadDBs("~/Downloads/KYCG_knowledgebase_EPICv2/hg38/CGI.20220904.gz") ## ----ky18, message=FALSE------------------------------------------------------ df <- rowData(sesameDataGet('MM285.tissueSignature')) query <- df$Probe_ID[df$branch == "fetal_liver" & df$type == "Hypo"] regs <- sesameData_getTxnGRanges("mm10", merge2gene = TRUE) genes <- sesameData_annoProbes(query, regs, platform="MM285", return_ov_features=TRUE) genes ## ----ky19, eval = FALSE------------------------------------------------------- # library(gprofiler2) # # ## use gene name # gostres <- gost(genes$gene_name, organism = "mmusculus") # gostres$result[order(gostres$result$p_value),] # gostplot(gostres) # # ## use Ensembl gene ID, note we need to remove the version suffix # gene_ids <- sapply(strsplit(names(genes),"\\."), function(x) x[1]) # gostres <- gost(gene_ids, organism = "mmusculus") # gostres$result[order(gostres$result$p_value),] # gostplot(gostres) ## ----ky21, run-test-data, echo=TRUE, eval=TRUE, message=FALSE----------------- query <- KYCG_getDBs("KYCG.MM285.designGroup")[["TSS"]] ## ----ky22, echo=TRUE, eval=TRUE, message=FALSE-------------------------------- res <- testEnrichmentSEA(query, "MM285.seqContextN") res[, c("dbname", "test", "estimate", "FDR", "nQ", "nD", "overlap")] ## ----ky24, GSEA, fig.width=6, fig.height=6, message=FALSE--------------------- query <- KYCG_getDBs("KYCG.MM285.designGroup")[["TSS"]] db <- KYCG_getDBs("MM285.seqContextN", "distToTSS") res <- testEnrichmentSEA(query, db, prepPlot = TRUE) KYCG_plotSetEnrichment(res[[1]]) ## ----ky23, warning=FALSE, eval=FALSE------------------------------------------ # beta_values <- getBetas(sesameDataGet("MM285.1.SigDF")) # res <- testEnrichmentSEA(beta_values, "MM285.chromHMM") # res[, c("dbname", "test", "estimate", "FDR", "nQ", "nD", "overlap")] ## ----ky25, echo=FALSE--------------------------------------------------------- se <- sesameDataGet("MM285.10.SE.tissue") library(tibble) df_plot <- as.data.frame(t(assay(se)[ c("cg30910045_BC21","cg34861418_TC21"),])) %>% rownames_to_column("IDAT") library(ggplot2) ggplot(df_plot) + geom_line(mapping=aes(IDAT, y=cg30910045_BC21, group=1), color="blue") + geom_line(mapping=aes(IDAT, y=cg34861418_TC21, group=1), color="red") + labs(y="Methylation fraction", x="Sample") + theme_bw() + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) ## ----------------------------------------------------------------------------- sessionInfo()