## ----echo = FALSE, message=FALSE---------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, # comment = "#>", tidy = FALSE, cache = FALSE, results = 'markup' ) ## ----eval=FALSE--------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("SingleMoleculeFootprinting") ## ----eval=FALSE--------------------------------------------------------------- # library(parallel) # library(QuasR) # library(BSgenome.Mmusculus.UCSC.mm10) # # cl <- makeCluster(40) # prj <- QuasR::qAlign(sampleFile = Qinput, # genome = "BSgenome.Mmusculus.UCSC.mm10", # aligner = "Rbowtie", # projectName = "prj", # paired = "fr", # bisulfite = "undir", # alignmentParameter = "-e 70 -X 1000 -k 2 --best -strata", # alignmentsDir = "./", # cacheDir = tempdir(), # clObj = cl) ## ----setup, message=FALSE, eval=TRUE------------------------------------------ library(BSgenome.Mmusculus.UCSC.mm10) library(SingleMoleculeFootprinting) library(SingleMoleculeFootprintingData) library(parallel) ## ----include=FALSE------------------------------------------------------------ # Under the hood: # download and cache SingleMoleculeFootprintingData data # locate bam file and write QuasR sampleSheet # Download and cache CachedBamPath = SingleMoleculeFootprintingData::NRF1pair.bam()[[1]] CachedBaiPath = SingleMoleculeFootprintingData::NRF1pair.bam.bai()[[1]] SingleMoleculeFootprintingData::AllCs.rds() SingleMoleculeFootprintingData::EnrichmentRegions_mm10.rds() SingleMoleculeFootprintingData::ReferenceMethylation.rds() # Create copy of bam and bai files with relevant names CacheDir <- ExperimentHub::getExperimentHubOption(arg = "CACHE") file.copy(from = CachedBamPath, to = paste0(CacheDir, "/", "NRF1pair.bam")) file.copy(from = CachedBaiPath, to = paste0(CacheDir, "/", "NRF1pair.bam.bai")) # Write QuasR input data.frame( FileName = paste0(CacheDir, "/", "NRF1pair.bam"), SampleName = "NRF1pair_DE_" ) -> df readr::write_delim(df, paste0(CacheDir, "/NRF1Pair_Qinput.txt"), delim = "\t") ## ----eval=TRUE---------------------------------------------------------------- Qinput = paste0(CacheDir, "/NRF1Pair_Qinput.txt") suppressMessages(readr::read_delim(Qinput, delim = "\t")) ## ----eval=TRUE---------------------------------------------------------------- BaitRegions <- SingleMoleculeFootprintingData::EnrichmentRegions_mm10.rds() clObj = makeCluster(5) BaitCaptureEfficiency <- BaitCapture(sampleSheet = Qinput, genome = BSgenome.Mmusculus.UCSC.mm10, baits = BaitRegions, clObj = clObj) stopCluster(clObj) BaitCaptureEfficiency ## ----conversion-rate-precision, eval=FALSE, echo = TRUE----------------------- # ConversionRateValue <- ConversionRate(sampleSheet = Qinput, # genome = BSgenome.Mmusculus.UCSC.mm10, # chr = 6) ## ----load-conversion-rate-precision, eval=TRUE, echo = FALSE------------------ ## the code above takes > 10 minutes to run, so we load a pre-computed value here ConversionRateValue <- readRDS(system.file("extdata", "vignette_ConversionRatePrecision.rds", package = "SingleMoleculeFootprinting")) ## ----echo = FALSE------------------------------------------------------------- knitr::kable(data.frame(ExperimentType = c("Single enzyme SMF", "Double enzyme SMF", "No enzyme (endogenous mCpG only)"), substring = c("\\_NO_", "\\_DE_", "\\_SS_"), RelevanContext = c("DGCHN & NWCGW", "GCH + HCG", "CG"), Notes = c("Methylation info is reported separately for each context", "Methylation information is aggregated across the contexts", "-"))) ## ----eval=TRUE---------------------------------------------------------------- MySample <- suppressMessages(readr::read_delim(Qinput, delim = "\t")[[2]]) Region_of_interest <- GRanges(seqnames = "chr6", ranges = IRanges(start = 88106000, end = 88106500), strand = "*") Methylation <- CallContextMethylation(sampleSheet = Qinput, sample = MySample, genome = BSgenome.Mmusculus.UCSC.mm10, range = Region_of_interest, coverage = 20, ConvRate.thr = 0.2) Methylation[[1]] Methylation[[2]][1:10, 1:10] ## ----------------------------------------------------------------------------- n = 1000 readsSubset <- sample(seq(nrow(Methylation[[2]])), n) Methylation[[2]] <- Methylation[[2]][readsSubset,] ## ----eval=TRUE---------------------------------------------------------------- TFBSs <- GenomicRanges::GRanges("chr6", IRanges(c(88106216, 88106253), c(88106226, 88106263)), strand = "-") elementMetadata(TFBSs)$name <- c("NRF1", "NRF1") names(TFBSs) <- c(paste0("TFBS_", c(4305215, 4305216))) TFBSs PlotAvgSMF(MethGR = Methylation[[1]], range = Region_of_interest, TFBSs = TFBSs) ## ----------------------------------------------------------------------------- PlotSM(MethSM = Methylation[[2]], range = Region_of_interest) ## ----eval=TRUE---------------------------------------------------------------- PlotSM(MethSM = Methylation[[2]], range = Region_of_interest, SortedReads = "HC") ## ----eval=TRUE---------------------------------------------------------------- SortedReads_TFCluster <- SortReadsByTFCluster(MethSM = Methylation[[2]], TFBSs = TFBSs) ## ----echo=FALSE--------------------------------------------------------------- message(paste0("Number of retrieved states: ", as.character(length(SortedReads_TFCluster)))) message("States population:") unlist(lapply(SortedReads_TFCluster, length)) ## ----------------------------------------------------------------------------- PlotSM(MethSM = Methylation[[2]], range = Region_of_interest, SortedReads = SortedReads_TFCluster) ## ----------------------------------------------------------------------------- StateQuantificationPlot(SortedReads = SortedReads_TFCluster) ## ----eval=TRUE---------------------------------------------------------------- PlotSingleSiteSMF(ContextMethylation = Methylation, sample = MySample, range = Region_of_interest, SortedReads = SortedReads_TFCluster, TFBSs = TFBSs, saveAs = NULL) ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()