## ----eval=F------------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("methylscaper") ## ----message=FALSE, eval=F---------------------------------------------------- # if (!requireNamespace("devtools", quietly=TRUE)) # install.packages("devtools") # devtools::install_github("rhondabacher/methylscaper", ref="R4.0") ## ----message = FALSE---------------------------------------------------------- library(methylscaper) ## ----eval=F------------------------------------------------------------------- # methylscaper() ## ----eval=FALSE--------------------------------------------------------------- # filepath <- "~/Downloads/GSE109262_RAW/" # singlecell_subset <- subsetSC(filepath, chromosome=19, startPos = 8937041, endPos = 8997041) # # To save for later, save as an rds file and change the folder location as desired: # saveRDS(singlecell_subset, "~/Downloads/singlecell_subset.rds") ## ----eval=TRUE---------------------------------------------------------------- gse_subset_path <- list(c(("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936197_ESC_A08_CpG-met_processed.tsv.gz"), ("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936196_ESC_A07_CpG-met_processed.tsv.gz"), ("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936192_ESC_A03_CpG-met_processed.tsv.gz")), c(("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936197_ESC_A08_GpC-acc_processed.tsv.gz"), ("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936196_ESC_A07_GpC-acc_processed.tsv.gz"), ("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936192_ESC_A03_GpC-acc_processed.tsv.gz")), c(("GSM2936197_ESC_A08_CpG-met_processed"), ("GSM2936196_ESC_A07_CpG-met_processed"), ("GSM2936192_ESC_A03_CpG-met_processed")), c(("GSM2936197_ESC_A08_GpC-acc_processed"), ("GSM2936196_ESC_A07_GpC-acc_processed"), ("GSM2936192_ESC_A03_GpC-acc_processed"))) # This formatting is a list of 4 objects: the met file urls, the acc file urls, the met file names, and the acc file names. singlecell_subset <- subsetSC(gse_subset_path, chromosome=19, startPos = 8937041, endPos = 8997041) # To save for later, save as an rds file and change the folder location as desired: # saveRDS(singlecell_subset, "~/Downloads/singlecell_subset.rds") ## ----eval=TRUE, fig.width=7, fig.height=3------------------------------------- data("mouse_bm") gene.select <- subset(mouse_bm, mgi_symbol == "Eef1g") startPos <- 8966841 endPos <- 8967541 prepSC.out <- prepSC(singlecell_subset, startPos=startPos, endPos=endPos) orderObj <- initialOrder(prepSC.out) plotSequence(orderObj, Title = "Eef1g gene", plotFast=TRUE, drawKey = FALSE) ## ----eval=TRUE, fig.align='left', fig.width=6, fig.height=5------------------- # If you followed the preprocessing code above, then you can do: # mydata <- readRDS("~/Downloads/singlecell_subset.rds") # Otherwise, we have also included this subset in the package directly: mydata <- system.file("extdata", "singlecell_subset.rds", package = "methylscaper") mydata <- readRDS(mydata) gene <- "Eef1g" data("mouse_bm") # for human use human_bm gene.select <- subset(mouse_bm, mgi_symbol == gene) # We will further subset the region to a narrow region of the gene: from 8966841bp to 8967541bp startPos <- 8966841 endPos <- 8967541 # This subsets the data to a specific region and prepares the data for visualization: prepSC.out <- prepSC(mydata, startPos=startPos, endPos=endPos) # Next the cells are ordered using the PCA approach and plot orderObj <- initialOrder(prepSC.out) plotSequence(orderObj, Title = "Eef1g gene", plotFast=TRUE) # We plot the nucleosome size key by default, however this can be turned off via drawKey = FALSE: # plotSequence(orderObj, Title = "Eef1g gene", plotFast=TRUE, drawKey = FALSE) ## ----eval=TRUE---------------------------------------------------------------- orderObj <- initialOrder(prepSC.out, weightStart = 47, weightEnd = 358, weightFeature = "acc") ## ----eval=TRUE, fig.width=7, fig.height=6------------------------------------- plotSequence(orderObj, Title = "Eef1g gene", plotFast=TRUE) ## ----eval=TRUE, fig.width=7, fig.height=6------------------------------------- orderObj$order1 <- refineFunction(orderObj, refineStart = 1, refineEnd = 20) plotSequence(orderObj, Title = "Eef1g gene", plotFast=TRUE) ## ----eval=FALSE--------------------------------------------------------------- # png("~/save_my_plot.png", width = 4, height = 6, units = 'in', res = 300) # plotSequence(orderObj, Title = "Eef1g gene", plotFast=FASLE) # dev.off() ## ----eval=FALSE--------------------------------------------------------------- # if (!requireNamespace("biomaRt", quietly=TRUE)) # BiocManager::install("biomaRt") # library(biomaRt) # ensembl <- useMart("ensembl") # # Demonstrating how to get the human annotations. # ensembl <- useDataset("hsapiens_gene_ensembl",mart=ensembl) # my_chr <- c(1:22, 'M', 'X', 'Y') # You can choose to omit this or include additional chromosome # # We only need the start, end, and symbol: # human_bm <- getBM(attributes=c('chromosome_name', 'start_position', 'end_position', 'hgnc_symbol'), # filters = 'chromosome_name', # values = my_chr, # mart=ensembl) # # ## Now that we have the biomart object, we can extract start and ends for methylscaper: # gene_select <- subset(human_bm, human_bm$hgnc_symbol == "GeneX") # # # These can then be passed into the prepSC function: # prepSC.out <- prepSC(mydata, startPos=gene_select$startPos, endPos=gene_select$endPos) # # # To continue the analysis: # # Next the cells are ordered using the PCA approach and then plot: # orderObj <- initialOrder(prepSC.out) # plotSequence(orderObj, Title = "Gene X", plotFast=TRUE) ## ----eval = TRUE-------------------------------------------------------------- # This provides the path to the raw datasets located in the methylscaper package seq_file <- system.file("extdata", "seq_file.fasta", package = "methylscaper") ref_file <- system.file("extdata", "reference.fa", package = "methylscaper") # Next we read the data into R using the read.fasta function from the seqinr package: if (!requireNamespace("seqinr", quietly=TRUE)) install.packages("seqinr") fasta <- seqinr::read.fasta(seq_file) ref <- seqinr::read.fasta(ref_file)[[1]] # For the vignette we will only run a subset of the molecules singlemolecule_example <- runAlign(fasta = fasta, ref = ref, fasta_subset = 1:150) # Once complete, we can save the output as an RDS object # saveRDS(singlemolecule_example, file="~/methylscaper_singlemolecule_preprocessed.rds") ## ----eval=TRUE, fig.width=7, fig.height=6------------------------------------- # If skipping the preprocessing steps above, use our pre-aligned data: # data(singlemolecule_example) orderObj <- initialOrder(singlemolecule_example, Method="PCA", weightStart = 308, weightEnd = 475, weightFeature = "red") plotSequence(orderObj, Title = "Ordered by PCA", plotFast = TRUE) ## ----eval=TRUE, fig.align='left', fig.height=8, fig.width=8------------------- par(mfrow=c(2,2)) props <- methyl_proportion(orderObj, type = "met", makePlot=TRUE, main="") props <- methyl_proportion(orderObj, type = "acc", makePlot=TRUE, main="") pcnts <- methyl_percent_sites(orderObj, makePlot = TRUE) avgs <- methyl_average_status(orderObj, makePlot = TRUE, window_length = 25) ## ----sessionInfo, results='markup'-------------------------------------------- sessionInfo()