## ----eval=F-------------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("methylscaper")

## ----message=FALSE, eval=F----------------------------------------------------
#  if (!requireNamespace("devtools", quietly=TRUE))
#      install.packages("devtools")
#  devtools::install_github("rhondabacher/methylscaper", ref="R4.0")

## ----message = FALSE----------------------------------------------------------
library(methylscaper)

## ----eval=F-------------------------------------------------------------------
#  methylscaper()

## ----eval=FALSE---------------------------------------------------------------
#  filepath <- "~/Downloads/GSE109262_RAW/"
#  singlecell_subset <- subsetSC(filepath, chromosome=19, startPos = 8937041, endPos = 8997041)
#  # To save for later, save as an rds file and change the folder location as desired:
#  saveRDS(singlecell_subset, "~/Downloads/singlecell_subset.rds")

## ----eval=TRUE----------------------------------------------------------------
gse_subset_path <- list(c(("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936197_ESC_A08_CpG-met_processed.tsv.gz"),
          ("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936196_ESC_A07_CpG-met_processed.tsv.gz"),
          ("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936192_ESC_A03_CpG-met_processed.tsv.gz")),
        
          c(("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936197_ESC_A08_GpC-acc_processed.tsv.gz"),
            ("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936196_ESC_A07_GpC-acc_processed.tsv.gz"),
            ("http://methylscaper.com/data/GSE109262_SUBSET/GSM2936192_ESC_A03_GpC-acc_processed.tsv.gz")),
          
          c(("GSM2936197_ESC_A08_CpG-met_processed"),
            ("GSM2936196_ESC_A07_CpG-met_processed"),
            ("GSM2936192_ESC_A03_CpG-met_processed")),
          
          c(("GSM2936197_ESC_A08_GpC-acc_processed"),
            ("GSM2936196_ESC_A07_GpC-acc_processed"),
            ("GSM2936192_ESC_A03_GpC-acc_processed")))
# This formatting is a list of 4 objects: the met file urls, the acc file urls, the met file names, and the acc file names.

singlecell_subset <- subsetSC(gse_subset_path, chromosome=19, startPos = 8937041, endPos = 8997041)

# To save for later, save as an rds file and change the folder location as desired:
# saveRDS(singlecell_subset, "~/Downloads/singlecell_subset.rds")

## ----eval=TRUE, fig.width=7, fig.height=3-------------------------------------
data("mouse_bm")
gene.select <- subset(mouse_bm, mgi_symbol == "Eef1g")

startPos <- 8966841 
endPos <- 8967541
prepSC.out <- prepSC(singlecell_subset, startPos=startPos, endPos=endPos)

orderObj <- initialOrder(prepSC.out)
plotSequence(orderObj, Title = "Eef1g gene", plotFast=TRUE, drawKey = FALSE)

## ----eval=TRUE, fig.align='left', fig.width=6, fig.height=5-------------------
# If you followed the preprocessing code above, then you can do:
# mydata <- readRDS("~/Downloads/singlecell_subset.rds")
# Otherwise, we have also included this subset in the package directly:
mydata <- system.file("extdata", "singlecell_subset.rds", package = "methylscaper")
mydata <- readRDS(mydata)
gene <- "Eef1g"
data("mouse_bm") # for human use human_bm
gene.select <- subset(mouse_bm, mgi_symbol == gene)
# We will further subset the region to a narrow region of the gene: from 8966841bp to 8967541bp
startPos <- 8966841 
endPos <- 8967541

# This subsets the data to a specific region and prepares the data for visualization:
prepSC.out <- prepSC(mydata, startPos=startPos, endPos=endPos)

# Next the cells are ordered using the PCA approach and plot
orderObj <- initialOrder(prepSC.out)
plotSequence(orderObj, Title = "Eef1g gene", plotFast=TRUE)
# We plot the nucleosome size key by default, however this can be turned off via drawKey = FALSE:
# plotSequence(orderObj, Title = "Eef1g gene", plotFast=TRUE, drawKey = FALSE)

## ----eval=TRUE----------------------------------------------------------------
orderObj <- initialOrder(prepSC.out,
                         weightStart = 47, weightEnd = 358, weightFeature = "acc")

## ----eval=TRUE, fig.width=7, fig.height=6-------------------------------------
plotSequence(orderObj, Title = "Eef1g gene", plotFast=TRUE)

## ----eval=TRUE, fig.width=7, fig.height=6-------------------------------------
orderObj$order1 <- refineFunction(orderObj, refineStart = 1, refineEnd = 20)
plotSequence(orderObj, Title = "Eef1g gene", plotFast=TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  png("~/save_my_plot.png", width = 4, height = 6, units = 'in', res = 300)
#  plotSequence(orderObj, Title = "Eef1g gene", plotFast=FASLE)
#  dev.off()

## ----eval=FALSE---------------------------------------------------------------
#  if (!requireNamespace("biomaRt", quietly=TRUE))
#      BiocManager::install("biomaRt")
#  library(biomaRt)
#  ensembl <- useMart("ensembl")
#  # Demonstrating how to get the human annotations.
#  ensembl <- useDataset("hsapiens_gene_ensembl",mart=ensembl)
#  my_chr <- c(1:22, 'M', 'X', 'Y') # You can choose to omit this or include additional chromosome
#  # We only need the start, end, and symbol:
#  human_bm <- getBM(attributes=c('chromosome_name', 'start_position', 'end_position', 'hgnc_symbol'),
#               filters = 'chromosome_name',
#               values = my_chr,
#               mart=ensembl)
#  
#  ## Now that we have the biomart object, we can extract start and ends for methylscaper:
#  gene_select <- subset(human_bm, human_bm$hgnc_symbol == "GeneX")
#  
#  # These can then be passed into the prepSC function:
#  prepSC.out <- prepSC(mydata, startPos=gene_select$startPos, endPos=gene_select$endPos)
#  
#  # To continue the analysis:
#  # Next the cells are ordered using the PCA approach and then plot:
#  orderObj <- initialOrder(prepSC.out)
#  plotSequence(orderObj, Title = "Gene X", plotFast=TRUE)

## ----eval = TRUE--------------------------------------------------------------
# This provides the path to the raw datasets located in the methylscaper package
seq_file <- system.file("extdata", "seq_file.fasta", package = "methylscaper")
ref_file <- system.file("extdata", "reference.fa", package = "methylscaper")

# Next we read the data into R using the read.fasta function from the seqinr package:
if (!requireNamespace("seqinr", quietly=TRUE))
    install.packages("seqinr")
fasta <- seqinr::read.fasta(seq_file)
ref <- seqinr::read.fasta(ref_file)[[1]]

# For the vignette we will only run a subset of the molecules
singlemolecule_example <- runAlign(fasta = fasta, ref = ref, fasta_subset = 1:150)

# Once complete, we can save the output as an RDS object
# saveRDS(singlemolecule_example, file="~/methylscaper_singlemolecule_preprocessed.rds")

## ----eval=TRUE, fig.width=7, fig.height=6-------------------------------------
# If skipping the preprocessing steps above, use our pre-aligned data:
# data(singlemolecule_example)
orderObj <- initialOrder(singlemolecule_example, Method="PCA",
                         weightStart = 308, weightEnd = 475, weightFeature = "red")
plotSequence(orderObj, Title = "Ordered by PCA", plotFast = TRUE)

## ----eval=TRUE, fig.align='left', fig.height=8, fig.width=8-------------------
par(mfrow=c(2,2))
props <- methyl_proportion(orderObj, type = "met", makePlot=TRUE, main="")
props <- methyl_proportion(orderObj, type = "acc", makePlot=TRUE, main="")

pcnts <- methyl_percent_sites(orderObj, makePlot = TRUE)
avgs <- methyl_average_status(orderObj, makePlot = TRUE, window_length = 25)

## ----sessionInfo, results='markup'--------------------------------------------
sessionInfo()