## ----setup_knitr, include=FALSE----------------------------------------------- knitr::opts_chunk$set(message = FALSE, warning = FALSE, cache=FALSE, comment = " ") options(timeout = 300) ## ----install_req_packages, eval=FALSE----------------------------------------- # install.packages(c("tidyverse", "impute", "Rcpp")) ## ----install_packages, eval=FALSE--------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("methylclock") ## ----load_package------------------------------------------------------------- library(methylclockData) library(methylclock) ## ----load_others, eval=TRUE--------------------------------------------------- library(Biobase) library(tibble) library(impute) library(ggplot2) library(ggpmisc) library(GEOquery) ## ----check-------------------------------------------------------------------- # Get TestDataset data TestDataset <- get_TestDataset() cpgs.missing <- checkClocks(TestDataset) ## ----checkGA------------------------------------------------------------------ cpgs.missing.GA <- checkClocksGA(TestDataset) ## ----showMissing-------------------------------------------------------------- names(cpgs.missing) ## ----showMissNames------------------------------------------------------------ commonClockCpgs(cpgs.missing, "Hannum" ) commonClockCpgs(cpgs.missing.GA, "Bohlin" ) ## ----load_horvath_example----------------------------------------------------- library(tidyverse) MethylationData <- get_MethylationDataExample() MethylationData ## ----DNAmAge_horvath, warning=TRUE-------------------------------------------- age.example55 <- DNAmAge(MethylationData) age.example55 ## ----show_cpg_miss------------------------------------------------------------ missCpGs <- checkClocks(MethylationData) ## ----DNAmAgemp_horvath, warning=TRUE------------------------------------------ age.example55.2 <- DNAmAge(MethylationData, min.perc = 0.25) age.example55.2 ## ----DNAmAge_horvath_sel, warning=TRUE---------------------------------------- age.example55.sel <- DNAmAge(MethylationData, clocks=c("Horvath", "BNN")) age.example55.sel ## ----covariates_horvath_example----------------------------------------------- library(tidyverse) path <- system.file("extdata", package = "methylclock") covariates <- read_csv(file.path(path, "SampleAnnotationExample55.csv")) covariates ## ----age_horvath_example------------------------------------------------------ age <- covariates$Age head(age) ## ----DNAmAge_horvath_cell, warning=TRUE--------------------------------------- age.example55 <- DNAmAge(MethylationData, age=age, cell.count=TRUE) age.example55 ## ----compare_autistic--------------------------------------------------------- autism <- covariates$diseaseStatus kruskal.test(age.example55$ageAcc.Horvath ~ autism) kruskal.test(age.example55$ageAcc2.Horvath ~ autism) kruskal.test(age.example55$ageAcc3.Horvath ~ autism) ## ----get_gse58045, echo=FALSE------------------------------------------------- # ff <- "c:/juan/CREAL/BayesianPrediction/Bayesian_clock/paper" # load(file.path(ff, "data/GSE58045.Rdata")) ## ----get_geo_gse58045, eval=TRUE---------------------------------------------- # To avoid connection buffer size Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 10) # Download data dd <- GEOquery::getGEO("GSE58045") gse58045 <- dd[[1]] # Restore connection buffer size Sys.setenv("VROOM_CONNECTION_SIZE" = 131072) ## ----show_gse58045------------------------------------------------------------ gse58045 ## ----age_gse58045------------------------------------------------------------- pheno <- pData(gse58045) age <- as.numeric(pheno$`age:ch1`) ## ----DNAmAge_gse58045, warning=TRUE------------------------------------------- age.gse58045 <- DNAmAge(gse58045, age=age) age.gse58045 ## ----horvat_age--------------------------------------------------------------- plotDNAmAge(age.gse58045$Horvath, age) ## ----bnn_age------------------------------------------------------------------ plotDNAmAge(age.gse58045$BNN, age, tit="Bayesian Neural Network") ## ----get_gse19711, echo=FALSE------------------------------------------------- # ff <- "c:/juan/CREAL/BayesianPrediction/Bayesian_clock/paper" # load(file.path(ff, "data/GSE19711.Rdata")) ## ----get_geo_gse19711, eval=TRUE---------------------------------------------- # To avoid connection buffer size Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 10) # Download data dd <- GEOquery::getGEO("GSE19711") gse19711 <- dd[[1]] # Restore connection buffer size Sys.setenv("VROOM_CONNECTION_SIZE" = 131072) ## ----show_gse19711------------------------------------------------------------ gse19711 ## ----get_case_control--------------------------------------------------------- pheno <- pData(gse19711) age <- as.numeric(pheno$`ageatrecruitment:ch1`) disease <- pheno$`sample type:ch1` table(disease) disease[grep("Control", disease)] <- "Control" disease[grep("Case", disease)] <- "Case" disease <- factor(disease, levels=c("Control", "Case")) table(disease) ## ----DNAmAge_gse19711, warning=TRUE------------------------------------------- age.gse19711 <- DNAmAge(gse19711, age=age) ## ----show_age.gse19711-------------------------------------------------------- age.gse19711 ## ----assoc_hpv---------------------------------------------------------------- mod.horvath1 <- glm(disease ~ ageAcc.Horvath , data=age.gse19711, family="binomial") summary(mod.horvath1) mod.skinHorvath <- glm(disease ~ ageAcc2.Horvath , data=age.gse19711, family="binomial") summary(mod.skinHorvath) mod.horvath3 <- glm(disease ~ ageAcc3.Horvath , data=age.gse19711, family="binomial") summary(mod.horvath3) ## ----mod_levine--------------------------------------------------------------- mod.levine1 <- glm(disease ~ ageAcc.Levine , data=age.gse19711, family="binomial") summary(mod.levine1) mod.levine2 <- glm(disease ~ ageAcc2.Levine , data=age.gse19711, family="binomial") summary(mod.levine2) mod.levine3 <- glm(disease ~ ageAcc3.Levine , data=age.gse19711, family="binomial") summary(mod.levine3) ## ----assoc_cell--------------------------------------------------------------- cell <- attr(age.gse19711, "cell_proportion") mod.cell <- glm(disease ~ ageAcc.Levine + cell, data=age.gse19711, family="binomial") summary(mod.cell) ## ----get_gse109446, echo=FALSE------------------------------------------------ # ff <- "c:/juan/CREAL/BayesianPrediction/Bayesian_clock/paper" # load(file.path(ff, "data/GSE109446.Rdata")) ## ----get_geo_109446, eval=TRUE------------------------------------------------ dd <- GEOquery::getGEO("GSE109446") gse109446 <- dd[[1]] ## ----age_gse109446, warning=TRUE---------------------------------------------- controls <- pData(gse109446)$`diagnosis:ch1`=="control" gse <- gse109446[,controls] age <- as.numeric(pData(gse)$`age:ch1`) age.gse <- DNAmAge(gse, age=age) ## ----plotClocks--------------------------------------------------------------- plotCorClocks(age.gse) ## ----load_3_inds-------------------------------------------------------------- TestDataset[1:5,] ## ----age_test, warning=TRUE--------------------------------------------------- ga.test <- DNAmGA(TestDataset) ga.test ## ----get_progress------------------------------------------------------------- data(progress_data) ## ----progressClin------------------------------------------------------------- data(progress_vars) ## ----age_progress, warning=TRUE----------------------------------------------- ga.progress <- DNAmGA(progress_data) ga.progress ## ----plot_progress------------------------------------------------------------ plotDNAmAge(ga.progress$Knight, progress_vars$EGA, tit="GA Knight's method", clock="GA") ## ----plotAcc------------------------------------------------------------------ library(ggplot2) progress_vars$acc <- ga.progress$Knight - progress_vars$EGA p <- ggplot(data=progress_vars, aes(x = acc, y = birthweight)) + geom_point() + geom_smooth(method = "lm", se=FALSE, color="black") + xlab("GA acceleration") + ylab("Birthweight (kgs.)") p ## ----acccelerated_ga, warning=TRUE-------------------------------------------- accga.progress <- DNAmGA(progress_data, age = progress_vars$EGA, cell.count=FALSE) accga.progress ## ----check_clocks_2----------------------------------------------------------- checkClocksGA(progress_data) ## ----plotCorClockHealth------------------------------------------------------- plotCorClocks(age.gse58045)