## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, cache=FALSE, eval = TRUE) library(knitr) ## ----echo=FALSE--------------------------------------------------------------- cols <- c("RSID", "CHR", "POS", "REF", "ALT", "SAMP_FREQ_ALT", "SAMP_MAF", "PVALUE", "HR", "HR_lowerCI", "HR_upperCI", "COEF", "SE.COEF", "Z", "N", "NEVENT") desc <- c("SNP ID", "Chromosome number", "Genomic Position (BP)", "Reference Allele", "Alternate Allele", "Alternate Allele frequency in sample being tested", "Minor allele frequency in sample being tested", "P-value of single SNP or interaction term", "Hazard Ratio (HR)", "Lower bound 95% CI of HR", "Upper bound 95% CI of HR", "Estimated coefficient of SNP", "Standard error of coefficient estimate", "Z-statistic", "Number of individuals in sample being tested", "Number of events that occurred in sample being tested") df <- cbind(cols, desc) colnames(df) <- c("Column", "Description") kable(df) ## ----echo=FALSE--------------------------------------------------------------- cols <- c("TYPED", "AF", "MAF", "R2", "ER2") desc <- c("Imputation status: TRUE (SNP IS TYPED)/FALSE (SNP IS IMPUTED)", "Minimac3 output Alternate Allele Frequency", "Minimac3 output of Minor Allele Frequency", "Imputation R2 score (minimac3 $R^2$)", "Minimac3 ouput empirical $R^2$") df <- cbind(cols, desc) colnames(df) <- c("Column", "Description") kable(df) ## ----echo=FALSE--------------------------------------------------------------- cols <- c("TYPED", "RefPanelAF", "INFO") desc <- c("Imputation status: TRUE (SNP IS TYPED)/FALSE (SNP IS IMPUTED)", "HRC Reference Panel Allele Frequency", "Imputation INFO score from PBWT") df <- cbind(cols, desc) colnames(df) <- c("Column", "Description") kable(df) ## ----echo=FALSE--------------------------------------------------------------- cols <- c("TYPED", "A0", "A1", "exp_freq_A1") desc <- c("`---` is imputed, repeated RSID is typed", "Allele coded 0 in IMPUTE2", "Allele coded 1 in IMPUTE2", "Expected allele frequency of alelle code A1") df <- cbind(cols, desc) colnames(df) <- c("Column", "Description") kable(df) ## ----eval=FALSE--------------------------------------------------------------- # devtools::install_github("suchestoncampbelllab/gwasurvivr") ## ----eval=FALSE--------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install("gwasurvivr", version = "devel") ## ----eval=FALSE--------------------------------------------------------------- # install.packages(c("ncdf4", "matrixStats", "parallel", "survival")) ## ----eval=FALSE--------------------------------------------------------------- # BiocManager::install("GWASTools") # BiocManager::install("VariantAnnotation") # BiocManager::install("SummarizedExperiment") # BiocManager::install("SNPRelate") ## ----------------------------------------------------------------------------- library(gwasurvivr) ## ----eval=FALSE--------------------------------------------------------------- # options("gwasurvivr.cores"=4) ## ----eval=FALSE--------------------------------------------------------------- # library(parallel) # cl <- makeCluster(detectCores()) # # impute2CoxSurv(..., clusterObj=cl) # sangerCoxSurv(..., clusterObj=cl) # michiganCoxSurv(..., clusterObj=cl) ## ----message=FALSE------------------------------------------------------------ vcf.file <- system.file(package="gwasurvivr", "extdata", "michigan.chr14.dose.vcf.gz") pheno.fl <- system.file(package="gwasurvivr", "extdata", "simulated_pheno.txt") pheno.file <- read.table(pheno.fl, sep=" ", header=TRUE, stringsAsFactors = FALSE) head(pheno.file) # recode sex column and remove first column pheno.file$SexFemale <- ifelse(pheno.file$sex=="female", 1L, 0L) # select only experimental group sample.ids sample.ids <- pheno.file[pheno.file$group=="experimental",]$ID_2 head(sample.ids) ## ----eval=FALSE--------------------------------------------------------------- # michiganCoxSurv(vcf.file=vcf.file, # covariate.file=pheno.file, # id.column="ID_2", # sample.ids=sample.ids, # time.to.event="time", # event="event", # covariates=c("age", "SexFemale", "DrugTxYes"), # inter.term=NULL, # print.covs="only", # out.file="michigan_only", # r2.filter=0.3, # maf.filter=0.005, # chunk.size=100, # verbose=TRUE, # clusterObj=NULL) ## ----echo=FALSE--------------------------------------------------------------- michiganCoxSurv(vcf.file=vcf.file, covariate.file=pheno.file, id.column="ID_2", sample.ids=sample.ids, time.to.event="time", event="event", covariates=c("age", "SexFemale", "DrugTxYes"), inter.term=NULL, print.covs="only", out.file=tempfile("michigan_only"), r2.filter=0.3, maf.filter=0.005, chunk.size=100, verbose=TRUE, clusterObj=NULL) ## ----message=FALSE, eval=FALSE------------------------------------------------ # michigan_only <- read.table("michigan_only.coxph", sep="\t", header=TRUE, stringsAsFactors = FALSE) ## ----message=FALSE, echo=FALSE------------------------------------------------ michigan_only <- read.table(dir(tempdir(), pattern="^michigan_only.*\\.coxph$", full.names = TRUE), sep="\t", header=TRUE, stringsAsFactors = FALSE) ## ----------------------------------------------------------------------------- str(head(michigan_only)) ## ----eval=FALSE--------------------------------------------------------------- # michiganCoxSurv(vcf.file=vcf.file, # covariate.file=pheno.file, # id.column="ID_2", # sample.ids=sample.ids, # time.to.event="time", # event="event", # covariates=c("age", "SexFemale", "DrugTxYes"), # inter.term="DrugTxYes", # print.covs="only", # out.file="michigan_intx_only", # r2.filter=0.3, # maf.filter=0.005, # chunk.size=100, # verbose=FALSE, # clusterObj=NULL) ## ----echo=FALSE--------------------------------------------------------------- michiganCoxSurv(vcf.file=vcf.file, covariate.file=pheno.file, id.column="ID_2", sample.ids=sample.ids, time.to.event="time", event="event", covariates=c("age", "SexFemale", "DrugTxYes"), inter.term="DrugTxYes", print.covs="only", out.file=tempfile("michigan_intx_only"), r2.filter=0.3, maf.filter=0.005, chunk.size=100, verbose=FALSE, clusterObj=NULL) ## ----message=FALSE, eval=FALSE------------------------------------------------ # michigan_intx_only <- read.table("michigan_intx_only.coxph", sep="\t", header=TRUE, stringsAsFactors = FALSE) ## ----message=FALSE, echo=FALSE------------------------------------------------ michigan_intx_only <- read.table(dir(tempdir(), pattern="^michigan_intx_only.*\\.coxph$", full.names = TRUE), sep="\t", header=TRUE, stringsAsFactors = FALSE) ## ----------------------------------------------------------------------------- str(head(michigan_intx_only)) ## ----eval=TRUE---------------------------------------------------------------- vcf.file <- system.file(package="gwasurvivr", "extdata", "sanger.pbwt_reference_impute.vcf.gz") pheno.fl <- system.file(package="gwasurvivr", "extdata", "simulated_pheno.txt") pheno.file <- read.table(pheno.fl, sep=" ", header=TRUE, stringsAsFactors = FALSE) head(pheno.file) # recode sex column and remove first column pheno.file$SexFemale <- ifelse(pheno.file$sex=="female", 1L, 0L) # select only experimental group sample.ids sample.ids <- pheno.file[pheno.file$group=="experimental",]$ID_2 head(sample.ids) ## ----eval=FALSE--------------------------------------------------------------- # sangerCoxSurv(vcf.file=vcf.file, # covariate.file=pheno.file, # id.column="ID_2", # sample.ids=sample.ids, # time.to.event="time", # event="event", # covariates=c("age", "SexFemale", "DrugTxYes"), # inter.term=NULL, # print.covs="only", # out.file="sanger_only", # info.filter=0.3, # maf.filter=0.005, # chunk.size=100, # verbose=TRUE, # clusterObj=NULL) ## ----echo=FALSE--------------------------------------------------------------- sangerCoxSurv(vcf.file=vcf.file, covariate.file=pheno.file, id.column="ID_2", sample.ids=sample.ids, time.to.event="time", event="event", covariates=c("age", "SexFemale", "DrugTxYes"), inter.term=NULL, print.covs="only", out.file=tempfile("sanger_only"), info.filter=0.3, maf.filter=0.005, chunk.size=100, verbose=TRUE, clusterObj=NULL) ## ----message=FALSE, echo=FALSE------------------------------------------------ sanger_only <- read.table(dir(tempdir(), pattern="^sanger_only.*\\.coxph$", full.names = TRUE), sep="\t", header=TRUE, stringsAsFactors = FALSE) ## ----------------------------------------------------------------------------- str(head(sanger_only)) ## ----eval=FALSE--------------------------------------------------------------- # sangerCoxSurv(vcf.file=vcf.file, # covariate.file=pheno.file, # id.column="ID_2", # sample.ids=sample.ids, # time.to.event="time", # event="event", # covariates=c("age", "SexFemale", "DrugTxYes"), # inter.term="DrugTxYes", # print.covs="only", # out.file="sanger_intx_only", # info.filter=0.3, # maf.filter=0.005, # chunk.size=100, # verbose=TRUE, # clusterObj=NULL) ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # sangerCoxSurv(vcf.file=vcf.file, # covariate.file=pheno.file, # id.column="ID_2", # sample.ids=sample.ids, # time.to.event="time", # event="event", # covariates=c("age", "SexFemale", "DrugTxYes"), # inter.term="DrugTxYes", # print.covs="only", # out.file=tempfile("sanger_intx_only"), # info.filter=0.3, # maf.filter=0.005, # chunk.size=100, # verbose=TRUE, # clusterObj=NULL) ## ----message=FALSE------------------------------------------------------------ impute.file <- system.file(package="gwasurvivr", "extdata", "impute_example.impute2.gz") sample.file <- system.file(package="gwasurvivr", "extdata", "impute_example.impute2_sample") covariate.file <- system.file(package="gwasurvivr", "extdata", "simulated_pheno.txt") covariate.file <- read.table(covariate.file, sep=" ", header=TRUE, stringsAsFactors = FALSE) covariate.file$SexFemale <- ifelse(covariate.file$sex=="female", 1L, 0L) sample.ids <- covariate.file[covariate.file$group=="experimental",]$ID_2 ## ----eval=FALSE--------------------------------------------------------------- # impute2CoxSurv(impute.file=impute.file, # sample.file=sample.file, # chr=14, # covariate.file=covariate.file, # id.column="ID_2", # sample.ids=sample.ids, # time.to.event="time", # event="event", # covariates=c("age", "SexFemale", "DrugTxYes"), # inter.term=NULL, # print.covs="only", # out.file="impute_example_only", # chunk.size=100, # maf.filter=0.005, # exclude.snps=NULL, # flip.dosage=TRUE, # verbose=TRUE, # clusterObj=NULL) ## ----echo=FALSE--------------------------------------------------------------- impute2CoxSurv(impute.file=impute.file, sample.file=sample.file, chr=14, covariate.file=covariate.file, id.column="ID_2", sample.ids=sample.ids, time.to.event="time", event="event", covariates=c("age", "SexFemale", "DrugTxYes"), inter.term=NULL, print.covs="only", out.file=tempfile("impute_example_only"), chunk.size=100, maf.filter=0.005, exclude.snps=NULL, flip.dosage=TRUE, verbose=TRUE, clusterObj=NULL) ## ----message=FALSE, eval=FALSE------------------------------------------------ # impute2_res_only <- read.table("impute_example_only.coxph", sep="\t", header=TRUE, stringsAsFactors = FALSE) # str(head(impute2_res_only)) ## ----message=FALSE, echo=FALSE------------------------------------------------ impute2_res_only <- read.table(dir(tempdir(), pattern="^impute_example_only.*\\.coxph$", full.names=TRUE), sep="\t", header=TRUE, stringsAsFactors = FALSE) str(head(impute2_res_only)) ## ----eval=FALSE--------------------------------------------------------------- # impute2CoxSurv(impute.file=impute.file, # sample.file=sample.file, # chr=14, # covariate.file=covariate.file, # id.column="ID_2", # sample.ids=sample.ids, # time.to.event="time", # event="event", # covariates=c("age", "SexFemale", "DrugTxYes"), # inter.term="DrugTxYes", # print.covs="only", # out.file="impute_example_intx", # chunk.size=100, # maf.filter=0.005, # flip.dosage=TRUE, # verbose=FALSE, # clusterObj=NULL, # keepGDS=FALSE) ## ----echo=FALSE--------------------------------------------------------------- impute2CoxSurv(impute.file=impute.file, sample.file=sample.file, chr=14, covariate.file=covariate.file, id.column="ID_2", sample.ids=sample.ids, time.to.event="time", event="event", covariates=c("age", "SexFemale", "DrugTxYes"), inter.term="DrugTxYes", print.covs="only", out.file=tempfile("impute_example_intx"), chunk.size=100, maf.filter=0.005, flip.dosage=TRUE, verbose=FALSE, clusterObj=NULL, keepGDS=FALSE) ## ----message=FALSE, eval=FALSE------------------------------------------------ # impute2_res_intx <- read.table("impute_example_intx.coxph", sep="\t", header=TRUE, stringsAsFactors = FALSE) # str(head(impute2_res_intx)) ## ----message=FALSE, echo=FALSE------------------------------------------------ impute2_res_intx <- read.table(dir(tempdir(), pattern="^impute_example_intx.*\\.coxph$", full.names=TRUE), sep="\t", header=TRUE, stringsAsFactors = FALSE) str(head(impute2_res_intx)) ## ----eval=FALSE--------------------------------------------------------------- # ## mysurvivalscript.R # library(gwasurvivr) # library(batch) # # parseCommandArgs(evaluate=TRUE) # this is loaded in the batch package # # options("gwasurvivr.cores"=4) # # vcf.file <- system.file(package="gwasurvivr", # "extdata", # vcf.file) # pheno.fl <- system.file(package="gwasurvivr", # "extdata", # pheno.file) # pheno.file <- read.table(pheno.fl, # sep=" ", # header=TRUE, # stringsAsFactors = FALSE) # # recode sex column and remove first column # pheno.file$SexFemale <- ifelse(pheno.file$sex=="female", 1L, 0L) # # select only experimental group sample.ids # sample.ids <- pheno.file[pheno.file$group=="experimental",]$ID_2 # ## -- unlist the covariates # ## (refer below to the shell script as to why we are doing this) # covariates <- unlist(sapply(covariates, strsplit, "_", 1, "[[")) # # sangerCoxSurv(vcf.file=vcf.file, # covariate.file=pheno.file, # id.column="ID_2", # sample.ids=sample.ids, # time.to.event=time, # event=event, # covariates=covariates, # inter.term=NULL, # print.covs="only", # out.file=out.file, # info.filter=0.3, # maf.filter=0.005, # chunk.size=100, # verbose=TRUE, # clusterObj=NULL) ## ----eval=FALSE--------------------------------------------------------------- # #!/bin/bash # DIRECTORY=/path/to/dir/impute_chr # # module load R # # R --script ${DIRECTORY}/survival/code/mysurvivalscript.R -q --args \ # vcf.file ${DIRECTORY}/sanger.pbwt_reference_impute.vcf.gz \ # pheno.file ${DIRECTORY}/phenotype_data/simulated_pheno.txt \ # covariates DrugTxYes_age_SexFemale\ # time.to.event time \ # event event \ # out.file ${DIRECTORY}/survival/results/sanger_example_output ## ----echo=FALSE--------------------------------------------------------------- sessionInfo()