## ----setup, include=FALSE------------------------------------------------ knitr::opts_chunk$set(echo = TRUE) ## ----eval=F-------------------------------------------------------------- # library(minfi) # datadir <- "/Users/Morgan/Documents/methylation_files/breast" # # clinicalDir <- file.path(datadir,"Clinical/Biotab") # sample_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_biospecimen_sample_brca.txt"),sep="\t",stringsAsFactors=FALSE) # keep <- sample_tab$sample_type %in% c("Primary Tumor", "Solid Tissue Normal") # sa mple_tab <- sample_tab[keep,] # # patient_id <- unique(sapply(strsplit(sample_tab$bcr_sample_barcode,split="-"), # function(x) paste(x[1:3],collapse="-"))) # # tumor_sample_id <- sample_tab$bcr_sample_uuid[sample_tab$sample_type=="Primary Tumor"] # normal_sample_id <- sample_tab$bcr_sample_uuid[sample_tab$sample_type=="Solid Tissue # Normal"] # # # read tumor data # tumor_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_biospecimen_tumor_sample_brca.txt"),sep="\t", # stringsAsFactors=FALSE) # # tab <- merge(sample_tab, tumor_tab, by="bcr_sample_uuid", # suffixes=c(".sample",".tumor"),all.x=TRUE) # # # read normal data # normal_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_biospecimen_normal_control_brca.txt"),sep="\t", # stringsAsFactors=FALSE) # tab <- merge(tab, normal_tab, by="bcr_sample_uuid", # suffixes=c(".tumor",".normal"),all.x=TRUE) # # tab$bcr_patient_barcode <- tab$bcr_patient_barcode.tumor # # ii <- is.na(tab$bcr_patient_barcode) # tab$bcr_patient_barcode[ii] <- tab$bcr_patient_barcode.normal[ii] # # # read patient data # patient_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_clinical_patient_brca.txt"),sep="\t",stringsAsFactors=FALSE) # names(patient_tab) <- paste("patient",names(patient_tab),sep=".") # tmp <- merge(tab,patient_tab,by.x="bcr_patient_barcode",by.y= # "patient.bcr_patient_barcode",all.x=TRUE,suffixes=c(".sample",".patient")) # tab <- tmp # # # read meth metadata # methMetaDir <- file.path(datadir,"METADATA/JHU_USC__HumanMethylation450") # methMeta_tab <- read.delim(file.path(methMetaDir, # "jhu-usc.edu_BRCA.HumanMethylation450.1.9.0.sdrf.txt"),sep="\t",stringsAsFactors=FALSE) # # sample_barcode <- sapply(strsplit(methMeta_tab$Comment..TCGA.Barcode.,split="-"), # function(x) paste(x[1:4],collapse="-")) # m <- match(tab$bcr_sample_barcode,sample_barcode) # tab$Basename <- gsub("_Grn\\.idat","",methMeta_tab$Array.Data.File[m]) # tab <- tab[!is.na(tab$Basename),] # # basedir <- file.path(datadir,"DNA_Methylation/JHU_USC__HumanMethylation450/Level_1") # tab$Basename <- file.path(basedir,tab$Basename) # keep <- file.exists(paste(tab$Basename,"_Grn.idat",sep="")) # breast_targets <- tab # objs <- grep("tab",ls(),value=TRUE) # rm(list=objs) # objs <- grep("dir",ls(),value=TRUE,ignore=TRUE) # rm(list=objs) # # nms <- names(breast_targets) # targets.breast <- breast_targets[nms] # # targets.breast$Status <- factor(ifelse(targets.breast$sample_type== # "Primary Tumor","cancer","normal"),levels=c("normal","cancer")) # targets.breast$Tissue <- tolower(targets.breast$patient.tumor_tissue_site) # targets.breast$Sex <- targets.breast$patient.gender ## ----eval=F-------------------------------------------------------------- # datadir <- "/Users/Morgan/Documents/methylation_files/colon" # # clinicalDir <- file.path(datadir,"Clinical/Biotab") # sample_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_biospecimen_sample_coad.txt"),sep="\t",stringsAsFactors=FALSE) # keep <- sample_tab$sample_type %in% c("Primary Tumor", "Solid Tissue Normal") # sample_tab <- sample_tab[keep,] # # patient_id <- unique(sapply(strsplit(sample_tab$bcr_sample_barcode,split="-"), # function(x) paste(x[1:3],collapse="-"))) # # tumor_sample_id <- sample_tab$bcr_sample_uuid[sample_tab$sample_type=="Primary Tumor"] # normal_sample_id <- sample_tab$bcr_sample_uuid[sample_tab$sample_type== # "Solid Tissue Normal"] # # # read tumor data # tumor_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_biospecimen_tumor_sample_coad.txt"),sep="\t", # stringsAsFactors=FALSE) # # tab <- merge(sample_tab, tumor_tab, by="bcr_sample_uuid", # suffixes=c(".sample",".tumor"),all.x=TRUE) # # # read normal data # normal_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_biospecimen_normal_control_coad.txt"),sep="\t", # stringsAsFactors=FALSE) # tab <- merge(tab, normal_tab, by="bcr_sample_uuid", # suffixes=c(".tumor",".normal"),all.x=TRUE) # # tab$bcr_patient_barcode <- tab$bcr_patient_barcode.tumor # # ii <- is.na(tab$bcr_patient_barcode) # tab$bcr_patient_barcode[ii] <- tab$bcr_patient_barcode.normal[ii] # # # read patient data # patient_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_clinical_patient_coad.txt"),sep="\t",stringsAsFactors=FALSE) # names(patient_tab) <- paste("patient",names(patient_tab),sep=".") # tmp <- merge(tab,patient_tab,by.x="bcr_patient_barcode",by.y= # "patient.bcr_patient_barcode",all.x=TRUE,suffixes=c(".sample",".patient")) # tab <- tmp # # # read meth metadata # methMetaDir <- file.path(datadir,"METADATA/JHU_USC__HumanMethylation450") # methMeta_tab <- read.delim(file.path(methMetaDir, # "jhu-usc.edu_COAD.HumanMethylation450.1.9.0.sdrf.txt"),sep="\t",stringsAsFactors=FALSE) # # sample_barcode <- sapply(strsplit(methMeta_tab$Comment..TCGA.Barcode.,split="-"), # function(x) paste(x[1:4],collapse="-")) # m <- match(tab$bcr_sample_barcode,sample_barcode) # tab$Basename <- gsub("_Grn\\.idat","",methMeta_tab$Array.Data.File[m]) # tab <- tab[!is.na(tab$Basename),] # # basedir <- file.path(datadir,"DNA_Methylation/JHU_USC__HumanMethylation450/Level_1") # tab$Basename <- file.path(basedir,tab$Basename) # keep <- file.exists(paste(tab$Basename,"_Grn.idat",sep="")) # colon_targets <- tab # objs <- grep("tab",ls(),value=TRUE) # rm(list=objs) # objs <- grep("dir",ls(),value=TRUE,ignore=TRUE) # rm(list=objs) # # nms <- names(colon_targets) # targets.colon <- colon_targets[nms] # # targets.colon$Status <- factor(ifelse(targets.colon$sample_type== # "Primary Tumor","cancer","normal"),levels=c("normal","cancer")) # targets.colon$Tissue <- tolower(targets.colon$patient.tumor_tissue_site) # targets.colon$Sex <- targets.colon$patient.gender ## ----eval=F-------------------------------------------------------------- # datadir <- "/Users/Morgan/Documents/methylation_files/lung" # # # clinicalDir <- file.path(datadir,"Clinical/Biotab") # sample_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_biospecimen_sample_lusc.txt"),sep="\t",stringsAsFactors=FALSE) # keep <- sample_tab$sample_type %in% c("Primary Tumor", "Solid Tissue Normal") # sample_tab <- sample_tab[keep,] # # patient_id <- unique(sapply(strsplit(sample_tab$bcr_sample_barcode,split="-"), # function(x) paste(x[1:3],collapse="-"))) # # tumor_sample_id <- sample_tab$bcr_sample_uuid[sample_tab$sample_type=="Primary Tumor"] # normal_sample_id <- sample_tab$bcr_sample_uuid[sample_tab$sample_type== # "Solid Tissue Normal"] # # # read tumor data # tumor_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_biospecimen_tumor_sample_lusc.txt"),sep="\t", # stringsAsFactors=FALSE) # # tab <- merge(sample_tab, tumor_tab, by="bcr_sample_uuid", # suffixes=c(".sample",".tumor"),all.x=TRUE) # # # read normal data # normal_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_biospecimen_normal_control_lusc.txt"),sep="\t", # stringsAsFactors=FALSE) # tab <- merge(tab, normal_tab, by="bcr_sample_uuid", # suffixes=c(".tumor",".normal"),all.x=TRUE) # # tab$bcr_patient_barcode <- tab$bcr_patient_barcode.tumor # # ii <- is.na(tab$bcr_patient_barcode) # tab$bcr_patient_barcode[ii] <- tab$bcr_patient_barcode.normal[ii] # # # read patient data # patient_tab <- read.delim(file.path(clinicalDir, # "nationwidechildrens.org_clinical_patient_lusc.txt"),sep="\t",stringsAsFactors=FALSE) # names(patient_tab) <- paste("patient",names(patient_tab),sep=".") # tmp <- merge(tab,patient_tab,by.x="bcr_patient_barcode",by.y= # "patient.bcr_patient_barcode",all.x=TRUE,suffixes=c(".sample",".patient")) # tab <- tmp # # # read meth metadata # methMetaDir <- file.path(datadir,"METADATA/JHU_USC__HumanMethylation450") # methMeta_tab <- read.delim(file.path(methMetaDir, # "jhu-usc.edu_LUSC.HumanMethylation450.1.7.0.sdrf.txt"),sep="\t",stringsAsFactors=FALSE) # # sample_barcode <- sapply(strsplit(methMeta_tab$Comment..TCGA.Barcode.,split="-"), # function(x) paste(x[1:4],collapse="-")) # m <- match(tab$bcr_sample_barcode,sample_barcode) # tab$Basename <- gsub("_Grn\\.idat","",methMeta_tab$Array.Data.File[m]) # tab <- tab[!is.na(tab$Basename),] # # basedir <- file.path(datadir,"DNA_Methylation/JHU_USC__HumanMethylation450/Level_1") # tab$Basename <- file.path(basedir,tab$Basename) # keep <- file.exists(paste(tab$Basename,"_Grn.idat",sep="")) # lung_targets <- tab # objs <- grep("tab",ls(),value=TRUE) # rm(list=objs) # objs <- grep("dir",ls(),value=TRUE,ignore=TRUE) # rm(list=objs) # # nms <- names(lung_targets) # targets.lung <- lung_targets[nms] # # targets.lung$Status <- factor(ifelse(targets.lung$sample_type== # "Primary Tumor","cancer","normal"),levels=c("normal","cancer")) # targets.lung$Tissue <- tolower(targets.lung$patient.tumor_tissue_site) # targets.lung$Sex <- targets.lung$patient.gender # # rm(list=ls()[!(ls() %in% c('targets.breast','targets.colon','targets.lung'))]) ## ----eval=F,message=F,results="hide"------------------------------------- # merge <- merge(targets.breast,targets.colon,all=TRUE) # targets <- merge(merge,targets.lung,all=TRUE) # targets <- targets[which(file.exists(paste0(targets$Basename,"_Grn.idat"))),] # # memory.limit(size=10000) # rg_set <- read.metharray(targets$Basename,verbose=TRUE) # pData(rg.set) <- targets # # table(targets$Tissue,targets$Status) ## ----eval=F,results="hide"----------------------------------------------- # memory.limit(size=10000) # methset <- preprocessIllumina(rg_set) # gen_methset <- mapToGenome(methset) # gen_ratset <- ratioConvert(gen_methset,type="Illumina") ## ---- eval=FALSE--------------------------------------------------------- # gr <- granges(gen_methset) # keep <- seqnames(gr) %in% c("chr10", "chr11", "chr20") # gen_methset_small <- gen_methset[keep,] # # gr <- granges(gen_ratset) # keep <- seqnames(gr) %in% c("chr10", "chr11", "chr20") # gen_ratset_small <- get_ratset[keep,]