## ----setup, echo=FALSE, messages=FALSE, warnings=FALSE------------------------ suppressPackageStartupMessages({ library(signatureSearchData) }) # knitr::opts_knit$set(root.dir = "~/insync/project/longevityTools_eDRUG/") ## ----install, eval=FALSE------------------------------------------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("signatureSearchData") ## ----load, eval=FALSE--------------------------------------------------------- # library(signatureSearchData) ## ----eh_explore_ssd1, eval=TRUE, warning=FALSE, message=FALSE----------------- library(ExperimentHub) eh <- ExperimentHub() ssd <- query(eh, c("signatureSearchData")) ssd ## ----eh_explore_ssd2, eval=TRUE, warning=FALSE, message=FALSE----------------- ssd$title ## ----eh_explore_ssd3, eval=TRUE----------------------------------------------- as.list(ssd)[10] ## ----lincs, eval=FALSE-------------------------------------------------------- # library(ExperimentHub); library(rhdf5) # eh <- ExperimentHub() # query(eh, c("signatureSearchData", "lincs")) # lincs_path <- eh[['EH3226']] # rhdf5::h5ls(lincs_path) ## ----filter_meta42, eval=FALSE------------------------------------------------ # meta42 <- readr::read_tsv("./data/GSE92742_Broad_LINCS_sig_info.txt") # dose <- meta42$pert_idose[7] # ## filter rows by 'pert_type' as compound, 10uM concentration, and 24h treatment time # meta42_filter <- sig_filter(meta42, pert_type="trt_cp", dose=dose, time="24 h") # 45956 X 14 ## ----extract_modz, eval=FALSE------------------------------------------------- # library(signatureSearch) # gctx <- "./data/GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx" # gctx2h5(gctx, cid=meta42_filter$sig_id, new_cid=meta42_filter$pert_cell_factor, # h5file="./data/lincs.h5", by_ncol=5000, overwrite=TRUE) # library(HDF5Array) # se <- SummarizedExperiment(HDF5Array("./data/lincs.h5", name="assay")) # rownames(se) <- HDF5Array("./data/lincs.h5", name="rownames") # colnames(se) <- HDF5Array("./data/lincs.h5", name="colnames") ## ----lincs_degs, eval=FALSE--------------------------------------------------- # library(signatureSearch) # # Get up and down 150 DEGs # degs <- getDEGSig(cmp="vorinostat", cell="SKB", refdb="lincs", Nup=150, Ndown=150) # # # Get DEGs by setting cutoffs # degs2 <- getDEGSig(cmp="vorinostat", cell="SKB", refdb="lincs", higher=2, lower=-2) ## ----lincs_degs_db, eval=FALSE------------------------------------------------ # # gCMAP method # gep <- getSig("vorinostat", "SKB", refdb="lincs") # qsig_gcmap <- qSig(query = gep, gess_method = "gCMAP", refdb = "lincs") # gcmap_res <- gess_gcmap(qsig_gcmap, higher=2, lower=-2) # # Fisher method # qsig_fisher <- qSig(query = degs, gess_method = "Fisher", refdb = "lincs") # fisher_res <- gess_fisher(qSig=qsig_fisher, higher=2, lower=-2) ## ----lincs_expr, eval=FALSE--------------------------------------------------- # library(ExperimentHub) # eh <- ExperimentHub() # query(eh, c("signatureSearchData", "lincs_expr")) # lincs_expr_path <- eh[['EH3227']] ## ----filter_expr, eval=FALSE-------------------------------------------------- # inst42 <- readr::read_tsv("./data/GSE92742_Broad_LINCS_inst_info.txt") # inst42_filter <- inst_filter(inst42, pert_type="trt_cp", dose=10, dose_unit="um", # time=24, time_unit="h") # 169,795 X 13 ## ----extract_expr, eval=FALSE------------------------------------------------- # # It takes some time # library(signatureSearch) # meanExpr2h5(gctx="./data/GSE92742_Broad_LINCS_Level3_INF_mlr12k_n1319138x12328.gctx", # inst=inst42_filter, h5file="./data/lincs_expr.h5") # 12328 X 38824 ## ----lincs2, eval=FALSE------------------------------------------------------- # library(ExperimentHub); library(rhdf5) # eh <- ExperimentHub() # query(eh, c("signatureSearchData", "lincs2")) # lincs2_path <- eh[['EH7297']] # rhdf5::h5ls(lincs2_path) ## ----lincs2_hdf5, eval=FALSE-------------------------------------------------- # siginfo_beta <- fread("https://s3.amazonaws.com/macchiato.clue.io/builds/LINCS2020/siginfo_beta.txt") # exemplar <- siginfo_beta %>% filter(pert_type=="trt_cp" & is_exemplar_sig == 1) # new_cid <- paste(exemplar$pert_id, exemplar$cell_iname, rep("trt_cp", length(exemplar$cmap_name)), sep="__") # gctx2h5("level5_beta_trt_cp_n720216x12328.gctx", cid=exemplar$sig_id, new_cid=new_cid, # h5file="lincs2.h5", by_ncol=5000, overwrite=TRUE) # DBpath <- "lincs2.h5" # sedb <- SummarizedExperiment(HDF5Array(DBpath, name="assay")) # rownames(sedb) <- HDF5Array(DBpath, name="rownames") # colnames(sedb) <- HDF5Array(DBpath, name="colnames") ## ----cmap_rank, eval=FALSE---------------------------------------------------- # library(ExperimentHub) # eh <- ExperimentHub() # query(eh, c("signatureSearchData", "cmap_rank")) # cmap_rank_path <- eh[["EH3225"]] # se <- SummarizedExperiment(HDF5Array(cmap_rank_path, name="assay")) # rownames(se) <- HDF5Array(cmap_rank_path, name="rownames") # colnames(se) <- HDF5Array(cmap_rank_path, name="colnames") ## ----filter_rankm, eval=FALSE------------------------------------------------- # path <- system.file("extdata", "cmap_instances_02.txt", package="signatureSearchData") # cmap_inst <- read.delim(path, check.names=FALSE) # inst_id <- cmap_inst$instance_id[!duplicated(paste(cmap_inst$cmap_name, cmap_inst$cell2, sep="_"))] # rankM <- read.delim("./data/rankMatrix.txt", check.names=FALSE, row.names=1) # 22283 X 6100 # rankM_sub <- rankM[, as.character(inst_id)] # colnames(rankM_sub) <- unique(paste(cmap_inst$cmap_name, cmap_inst$cell2, "trt_cp", sep="__")) ## ----affyid_annot, eval=FALSE, message=FALSE---------------------------------- # library(hgu133a.db) # myAnnot <- data.frame(ACCNUM=sapply(contents(hgu133aACCNUM), paste, collapse=", "), # SYMBOL=sapply(contents(hgu133aSYMBOL), paste, collapse=", "), # UNIGENE=sapply(contents(hgu133aUNIGENE), paste, collapse=", "), # ENTREZID=sapply(contents(hgu133aENTREZID), paste, collapse=", "), # ENSEMBL=sapply(contents(hgu133aENSEMBL), paste, collapse=", "), # DESC=sapply(contents(hgu133aGENENAME), paste, collapse=", ")) # saveRDS(myAnnot, "./data/myAnnot.rds") ## ----mr_prob, eval=FALSE------------------------------------------------------ # rankM_sub_gene <- probe2gene(rankM_sub, myAnnot) ## ----cmap_rank2h5, eval=FALSE------------------------------------------------- # matrix2h5(rankM_sub_gene, "./data/cmap_rank.h5", overwrite=TRUE) # 12403 X 3587 # rhdf5::h5ls("./data/cmap_rank.h5") # ## Read in cmap_rank.h5 as SummarizedExperiment object # se <- SummarizedExperiment(HDF5Array("./data/cmap_rank.h5", name="assay")) # rownames(se) <- HDF5Array("./data/cmap_rank.h5", name="rownames") # colnames(se) <- HDF5Array("./data/cmap_rank.h5", name="colnames") ## ----cmap_expr, eval=FALSE---------------------------------------------------- # library(ExperimentHub) # eh <- ExperimentHub() # query(eh, c("signatureSearchData", "cmap_expr")) # cmap_expr_path <- eh[["EH3224"]] ## ----work_envir, eval=FALSE--------------------------------------------------- # dir.create("data"); dir.create("data/CEL"); dir.create("results") ## ----download_cmap, eval=FALSE------------------------------------------------ # getCmapCEL(rerun=FALSE) ## ----get_cel_type, eval=FALSE------------------------------------------------- # library(affxparser) # celfiles <- list.files("./data/CEL", pattern=".CEL$") # chiptype <- sapply(celfiles, function(x) affxparser::readCelHeader(paste0("data/CEL/", x))$chiptype) # saveRDS(chiptype, "./data/chiptype.rds") ## ----normalize_chips, eval=FALSE---------------------------------------------- # chiptype <- readRDS("./data/chiptype.rds") # chiptype_list <- split(names(chiptype), as.character(chiptype)) # normalizeCel(chiptype_list, batchsize=200, rerun=FALSE) ## ----comb_chip_type_data, eval=FALSE------------------------------------------ # chiptype_dir <- unique(chiptype) # combineResults(chiptype_dir, rerun=FALSE) # mas5df <- combineNormRes(chiptype_dir, norm_method="MAS5") ## ----prof2gene, eval=FALSE---------------------------------------------------- # myAnnot <- readRDS("./data/myAnnot.rds") # mas5df <- probe2gene(mas5df, myAnnot) # saveRDS(mas5df,"./data/mas5df.rds") ## ----rma2cmap_expr, eval=FALSE------------------------------------------------ # mas5df <- readRDS("./data/mas5df.rds") # dim: 12403 x 7056 # path <- system.file("extdata", "cmap_instances_02.txt", package="signatureSearchData") # cmap_inst <- read.delim(path, check.names=FALSE) # cmap_drug_cell_expr <- meanExpr(mas5df, cmap_inst) # dim: 12403 X 3587 # saveRDS(cmap_drug_cell_expr, "./data/cmap_drug_cell_expr.rds") ## ----gen_cmap_expr, eval=FALSE------------------------------------------------ # cmap_drug_cell_expr <- readRDS("./data/cmap_drug_cell_expr.rds") # ## match colnames to '(drug)__(cell)__(factor)' format # colnames(cmap_drug_cell_expr) <- gsub("(^.*)_(.*$)", "\\1__\\2__trt_cp", # colnames(cmap_drug_cell_expr)) # matrix2h5(cmap_drug_cell_expr, "./data/cmap_expr.h5", overwrite=TRUE) # h5ls("./data/cmap_expr.h5") ## ----cmap, eval=FALSE--------------------------------------------------------- # library(ExperimentHub) # eh <- ExperimentHub() # query(eh, c("signatureSearchData", "cmap")) # cmap_path <- eh[["EH3223"]] ## ----cel_file_list, eval=FALSE------------------------------------------------ # path <- system.file("extdata", "cmap_instances_02.txt", package="signatureSearchData") # cmap_inst <- read.delim(path, check.names=FALSE) # comp_list <- sampleList(cmap_inst, myby="CMP_CELL") ## ----deg_limma, eval=FALSE---------------------------------------------------- # mas5df <- readRDS("./data/mas5df.rds") # degList <- runLimma(df=log2(mas5df), comp_list, fdr=0.10, foldchange=1, verbose=TRUE) # saveRDS(degList, "./results/degList.rds") # saves entire degList ## ----se, eval=FALSE----------------------------------------------------------- # degList <- readRDS("./results/degList.rds") # logMA <- degList$logFC # ## match colnames of logMA to '(drug)__(cell)__(factor)' format # colnames(logMA) <- gsub("(^.*)_(.*$)", "\\1__\\2__trt_cp", colnames(logMA)) # fdr <- degList$FDR # colnames(fdr) <- gsub("(^.*)_(.*$)", "\\1__\\2__trt_cp", colnames(fdr)) # matrix2h5(logMA, "./data/cmap.h5", name="assay", overwrite=TRUE) # 12403 X 3478 # matrix2h5(fdr, "./data/cmap.h5", name="padj", overwrite=FALSE) # rhdf5::h5ls("./data/cmap.h5") ## ----cmap_degs, eval=FALSE---------------------------------------------------- # library(signatureSearch) # # Get up and down 150 DEGs # degs <- getDEGSig(cmp="vorinostat", cell="PC3", refdb="cmap", Nup=150, Ndown=150) # # # Get DEGs by setting cutoffs # degs2 <- getDEGSig(cmp="vorinostat", cell="PC3", refdb="cmap", # higher=1, lower=-1, padj=0.05) ## ----cmap_degs_db, eval=FALSE------------------------------------------------- # # gCMAP method # gep <- getSig("vorinostat", "PC3", refdb="cmap") # qsig_gcmap <- qSig(query = gep, gess_method = "gCMAP", refdb = "cmap") # gcmap_res <- gess_gcmap(qsig_gcmap, higher=1, lower=-1, padj=0.05) # # Fisher method # qsig_fisher <- qSig(query = degs, gess_method = "Fisher", refdb = "cmap") # fisher_res <- gess_fisher(qSig=qsig_fisher, higher=1, lower=-1, padj=0.05) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()