## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----install curatedTBData, eval = FALSE-------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("curatedTBData") ## ----libraries, message=FALSE, warning=FALSE, results='hide'------------------ library(curatedTBData) library(dplyr) library(SummarizedExperiment) library(sva) ## ----accessimg datasummary---------------------------------------------------- # Remove GeographicalRegion, Age, DiagnosisMethod, Notes, Tissue, HIVStatus for concise display data("DataSummary", package = "curatedTBData") DataSummary |> dplyr::select(-c(GeographicalRegion, Age, DiagnosisMethod, Notes, Tissue, HIVStatus)) |> DT::datatable() ## ----access data with dry.run------------------------------------------------- curatedTBData("GSE19439", dry.run = TRUE, curated.only = FALSE) ## ----download full data for GSE19439, warning=FALSE--------------------------- GSE19439 <- curatedTBData("GSE19439", dry.run = FALSE, curated.only = FALSE) GSE19439 ## ----warning=FALSE------------------------------------------------------------ GSE79362 <- curatedTBData("GSE79362", dry.run = FALSE, curated.only = FALSE) GSE79362 ## ----access selected curated datasets microarray, warning=FALSE, results='hide'---- myGEO <- c("GSE19435", "GSE19439", "GSE19442", "GSE19444", "GSE22098") object_list <- curatedTBData(myGEO, dry.run = FALSE, curated.only = TRUE) ## ----------------------------------------------------------------------------- object_list[1:2] ## ----ACS RNA-seq, warning=FALSE, results='hide'------------------------------- GSE79362 <- curatedTBData("GSE79362", dry.run = FALSE, curated.only = FALSE) ## ----------------------------------------------------------------------------- GSE79362 ## ----------------------------------------------------------------------------- GSE19439 <- object_list$GSE19439 GSE19439[, GSE19439$TBStatus == "PTB"]["assay_curated"] # 13 samples ## ----------------------------------------------------------------------------- GSE19439[, GSE19439$TBStatus %in% c("PTB", "LTBI")]["assay_curated"] # 30 samples ## ----------------------------------------------------------------------------- GSE19491 <- combine_objects(object_list, experiment_name = "assay_curated", update_genes = TRUE) GSE19491 ## ----------------------------------------------------------------------------- unique(GSE19491$Study) ## ----commbine objects multiple experiment name-------------------------------- exp <- combine_objects(c(GSE79362[1], object_list[1]), experiment_name = c("assay_reprocess_hg19", "assay_curated"), update_genes = TRUE) exp ## ----batch correction, message=FALSE------------------------------------------ batch1 <- colData(GSE19491)$Study combat_edata1 <- sva::ComBat(dat = assay(GSE19491), batch = batch1) assays(GSE19491)[["Batch_corrected_assay"]] <- combat_edata1 GSE19491 ## ----subset active TB and LTBI, message=FALSE, warning=FALSE------------------ multi_set_PTB_LTBI <- lapply(object_list, function(x) subset_curatedTBData(x, annotationColName = "TBStatus", annotationCondition = c("LTBI", "PTB"), assayName = "assay_curated")) # Remove NULL from the list multi_set_PTB_LTBI <- multi_set_PTB_LTBI[!sapply(multi_set_PTB_LTBI, is.null)] multi_set_PTB_LTBI[1:3] ## ----subset patients with HIV, message=FALSE, warning=FALSE------------------- multi_set_HIV <- lapply(object_list, function(x) subset_curatedTBData(x, annotationColName = "HIVStatus", annotationCondition = "Negative", assayName = "assay_curated")) # Remove NULL from the list multi_set_HIV <- multi_set_HIV[!vapply(multi_set_HIV, is.null, TRUE)] multi_set_HIV[1:3] ## ----session info------------------------------------------------------------- sessionInfo()