## ----------------------------------------------------------------------------- knitr::opts_chunk$set(message = FALSE) ## ----------------------------------------------------------------------------- library(HMP16SData) library(phyloseq) library(magrittr) library(ggplot2) library(tibble) library(dplyr) library(dendextend) library(circlize) library(ExperimentHub) library(gridExtra) library(cowplot) library(readr) library(haven) ## ----eval=FALSE--------------------------------------------------------------- # BiocManager::install("HMP16SData") ## ----------------------------------------------------------------------------- V13() V35() ## ----------------------------------------------------------------------------- V13() %>% table_one() %>% head() ## ----------------------------------------------------------------------------- list(V13 = V13(), V35 = V35()) %>% table_one() %>% kable_one() ## ----------------------------------------------------------------------------- V35_stool <- V35() %>% subset(select = HMP_BODY_SUBSITE == "Stool") V35_stool ## ----eval=FALSE--------------------------------------------------------------- # V35_stool_protected <- # V35_stool %>% # attach_dbGaP("~/prj_12146.ngc") ## ----eval=FALSE--------------------------------------------------------------- # colData(V35_stool_protected) ## ----------------------------------------------------------------------------- V13_tonsils <- V13() %>% subset(select = HMP_BODY_SUBSITE == "Palatine Tonsils") V13_stool <- V13() %>% subset(select = HMP_BODY_SUBSITE == "Stool") ## ----------------------------------------------------------------------------- V13_tonsils_phyloseq <- as_phyloseq(V13_tonsils) V13_stool_phyloseq <- as_phyloseq(V13_stool) ## ----------------------------------------------------------------------------- sample_samples <- function(x, size) { sampled_names <- sample_names(x) %>% sample(size) prune_samples(sampled_names, x) } ## ----------------------------------------------------------------------------- V13_tonsils_phyloseq %<>% sample_samples(25) V13_stool_phyloseq %<>% sample_samples(25) ## ----------------------------------------------------------------------------- sample_data(V13_tonsils_phyloseq)$Study <- "Tonsils" sample_data(V13_stool_phyloseq)$Study <- "Stool" ## ----------------------------------------------------------------------------- V13_phyloseq <- merge_phyloseq(V13_tonsils_phyloseq, V13_stool_phyloseq) ## ----------------------------------------------------------------------------- V13_phyloseq %<>% taxa_sums() %>% is_greater_than(0) %>% prune_taxa(V13_phyloseq) ## ----------------------------------------------------------------------------- richness_measures <- c("Observed", "Shannon", "Simpson") ## ----fig.height=5, fig.width=8------------------------------------------------ V13_phyloseq %>% plot_richness(x = "Study", color = "Study", measures = richness_measures) + stat_boxplot(geom ="errorbar") + geom_boxplot() + theme_bw() + theme(axis.title.x = element_blank(), legend.position = "none") ## ----------------------------------------------------------------------------- V13_dendrogram <- distance(V13_phyloseq, method = "bray") %>% hclust() %>% as.dendrogram() ## ----------------------------------------------------------------------------- V13_sample_data <- sample_data(V13_phyloseq) %>% data.frame() ## ----------------------------------------------------------------------------- V13_sample_data %<>% rownames_to_column(var = "PSN") %>% mutate(labels_col = if_else(Study == "Stool", "#F8766D", "#00BFC4")) %>% mutate(leaves_col = if_else(Study == "Stool", "#F8766D", "#00BFC4")) %>% mutate(leaves_pch = if_else(Study == "Stool", 16, 17)) ## ----------------------------------------------------------------------------- V13_sample_order <- labels(V13_dendrogram) %>% match(V13_sample_data$PSN) ## ----------------------------------------------------------------------------- labels_col <- V13_sample_data$labels_col[V13_sample_order] leaves_col <- V13_sample_data$leaves_col[V13_sample_order] leaves_pch <- V13_sample_data$leaves_pch[V13_sample_order] ## ----------------------------------------------------------------------------- V13_dendrogram %<>% set("labels_col", labels_col) %>% set("leaves_col", leaves_col) %>% set("leaves_pch", leaves_pch) ## ----fig.height=8, fig.width=8------------------------------------------------ V13_dendrogram %>% circlize_dendrogram(labels_track_height = 0.2) ## ----------------------------------------------------------------------------- V13_ordination <- ordinate(V13_phyloseq, method = "PCoA", distance = "bray") ## ----fig.height=5, fig.width=8------------------------------------------------ V13_phyloseq %>% plot_ordination(V13_ordination, color = "Study", shape = "Study") + theme_bw() + theme(legend.position = "bottom") ## ----fig.height=5, fig.width=8------------------------------------------------ V13_ordination %>% plot_scree() + theme_bw() ## ----------------------------------------------------------------------------- V35_stool_phyloseq <- V35_stool %>% as_phyloseq() EH <- ExperimentHub() HMP_2012.metaphlan_bugs_list.stool <- EH[["EH426"]] # a modified version of ExpressionSet2phyloseq taken from curatedMetagenomicData # ExpressionSet2phyloseq was removed in curatedMetagenomicData version 3.0.0 ExpressionSet2phyloseq <- function(eset, simplify = TRUE, relab = TRUE) { otu.table <- Biobase::exprs(eset) sample.data <- Biobase::pData(eset) %>% phyloseq::sample_data(., errorIfNULL = FALSE) taxonomic.ranks <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species", "Strain") tax.table <- rownames(otu.table) %>% gsub("[a-z]__", "", .) %>% dplyr::tibble() %>% tidyr::separate(., ".", taxonomic.ranks, sep = "\\|", fill="right") %>% as.matrix() if(simplify) { rownames(otu.table) <- rownames(otu.table) %>% gsub(".+\\|", "", .) } rownames(tax.table) <- rownames(otu.table) if(!relab) { otu.table <- round(sweep(otu.table, 2, eset$number_reads/100, "*")) } otu.table <- phyloseq::otu_table(otu.table, taxa_are_rows = TRUE) tax.table <- phyloseq::tax_table(tax.table) phyloseq::phyloseq(otu.table, sample.data, tax.table) } MGX_stool_phyloseq <- ExpressionSet2phyloseq(HMP_2012.metaphlan_bugs_list.stool) ## ----------------------------------------------------------------------------- MGX_stool_melted <- MGX_stool_phyloseq %>% subset_taxa(is.na(Class) & !is.na(Phylum)) %>% psmelt() ## ----------------------------------------------------------------------------- V35_stool_melted <- V35_stool_phyloseq %>% tax_glom(taxrank = "PHYLUM") %>% psmelt() ## ----------------------------------------------------------------------------- SRS_SAMPLE_IDS <- intersect(V35_stool_melted$SRS_SAMPLE_ID, MGX_stool_melted$NCBI_accession) V35_stool_melted %<>% filter(SRS_SAMPLE_ID %in% SRS_SAMPLE_IDS) %>% rename(Phylum = PHYLUM) %>% mutate(Sample = SRS_SAMPLE_ID) %>% group_by(Sample) %>% mutate(`Relative Abundance` = Abundance / sum(Abundance)) %>% arrange(Phylum, -`Relative Abundance`) %>% group_by(Phylum) %>% mutate(phylum_rank = sum(`Relative Abundance`)) %>% arrange(-phylum_rank) %>% select(Sample, Phylum, `Relative Abundance`) %>% ungroup() MGX_stool_melted %<>% filter(NCBI_accession %in% SRS_SAMPLE_IDS) %>% group_by(Sample) %>% mutate(`Relative Abundance` = Abundance / sum(Abundance)) %>% arrange(Phylum, -`Relative Abundance`) %>% group_by(Phylum) %>% mutate(phylum_rank = sum(`Relative Abundance`)) %>% arrange(-phylum_rank) %>% select(Sample, Phylum, `Relative Abundance`) %>% ungroup() ## ----------------------------------------------------------------------------- V35_top_phyla <- V35_stool_melted %$% unique(Phylum) %>% as.character() MGX_top_phyla <- MGX_stool_melted %$% unique(Phylum) %>% as.character() top_eight_phyla <- intersect(V35_top_phyla, MGX_top_phyla) %>% extract(1:8) V35_stool_melted %<>% filter(Phylum %in% top_eight_phyla) MGX_stool_melted %<>% filter(Phylum %in% top_eight_phyla) ## ----------------------------------------------------------------------------- sample_order <- V35_stool_melted %$% unique(Sample) ## ----------------------------------------------------------------------------- phylum_order <- V35_stool_melted %$% unique(Phylum) ## ----------------------------------------------------------------------------- bang_wong_colors <- c("#CC79A7", "#D55E00", "#0072B2", "#F0E442", "#009E73", "#56B4E9", "#E69F00", "#000000") ## ----------------------------------------------------------------------------- V35_plot <- V35_stool_melted %>% mutate(Sample = factor(Sample, sample_order)) %>% mutate(Phylum = factor(Phylum, phylum_order)) %>% ggplot(aes(Sample, `Relative Abundance`, fill = Phylum)) + geom_bar(stat = "identity", position = "fill", width = 1) + scale_fill_manual(values = bang_wong_colors) + theme_minimal() + theme(axis.text.x = element_blank(), axis.title.x = element_blank(), panel.grid = element_blank(), legend.position = "none", legend.title = element_blank()) + ggtitle("Phylum-Level Relative Abundance", "16S Stool Samples") MGX_plot <- MGX_stool_melted %>% mutate(Sample = factor(Sample, sample_order)) %>% mutate(Phylum = factor(Phylum, phylum_order)) %>% ggplot(aes(Sample, `Relative Abundance`, fill = Phylum)) + geom_bar(stat = "identity", position = "fill", width = 1) + scale_fill_manual(values = bang_wong_colors) + theme_minimal() + theme(axis.text.x = element_blank(), axis.title.x = element_blank(), panel.grid = element_blank(), legend.position = "none", legend.title = element_blank()) + ggtitle("", "MGX Stool Samples") ## ----------------------------------------------------------------------------- plot_legend <- { MGX_plot + theme(legend.position = "bottom") } %>% get_legend() ## ----fig.height=8, fig.width=8------------------------------------------------ grid.arrange(V35_plot, MGX_plot, plot_legend, ncol = 1, heights = c(3, 3, 1)) ## ----eval=FALSE--------------------------------------------------------------- # V35_plot + # theme(text = element_text(size = 19)) + # labs(title = NULL, subtitle = NULL, tag = "A") # # ggsave("~/AJE-00611-2018 Schiffer Figure 2A.eps", device = "eps") # ggsave("~/AJE-00611-2018 Schiffer Figure 2A.pdf", device = "pdf") # # MGX_plot + # theme(text = element_text(size = 19)) + # labs(title = NULL, subtitle = NULL, tag = "B") # # ggsave("~/AJE-00611-2018 Schiffer Figure 2B.eps", device = "eps") # ggsave("~/AJE-00611-2018 Schiffer Figure 2B.pdf", device = "pdf") # # plot_legend <- { # MGX_plot + # theme(legend.position = "bottom", text = element_text(size = 19)) + # guides(fill = guide_legend(byrow = TRUE)) # } %>% # get_legend() # # ggsave("~/AJE-00611-2018 Schiffer Figure 2 Legend 1.eps", plot = plot_legend, # device = "eps") # ggsave("~/AJE-00611-2018 Schiffer Figure 2 Legend 1.pdf", plot = plot_legend, # device = "pdf") # # plot_legend <- { # MGX_plot + # theme(legend.position = "right", text = element_text(size = 19)) # } %>% # get_legend() # # ggsave("~/AJE-00611-2018 Schiffer Figure 2 Legend 2.eps", plot = plot_legend, # device = "eps") # ggsave("~/AJE-00611-2018 Schiffer Figure 2 Legend 2.pdf", plot = plot_legend, # device = "pdf") # # plot_legend <- { # MGX_plot + # theme(legend.position = "right", text = element_text(size = 19)) + # guides(fill = guide_legend(ncol = 2, byrow = TRUE)) # } %>% # get_legend() # # ggsave("~/AJE-00611-2018 Schiffer Figure 2 Legend 3.eps", plot = plot_legend, # device = "eps") # ggsave("~/AJE-00611-2018 Schiffer Figure 2 Legend 3.pdf", plot = plot_legend, # device = "pdf") # # V35_plot <- # V35_plot + # theme(text = element_text(size = 19)) + # labs(title = NULL, subtitle = NULL, tag = "A") # # MGX_plot <- # MGX_plot + # theme(text = element_text(size = 19)) + # labs(title = NULL, subtitle = NULL, tag = "B") # # plot_legend <- { # MGX_plot + # theme(legend.position = "bottom", text = element_text(size = 19)) + # guides(fill = guide_legend(byrow = TRUE)) # } %>% # get_legend() # # grid_object <- # grid.arrange(V35_plot, MGX_plot, plot_legend, ncol = 1, # heights = c(3, 3, 1)) # # ggsave("~/AJE-00611-2018 Schiffer Figure 3.eps", plot = grid_object, # device = "eps", width = 8, height = 8) # ggsave("~/AJE-00611-2018 Schiffer Figure 3.pdf", plot = grid_object, # device = "pdf", width = 8, height = 8) ## ----eval=FALSE--------------------------------------------------------------- # V13_participant_data <- # V13() %>% # colData() %>% # as.data.frame() %>% # rownames_to_column(var = "PSN") ## ----eval=FALSE--------------------------------------------------------------- # V13_OTU_counts <- # V13() %>% # assay() %>% # t() %>% # as.data.frame() %>% # rownames_to_column(var = "PSN") ## ----eval=FALSE--------------------------------------------------------------- # V13_data <- # merge.data.frame(V13_participant_data, V13_OTU_counts, by = "PSN") ## ----eval=FALSE--------------------------------------------------------------- # V13_dictionary <- # V13() %>% # rowData() %>% # t.data.frame() %>% # as.data.frame() ## ----eval=FALSE--------------------------------------------------------------- # colnames(V13_data) <- # colnames(V13_data) %>% # gsub(pattern = "\\.", replacement = "_", x = .) ## ----eval=FALSE--------------------------------------------------------------- # colnames(V13_dictionary) <- # colnames(V13_dictionary) %>% # gsub(pattern = "\\.", replacement = "_", x = .) ## ----eval=FALSE--------------------------------------------------------------- # write_csv(V13_data, "~/V13_data.csv") # write_csv(V13_dictionary, "~/V13_dictionary.csv") ## ----eval=FALSE--------------------------------------------------------------- # write_sas(V13_data, "~/V13_data.sas7bdat") # write_sas(V13_dictionary, "~/V13_dictionary.sas7bdat") ## ----eval=FALSE--------------------------------------------------------------- # write_sav(V13_data, "~/V13_data.sav") # write_sav(V13_dictionary, "~/V13_dictionary.sav") ## ----eval=FALSE--------------------------------------------------------------- # write_dta(V13_data, "~/V13_data.dta", version = 13) # write_dta(V13_dictionary, "~/V13_dictionary.dta", version = 13) ## ----------------------------------------------------------------------------- sessionInfo()