## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----style, echo=FALSE, results='asis'---------------------------------------- BiocStyle::markdown() ## ----setup-------------------------------------------------------------------- knitr::opts_chunk$set( dpi = 300, warning = FALSE, collapse = TRUE, error = FALSE, comment = "#>", echo = TRUE ) library(mbQTL) library(tidyr) data("SnpFile") data("microbeAbund") data("CovFile") data("metagenomeSeqObj") doctype <- knitr::opts_knit$get("rmarkdown.pandoc.to") ## ----------------------------------------------------------------------------- sessionInfo() ## ----eval=TRUE, echo=TRUE----------------------------------------------------- # microbial count file # microbeAbund # SNP file # SnpFile # Covariance file # CovFile #Check out file formats compatible with mbQTL files. #head(microbeAbund) #head(SnpFile) #head(CovFile) metagenomeSeqObj ## ----eval=TRUE, echo=TRUE----------------------------------------------------- compatible_metagenome_seq <- metagenomeSeqToMbqtl(metagenomeSeqObj, norm = TRUE, log = TRUE, aggregate_taxa = "Genus" ) # Check for the class of compatible_metagenome_seq class(compatible_metagenome_seq) ## ----eval=TRUE, echo=TRUE----------------------------------------------------- # perform linear regression analysis between taxa and snps across # samples and produce a data frame of results.(covariate file is # optional but highly recommended to avoid results which might be # prone to batch effects and obtaining optimal biological relevance # for finding snp - taxa relationships.) LinearAnalysisTaxaSNP <- linearTaxaSnp(microbeAbund, SnpFile, Covariate = CovFile ) ## ----eval=TRUE, echo=TRUE, fig.width = 4, fig.height=4, out.width="90%", dpi=300, fig.align="center"---- # Create a histogram of P values across all snp - taxa # linear regression estimations histPvalueLm(LinearAnalysisTaxaSNP) # Create a QQPlot of expected versus estimated P value for all all # snp - taxa linear regression estimations qqPlotLm(microbeAbund, SnpFile, Covariate = CovFile) ## ----eval=TRUE, echo=TRUE----------------------------------------------------- # Estimate taxa SNP - taxa correlation R and estimate R2 # (To what extent variance in one taxa explains the # variance in snp) # First estimate R value for all SNP-Taxa relationships correlationMicrobes <- coringTaxa(microbeAbund) # Estimate the correlation value (R2) between a specific snp and all taxa. one_rs_id <- allToAllProduct(SnpFile, microbeAbund, "chr1.171282963_T") # Estimate the correlation value (R2) between all snps and all taxa. for_all_rsids <- allToAllProduct(SnpFile, microbeAbund) # Estimate rho value for all snps and all taxa present in the dataset. taxa_SNP_Cor <- taxaSnpCor(for_all_rsids, correlationMicrobes) # Estimate rho value for all snps and all taxa present in the # dataset and pick the highest and lowest p values in the # dataset to identify improtant SNP-Taxa relationships. taxa_SNP_Cor_lim <- taxaSnpCor(for_all_rsids, correlationMicrobes, probs = c(0.0001, 0.9999) ) ## ----eval=TRUE, echo=TRUE, fig.width = 9, fig.height=5, out.width="90%", dpi=300, fig.align="center"---- ## Draw heatmap of rho estimates "taxa_SNP_Cor_lim" is the # output of taxaSnpCor(). # One can use other pheatmap() settings for extra annotation mbQtlCorHeatmap(taxa_SNP_Cor_lim, fontsize_col = 5, fontsize_row = 7 ) ## ----eval=TRUE, echo=TRUE----------------------------------------------------- # perform Logistic regression analysis between taxa and snps # across samples microbeAbund is the microbe abundance file # (a dateframe with rownames as taxa and colnames and sample # names) and SnpFile is the snp file (0,1,2) values # (rownames as snps and colnames as samples) log_link_resA <- logRegSnpsTaxa(microbeAbund, SnpFile) # Perform Logistic regression for specific microbe log_link_resB <- logRegSnpsTaxa(microbeAbund, SnpFile, selectmicrobe = c("Haemophilus") ) ## ----eval=TRUE, echo=TRUE, fig.width = 5, fig.height=2, out.width="90%", dpi=300, fig.align="center"---- # Create a barplot with the specific rsID, and microbe of interest, # including the genotype information for the reference versus # alternate versus hetrozygous alleles and and presence absence # of microbe of interest. Note your reference, alternative and # heterozygous genotype values need to match the genotype of # your SNP of interest this information can be obtained # from snpdb data base or GATK/plink files. Logit_plot <- logitPlotSnpTaxa(microbeAbund, SnpFile, selectmicrobe = "Neisseria", rsID = "chr2.241072116_A", ref = "GG", alt = "AA", het = "AG" ) Logit_plot