## ----style, echo = FALSE, results = 'asis', message=FALSE---------------- BiocStyle::markdown() ## ----env, echo=FALSE, warning=FALSE-------------------------------------- .cache <- FALSE suppressPackageStartupMessages(library("ProteomicsBioc2016Workshop")) suppressPackageStartupMessages(library("RforProteomics")) suppressPackageStartupMessages(library("AnnotationHub")) suppressMessages(library("BiocInstaller")) suppressPackageStartupMessages(library("lattice")) suppressPackageStartupMessages(library("gridExtra")) suppressPackageStartupMessages(library("mzID")) suppressPackageStartupMessages(library("msmsTests")) suppressPackageStartupMessages(library("gplots")) ## ----src, eval=FALSE----------------------------------------------------- # source("https://bioconductor.org/biocLite.R") ## ----install, eval=FALSE------------------------------------------------- # library("BiocInstaller") # biocLite("lgatto/ProteomicsBioc2016Workshop", # dependencies = TRUE, build_vignettes=TRUE) ## ----launch, eval=FALSE-------------------------------------------------- # library("ProteomicsBioc2016Workshop") # bioc2016() ## ----pk, echo=FALSE, warning=FALSE, cache=.cache------------------------- biocv <- as.character(biocVersion()) pp <- proteomicsPackages(biocv) msp <- massSpectrometryPackages(biocv) msdp <- massSpectrometryDataPackages(biocv) ## ----pp, eval=FALSE------------------------------------------------------ # library("RforProteomics") # pp <- proteomicsPackages("3.3") # display(pp) ## ---- datatab, results='asis', echo=FALSE-------------------------------- datatab <- data.frame(Type = c("raw", "identification", "quantitation", "peak lists", "other"), Format = c("mzML, mzXML, netCDF, mzData", "mzIdentML", "mzQuantML", "mgf", "mzTab"), Package = c( "[`mzR`](http://bioconductor.org/packages/release/bioc/html/mzR.html) (read)", "[`mzID`](http://bioconductor.org/packages/release/bioc/html/mzID.html) and [`mzR`](http://bioconductor.org/packages/release/bioc/html/mzR.html) (read)", "", "[`MSnbase`](http://bioconductor.org/packages/release/bioc/html/MSnbase.html) (read/write)", "[`MSnbase`](http://bioconductor.org/packages/release/bioc/html/MSnbase.html) (read)")) knitr::kable(datatab) ## ----ah0----------------------------------------------------------------- library("AnnotationHub") ah <- AnnotationHub() query(ah, "proteomics") ## ----ah, message=FALSE--------------------------------------------------- ms <- ah[["AH49008"]] ms ## ----mshd, echo=FALSE---------------------------------------------------- hd <- header(ms) ## ---- rpx---------------------------------------------------------------- library("rpx") pxannounced() ## ---- pxd---------------------------------------------------------------- px <- PXDataset("PXD000001") px pxfiles(px) ## ---- pxvar, eval=FALSE-------------------------------------------------- # pxtax(px) # pxurl(px) # pxref(px) ## ----pxgetfile6, eval=TRUE----------------------------------------------- mzf <- pxget(px, pxfiles(px)[6]) mzf ## ----rawms--------------------------------------------------------------- library("mzR") ms <- openMSfile(mzf) ms ## ----hd------------------------------------------------------------------ hd <- header(ms) dim(hd) names(hd) ## ----peaks--------------------------------------------------------------- head(peaks(ms, 234)) str(peaks(ms, 1:5)) ## ---- ex_raw, echo=FALSE, eval=FALSE, fig.align='center'----------------- # hd2 <- hd[hd$msLevel == 2, ] # i <- which.max(hd2$basePeakIntensity) # hd2[i, ] # pi <- peaks(ms, hd2[i, 1]) # plot(pi, type = "h") # mz <- hd2[i, "basePeakMZ"] # plot(pi, type = "h", xlim = c(mz-0.5, mz+0.5)) # # pj <- peaks(ms, 100) # plot(pj, type = "l") # plot(pj, type = "l", xlim = c(536,540)) ## ----id, cache=.cache---------------------------------------------------- library("mzID") (f <- dir(system.file("extdata", package = "ProteomicsBioc2016Workshop"), pattern = "mzid", full.names = TRUE)) id <- mzID(f) software(id) id ## ----fid----------------------------------------------------------------- head(flatten(id)) ## ----id2----------------------------------------------------------------- library("mzR") id2 <- openIDfile(f) id2 softwareInfo(id2) ## ----fid2---------------------------------------------------------------- head(psms(id2)) ## ---- ex_id, echo = FALSE, eval=FALSE------------------------------------ # fid <- flatten(id) # x <- by(fid, fid$accession, function(x) # c(unique(x$length), # length(unique(x$pepseq)), # mean(x$'ms-gf:specevalue'))) # x <- data.frame(do.call(rbind, x)) # colnames(x) <- c("plength", "npep", "eval") # x$bins <- cut(x$eval, summary(x$eval)) # library("lattice") # xyplot(plength ~ npep | bins, data = x) ## ----rtandem, eval=FALSE------------------------------------------------- # library("rTANDEM") # ?rtandem ## ----msgfplus, eval = FALSE---------------------------------------------- # library("MSGFplus") # parameters <- msgfPar(database = 'milk-proteins.fasta', # tolerance = '20 ppm', # instrument = 'TOF') # runMSGF(parameters, 'msraw.mzML') ## ---- msnset, echo=FALSE, fig.width = 5, fig.height = 7, fig.align='center'---- plot(NA, xlim = c(0, 5), ylim = c(0, 10), axes=FALSE, xlab = NA, ylab = NA) rect(0, 0, 3, 1.9) rect(0, 2, 3, 10) rect(3.05, 2, 5, 10) segments(seq(0, 3, length.out = 7), rep(0, 7), seq(0, 3, length.out = 7), rep(10, 7), lty = "dotted") segments(rep(0, 50), seq(2, 10, length.out = 50), rep(5, 100), seq(2, 10, length.out = 50), lty = "dotted") text(1.5, 1, "sample metadata", cex = 1.5) text(1.5, 6, "assay data", cex = 1.5) text(4, 6, "feature\nmetadata", cex = 1.5) ## ----msnbase------------------------------------------------------------- library("MSnbase") quantFile <- dir(system.file(package = "MSnbase", dir = "extdata"), full.name = TRUE, pattern = "mzXML$") quantFile msexp <- readMSData(quantFile, verbose=FALSE) msexp ## ----addid--------------------------------------------------------------- ## find path to a mzIdentML file identFile <- dir(system.file(package = "MSnbase", dir = "extdata"), full.name = TRUE, pattern = "mzid$") identFile msexp <- addIdentificationData(msexp, identFile) fData(msexp) ## ----specplot------------------------------------------------------------ msexp[[1]] as(msexp[[1]], "data.frame")[100:105, ] ## ----chromato------------------------------------------------------------ chromatogram(ms) ## ----chromato3, eval=FALSE----------------------------------------------- # ## manual download # library("RforProteomics") # url <- "http://proteome.sysbiol.cam.ac.uk/lgatto/files/Thermo-HELA-PRT/" # f1 <- downloadData(file.path(url, "Thermo_Hela_PRTC_1.mzML")) # f2 <- downloadData(file.path(url, "Thermo_Hela_PRTC_2.mzML")) # f3 <- downloadData(file.path(url, "Thermo_Hela_PRTC_3.mzML")) ## ----chromato3plot, eval=FALSE------------------------------------------- # ## plotting # c1 <- chromatogram(f1) # c2 <- chromatogram(f2, plot = FALSE) # lines(c2, col = "steelblue", lty = "dashed", lwd = 2) # c3 <- chromatogram(f3, plot = FALSE) # lines(c2, col = "orange", lty = "dotted") ## ----xic, cache=.cache, fig.width=12------------------------------------- par(mfrow = c(1, 2)) xic(ms, mz = 636.925, width = 0.01) x <- xic(ms, mz = 636.925, width = 0.01, rtlim = c(2120, 2200)) ## ----itraqdata----------------------------------------------------------- data(itraqdata) itraqdata plot(itraqdata[[10]], reporters = iTRAQ4, full=TRUE) ## ----ms1----------------------------------------------------------------- plot(peaks(ms, 3071), type = "h", xlab = "M/Z", ylab = "Intensity", sub = formatRt(hd[3071, "retentionTime"])) ## ----pxd1---------------------------------------------------------------- ## (1) Open raw data file ms <- openMSfile(mzf) ## (2) Extract the header information hd <- header(ms) ## (3) MS1 spectra indices ms1 <- which(hd$msLevel == 1) ## (4) Select MS1 spectra with retention time between 30 and 35 minutes rtsel <- hd$retentionTime[ms1] / 60 > 30 & hd$retentionTime[ms1] / 60 < 35 ## (5) Indices of the 1st and 2nd MS1 spectra after 30 minutes i <- ms1[which(rtsel)][1] j <- ms1[which(rtsel)][2] ## (6) Interleaved MS2 spectra ms2 <- (i+1):(j-1) ## ----visfig01------------------------------------------------------------ chromatogram(ms) ## ----visfig02------------------------------------------------------------ chromatogram(ms) abline(v = hd[i, "retentionTime"], col = "red") ## ----visfig03------------------------------------------------------------ plot(peaks(ms, i), type = "l", xlim = c(400, 1000)) legend("topright", bty = "n", legend = paste0( "Acquisition ", hd[i, "acquisitionNum"], "\n", "Retention time ", formatRt(hd[i, "retentionTime"]))) ## ----visfig04------------------------------------------------------------ plot(peaks(ms, i), type = "l", xlim = c(400, 1000)) legend("topright", bty = "n", legend = paste0( "Acquisition ", hd[i, "acquisitionNum"], "\n", "Retention time ", formatRt(hd[i, "retentionTime"]))) abline(v = hd[ms2, "precursorMZ"], col = c("#FF000080", rep("#12121280", 9))) ## ----visfig05------------------------------------------------------------ plot(peaks(ms, i), type = "l", xlim = c(521, 522.5)) abline(v = hd[ms2, "precursorMZ"], col = "#FF000080") ## ----visfig06, fig.width = 8, fig.height = 10---------------------------- par(mfrow = c(5, 2), mar = c(2, 2, 0, 1)) for (ii in ms2) { p <- peaks(ms, ii) plot(p, xlab = "", ylab = "", type = "h", cex.axis = .6) legend("topright", legend = paste0("Prec M/Z\n", round(hd[ii, "precursorMZ"], 2)), bty = "n", cex = .8) } ## ----mslayout, echo=FALSE------------------------------------------------ ## Preparing the layout (not shown) lout <- matrix(NA, ncol = 10, nrow = 8) lout[1:2, ] <- 1 for (ii in 3:4) lout[ii, ] <- c(2, 2, 2, 2, 2, 2, 3, 3, 3, 3) lout[5, ] <- rep(4:8, each = 2) lout[6, ] <- rep(4:8, each = 2) lout[7, ] <- rep(9:13, each = 2) lout[8, ] <- rep(9:13, each = 2) ## ----visfig-------------------------------------------------------------- layout(lout) par(mar=c(4,2,1,1)) chromatogram(ms) abline(v = hd[i, "retentionTime"], col = "red") par(mar = c(3, 2, 1, 0)) plot(peaks(ms, i), type = "l", xlim = c(400, 1000)) legend("topright", bty = "n", legend = paste0( "Acquisition ", hd[i, "acquisitionNum"], "\n", "Retention time ", formatRt(hd[i, "retentionTime"]))) abline(h = 0) abline(v = hd[ms2, "precursorMZ"], col = c("#FF000080", rep("#12121280", 9))) par(mar = c(3, 0.5, 1, 1)) plot(peaks(ms, i), type = "l", xlim = c(521, 522.5), yaxt = "n") abline(h = 0) abline(v = hd[ms2, "precursorMZ"], col = "#FF000080") par(mar = c(2, 2, 0, 1)) for (ii in ms2) { p <- peaks(ms, ii) plot(p, xlab = "", ylab = "", type = "h", cex.axis = .6) legend("topright", legend = paste0("Prec M/Z\n", round(hd[ii, "precursorMZ"], 2)), bty = "n", cex = .8) } ## ----msmap1, message=FALSE, fig.width=15, echo=TRUE---------------------- ## (1) MS space heaptmap M <- MSmap(ms, ms1[rtsel], 521, 523, .005, hd) ff <- colorRampPalette(c("yellow", "steelblue")) trellis.par.set(regions=list(col=ff(100))) m1 <- plot(M, aspect = 1, allTicks = FALSE) ## (2) Same data as (1), in 3 dimenstion M@map[msMap(M) == 0] <- NA m2 <- plot3D(M, rgl = FALSE) ## (3) The 2 MS1 and 10 interleaved MS2 spectra from above i <- ms1[which(rtsel)][1] j <- ms1[which(rtsel)][2] M2 <- MSmap(ms, i:j, 100, 1000, 1, hd) m3 <- plot3D(M2) grid.arrange(m1, m2, m3, ncol = 3) ## ----two-col-1, results='asis', echo=FALSE, out.extra=''----------------- cat("") cat("") cat("") cat("
") ## ----two-col-2, results='asis', echo=FALSE, out.extra=''----------------- cat("") ## ----two-col-3, results='asis', echo=FALSE, out.extra=''----------------- cat("
") ## ----id1, message=FALSE, fig.width=15, message=FALSE--------------------- par(mfrow = c(1, 2)) itraqdata2 <- pickPeaks(itraqdata, verbose = FALSE) s <- "SIGFEGDSIGR" plot(itraqdata2[[14]], s, main = s) plot(itraqdata2[[25]], itraqdata2[[28]], sequences = rep("IMIDLDGTENK", 2)) ## ----fag----------------------------------------------------------------- calculateFragments("SIGFEGDSIGR") ## ---- quanttab, echo=FALSE, results='asis'------------------------------- qtb <- matrix(c("XIC", "Counting", "SILAC, 15N", "iTRAQ, TMT"), nrow = 2, ncol = 2) dimnames(qtb) <- list( 'MS level' = c("MS1", "MS2"), 'Quantitation' = c("Label-free", "Labelled")) knitr::kable(qtb) ## ---- itraq4plot, fig.align='center'------------------------------------- plot(msexp[[1]], full=TRUE, reporters = iTRAQ4) ## ---- quantitraq--------------------------------------------------------- msset <- quantify(msexp, method = "trap", reporters = iTRAQ4, verbose = FALSE) exprs(msset) processingData(msset) ## ---- lfms2-------------------------------------------------------------- exprs(si <- quantify(msexp, method = "SIn")) exprs(saf <- quantify(msexp, method = "NSAF")) ## ---- mztab-------------------------------------------------------------- mztf <- pxget(px, pxfiles(px)[2]) (mzt <- readMzTabData(mztf, what = "PEP", version = "0.9")) ## ---- readmsnset2-------------------------------------------------------- csv <- dir(system.file ("extdata" , package = "pRolocdata"), full.names = TRUE, pattern = "pr800866n_si_004-rep1.csv") getEcols(csv, split = ",") ecols <- 7:10 res <- readMSnSet2(csv, ecols) head(exprs(res)) head(fData(res)) ## ----msmstest------------------------------------------------------------ library("msmsTests") data(msms.dataset) ## an MSnSet e <- pp.msms.data(msms.dataset) head(exprs(e)) ## ----msmstest2----------------------------------------------------------- pData(e) null.f <- "y~batch" alt.f <- "y~treat+batch" div <- apply(exprs(e),2,sum) res <- msms.edgeR(e,alt.f,null.f,div=div,fnm="treat") head(res) ## ----na------------------------------------------------------------------ data(naset) naplot(naset, col = "black") ## ----filterNA------------------------------------------------------------ flt <- filterNA(naset) processingData(flt) ## ----naheatmap, echo=FALSE----------------------------------------------- x <- impute(naset, "zero") exprs(x)[exprs(x) != 0] <- 1 library("gplots") heatmap.2(exprs(x), col = c("lightgray", "black"), scale = "none", dendrogram = "none", trace = "none", keysize = 0.5, key = FALSE, RowSideColors = ifelse(fData(x)$randna, "orange", "brown"), ColSideColors = rep(c("steelblue", "darkolivegreen"), each = 8)) ## ------------------------------------------------------------------------ data(cvg) hist(cvg$coverage, breaks = 100, xlab = "coverage", main = "") ## ----up------------------------------------------------------------------ data(upens) data(upenst) ## ----idmappingplot, echo=FALSE------------------------------------------- par(mfrow = c(2, 1)) barplot(table(tapply(upens[, 2], upens[, 1], function(x) length(na.omit(x)))), main = "Ensembl genes - UnitProtKB mapping") barplot(table(tapply(upenst[, 2], upenst[, 1], function(x) length(na.omit(x)))), main = "Ensembl transcripts - UnitProtKB mapping") ## ---- si, echo=FALSE----------------------------------------------------- print(sessionInfo(), local = FALSE)