## ----Setup, echo=TRUE, message=FALSE------------------------------------------ library("ALL") library("hgu95av2.db") library("GO.db") library("annotate") library("genefilter") library("GOstats") library("RColorBrewer") library("xtable") library("Rgraphviz") ## ----universeSizeVsPvalue, eval=TRUE, echo=FALSE, results='hide'-------------- hg_tester <-function(size){ numFound <- 10 numDrawn <- 400 numAtCat <- 40 numNotAtCat <- size - numAtCat phyper(numFound-1, numAtCat,numNotAtCat, numDrawn, lower.tail=FALSE) } pv1000 <- hg_tester(1000) pv5000 <- hg_tester(5000) ## ----bcrAblOrNegSubset, results='hide', echo=TRUE, cache=TRUE----------------- data(ALL, package="ALL") ## For this data we can have ALL1/AF4 or BCR/ABL subsetType <- "ALL1/AF4" ## Subset of interest: 37BRC/ABL + 42NEG = 79 samples Bcell <- grep("^B", as.character(ALL$BT)) bcrAblOrNegIdx <- which(as.character(ALL$mol) %in% c("NEG", subsetType)) bcrAblOrNeg <- ALL[, intersect(Bcell, bcrAblOrNegIdx)] bcrAblOrNeg$mol.biol = factor(bcrAblOrNeg$mol.biol) ## ----nsFiltering-noEntrez, results='hide', echo=TRUE, cache=TRUE-------------- ##Remove genes that have no entrezGene id entrezIds <- mget(featureNames(bcrAblOrNeg), envir=hgu95av2ENTREZID) haveEntrezId <- names(entrezIds)[sapply(entrezIds, function(x) !is.na(x))] numNoEntrezId <- length(featureNames(bcrAblOrNeg)) - length(haveEntrezId) bcrAblOrNeg <- bcrAblOrNeg[haveEntrezId, ] ## ----nsFiltering-noGO, results='hide', echo=TRUE, cache=FALSE----------------- ## Remove genes with no GO mapping haveGo <- sapply(mget(featureNames(bcrAblOrNeg), hgu95av2GO), function(x){ if (length(x) == 1 && is.na(x)) FALSE else TRUE }) numNoGO <- sum(!haveGo) bcrAblOrNeg <- bcrAblOrNeg[haveGo, ] ## ----nsFiltering-IQR, results='hide', echo=TRUE, cache=TRUE------------------- ## Non-specific filtering based on IQR iqrCutoff <- 0.5 bcrAblOrNegIqr <- apply(exprs(bcrAblOrNeg), 1, IQR) selected <- bcrAblOrNegIqr > iqrCutoff ## Drop those that are on the Y chromosome ## because there is an imbalance of men and women by group chrN <- mget(featureNames(bcrAblOrNeg), envir=hgu95av2CHR) onY <- sapply(chrN, function(x) any(x=="Y")) onY[is.na(onY)] <- FALSE selected <- selected & !onY nsFiltered <- bcrAblOrNeg[selected, ] ## ----nsFiltering-unique, results='hide', echo=TRUE, cache=TRUE---------------- numNsWithDups <- length(featureNames(nsFiltered)) nsFilteredIqr <- bcrAblOrNegIqr[selected] uniqGenes <- findLargest(featureNames(nsFiltered), nsFilteredIqr, "hgu95av2") nsFiltered <- nsFiltered[uniqGenes, ] numSelected <- length(featureNames(nsFiltered)) ##set up some colors BCRcols = ifelse(nsFiltered$mol == subsetType, "goldenrod", "skyblue") cols = brewer.pal(10, "RdBu") ## ----defineGeneUniverse, echo=TRUE, results='hide', cache=TRUE---------------- ## Define gene universe based on results of non-specific filtering affyUniverse <- featureNames(nsFiltered) entrezUniverse <- unlist(mget(affyUniverse, hgu95av2ENTREZID)) if (any(duplicated(entrezUniverse))) stop("error in gene universe: can't have duplicate Entrez Gene Ids") ## Also define an alternate universe based on the entire chip chipAffyUniverse <- featureNames(bcrAblOrNeg) chipEntrezUniverse <- mget(chipAffyUniverse, hgu95av2ENTREZID) chipEntrezUniverse <- unique(unlist(chipEntrezUniverse)) ## ----parametric1, echo=TRUE, results='hide', cache=TRUE----------------------- ttestCutoff <- 0.05 ttests = rowttests(nsFiltered, "mol.biol") smPV = ttests$p.value < ttestCutoff pvalFiltered <- nsFiltered[smPV, ] selectedEntrezIds <- unlist(mget(featureNames(pvalFiltered), hgu95av2ENTREZID)) ## ----eval=FALSE--------------------------------------------------------------- # resultList <- lapply(lisOfParamObjs, hyperGTest) ## ----withYourData1, eval=FALSE------------------------------------------------ # entrezUniverse <- unlist(mget(featureNames(yourData), # hgu95av2ENTREZID)) # if (any(duplicated(entrezUniverse))) # stop(\"error in gene universe: can't have duplicate Entrez Gene Ids") # # pvalFiltered <- yourData[hasSmallPvalue, ] # selectedEntrezIds <-unlist(mget(featureNames(pvalFiltered), # hgu95av2ENTREZID)) ## ----standardHyperGeo, echo=TRUE, results='hide', cache=TRUE------------------ hgCutoff<- 0.001 params <- new("GOHyperGParams", geneIds=selectedEntrezIds, universeGeneIds=entrezUniverse, annotation="hgu95av2.db", ontology="BP", pvalueCutoff=hgCutoff, conditional=FALSE, testDirection="over") ## ----condHyperGeo, echo=TRUE, results='hide', cache=TRUE---------------------- paramsCond <- params conditional(paramsCond) <- TRUE ## ----standardHGTEST, cache=TRUE, echo=TRUE, results='hide'-------------------- hgOver <- hyperGTest(params) hgCondOver <- hyperGTest(paramsCond) ## ----HGTestAns---------------------------------------------------------------- hgOver hgCondOver ## ----summaryEx---------------------------------------------------------------- df <- summary(hgOver) names(df) # the columns dim(summary(hgOver, pvalue=0.1)) dim(summary(hgOver, categorySize=10)) ## ----resultAccessors---------------------------------------------------------- pvalues(hgOver)[1:3] oddsRatios(hgOver)[1:3] expectedCounts(hgOver)[1:3] geneCounts(hgOver)[1:3] universeCounts(hgOver)[1:3] length(geneIds(hgOver)) length(geneIdUniverse(hgOver)[[3]]) ## GOHyperGResult _only_ ## (NB: edges go from parent to child) goDag(hgOver) geneMappedCount(hgOver) universeMappedCount(hgOver) conditional(hgOver) testDirection(hgOver) testName(hgOver) ## ----htmlReportExample, results='hide', echo=TRUE----------------------------- htmlReport(hgCondOver, file="ALL_hgco.html") ## ----helperFunc, echo=FALSE, results='hide', cache=TRUE----------------------- sigCategories <- function(res, p){ if (missing(p)) p <- pvalueCutoff(res) pv <- pvalues(res) goIds <- names(pv[pv < p]) goIds } ## ----plotFuns, echo=FALSE, results='hide', cache=TRUE------------------------- coloredGoPlot <- function(ccMaxGraph, hgOver, hgCondOver) { nodeColors <- sapply(nodes(ccMaxGraph), function(n) { if (n %in% sigCategories(hgCondOver)) "dark red" else if (n %in% sigCategories(hgOver)) "pink" else "gray" }) nattr <- makeNodeAttrs(ccMaxGraph, label=nodes(ccMaxGraph), shape="ellipse", fillcolor=nodeColors, fixedsize=FALSE) plot(ccMaxGraph, nodeAttrs=nattr) } getMaxConnCompGraph <- function(hgOver, hgCondOver){ ##uGoDagRev \<-ugraph(goDag(hgOver)) sigNodes <- sigCategories(hgOver) ##adjNodes \<- unlist(adj(uGoDagRev, sigNodes)) adjNodes <- unlist(adj(goDag(hgOver),sigNodes)) displayNodes <- unique(c(sigNodes, adjNodes)) displayGraph <- subGraph(displayNodes, goDag(hgOver)) cc <- connComp(displayGraph) ccSizes <- listLen(cc) ccMaxIdx <- which(ccSizes == max(ccSizes)) ccMaxGraph <- subGraph(cc[[ccMaxIdx]], displayGraph) ccMaxGraph } ## ----info--------------------------------------------------------------------- sessionInfo()