basicsAll.R.txt

################################################### ### chunk number 1: load ################################################### library(XML) library(Biobase) library(annotate) library(genefilter) library(golubEsets) ## our data library(ctest) library(MASS) library(cluster)

################################################### ### chunk number 2:

################################################### ### chunk number 3: R ################################################### class(golubTrain) slotNames(golubTrain) golubTrain

################################################### ### chunk number 4: R ################################################### phenoTrain<-phenoData(golubTrain) class(phenoTrain) slotNames(phenoTrain) phenoTrain

################################################### ### chunk number 5: R ################################################### golubTrain[1:3,1:10]

################################################### ### chunk number 6: R ################################################### X <- golubTrain@exprs X <- slot(golubTrain, "exprs") X <- exprs(golubTrain) dim(X)

################################################### ### chunk number 7: R ################################################### slot(golubTrain,"annotation") slot(golubTrain,"annotation")<-"HU6800" golubTrain@annotation

################################################### ### chunk number 8: R ################################################### X<-exprs(golubTrain) X[X<100]<-100 X[X>16000]<-16000

################################################### ### chunk number 9: R ################################################### mmfilt <- function(r=5, d=500, na.rm=TRUE) { function(x) { minval <- min(x, na.rm=na.rm) maxval <- max(x, na.rm=na.rm) (maxval/minval > r) && (maxval-minval > d) } }

mmfun <- mmfilt() ffun <- filterfun(mmfun) good <- genefilter(X, ffun ) sum(good) ## Should get 3051 X <- X[good,]

################################################### ### chunk number 10: R ################################################### X <- log10(X)

################################################### ### chunk number 11: R ################################################### X<-scale(X) golubTrainSub<-golubTrain[good,] slot(golubTrainSub,"exprs")<-X golubTrainSub

################################################### ### chunk number 12: R ################################################### cl <- golubTrainSub$ALL.AML ## ALL and AML class labels table(cl) ffun <- filterfun(ttest(cl,p=0.0001)) smallp<- genefilter(exprs(golubTrainSub), ffun ) sum(smallp) # Should get 163

################################################### ### chunk number 13: R ################################################### X2<-exprs(golubTrainSub[smallp,]) stats<-apply(X2, 1, function(z) { res<-t.test(z ~ cl) c(res$estimate, res$statistic, res$parameter, res$p.value) }) stats<-data.frame(t(stats)) ## put row names on the dataframe names(stats)<-c("ALL mean", "AML mean", "t-statistic", "df", "p-value") ## put the data in order, smallest p-value first stats<-stats[order(stats$p),]

################################################### ### chunk number 14: R ################################################### ll <- read.annotation("hgu68ll") # Read in locus link annotation data gname <- row.names(X2) # Names of genes with small p--value LocusID<-multiget(gname, env=ll) # Locus link IDs locuslinkByID(LocusID) # Query LocusLink

################################################### ### chunk number 15: R ################################################### pubmed("11695507","9923029","11769896",disp="browser")

################################################### ### chunk number 16: R ################################################### x <- pubmed("11695507","9923029","11769896") a <- xmlRoot(x) numAbst <- length(xmlChildren(a)) absts1 <- list() for (i in 1:numAbst) { absts1[[i]] <- buildPubMedAbst(a[[i]]) }

################################################### ### chunk number 17: R ################################################### absts1[[1]] for(i in 1: length(absts1)) print(articleTitle(absts1[[i]]))

################################################### ### chunk number 18: R ################################################### g5<-gname[1:5] absts2<-pm.getabst(g5,"hgu68")

################################################### ### chunk number 19: R ################################################### pm.titles(absts2)

################################################### ### chunk number 20: R ################################################### sapply(absts2, function(x) pm.abstGrep("[Pp]rotein", x))

################################################### ### chunk number 21: R ################################################### chroms <- read.annotation("hgu68chrom") whichC <- multiget(gname, env=chroms, iffail=NA) cc <-as.numeric(unlist(whichC))

################################################### ### chunk number 22: R ################################################### res<-cbind(gname,cc,signif(stats,3)) names(res)<-c("Gene name", "Chromosome",names(stats)) ll.htmlpage(LocusID, filename="golub.html", title="Golub et al. data, genes with p-value < 0.0001", othernames=res, table.head=c("LocusID",names(res)), table.center = TRUE )

################################################### ### chunk number 23: hclust ################################################### dgTr <- dist(t(exprs(golubTrainSub)), method="manhattan") hcgTr <- hclust(dgTr, method="average") plot(hcgTr, labels=golubTrainSub$ALL.AML, main="Hierarchical clustering dendrogram for ALL AML data",sub="Average linkage, Manhattan distance for scaled arrays, G=3,051 genes")

################################################### ### chunk number 24: pam ################################################### set.seed(12345) ##so we always get the same results pmgTr <- pam(dgTr, k=2, diss=TRUE) ##plot(pmgTr) ## Can't do this interactively kmgTr <- kmeans(t(exprs(golubTrainSub)), centers=2) table(pmgTr$clust, golubTrainSub$ALL.AML) table(kmgTr$clust, golubTrainSub$ALL.AML)

################################################### ### chunk number 25: sil ################################################### sils<-pmgTr$silinfo[[1]][,3] ord<-as.numeric(row.names(pmgTr$silinfo[[1]]))

barplot(sils,col=c(2,4)[as.numeric(cl[ord])],main="Silhouette plot for ALL AML data", sub="Manhattan distance for scaled arrays, K=2, G=3,051 genes")

bioconductor.org

Sections

basicsAll.R.txt

Navigation

News