## ----style, echo = FALSE, results = 'asis'-------------------------------
BiocStyle::markdown()
knitr::opts_chunk$set(tidy=FALSE)

## ----setup, echo=FALSE---------------------------------------------------
library(LearnBioconductor)
library(xtable)
stopifnot(BiocInstaller::biocVersion() == "3.0")

## ----echo=FALSE, results='asis'------------------------------------------
biocView_df <- data.frame( technique=c("Bayesian","Classification","Clustering"
    ,"DecisionTree","NeuralNetwork","SupportVectorMachines","DimensionReduction",
    "HiddenMarkovModel","Regression","PrincipalComponent"), packages = 
    as.integer(c(15, 64, 89, 7, 1, 1, 2, 4, 7, 4)))
print(xtable(biocView_df), type="html", comment=FALSE)

## ----message=FALSE-------------------------------------------------------
library(GenomicRanges)
sefile <- system.file("extdata", "NCI60.Rda", package="LearnBioconductor")
load(sefile)
nci60data <- t(assay(NCI60))
ncilabels <- colData(NCI60)

## ----message=FALSE-------------------------------------------------------
library(MLSeq)
filepath = system.file("extdata/cervical.txt", package = "MLSeq")
cervical = read.table(filepath, header = TRUE)


## ------------------------------------------------------------------------
pcaRes <- prcomp(nci60data, scale=TRUE)
biplot(pcaRes)

## ----fig.width=12--------------------------------------------------------
# make colors as factors. 
labs <- as.character(unlist(as.list(ncilabels)))

cellColor <- function(vec)
{
    uvec <- unique(vec)
    cols = rainbow(length(uvec))
    colvec <- cols[as.numeric(as.factor(vec))]
    list(colvec=colvec, cols=cols, labels= uvec)
}

par(mfrow=c(1,2))

colres <- cellColor(labs)

plot(pcaRes$x[,1:2],col=colres$colvec, xlab = "z1", ylab="z2", pch=19)
legend("bottomright", legend = colres$labels, text.col = colres$cols, 
       bty="n", cex=0.80)
plot(pcaRes$x[,c(1,3)], col=colres$colvec, xlab="z1", ylab="z3", pch=19)
legend("topright",  legend = colres$labels,text.col = colres$cols, 
       bty ="n" , cex=0.80)

par(mfrow=c(1,1))


## ----fig.width=12 , message=FALSE----------------------------------------
library(dendextend)

sdata <- scale(nci60data)
d <- dist(sdata)
labs <- as.character(unlist(as.list(ncilabels)))
comp_clust <- hclust(d)
dend <- as.dendrogram(comp_clust)
leaves <- labs[order.dendrogram(dend)]
labels_colors(dend, labels=TRUE) <- cellColor(leaves)$colvec
labels(dend) <- leaves
plot(dend, main ="Clustering using Complete Linkage")

## ----fig.width=12, fig.height=6------------------------------------------
plot(hclust(d, method="average"), labels= labs,
     main ="Clustering using Average Linkage" , xlab="", ylab="" )
plot(hclust(d, method="single"), labels= labs, 
     main ="Clusteringg using Single Linkage" , xlab="", ylab="" )

## ------------------------------------------------------------------------
hc <- cutree(comp_clust, 4)
table(hc, labs)

## ------------------------------------------------------------------------
class = data.frame(condition = factor(rep(c(0, 1), c(29, 29))))

## ------------------------------------------------------------------------
data <- t(cervical)
data <- data[,1:2]
df  <- cbind(data, class)
colnames(df) <- c("x1","x2","y")
rownames(df) <- NULL 
head(df)

## ----fig.width=12--------------------------------------------------------
plot(df[,"x1"], df[,"x2"], xlab="x1", ylab="x2", 
     main="data representation for knn", 
     col=ifelse(as.character(df[,"y"])==1, "red","blue"))                                               

## ------------------------------------------------------------------------
set.seed(9)
nTest = ceiling(ncol(cervical) * 0.2)
ind = sample(ncol(cervical), nTest, FALSE)

cervical.train = cervical[, -ind]
cervical.train = as.matrix(cervical.train + 1)
classtr = data.frame(condition = class[-ind, ])

cervical.test = cervical[, ind]
cervical.test = as.matrix(cervical.test + 1)
classts = data.frame(condition = class[ind, ])

## ----message=FALSE-------------------------------------------------------
library(class)

newknn <- function( testset, trainset, testclass, trainclass, k)
{
    pred.train <- knn.cv(trainset, trainclass, k=k)
    pred.test <- knn(trainset, testset, trainclass, k=k)
    
    test_fit <- length(which(mapply(identical, as.character(pred.test), 
        testclass)==FALSE))/length(testclass)
            
    train_fit <- length(which(mapply(identical, as.character(pred.train), 
            trainclass)==FALSE))/length(trainclass)
    
    c(train_fit=train_fit, test_fit= test_fit)
}

trainset <- t(cervical.train)
testset <- t(cervical.test)
testclass <- t(classts)
trainclass <- t(classtr)
klist <- 1:15
ans <- lapply(klist, function(x) 
    newknn(testset, trainset, testclass, trainclass,k =x))

resdf <- t(as.data.frame(ans))
rownames(resdf) <- NULL
plot(klist, resdf[,"train_fit"], col="blue", type="b",ylim=c(range(resdf)),
    main="k Nearest Neighbors for Cervical Data", xlab="No of neighbors", 
    ylab ="Training and Test Error")
points(klist, resdf[,"test_fit"], col="red", type="b")
legend("bottomright", legend=c("Training error","Test error"), 
    text.col=c("blue","red"), bty="n")


## ------------------------------------------------------------------------
cervical.trainS4 = DESeqDataSetFromMatrix(countData = cervical.train, 
        colData = classtr, formula(~condition))
cervical.trainS4 = DESeq(cervical.trainS4, fitType = "local")

cervical.testS4 = DESeqDataSetFromMatrix(countData = cervical.test, colData = classts,
formula(~condition))
cervical.testS4 = DESeq(cervical.testS4, fitType = "local")


## ------------------------------------------------------------------------
svm = classify(data = cervical.trainS4, method = "svm", normalize = "deseq",
deseqTransform = "vst", cv = 5, rpt = 3, ref = "1")
svm

## ------------------------------------------------------------------------
getSlots("MLSeq")

## ------------------------------------------------------------------------
trained(svm)

## ------------------------------------------------------------------------
pred.svm = predictClassify(svm, cervical.testS4)
table(pred.svm, relevel(cervical.testS4$condition, 2))

## ------------------------------------------------------------------------
rf = classify(data = cervical.trainS4, method = "randomforest", 
        normalize = "deseq", deseqTransform = "vst", cv = 5, rpt = 3, ref = "1")
trained(rf)
pred.rf = predictClassify(rf, cervical.testS4)
table(pred.rf, relevel(cervical.testS4$condition, 2))

## ------------------------------------------------------------------------
sessionInfo()

