##---------------------------------------------------------------
## densities
##---------------------------------------------------------------
library("vsn")
library("geneplotter")
data("lymphoma")
ll=function(x, thr=2)log2(ifelse(x>thr, x, thr))
multidensity(ll(exprs(lymphoma)), col=brewer.pal(9, "Set1")[ifelse(lymphoma$dye=="Cy3", 3, 1)], 
  lwd=3, main="8 arrays from the lymphoma data (Alizadeh 2000)", xlab=expression(log[2]~intensity))

##---------------------------------------------------------------
## quantile normalisation
##---------------------------------------------------------------
library("affy")
library("affydata")
library("preprocessCore")
library("RColorBrewer")
library("geneplotter")

data("Dilution")
nr = apply(exprs(Dilution), 2, rank)
nq = normalize.quantiles(exprs(Dilution))
colnames(nq) = sampleNames(Dilution)
cols = brewer.pal(9,"Set1")
qn1 = function(x, ...) {
  matplot(nr, x, pch=".", log="y", xlab="rank", col=cols)
  legend("topleft", colnames(x), fill=cols)
}
qn2 = function(x, ...)
  multidensity(log2(x), lwd=2, col=cols, ...)
  

png(file="110628-brixen-microarrays-huber-quantilenorm.png", width=1280, height=960, pointsize=24)
par(mfrow=c(2,2))

qn1(exprs(Dilution), main="before")
qn1(nq, main="after quantile normalisation")
qn2(exprs(Dilution))
qn2(nq)

dev.off()


##---------------------------------------------------------------
## robust regression
##---------------------------------------------------------------

x = runif(10)*10
y = 2 + 2*x + rnorm(length(x))
wm = which.min(x)
y[wm] = y[wm]+10
plot(x,y, pch=16)
f1 = lm(y~x)
f2 = rlm(y~x)
cols = c("#E41A1C", "#377EB8")
lines(x, predict(f1), lwd=2, col=cols[1])
lines(x, predict(f2), lwd=2, col=cols[2])
legend("bottomright", c("lm", "rlm"), col=cols, lwd=2, lty=1)

##---------------------------------------------------------------
## two component error model
##---------------------------------------------------------------
pdf(file="110628-brixen-microarrays-huber-twocomponent.pdf", width=8, height=4)
par(mfrow=c(1,2))
mean = seq(0, 1000, length=40)
a = 1000; b=.01
variance = a + b*mean^2
varmax = variance[length(variance)]
plot(mean, variance, type="l", lwd=2, ylim=c(0, varmax))
abline(h=a, col="blue", lty=2)
text(0, varmax, substitute(var==a+b~mean^2, list(a=a,b=b)), adj=c(0,1))
plot(mean, sqrt(variance), type="l", lwd=2, ylim=c(0, sqrt(varmax)))
abline(h=sqrt(a), col="blue", lty=2)
abline(a=0, b=sqrt(b), col="orange", lty=2)
dev.off()
    
##---------------------------------------------------------------
## Tukey Biweight
##---------------------------------------------------------------
x = seq(1, 100, length=100)
m = 50
s = 18
u = (x-m)/s
w = ifelse(abs(u)>1, 0, ((1 - u^2)^2))
plot(x, w, main="Tukey Biweight", type="l")

##---------------------------------------------------------------
## Median Polish
##---------------------------------------------------------------
x1 = c(rnorm(13), 8+rnorm(12))
x2 = x1
x2[1] = x2[1]+8

rg = range(c(x1,x2))
myhist=function(x){
  hist(x, xlim=rg, breaks=12, col="orange", main="")
  med = median(x)
  trm = mean(x, trim=0.15)
  abline(v=c(med, trm), col=cols, lwd=2)
  }
  
par(mfrow=c(1,2))
myhist(x1)
myhist(x2)