Biocpkg("BloodCancerMultiOmics2017")
is a multi-omic dataset comprising genome, transcriptome, DNA methylome data together with data from the ex vivo drug sensitivity screen of the primary blood tumor samples.
In this vignette we present the analysis of the Primary Blood Cancer Encyclopedia (PACE) project and source code for the paper
Drug-perturbation-based stratification of blood cancer
Sascha Dietrich*, Małgorzata Oleś*, Junyan Lu*,
Leopold Sellner, Simon Anders, Britta Velten, Bian Wu, Jennifer Hüllein, Michelle da Silva Liberio, Tatjana Walther, Lena Wagner, Sophie Rabe, Sonja Ghidelli-Disse, Marcus Bantscheff, Andrzej K. Oleś, Mikołaj Słabicki, Andreas Mock, Christopher C. Oakes, Shihui Wang, Sina Oppermann, Marina Lukas, Vladislav Kim, Martin Sill, Axel Benner, Anna Jauch, Lesley Ann Sutton, Emma Young, Richard Rosenquist, Xiyang Liu, Alexander Jethwa, Kwang Seok Lee, Joe Lewis, Kerstin Putzker, Christoph Lutz, Davide Rossi, Andriy Mokhir, Thomas Oellerich, Katja Zirlik, Marco Herling, Florence Nguyen-Khac, Christoph Plass, Emma Andersson, Satu Mustjoki, Christof von Kalle, Anthony D. Ho, Manfred Hensel, Jan Dürig, Ingo Ringshausen, Marc Zapatka,
Wolfgang Huber and Thorsten Zenz
J. Clin. Invest. (2018); 128(1):427–445. doi:10.1172/JCI93801.
The presented analysis was done by Małgorzata Oleś, Sascha Dietrich, Junyan Lu, Britta Velten, Andreas Mock, Vladislav Kim and Wolfgang Huber.
This vignette was put together by Małgorzata Oleś.
This vignette is build from the sub-vignettes, which each can be build separately. The parts are separated by the horizontal lines. Each part finishes with removal of all the created objects.
library("AnnotationDbi")
library("abind")
library("beeswarm")
library("Biobase")
library("biomaRt")
library("broom")
library("colorspace")
library("cowplot")
library("dendsort")
library("DESeq2")
library("doParallel")
library("dplyr")
library("foreach")
library("forestplot")
library("genefilter")
library("ggbeeswarm")
library("ggdendro")
library("ggplot2")
#library("ggtern")
library("glmnet")
library("grid")
library("gridExtra")
library("gtable")
library("hexbin")
library("IHW")
library("ipflasso")
library("knitr")
library("limma")
library("magrittr")
library("maxstat")
library("nat")
library("org.Hs.eg.db")
library("BloodCancerMultiOmics2017")
library("pheatmap")
library("piano")
library("readxl")
library("RColorBrewer")
library("reshape2")
library("Rtsne")
library("scales")
library("SummarizedExperiment")
library("survival")
library("tibble")
library("tidyr")
library("xtable")
options(stringsAsFactors=FALSE)
Loading the data.
data("drpar", "drugs", "patmeta", "mutCOM")
Creating vectors of patient samples and drugs within the drug screen. Within drugs, we omit the statistics for one drug combination, due to lack of possibility to assign its targets.
# PATIENTS
patM = colnames(drpar)
# DRUGS
drM = rownames(drpar)
drM = drM[!drM %in% "D_CHK"] # remove combintation of 2 drugs: D_CHK
General plotting parameters.
bwScale = c("0"="white","1"="black","N.A."="grey90")
lfsize = 16 # legend font size
Categorize the drugs.
drugs$target_category = as.character(drugs$target_category)
drugs$group = NA
drugs$group[which(drugs$approved_042016==1)] = "FDA approved"
drugs$group[which(drugs$devel_042016==1)] = "clinical development/\ntool compound"
Show the characteristics.
tool compound | ||
---|---|---|
DNA damage response | 4 | 12 |
EGFR | 3 | 0 |
Other | 3 | 4 |
ALK | 1 | 1 |
Angiogenesis | 1 | 0 |
Apoptosis (BH3, Survivin) | 1 | 2 |
B-cell receptor | 1 | 5 |
BCR/ABL | 1 | 0 |
Epigenome | 1 | 2 |
Hedgehog signaling | 1 | 0 |
Immune modulation | 1 | 0 |
JAK/STAT | 1 | 2 |
MAPK | 1 | 7 |
PI3K/AKT | 1 | 5 |
Proteasome | 1 | 1 |
mTOR | 1 | 1 |
Cell cycle control | 0 | 6 |
Cytoskeleton | 0 | 2 |
HSP90 | 0 | 1 |
MET | 0 | 1 |
Mitochondrial metabolism | 0 | 2 |
NFkB | 0 | 2 |
NOTCH | 0 | 1 |
PIM | 0 | 1 |
PKC | 0 | 3 |
Reactive oxygen species | 0 | 3 |
Splicing | 0 | 1 |
TGF | 0 | 1 |
WNT | 0 | 1 |
Show number of samples stratified by the diagnosis.
Var1 | Freq |
---|---|
CLL | 184 |
T-PLL | 25 |
MCL | 10 |
MZL | 6 |
AML | 5 |
LPL | 4 |
B-PLL | 3 |
HCL | 3 |
hMNC | 3 |
HCL-V | 2 |
Sezary | 2 |
FL | 1 |
PTCL-NOS | 1 |
Within CLL group, we now show mutations with occurred in at least 4 samples.
# select CLL samples
patM = patM[patmeta[patM,"Diagnosis"]=="CLL"]
ighv = factor(setNames(patmeta[patM,"IGHV"], nm=patM), levels=c("U","M"))
mut1 = c("del17p13", "del11q22.3", "trisomy12", "del13q14_any")
mut2 = c("TP53", "ATM", "SF3B1", "NOTCH1", "MYD88")
mc = assayData(mutCOM)$binary[patM,]
## SELECTION OF MUTATIONS
# # include mutations with at least incidence of 4
mut2plot = names(which(sort(colSums(mc, na.rm=TRUE), decreasing=TRUE)>3))
# remove chromothrypsis
mut2plot = mut2plot[-grep("Chromothripsis", mut2plot)]
# divide mutations into gene mut and cnv
mut2plotSV = mut2plot[grep("[[:lower:]]", mut2plot)]
mut2plotSP = mut2plot[grep("[[:upper:]]", mut2plot)]
# remove some other things (it is quite manual thing, so be careful)
# IF YOU WANT TO REMOVE SOME MORE MUTATIONS JUST ADD THE LINES HERE!
mut2plotSV = mut2plotSV[-grep("del13q14_mono", mut2plotSV)]
mut2plotSV = mut2plotSV[-grep("del13q14_bi", mut2plotSV)]
mut2plotSV = mut2plotSV[-grep("del14q24.3", mut2plotSV)]
# rearrange the top ones to match the order in mut1 and mut2
mut2plotSV = c(mut1, mut2plotSV[!mut2plotSV %in% mut1])
mut2plotSP = c(mut2, mut2plotSP[!mut2plotSP %in% mut2])
factors = data.frame(assayData(mutCOM)$binary[patM, c(mut2plotSV, mut2plotSP)],
check.names=FALSE)
# change del13q14_any to del13q14
colnames(factors)[which(colnames(factors)=="del13q14_any")] = "del13q14"
mut2plotSV = gsub("del13q14_any", "del13q14", mut2plotSV)
# change it to factors
for(i in 1:ncol(factors)) {
factors[,i] = factor(factors[,i], levels=c(1,0))
}
ord = order(factors[,1], factors[,2], factors[,3], factors[,4], factors[,5],
factors[,6], factors[,7], factors[,8], factors[,9], factors[,10],
factors[,11], factors[,12], factors[,13], factors[,14],
factors[,15], factors[,16], factors[,17], factors[,18],
factors[,19], factors[,20], factors[,21], factors[,22],
factors[,23], factors[,24], factors[,25], factors[,26],
factors[,27], factors[,28], factors[,29], factors[,30],
factors[,31], factors[,32])
factorsord = factors[ord,]
patM = patM[ord]
(c(mut2plotSV, mut2plotSP))
## [1] "del17p13" "del11q22.3" "trisomy12" "del13q14" "del8p12"
## [6] "gain2p25.3" "gain8q24" "del6q21" "gain3q26" "del9p21.3"
## [11] "del15q15.1" "del6p21.2" "TP53" "ATM" "SF3B1"
## [16] "NOTCH1" "MYD88" "BRAF" "KRAS" "EGR2"
## [21] "MED12" "PCLO" "MGA" "ACTN2" "BIRC3"
## [26] "CPS1" "FLRT2" "KLHL6" "NFKBIE" "RYR2"
## [31] "XPO1" "ZC3H18"
Let’s now look deeper and for each mutation. We ask how many samples have (1) or don’t have (0) a particular mutation.
plotDF = meltWholeDF(factorsord)
plotDF$Mut =
ifelse(sapply(plotDF$X,
function(x) grep(x, list(mut2plotSV, mut2plotSP)))==1,"SV","SP")
plotDF$Status = "N.A."
plotDF$Status[plotDF$Measure==1 & plotDF$Mut=="SV"] = "1a"
plotDF$Status[plotDF$Measure==1 & plotDF$Mut=="SP"] = "1b"
plotDF$Status[plotDF$Measure==0] = "0"
plotDF$Status = factor(plotDF$Status, levels=c("1a","1b","0","N.A."))
plotDF$Y = factor(plotDF$Y, levels=patM)
plotDF$X = factor(plotDF$X, levels=rev(colnames(factorsord)))
mutPL = ggplotGrob(
ggplot(data=plotDF, aes(x=Y, y=X, fill=Status)) + geom_tile() +
scale_fill_manual(
values=c("0"="white","1a"="forestgreen","1b"="navy","N.A."="grey90"),
name="Mutation", labels=c("CNV","Gene mutation","WT","NA")) +
ylab("") + xlab("") +
geom_vline(xintercept=seq(0.5,length(patM)+1,5), colour="grey60") +
geom_hline(yintercept=seq(0.5,ncol(factorsord)+1,1), colour="grey60") +
scale_y_discrete(expand=c(0,0)) + scale_x_discrete(expand=c(0,0)) +
theme(axis.ticks=element_blank(), axis.text.x=element_blank(),
axis.text.y=element_text(
size=60, face=ifelse(levels(plotDF$X) %in% mut2plotSV,
"plain","italic")),
axis.text=element_text(margin=unit(0.5,"cm"), colour="black"),
legend.key = element_rect(colour = "black"),
legend.text=element_text(size=lfsize),
legend.title=element_text(size=lfsize)))
## Warning: Vectorized input to `element_text()` is not officially supported.
## ℹ Results may be unexpected or may change in future versions of ggplot2.
res = table(plotDF[,c("X","Measure")])
knitr::kable(res[order(res[,2], decreasing=TRUE),])
0 | 1 | |
---|---|---|
del13q14 | 69 | 106 |
TP53 | 142 | 36 |
del11q22.3 | 146 | 28 |
trisomy12 | 146 | 27 |
del17p13 | 150 | 27 |
SF3B1 | 152 | 26 |
NOTCH1 | 160 | 19 |
ATM | 89 | 11 |
BRAF | 169 | 10 |
KRAS | 136 | 8 |
del8p12 | 131 | 8 |
gain8q24 | 162 | 7 |
gain2p25.3 | 132 | 7 |
PCLO | 94 | 6 |
MED12 | 94 | 6 |
EGR2 | 94 | 6 |
del6q21 | 163 | 6 |
MGA | 95 | 5 |
MYD88 | 173 | 5 |
del15q15.1 | 134 | 5 |
del9p21.3 | 134 | 5 |
gain3q26 | 134 | 5 |
ZC3H18 | 96 | 4 |
XPO1 | 96 | 4 |
RYR2 | 96 | 4 |
NFKBIE | 96 | 4 |
KLHL6 | 96 | 4 |
FLRT2 | 96 | 4 |
CPS1 | 96 | 4 |
BIRC3 | 96 | 4 |
ACTN2 | 96 | 4 |
del6p21.2 | 135 | 4 |
In the last part, we characterize samples according to metadata categories.
Age
ageDF = data.frame(Factor="Age",
PatientID=factor(patM, levels=patM),
Value=patmeta[patM,c("Age4Main")])
agePL = ggplotGrob(
ggplot(ageDF, aes(x=PatientID, y=Factor, fill=Value)) + geom_tile() +
scale_fill_gradient(low = "gold", high = "#3D1F00", na.value="grey92",
name="Age", breaks=c(40,60,80)) +
theme(axis.ticks=element_blank(),
axis.text=element_text(size=60, colour="black",
margin=unit(0.5,"cm")),
legend.text=element_text(size=lfsize),
legend.title=element_text(size=lfsize)))
hist(ageDF$Value, col="slategrey", xlab="Age", main="")
Sex
sexDF = data.frame(Factor="Sex", PatientID=factor(patM, levels=patM),
Value=patmeta[patM, "Gender"])
sexPL = ggplotGrob(
ggplot(sexDF, aes(x=PatientID, y=Factor, fill=Value)) + geom_tile() +
scale_fill_manual(values=c("f"="maroon","m"="royalblue4","N.A."="grey90"),
name="Sex", labels=c("Female","Male","NA")) +
theme(axis.ticks=element_blank(),
axis.text=element_text(size=60, colour="black",
margin=unit(0.5,"cm")),
legend.key = element_rect(colour = "black"),
legend.text=element_text(size=lfsize),
legend.title=element_text(size=lfsize)))
table(sexDF$Value)
##
## f m
## 76 108
Treatment
Number of samples treated (1) or not treated (0) before sampling.
treatDF = data.frame(Factor="Treated", PatientID=factor(patM, levels=patM),
Value=ifelse(patmeta[patM, "IC50beforeTreatment"], 0, 1))
treatDF$Value[is.na(treatDF$Value)] = "N.A."
treatDF$Value = factor(treatDF$Value, levels=c("0","1","N.A."))
treatPL = ggplotGrob(
ggplot(treatDF, aes(x=PatientID, y=Factor, fill=Value)) +geom_tile() +
scale_fill_manual(values=bwScale, name="Treated",
labels=c("0"="No","1"="Yes","N.A."="NA")) +
theme(axis.ticks=element_blank(),
axis.text=element_text(size=60, colour="black",
margin=unit(0.5,"cm")),
legend.key = element_rect(colour = "black"),
legend.text=element_text(size=lfsize),
legend.title=element_text(size=lfsize)))
table(treatDF$Value)
##
## 0 1 N.A.
## 131 52 1
IGHV status
Number of samples with (1) and without (0) the IGHV mutation.
ighvDF = data.frame(Factor="IGHV", PatientID=factor(patM, levels=patM),
Value=patmeta[patM, "IGHV"])
ighvDF$Value = ifelse(ighvDF$Value=="M", 1, 0)
ighvDF$Value[is.na(ighvDF$Value)] = "N.A."
ighvDF$Value = factor(ighvDF$Value, levels=c("0","1","N.A."))
ighvPL = ggplotGrob(
ggplot(ighvDF, aes(x=PatientID, y=Factor, fill=Value)) + geom_tile() +
scale_fill_manual(values=bwScale, name="IGHV",
labels=c("0"="Unmutated","1"="Mutated","N.A."="NA")) +
theme(axis.ticks=element_blank(),
axis.text=element_text(size=60, colour="black", margin=unit(0.5,"cm")),
legend.key=element_rect(colour = "black"),
legend.text=element_text(size=lfsize),
legend.title=element_text(size=lfsize)))
table(ighvDF$Value)
##
## 0 1 N.A.
## 74 98 12