## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, collapse = TRUE, comment = "#>" ) ## ----setup.show, include=FALSE------------------------------------------------ suppressMessages(library(synaptome.db)) suppressMessages(library(dplyr)) library(pander) library(ggplot2) ## ----pre_histogram,fig.width=8,fig.height=8,fig.cap='Number of identified proteins in Presynaptic datasets',eval=TRUE, echo=FALSE, message=FALSE, warning=FALSE,fig.show='hold'---- gp<-findGeneByCompartmentPaperCnt(1) # presynaptic stats presgp <- gp[gp$Localisation == "Presynaptic",] syngp <- gp[gp$Localisation == "Synaptosome",] presg <- getGeneInfoByIDs(presgp$GeneID) mpres <- merge(presgp, presg, by = c("GeneID","Localisation")) mmpres <- mpres[, c('GeneID', 'HumanEntrez.x', 'HumanName.x', 'Npmid', 'PaperPMID', 'Paper', 'Year')] papers <- getPapers() prespap <- papers[papers$Localisation == "Presynaptic",] mmmpres <- mmpres[mmpres$PaperPMID %in% prespap$PaperPMID,] mmmpres$found <- 0 for(i in 1:dim(mmmpres)[1]) { if (mmmpres$Npmid[i] == 1) { mmmpres$found[i] <- '1' } else if (mmmpres$Npmid[i] > 1 & mmmpres$Npmid[i] < 4) { mmmpres$found[i] <- '2-3' } else if (mmmpres$Npmid[i] >= 4 & mmmpres$Npmid[i] < 10) { mmmpres$found[i] <- '4-9' } else if (mmmpres$Npmid[i] >= 10) { mmmpres$found[i] <- '>10' } } mmmpres$found<- factor(mmmpres$found, levels = c('1','2-3','4-9','>10'), ordered=TRUE) tp<-unique(mmmpres$Paper) mmmpres$Paper<- factor(mmmpres$Paper, levels =tp[order(as.numeric(sub('^[^0-9]+_([0-9]+)', '\\1',tp)))], ordered=TRUE) ummpres<-unique(mmmpres[,c('GeneID','Paper','found')]) ggplot(ummpres) + geom_bar(aes(y = Paper, fill = found)) ## ----post_histogram,fig.width=8,fig.height=8,fig.cap='Number of identified proteins in Postsynaptic datasets',eval=TRUE, echo=FALSE, message=FALSE, warning=FALSE,fig.show='hold'---- #postsynaptic stats pstgp <- gp[gp$Localisation == "Postsynaptic",] postg <- getGeneInfoByIDs(pstgp$GeneID) mpost <- merge(pstgp, postg, by = c("GeneID","Localisation")) mmpost <- mpost[, c('GeneID', 'HumanEntrez.x', 'HumanName.x','Npmid', 'PaperPMID','Paper','Year')] postspap <- papers[papers$Localisation == "Postsynaptic",] mmmpost <- mmpost[mmpost$PaperPMID %in% postspap$PaperPMID,] mmmpost$found <- 0 for(i in 1:dim(mmmpost)[1]) { if (mmmpost$Npmid[i] == 1) { mmmpost$found[i] <- '1' } else if (mmmpost$Npmid[i] > 1 & mmmpost$Npmid[i] < 4) { mmmpost$found[i] <- '2-3' } else if (mmmpost$Npmid[i] >= 4 & mmmpost$Npmid[i] < 10) { mmmpost$found[i] <- '4-9' } else if (mmmpost$Npmid[i] >= 10) { mmmpost$found[i] <- '>10' } } mmmpost$found<- factor(mmmpost$found,levels = c('1','2-3','4-9','>10'), ordered=TRUE) tp<-unique(mmmpost$Paper) mmmpost$Paper<- factor(mmmpost$Paper, levels =tp[order(as.numeric(sub('^[^0-9]+_([0-9]+)', '\\1',tp)))], ordered=TRUE) ummpos<-unique(mmmpost[,c('GeneID','Paper','found')]) ggplot(ummpos) + geom_bar(aes(y = Paper, fill = found)) ## ----sv_histogram,fig.width=8,fig.height=8,fig.cap='Number of identified proteins in Synaptic Vesicle datasets (note the difference in the color scale)',eval=TRUE, echo=FALSE, message=FALSE, warning=FALSE,fig.show='hold'---- svgp <- gp[gp$Localisation == "Synaptic_Vesicle",] svg <- getGeneInfoByIDs(svgp$GeneID) mpost <- merge(svgp, svg, by = c("GeneID","Localisation")) mpost$Paper<-paste0(mpost$Paper,ifelse('FULL'==mpost$Dataset,'','_SVR')) mmpost <- mpost[, c('GeneID','HumanEntrez.x','HumanName.x','Npmid', 'PaperPMID','Paper','Year')] postspap <- papers[papers$Localisation == "Synaptic_Vesicle",] mmmpost <- mmpost[mmpost$PaperPMID %in% postspap$PaperPMID,] mmmpost$found <- 0 for(i in 1:dim(mmmpost)[1]) { if (mmmpost$Npmid[i] == 1) { mmmpost$found[i] <- '1' } else if (mmmpost$Npmid[i] > 1 & mmmpost$Npmid[i] < 4) { mmmpost$found[i] <- '2-3' } else if (mmmpost$Npmid[i] >= 4 & mmmpost$Npmid[i] < 6) { mmmpost$found[i] <- '4-5' } else if (mmmpost$Npmid[i] >= 6) { mmmpost$found[i] <- '>6' } } mmmpost$found<- factor(mmmpost$found,levels = c('1','2-3','4-5','>6'), ordered=TRUE) tp<-unique(mmmpost$Paper) mmmpost$Paper<- factor(mmmpost$Paper, levels =tp[order(as.numeric(sub('^[^0-9]+_([0-9]+)_?.*', '\\1',tp)))], ordered=TRUE) ummpos<-unique(mmmpost[,c('GeneID','Paper','found')]) g<-ggplot(ummpos) + geom_bar(aes(y = Paper, fill = found)) g ## ----utot_histogram,fig.width=8,fig.height=8,fig.cap='Number of identified proteins in different Brain Regions(stacked)',eval=TRUE, echo=FALSE, message=FALSE, warning=FALSE,fig.show='hold'---- #brain region statistics totg <- getGeneInfoByIDs(gp$GeneID) mtot <- merge(gp, totg, by = c("GeneID","Localisation")) mmptot <- mtot[, c('GeneID', 'HumanEntrez.x', 'HumanName.x', 'Localisation', 'Npmid', 'Paper', 'BrainRegion')] untot<-unique(mmptot[,c('GeneID','BrainRegion','Localisation')]) loccolors<-c("#3D5B59","#B5E5CF","#FCB5AC", "#B99095") loccolors<-loccolors[1:length(unique(untot$Localisation))] ggplot(untot) + geom_bar(aes(y = BrainRegion, fill = Localisation)) + scale_fill_manual(values = loccolors) ## ----grouped_histogram,fig.width=8,fig.height=4,fig.cap='Number of identified proteins in different Brain Regions(grouped)',eval=TRUE, echo=FALSE, message=FALSE, warning=FALSE,fig.show='hold'---- table(untot$Localisation,untot$BrainRegion)-> m as.data.frame(m)->udf names(udf)<-c('Localisation','BrainRegion','value') ggplot(udf, aes(fill=Localisation, y=value, x=BrainRegion)) + geom_bar(position="dodge", stat="identity")+ scale_fill_manual(values = loccolors) + theme(axis.text.x = element_text(face="plain", color="#993333", angle=45,vjust = 1, hjust=1,size = rel(1.5))) ## ----gene_info---------------------------------------------------------------- t <- getGeneInfoByEntrez(1742) pander(head(t)) t <- getGeneInfoByName("CASK") pander(head(t)) t <- getGeneInfoByName(c("CASK", "DLG2")) pander(head(t)) ## ----findIDs------------------------------------------------------------------ t <- findGenesByEntrez(c(1742, 1741, 1739, 1740)) pander(head(t)) t <- findGenesByName(c("SRC", "SRCIN1", "FYN")) pander(head(t)) ## ----disease------------------------------------------------------------------ t <- getGeneDiseaseByName (c("CASK", "DLG2", "DLG1")) pander(head(t)) t <- getGeneDiseaseByEntres (c(8573, 1742, 1739)) pander(head(t)) ## ----paper-------------------------------------------------------------------- p <- getPapers() pander(head(p)) ## ----gene_count--------------------------------------------------------------- #find all proteins in synaptic proteome identified 2 times or more gp <- findGeneByPaperCnt(cnt = 2) pander(head(gp)) ## ----gene_papCount------------------------------------------------------------ spg <- findGeneByPapers(p$PaperPMID[1:5], cnt = 1) pander(head(spg)) ## ----gene_comp---------------------------------------------------------------- gcp <- findGeneByCompartmentPaperCnt(cnt = 2) pander(head(gcp)) # Now user can select the specific compartment and proceed working with # obtained list of frequently found proteins presgp <- gcp[gcp$Localisation == "Presynaptic",] dim(presgp) pander(head(presgp)) ## ----PPI---------------------------------------------------------------------- t <- getPPIbyName( c("CASK", "DLG4", "GRIN2A", "GRIN2B","GRIN1"), type = "limited") pander(head(t)) t <- getPPIbyEntrez(c(1739, 1740, 1742, 1741), type='induced') pander(head(t)) #obtain PPIs for the list of frequently found genes in presynaptc compartment t <- getPPIbyEntrez(presgp$HumanEntrez, type='induced') pander(head(t)) ## ----compPPI------------------------------------------------------------------ #getting the list of compartment comp <- getCompartments() pander(comp) #getting all genes for postsynaptic compartment gns <- getAllGenes4Compartment(compartmentID = 1) pander(head(gns)) #getting full PPI network for postsynaptic compartment ppi <- getPPIbyIDs4Compartment(gns$GeneID,compartmentID =1, type = "induced") pander(head(ppi)) ## ----regPPI------------------------------------------------------------------- #getting the full list of brain regions reg <- getBrainRegions() pander(reg) #getting all genes for mouse Striatum gns <- getAllGenes4BrainRegion(brainRegion = "Striatum",taxID = 10090) pander(head(gns)) #getting full PPI network for postsynaptic compartment ppi <- getPPIbyIDs4BrainRegion( gns$GeneID, brainRegion = "Striatum", taxID = 10090, type = "limited") pander(head(ppi)) ## ----check_list--------------------------------------------------------------- #check which genes from 250 random EntrezIds are in the database listG<-findGenesByEntrez(1:250) dim(listG) head(listG) #check which genes from subset identified as synaptic are presynaptic getCompartments() presG <- getGenes4Compartment(listG$GeneID, 2) dim(presG) head(presG) #check which genes from subset identified as synaptic are found in #human cerebellum getBrainRegions() listR <- getGenes4BrainRegion(listG$GeneID, brainRegion = "Cerebellum", taxID = 10090) dim(listR) head(listR) ## ----PPI_igraph,fig.width=7,fig.height=7,out.width="70%",fig.align = "center"---- library(igraph) g<-getIGraphFromPPI( getPPIbyIDs(c(48, 129, 975, 4422, 5715, 5835), type='lim')) plot(g,vertex.label=V(g)$RatName,vertex.size=25) ## ----PPI_table---------------------------------------------------------------- tbl<-getTableFromPPI(getPPIbyIDs(c(48, 585, 710), type='limited')) tbl ## ----pre_histogram, eval=FALSE, include=TRUE---------------------------------- # gp<-findGeneByCompartmentPaperCnt(1) # # # presynaptic stats # presgp <- gp[gp$Localisation == "Presynaptic",] # syngp <- gp[gp$Localisation == "Synaptosome",] # presg <- getGeneInfoByIDs(presgp$GeneID) # mpres <- merge(presgp, presg, by = c("GeneID","Localisation")) # mmpres <- mpres[, c('GeneID', # 'HumanEntrez.x', # 'HumanName.x', # 'Npmid', # 'PaperPMID', # 'Paper', # 'Year')] # papers <- getPapers() # prespap <- papers[papers$Localisation == "Presynaptic",] # mmmpres <- mmpres[mmpres$PaperPMID %in% prespap$PaperPMID,] # mmmpres$found <- 0 # for(i in 1:dim(mmmpres)[1]) { # if (mmmpres$Npmid[i] == 1) { # mmmpres$found[i] <- '1' # } else if (mmmpres$Npmid[i] > 1 & mmmpres$Npmid[i] < 4) { # mmmpres$found[i] <- '2-3' # } else if (mmmpres$Npmid[i] >= 4 & mmmpres$Npmid[i] < 10) { # mmmpres$found[i] <- '4-9' # } else if (mmmpres$Npmid[i] >= 10) { # mmmpres$found[i] <- '>10' # } # } # # mmmpres$found<- factor(mmmpres$found, # levels = c('1','2-3','4-9','>10'), # ordered=TRUE) # tp<-unique(mmmpres$Paper) # mmmpres$Paper<- factor(mmmpres$Paper, # levels =tp[order(as.numeric(sub('^[^0-9]+_([0-9]+)', # '\\1',tp)))], # ordered=TRUE) # # ummpres<-unique(mmmpres[,c('GeneID','Paper','found')]) # ggplot(ummpres) + geom_bar(aes(y = Paper, fill = found)) ## ----post_histogram, eval=FALSE, include=TRUE--------------------------------- # #postsynaptic stats # pstgp <- gp[gp$Localisation == "Postsynaptic",] # postg <- getGeneInfoByIDs(pstgp$GeneID) # mpost <- merge(pstgp, postg, by = c("GeneID","Localisation")) # mmpost <- mpost[, c('GeneID', # 'HumanEntrez.x', # 'HumanName.x','Npmid', # 'PaperPMID','Paper','Year')] # postspap <- papers[papers$Localisation == "Postsynaptic",] # mmmpost <- mmpost[mmpost$PaperPMID %in% postspap$PaperPMID,] # mmmpost$found <- 0 # for(i in 1:dim(mmmpost)[1]) { # if (mmmpost$Npmid[i] == 1) { # mmmpost$found[i] <- '1' # } else if (mmmpost$Npmid[i] > 1 & mmmpost$Npmid[i] < 4) { # mmmpost$found[i] <- '2-3' # } else if (mmmpost$Npmid[i] >= 4 & mmmpost$Npmid[i] < 10) { # mmmpost$found[i] <- '4-9' # } else if (mmmpost$Npmid[i] >= 10) { # mmmpost$found[i] <- '>10' # } # } # mmmpost$found<- factor(mmmpost$found,levels = c('1','2-3','4-9','>10'), # ordered=TRUE) # tp<-unique(mmmpost$Paper) # mmmpost$Paper<- factor(mmmpost$Paper, # levels =tp[order(as.numeric(sub('^[^0-9]+_([0-9]+)', # '\\1',tp)))], # ordered=TRUE) # ummpos<-unique(mmmpost[,c('GeneID','Paper','found')]) # ggplot(ummpos) + geom_bar(aes(y = Paper, fill = found)) ## ----sv_histogram, eval=FALSE, include=TRUE----------------------------------- # svgp <- gp[gp$Localisation == "Synaptic_Vesicle",] # svg <- getGeneInfoByIDs(svgp$GeneID) # mpost <- merge(svgp, svg, by = c("GeneID","Localisation")) # mpost$Paper<-paste0(mpost$Paper,ifelse('FULL'==mpost$Dataset,'','_SVR')) # mmpost <- mpost[, c('GeneID','HumanEntrez.x','HumanName.x','Npmid', # 'PaperPMID','Paper','Year')] # postspap <- papers[papers$Localisation == "Synaptic_Vesicle",] # mmmpost <- mmpost[mmpost$PaperPMID %in% postspap$PaperPMID,] # mmmpost$found <- 0 # for(i in 1:dim(mmmpost)[1]) { # if (mmmpost$Npmid[i] == 1) { # mmmpost$found[i] <- '1' # } else if (mmmpost$Npmid[i] > 1 & mmmpost$Npmid[i] < 4) { # mmmpost$found[i] <- '2-3' # } else if (mmmpost$Npmid[i] >= 4 & mmmpost$Npmid[i] < 6) { # mmmpost$found[i] <- '4-5' # } else if (mmmpost$Npmid[i] >= 6) { # mmmpost$found[i] <- '>6' # } # } # # mmmpost$found<- factor(mmmpost$found,levels = c('1','2-3','4-5','>6'), # ordered=TRUE) # tp<-unique(mmmpost$Paper) # mmmpost$Paper<- factor(mmmpost$Paper, # levels =tp[order(as.numeric(sub('^[^0-9]+_([0-9]+)_?.*', # '\\1',tp)))], # ordered=TRUE) # # ummpos<-unique(mmmpost[,c('GeneID','Paper','found')]) # g<-ggplot(ummpos) + geom_bar(aes(y = Paper, fill = found)) # g ## ----utot_histogram, eval=FALSE, include=TRUE--------------------------------- # #brain region statistics # totg <- getGeneInfoByIDs(gp$GeneID) # mtot <- merge(gp, totg, by = c("GeneID","Localisation")) # mmptot <- mtot[, c('GeneID', # 'HumanEntrez.x', # 'HumanName.x', # 'Localisation', # 'Npmid', # 'Paper', # 'BrainRegion')] # untot<-unique(mmptot[,c('GeneID','BrainRegion','Localisation')]) # loccolors<-c("#3D5B59","#B5E5CF","#FCB5AC", "#B99095") # loccolors<-loccolors[1:length(unique(untot$Localisation))] # ggplot(untot) + geom_bar(aes(y = BrainRegion, fill = Localisation)) + # scale_fill_manual(values = loccolors) ## ----grouped_histogram, eval=FALSE, include=TRUE------------------------------ # table(untot$Localisation,untot$BrainRegion)-> m # as.data.frame(m)->udf # names(udf)<-c('Localisation','BrainRegion','value') # ggplot(udf, aes(fill=Localisation, y=value, x=BrainRegion)) + # geom_bar(position="dodge", stat="identity")+ # scale_fill_manual(values = loccolors) + # theme(axis.text.x = element_text(face="plain", # color="#993333", # angle=45,vjust = 1, # hjust=1,size = rel(1.5))) ## ----sessionInfo, echo=FALSE, results='asis', class='text', warning=FALSE----- c<-devtools::session_info() pander::pander(t(data.frame(c(c$platform)))) pander::pander(as.data.frame(c$packages)[,-c(4,5,10,11)])