## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----include=FALSE, echo=FALSE, results="hide", warning=FALSE----------------- suppressPackageStartupMessages({ library(rgoslin) library(dplyr) library(knitr) library(kableExtra) }) scrollBoxWidth <- "650px" ## ----eval=FALSE--------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("rgoslin") ## ----eval = FALSE------------------------------------------------------------- # library(rgoslin) ## ----------------------------------------------------------------------------- listAvailableGrammars() ## ----------------------------------------------------------------------------- isValidLipidName("PC 32:1") ## ----------------------------------------------------------------------------- df <- parseLipidNames("PC 32:1") ## ----echo = FALSE, results = 'asis'------------------------------------------- kable(df %>% select(-starts_with("FA"),-starts_with("LCB")), caption = "Lipid name parsing results for PC 32:1, FA and LCB columns omitted, since they are unpopulated (`NA`) on the lipid species level.") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", font_size = 7)) %>% scroll_box(width = scrollBoxWidth, height = "150px") ## ----------------------------------------------------------------------------- originalName <- "TG(16:1(5E)/18:0/20:2(3Z,6Z))" tagDf <- parseLipidNames(originalName, grammar = "LipidMaps") ## ----echo = FALSE, results = 'asis'------------------------------------------- kable(tagDf %>% select(-starts_with("FA"),-starts_with("LCB")), caption = "Lipid name parsing results for TG isomeric subspecies, FA and LCB columns omitted for brevity.") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", font_size = 7)) %>% scroll_box(width = scrollBoxWidth, height = "200px") ## ----echo = FALSE, results = 'asis'------------------------------------------- kable(tagDf %>% select(Normalized.Name, starts_with("FA"),starts_with("LCB")), caption = "Lipid name parsing results for TG isomeric subspecies with FA and LCB columns.") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", font_size = 7)) %>% scroll_box(width = scrollBoxWidth, height = "200px") ## ----------------------------------------------------------------------------- multipleLipidNamesDf <- parseLipidNames(c("PC 32:1","LPC 34:1","TG(18:1_18:0_16:1)")) ## ----echo = FALSE, results = 'asis'------------------------------------------- kable(multipleLipidNamesDf %>% select(-starts_with("FA"),-starts_with("LCB")), caption = "Lipid name parsing results for PC 32:1, LPC 34:1, TG(18:1_18:0_16:1), FA and LCB columns omitted for brevity.") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", font_size = 7)) %>% scroll_box(width = scrollBoxWidth, height = "300px") ## ----echo = FALSE, results = 'asis'------------------------------------------- kable(multipleLipidNamesDf %>% select(Normalized.Name, starts_with("FA"), starts_with("LCB")), caption = "Lipid name parsing results for PC 32:1, LPC 34:1, TG(18:1_18:0_16:1) with FA and LCB columns.") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", font_size = 7)) %>% scroll_box(width = scrollBoxWidth, height = "300px") ## ----------------------------------------------------------------------------- originalNames <- c("PC 32:1","LPC 34:1","TAG 18:1_18:0_16:1") multipleLipidNamesWithGrammar <- parseLipidNames(originalNames, grammar = "Goslin") ## ----echo = FALSE, results = 'asis'------------------------------------------- kable(multipleLipidNamesWithGrammar %>% select(-starts_with("FA"),-starts_with("LCB")), caption = "Lipid name parsing results for Goslin grammar and lipids PC 32:1, LPC 34:1, TG(18:1_18:0_16:1), FA and LCB columns omitted for brevity.") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", font_size = 7)) %>% scroll_box(width = scrollBoxWidth, height = "300px") ## ----echo = FALSE, results = 'asis'------------------------------------------- kable(multipleLipidNamesWithGrammar %>% select(Normalized.Name, starts_with("FA"), starts_with("LCB")), caption = "Lipid name parsing results for Goslin grammar and lipids PC 32:1, LPC 34:1, TG(18:1_18:0_16:1) with FA and LCB columns.") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", font_size = 7)) %>% scroll_box(width = scrollBoxWidth, height = "300px") ## ----------------------------------------------------------------------------- originalNames <- c("LMFA01020216"="5-methyl-octadecanoic acid", "LMFA08040030"="N-((+/-)-8,9-dihydroxy-5Z,11Z,14Z-eicosatrienoyl)-ethanolamine") normalizedFattyAcidsNames <- parseLipidNames(originalNames, "FattyAcids") ## ----echo = FALSE, results = 'asis'------------------------------------------- kable(normalizedFattyAcidsNames %>% select(-starts_with(c("LCB","FA2","FA3","FA4","Adduct", "Adduct.Charge"))), caption = "Lipid name parsing results for Goslin grammar and fatty acids 5-methyl-octadecanoic acid N-((+/-)-8,9-dihydroxy-5Z,11Z,14Z-eicosatrienoyl)-ethanolamine, some columns omitted for brevity.") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", font_size = 7)) %>% scroll_box(width = scrollBoxWidth, height = "300px") ## ----------------------------------------------------------------------------- originalNames <- c("PC 32:1[M+H]1+", "PC 32:1 [M+H]+","PC 32:1") lipidNamesWithAdduct <- parseLipidNames(originalNames, "Goslin") ## ----echo = FALSE, results = 'asis'------------------------------------------- kable(lipidNamesWithAdduct %>% select(-starts_with("FA"),-starts_with("LCB")), caption = "Lipid name parsing results for Goslin grammar and lipids PC 32:1[M+H]1+, PC 32:1 [M+H]+ and PC 32:1, FA and LCB columns omitted for brevity.") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", font_size = 7)) %>% scroll_box(width = scrollBoxWidth, height = "300px") ## ----eval = FALSE------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("lipidr") ## ----eval = FALSE------------------------------------------------------------- # library(rgoslin) # library(lipidr) # library(stringr) # library(ggplot2) ## ----include=FALSE, echo=FALSE, results="hide", warning=FALSE----------------- suppressPackageStartupMessages({ library(lipidr) library(stringr) library(ggplot2) }) ## ----------------------------------------------------------------------------- datadir = system.file("extdata", package="lipidr") filelist = list.files(datadir, "data.csv", full.names = TRUE) # all csv files d = read_skyline(filelist) clinical_file = system.file("extdata", "clin.csv", package="lipidr") d = add_sample_annotation(d, clinical_file) ## ----echo = FALSE, results = 'asis'------------------------------------------- kable(rowData(d[1:10, 1:10]), caption = "Subset of first ten rows of row data.") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", font_size = 7)) %>% scroll_box(width = scrollBoxWidth, height = "300px") ## ----------------------------------------------------------------------------- lipidNames <- parseLipidNames(rowData(d)$clean_name) ## ----------------------------------------------------------------------------- # lipidr stores original lipid names in the Molecule column old_names <- rowData(d)$Molecule # split lipid prefix from potential (d7) suffix for labeled internal standards new_names <- rowData(d)$clean_name %>% str_match(pattern="([\\w :-]+)(\\(\\w+\\))?") # extract the first match group (the original word is at column index 1) normalized_new_names <- new_names[,2] %>% str_replace_all(c("Sa1P"="SPBP","So1P"="SPBP")) %>% parseLipidNames(.) ## ----------------------------------------------------------------------------- updated <- update_molecule_names(d, old_names, normalized_new_names$Normalized.Name) ## ----------------------------------------------------------------------------- rowData(updated)$Class <- normalized_new_names$Lipid.Maps.Main.Class rowData(updated)$Category <- normalized_new_names$Lipid.Maps.Category rowData(updated)$Molecule <- normalized_new_names$Normalized.Name rowData(updated)$LipidSpecies <- normalized_new_names$Species.Name rowData(updated)$Mass <- normalized_new_names$Mass rowData(updated)$SumFormula <- normalized_new_names$Sum.Formula # select Ceramides, Lyso-Phosphatidylcholines and Phosphatidylcholines (includes plasmanyls and plasmenyls) lipid_classes <- rowData(updated)$Class %in% c("Cer","LPC", "PC") d <- updated[lipid_classes,] ## ----------------------------------------------------------------------------- ddf <- lipidr:::to_long_format(d) ggplot(data=ddf, mapping=aes(x=Molecule, y=Area, fill=Class)) + geom_boxplot() + facet_wrap(~filename, scales = "free_y") + scale_y_log10() + coord_flip() ## ----sessioninfo, echo=FALSE-------------------------------------------------- sessionInfo()