## ----setup, include=FALSE, results = "asis"----------------------------------- BiocStyle::markdown() knitr::opts_chunk$set(echo = TRUE, dev = "png") ## ----eval= FALSE-------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly=TRUE)){ # install.packages("BiocManager")} # BiocManager::install("musicatk") ## ----eval = FALSE------------------------------------------------------------- # if (!requireNamespace("devtools", quietly=TRUE)){ # install.packages("devtools")} # # library(devtools) # install_github("campbio/musicatk") ## ----eval = TRUE, message = FALSE--------------------------------------------- library(musicatk) ## ----extract_variants, message = FALSE---------------------------------------- # Extract variants from a MAF File lusc_maf <- system.file("extdata", "public_TCGA.LUSC.maf", package = "musicatk") lusc.variants <- extract_variants_from_maf_file(maf_file = lusc_maf) # Extract variants from an individual VCF file luad_vcf <- system.file("extdata", "public_LUAD_TCGA-97-7938.vcf", package = "musicatk") luad.variants <- extract_variants_from_vcf_file(vcf_file = luad_vcf) # Extract variants from multiple files and/or objects melanoma_vcfs <- list.files(system.file("extdata", package = "musicatk"), pattern = glob2rx("*SKCM*vcf"), full.names = TRUE) variants <- extract_variants(c(lusc_maf, luad_vcf, melanoma_vcfs)) ## ----select_genome------------------------------------------------------------ g <- select_genome("hg38") ## ----create_musica------------------------------------------------------------ musica <- create_musica(x = variants, genome = g) ## ----build_tables------------------------------------------------------------- build_standard_table(musica, g = g, table_name = "SBS96") ## ----combine_tables----------------------------------------------------------- data(dbs_musica) build_standard_table(dbs_musica, g, "SBS96", overwrite = TRUE) build_standard_table(dbs_musica, g, "DBS78", overwrite = TRUE) #Subset SBS table to DBS samples so they cam be combined count_tables <- tables(dbs_musica) overlap_samples <- which(colnames(count_tables$SBS96@count_table) %in% colnames(count_tables$DBS78@count_table)) count_tables$SBS96@count_table <- count_tables$SBS96@count_table[, overlap_samples] tables(dbs_musica) <- count_tables combine_count_tables(musica = dbs_musica, to_comb = c("SBS96", "DBS78"), name = "sbs_dbs", description = "An example combined table, combining SBS96 and DBS", overwrite = TRUE) ## ----------------------------------------------------------------------------- annotate_transcript_strand(musica, "19", build_table = FALSE) build_custom_table(musica = musica, variant_annotation = "Transcript_Strand", name = "Transcript_Strand", description = "A table of transcript strand of variants", data_factor = c("T", "U"), overwrite = TRUE) ## ----discover_sigs------------------------------------------------------------ result <- discover_signatures(musica = musica, table_name = "SBS96", num_signatures = 3, algorithm = "nmf", nstart = 10) ## ----result_accessors--------------------------------------------------------- # Extract the exposure matrix expos <- exposures(result) expos[1:3,1:3] # Extract the signature matrix sigs <- signatures(result) sigs[1:3,1:3] ## ----plot_sigs---------------------------------------------------------------- plot_signatures(result) ## ----name_sigs---------------------------------------------------------------- name_signatures(result, c("Smoking", "APOBEC", "UV")) plot_signatures(result) ## ----exposures_raw------------------------------------------------------------ plot_exposures(result, plot_type = "bar") ## ----exposures_prop----------------------------------------------------------- plot_exposures(result, plot_type = "bar", proportional = TRUE) ## ----sample_counts------------------------------------------------------------ samples <- sample_names(musica) plot_sample_counts(musica, sample_names = samples[c(3,4,5)], table_name = "SBS96") ## ----compare_cosmic----------------------------------------------------------- compare_cosmic_v2(result, threshold = 0.75) ## ----predict_cosmic----------------------------------------------------------- # Load COSMIC V2 data data("cosmic_v2_sigs") # Predict pre-existing exposures using the "lda" method pred_cosmic <- predict_exposure(musica = musica, table_name = "SBS96", signature_res = cosmic_v2_sigs, signatures_to_use = c(1, 4, 7, 13), algorithm = "lda") # Plot exposures plot_exposures(pred_cosmic, plot_type = "bar") ## ----subtype_map-------------------------------------------------------------- cosmic_v2_subtype_map("lung") ## ----predict_previous--------------------------------------------------------- pred_our_sigs <- predict_exposure(musica = musica, table_name = "SBS96", signature_res = result, algorithm = "lda") ## ----predict_compare---------------------------------------------------------- compare_results(result = pred_cosmic, other_result = pred_our_sigs, threshold = 0.60) ## ----annotations-------------------------------------------------------------- annot <- read.table(system.file("extdata", "sample_annotations.txt", package = "musicatk"), sep = "\t", header=TRUE) samp_annot(result, "Tumor_Subtypes") <- annot$Tumor_Subtypes ## ----sample_names------------------------------------------------------------- sample_names(result) ## ----plot_exposures_by_subtype------------------------------------------------ plot_exposures(result, plot_type = "bar", group_by = "annotation", annotation = "Tumor_Subtypes") ## ----plot_exposures_box_annot------------------------------------------------- plot_exposures(result, plot_type = "box", group_by = "annotation", annotation = "Tumor_Subtypes") ## ----plot_exposures_box_sig--------------------------------------------------- plot_exposures(result, plot_type = "box", group_by = "signature", color_by = "annotation", annotation = "Tumor_Subtypes") ## ----umap_create-------------------------------------------------------------- create_umap(result = result) ## ----umap_plot---------------------------------------------------------------- plot_umap(result = result) ## ----umap_plot_same_scale----------------------------------------------------- plot_umap(result = result, same_scale = FALSE) ## ----umap_plot_annot---------------------------------------------------------- plot_umap(result = result, color_by = "annotation", annotation = "Tumor_Subtypes", add_annotation_labels = TRUE) ## ----plotly------------------------------------------------------------------- plot_signatures(result, plotly = TRUE) plot_exposures(result, plotly = TRUE) plot_umap(result, plotly = TRUE) ## ----reproducible_prediction-------------------------------------------------- seed <- 1 reproducible_prediction <- withr::with_seed(seed, predict_exposure(musica = musica, table_name = "SBS96", signature_res = result, algorithm = "lda")) ## ----session------------------------------------------------------------------ sessionInfo()