Contents

0.1 Preamble

library(COTAN)
library(zeallot)
library(rlang)
library(data.table)
library(Rtsne)
library(qpdf)
library(GEOquery)

options(parallelly.fork.enable = TRUE)

0.2 Introduction

This tutorial contains the same functionalities as the first release of the COTAN tutorial but done using the new and updated functions.

0.3 Get the data-set

Download the data-set for "mouse cortex E17.5".

dataDir <- tempdir()

GEO <- "GSM2861514"
fName <- "GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz"

dataSetFile <- file.path(dataDir, GEO, fName)

dir.create(file.path(dataDir, GEO), showWarnings = FALSE)

if (!file.exists(dataSetFile)) {
  getGEOSuppFiles(GEO, makeDirectory = TRUE,
                  baseDir = dataDir, fetch_files = TRUE,
                  filter_regex = fName)
}
#>                                                                              size
#> /tmp/RtmpNBS3Tn/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 1509523
#>                                                                           isdir
#> /tmp/RtmpNBS3Tn/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz FALSE
#>                                                                           mode
#> /tmp/RtmpNBS3Tn/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz  644
#>                                                                                         mtime
#> /tmp/RtmpNBS3Tn/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 2024-10-01 17:16:07
#>                                                                                         ctime
#> /tmp/RtmpNBS3Tn/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 2024-10-01 17:16:07
#>                                                                                         atime
#> /tmp/RtmpNBS3Tn/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 2024-10-01 17:16:06
#>                                                                            uid
#> /tmp/RtmpNBS3Tn/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 1002
#>                                                                            gid
#> /tmp/RtmpNBS3Tn/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz 1002
#>                                                                               uname
#> /tmp/RtmpNBS3Tn/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz biocbuild
#>                                                                              grname
#> /tmp/RtmpNBS3Tn/GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz biocbuild

sample.dataset <- read.csv(dataSetFile, sep = "\t", row.names = 1L)

Define a directory where the output will be stored.

outDir <- dataDir

# Log-level 2 was chosen to showcase better how the package works
# In normal usage a level of 0 or 1 is more appropriate
setLoggingLevel(2L)
#> Setting new log level to 2

# This file will contain all the logs produced by the package
# as if at the highest logging level
setLoggingFile(file.path(outDir, "vignette_v2.log"))
#> Setting log file to be: /tmp/RtmpNBS3Tn/vignette_v2.log

message("COTAN uses the `torch` library when asked to `optimizeForSpeed`")
#> COTAN uses the `torch` library when asked to `optimizeForSpeed`
message("Run the command 'options(COTAN.UseTorch = FALSE)'",
        " in your session to disable `torch` completely!")
#> Run the command 'options(COTAN.UseTorch = FALSE)' in your session to disable `torch` completely!

# this command does check whether the torch library is properly installed
c(useTorch, deviceStr) %<-% COTAN:::canUseTorch(TRUE, "cuda")
#> While trying to load the `torch` library Error in doTryCatch(return(expr), name, parentenv, handler): The `torch` library is installed but the required additional libraries are not avalable yet
#> Warning in value[[3L]](cond): The `torch` library is installed, but might
#> require further initialization
#> Warning in value[[3L]](cond): Please look at the `torch` package installation
#> guide to complete the installation
#> Warning in COTAN:::canUseTorch(TRUE, "cuda"): Falling back to legacy
#> [non-torch] code.
if (useTorch) {
  message("The `torch` library is available and ready to use")
  if (deviceStr == "cuda") {
    message("The `torch` library can use the `CUDA` GPU")
  } else {
    message("The `torch` library can only use the CPU")
    message("Please ensure you have the `OpenBLAS` libraries",
            " installed on the system")
  }
}

rm(useTorch, deviceStr)

1 Analytical pipeline

Initialize the COTAN object with the row count table and the metadata for the experiment.

cond <- "mouse_cortex_E17.5"

obj <- COTAN(raw = sample.dataset)
obj <- initializeMetaDataset(obj,
                             GEO = GEO,
                             sequencingMethod = "Drop_seq",
                             sampleCondition = cond)
#> Initializing `COTAN` meta-data

logThis(paste0("Condition ", getMetadataElement(obj, datasetTags()[["cond"]])),
        logLevel = 1L)
#> Condition mouse_cortex_E17.5

Before we proceed to the analysis, we need to clean the data. The analysis will use a matrix of raw UMI counts as the input. To obtain this matrix, we have to remove any potential cell doublets or multiplets, as well as any low quality or dying cells.

1.1 Data cleaning

We can check the library size (UMI number) with an empirical cumulative distribution function

plot(ECDPlot(obj))

plot(cellSizePlot(obj))
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_point()`).

plot(genesSizePlot(obj))