Contents

1 Install RegionalST

To install this package, start R (version “4.3”) and enter:

if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("RegionalST") 

2 Preparing your data for RegionalST through BayesSpace

The data input step of RegionalST package relies on the package BayesSpace. BayesSpace supports three ways of loading the data for analysis.

First, reading Visium data through readVisium(): This function takes only the path to the Space Ranger output directory (containing the spatial/ and filtered_feature_bc_matrix/ subdirectories) and returns a SingleCellExperiment.

sce <- readVisium("path/to/spaceranger/outs/")

Second, you can create a SingleCellExperiment object directly from the count matrix:

library(Matrix)

rowData <- read.csv("path/to/rowData.csv", stringsAsFactors=FALSE)
colData <- read.csv("path/to/colData.csv", stringsAsFactors=FALSE, row.names=1)
counts <- read.csv("path/to/counts.csv.gz",
                   row.names=1, check.names=F, stringsAsFactors=FALSE))

sce <- SingleCellExperiment(assays=list(counts=as(counts, "dgCMatrix")),
                            rowData=rowData,
                            colData=colData)

Lastly is to use the getRDS() function. Please check the manual of BayesSpace if this step runs into any question.

3 Analysis with the incorporation of cell type proportions

3.1 Obtain cell deconvolution proportions

For Visium platform, a single spot is usually consisting of multiple cells and thus analyzing it as a whole could reduce the accuracy. As a result, we suggest perform cell deconvolution analysis using CARD or RCTD (spacexr) or cell2location. Below we show some example code of obtaining cell type proportions using CARD:

### read in spatial transcriptomics data for analysis
library(BayesSpace)
outdir = "/Dir/To/Data/BreastCancer_10x"
sce <- readVisium(outdir)
sce <- spatialPreprocess(sce, platform="Visium", log.normalize=TRUE)
spatial_count <- assays(sce)$counts
spatial_location <- data.frame(x = sce$imagecol,
                               y = max(sce$imagerow) - sce$imagerow)
rownames(spatial_location) <- colnames(spatial_count)

### assuming the single cell reference data for BRCA has been loaded
### BRCA_countmat: the count matrix of the BRCA single cell reference
### cellType: the cell types of the BRCA reference data
sc_count <- BRCA_countmat
sc_meta <- data.frame(cellID = colnames(BRCA_countmat),
                      cellType = cellType)
rownames(sc_meta) <- colnames(BRCA_countmat)

library(CARD)
CARD_obj <- createCARDObject(
    sc_count = sc_count,
    sc_meta = sc_meta,
    spatial_count = spatial_count,
    spatial_location = spatial_location,
    ct.varname = "cellType",
    ct.select = unique(sc_meta$cellType),
    sample.varname = "sampleInfo",
    minCountGene = 100,
    minCountSpot = 5) 
CARD_obj <- CARD_deconvolution(CARD_object = CARD_obj)

## add proportion to the sce object
S4Vectors::metadata(sce)$Proportions <- RegionalST::getProportions(CARD_obj)

3.2 Load example dataset

In our package, we create a small example dataset by subsetting the breast cancer Visium data from 10X. We already added the cell type proportion from deconvolution. In case deconvolution couldn’t be performed or the data is of single cell resolution, we also provided the cell type label for each spot. Note that the Visium data is actually not single cell resolution, so the cell type label indicates the major cell type for each spot.

set.seed(1234)

library(RegionalST)
library("gridExtra")
data(example_sce)

## the proportion information is saved under the metadata
S4Vectors::metadata(example_sce)$Proportions[seq_len(5),seq_len(5)]
##                    Cancer Epithelial      CAFs    T-cells Endothelial
## GTAGACAACCGATGAA-1        0.04610997 0.2684636 0.11682355  0.07809853
## ACAGATTAGGTTAGTG-1        0.09078458 0.3380722 0.06542484  0.05633140
## TGGTATCGGTCTGTAT-1        0.05897943 0.4562020 0.02192799  0.09059294
## ATTATCTCGACAGATC-1        0.07374128 0.5029240 0.03206525  0.02884060
## TGAGATCAAATACTCA-1        0.09849148 0.5737657 0.01429222  0.02761076
##                            PVL
## GTAGACAACCGATGAA-1 0.040103174
## ACAGATTAGGTTAGTG-1 0.098474025
## TGGTATCGGTCTGTAT-1 0.056373840
## ATTATCTCGACAGATC-1 0.006084852
## TGAGATCAAATACTCA-1 0.014253759
## the cell type information is saved under a cell type variable
head(example_sce$celltype)
## [1] "CAFs" "CAFs" "CAFs" "CAFs" "CAFs" "CAFs"

3.3 Identify Regions of Interest (ROIs) with incorporation of proportions

First, we want to preprocess the data using the functions from BayesSpace:

library(BayesSpace)
example_sce <- example_sce[, colSums(counts(example_sce)) > 0]
example_sce <- mySpatialPreprocess(example_sce, platform="Visium")

Second, we assign weights to each cell type and check the entropy at different radii.

weight <- data.frame(celltype = c("Cancer Epithelial", "CAFs", "T-cells", "Endothelial",
                                  "PVL", "Myeloid", "B-cells", "Normal Epithelial", "Plasmablasts"),
                     weight = c(0.25,0.05,
                                0.25,0.05,
                                0.025,0.05,
                                0.25,0.05,0.025))
OneRad <- GetOneRadiusEntropy_withProp(example_sce,
                             selectN = length(example_sce$spot),
                             weight = weight,
                             radius = 5,
                             doPlot = TRUE,
                             mytitle = "Radius 5 weighted entropy")

Note: Here the GetOneRadiusEntropy() will calculate the entropy for all the spots (as length(example_sce$spot) is the length of all the spots). If this is too slow with a large dataset, you can specify to compute only a subset of the spots by argument, e.g., selectN = round(length(example_sce$spot)/10). I use one tenth as an example, depending on the size of your data, you can try 1/3, 1/5, or 1/20 to generate entropy figures with different sparsities. The smaller selectN is, the faster this function will be.

3.3.1 Automatic ROI selection

Then, we can use automatic functions to select ROIs:

example_sce <- RankCenterByEntropy_withProp(example_sce, 
                                    weight,
                                    selectN = round(length(example_sce$spot)/5),
                                    topN = 3, 
                                    min_radius = 10,
                                    radius_vec = c(5,10),
                                    doPlot = TRUE)