4 Analysis steps

This part contains chapters describing individual analysis steps within computational analysis workflows for spatial transcriptomics data.

Each chapter describes the analysis type, including discussion on statistical issues and available methods, and provides an interactive example with R code and an example dataset.

In the next chapter, we load a dataset in SpatialExperiment format (see Chapter 3), which will be used in several of the subsequent chapters.

For examples of complete analysis workflows for selected datasets and technological platforms, see the next part (Chapter 16).

4.1 Save data objects for re-use in later chapters

Below, we also run some code to generate data objects that will be re-used in the later chapters, in order to speed up the build time for the online version of the book.

All code below is also shown in later chapters, so this section can be skipped when reading the book.

4.1.1 Human DLPFC dataset

# LOAD DATA

library(SpatialExperiment)
library(STexampleData)
spe <- Visium_humanDLPFC()

# save object
library(here)
if (!dir.exists(here("outputs"))) dir.create(here("outputs"))
saveRDS(spe, file = here("outputs/spe_load.rds"))

# QUALITY CONTROL (QC)

library(scater)
# subset to keep only spots over tissue
spe <- spe[, colData(spe)$in_tissue == 1]
# identify mitochondrial genes
is_mito <- grepl("(^MT-)|(^mt-)", rowData(spe)$gene_name)
# calculate per-spot QC metrics
spe <- addPerCellQC(spe, subsets = list(mito = is_mito))
# select QC thresholds
qc_lib_size <- colData(spe)$sum < 600
qc_detected <- colData(spe)$detected < 400
qc_mito <- colData(spe)$subsets_mito_percent > 28
qc_cell_count <- colData(spe)$cell_count > 10
# combined set of discarded spots
discard <- qc_lib_size | qc_detected | qc_mito | qc_cell_count
colData(spe)$discard <- discard
# filter low-quality spots
spe <- spe[, !colData(spe)$discard]

# save object
saveRDS(spe, file = here("outputs/spe_qc.rds"))

# NORMALIZATION

library(scran)
# calculate logcounts using library size factors
spe <- logNormCounts(spe)

# save object
saveRDS(spe, file = here("outputs/spe_logcounts.rds"))

# FEATURE SELECTION

# remove mitochondrial genes
spe <- spe[!is_mito, ]
# fit mean-variance relationship
dec <- modelGeneVar(spe)
# select top HVGs
top_hvgs <- getTopHVGs(dec, prop = 0.1)

# save object
saveRDS(spe, file = here("outputs/spe_hvgs.rds"))
saveRDS(top_hvgs, file = here("outputs/top_hvgs.rds"))

# DIMENSIONALITY REDUCTION

# compute PCA
set.seed(123)
spe <- runPCA(spe, subset_row = top_hvgs)
# compute UMAP on top 50 PCs
set.seed(123)
spe <- runUMAP(spe, dimred = "PCA")
# update column names
colnames(reducedDim(spe, "UMAP")) <- paste0("UMAP", 1:2)

# save object
saveRDS(spe, file = here("outputs/spe_reduceddims.rds"))

# CLUSTERING

# graph-based clustering
set.seed(123)
k <- 10
g <- buildSNNGraph(spe, k = k, use.dimred = "PCA")
g_walk <- igraph::cluster_walktrap(g)
clus <- g_walk$membership
colLabels(spe) <- factor(clus)

# save object
saveRDS(spe, file = here("outputs/spe_cluster.rds"))