# LOAD DATA
library(SpatialExperiment)
library(STexampleData)
<- Visium_humanDLPFC()
spe
# save object
library(here)
saveRDS(spe, file = here("outputs/spe_load.rds"))
4 Analysis steps
This part contains chapters describing individual analysis steps within computational analysis workflows for spatial transcriptomics data.
Each chapter describes the analysis type, including discussion on statistical issues and available methods, and provides an interactive example with R code and an example dataset.
In the next chapter, we load a dataset in SpatialExperiment
format (see Chapter 3), which will be used in several of the subsequent chapters.
For examples of complete analysis workflows for selected datasets and technological platforms, see the next part (Chapter 16).
4.1 Save data objects for re-use in later chapters
Below, we also run some code to generate data objects that will be re-used in the later chapters, in order to speed up the build time for the online version of the book.
All code below is also shown in later chapters, so this section can be skipped when reading the book.
4.1.1 Human DLPFC dataset
# QUALITY CONTROL (QC)
library(scater)
# subset to keep only spots over tissue
<- spe[, colData(spe)$in_tissue == 1]
spe # identify mitochondrial genes
<- grepl("(^MT-)|(^mt-)", rowData(spe)$gene_name)
is_mito # calculate per-spot QC metrics
<- addPerCellQC(spe, subsets = list(mito = is_mito))
spe # select QC thresholds
<- colData(spe)$sum < 600
qc_lib_size <- colData(spe)$detected < 400
qc_detected <- colData(spe)$subsets_mito_percent > 28
qc_mito <- colData(spe)$cell_count > 10
qc_cell_count # combined set of discarded spots
<- qc_lib_size | qc_detected | qc_mito | qc_cell_count
discard colData(spe)$discard <- discard
# filter low-quality spots
<- spe[, !colData(spe)$discard]
spe
# save object
saveRDS(spe, file = here("outputs/spe_qc.rds"))
# NORMALIZATION
library(scran)
# calculate logcounts using library size factors
<- logNormCounts(spe)
spe
# save object
saveRDS(spe, file = here("outputs/spe_logcounts.rds"))
# FEATURE SELECTION
# remove mitochondrial genes
<- spe[!is_mito, ]
spe # fit mean-variance relationship
<- modelGeneVar(spe)
dec # select top HVGs
<- getTopHVGs(dec, prop = 0.1)
top_hvgs
# save object
saveRDS(spe, file = here("outputs/spe_hvgs.rds"))
saveRDS(top_hvgs, file = here("outputs/top_hvgs.rds"))
# DIMENSIONALITY REDUCTION
# compute PCA
set.seed(123)
<- runPCA(spe, subset_row = top_hvgs)
spe # compute UMAP on top 50 PCs
set.seed(123)
<- runUMAP(spe, dimred = "PCA")
spe # update column names
colnames(reducedDim(spe, "UMAP")) <- paste0("UMAP", 1:2)
# save object
saveRDS(spe, file = here("outputs/spe_reduceddims.rds"))
# CLUSTERING
# graph-based clustering
set.seed(123)
<- 10
k <- buildSNNGraph(spe, k = k, use.dimred = "PCA")
g <- igraph::cluster_walktrap(g)
g_walk <- g_walk$membership
clus colLabels(spe) <- factor(clus)
# save object
saveRDS(spe, file = here("outputs/spe_cluster.rds"))