From d0ba22774d5c92a9c0fc31c1453e58864f3fae9e Mon Sep 17 00:00:00 2001 From: Chengxiaojie <44351382+Chengxiaojie@users.noreply.github.com> Date: Mon, 29 Jul 2024 20:07:05 +0800 Subject: [PATCH] Add files via upload --- vignettes/SpaTME_tutorial.html | 764 +++++++++++++++++++++++++++++++++ 1 file changed, 764 insertions(+) create mode 100644 vignettes/SpaTME_tutorial.html diff --git a/vignettes/SpaTME_tutorial.html b/vignettes/SpaTME_tutorial.html new file mode 100644 index 0000000..b752d0c --- /dev/null +++ b/vignettes/SpaTME_tutorial.html @@ -0,0 +1,764 @@ + + + + + + + + + + + + + + + +SpaTME + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Preprocessing

+
library(SpaTME)
+library(dplyr)
+library(ggplot2)
+# load ST data
+st <- readRDS("~/test_data/st.rds")
+

SpaTME checks if the coordinate information is available in the +meta.data, images, or colnames (formatted as 1x2) of st. Specifically, +for Visium data, the x and y coordinates are adjusted to follow a +hexagonal arrangement.

+
st <- STcoordCheck(st = st, platform = "Visium", hexagon.correct = T,
+                   hexagon.direct = "vertical",verbose = T)
+
## Get coordinate information from @image
+
## Converting coordinate position to hexagon arrangement!
+
## [1] "Completing coordinate check!"
+

Preprocessing includes standard steps in Seurat tool, like SCT +normalization, dimensionality reduction (PCA, UMAP, t-SNE), and +clustering.

+
st <- SePreprocess(se = st, assay = "Spatial", norm.SCT = TRUE, cluster.resolution = 0.8)
+
+
+

Characterization of TME with cellular and molecular features.

+

Creating an STFeature object, which serves as a +container for identified ST features. The STFeature object is +initialized with positional data and cell abundance annotations.

+
# Loading cell abundances deconvoluted using Cell2location.
+cell.abun <- readRDS("~/test_data/cell.abun.rds")
+stf <- CreateStfObj(st = st, assay = "SCT", slot = "data", norm = T,
+                    cell.anno = cell.abun, min.prop = 0.05,
+                    init.fea = c("Position", "CellAnno"))
+
+

Molecular features

+

SpaTME explores the molecular features from three aspects: (1) +Assessing the enrichment of annotated gene sets. (2) Determining gene +co-expression modules. (3) Quantifying the ligand-receptor (LR) +interactions.

+

Firstly, SpaTME enables assessing the enrichment of immune and +tumor-related signatures collected from literatures (CuratedSig +category) or various biomedical ontologies sourced from databases +(MsigDB category). Here, we calculate the enrichment scores of TLS +signatures using the AddModuleScore function in Seurat.

+
expr <- GetAssayData(object = st, assay = "SCT", slot = "data")
+stf = GetGsetSigScore(expr = expr, stf = stf, category = "CuratedSig",
+                      types = c("Immune"),
+                      subtype = "TLS",
+                      method = "AddModuleScore", scale = T)
+
## Calculating Immune-related signatures
+
# In SpaTME, the SpotVisualize() function is used to visualize features.
+SpotVisualize(st, title = "imprint.65sig",
+              legend.name = "Score",size = 1.5,
+              meta = stf@GsetSig$CuratedSig$Immune$TLS$imprint.65sig)
+

+

SpaTME allows identification of gene co-expression modules utilizing +WGCNA. Here, we use the top 2000 Spatially variable genes +(SVGs) identified previously as an example.

+
topSVGs <- readRDS("~/test_data/topSVGs.rds")
+data <- st@assays$SCT@data[topSVGs,]
+# gene co-expression module
+Topgene.net <- BulidCGnet(data = t(as.matrix(data)), outdir = NULL,
+                     power.seq = c(c(1:10), seq(from = 12, to=20, by=2)),
+                     verbose = 0, minModuleSize = 20, reassignThreshold = 0,
+                     mergeCutHeight = 0.15, pamRespectsDendro = FALSE, detectCutHeight = 0.99)
+
## 
+
## The output directory is recommended to save the result files!
+
## Selecting soft threshold...
+
##    Power SFT.R.sq slope truncated.R.sq  mean.k. median.k.  max.k.
+## 1      1    0.461 -2.03         0.9450 1.33e+02  1.20e+02 370.000
+## 2      2    0.871 -2.55         0.9630 1.59e+01  1.12e+01 102.000
+## 3      3    0.377 -3.42         0.3350 2.83e+00  1.36e+00  34.800
+## 4      4    0.374 -2.94         0.3490 6.96e-01  1.98e-01  13.800
+## 5      5    0.353 -2.52         0.3190 2.21e-01  3.26e-02   6.060
+## 6      6    0.247 -1.94         0.0330 8.62e-02  6.06e-03   3.090
+## 7      7    0.278 -1.87         0.0721 3.94e-02  1.22e-03   2.270
+## 8      8    0.380 -2.03         0.2980 2.05e-02  2.65e-04   1.770
+## 9      9    0.381 -1.88         0.2870 1.18e-02  6.10e-05   1.440
+## 10    10    0.389 -1.76         0.2970 7.41e-03  1.34e-05   1.190
+## 11    12    0.326 -1.90         0.1610 3.49e-03  7.80e-07   0.842
+## 12    14    0.361 -2.26         0.2010 1.93e-03  4.68e-08   0.612
+## 13    16    0.314 -1.99         0.1180 1.18e-03  3.01e-09   0.452
+## 14    18    0.320 -1.92         0.1260 7.67e-04  1.96e-10   0.337
+## 15    20    0.328 -1.84         0.1370 5.19e-04  1.35e-11   0.253
+
## soft-thresholding power is selected: 2
+
## Starting module detection...
+

+
## $mar
+## [1] 1 5 0 1
+

Calculating the eigengenes of modules.

+
net_MEs <- WGCNA::moduleEigengenes(expr = t(as.matrix(data)),
+                           colors = Topgene.net$colors)
+net_MEs <- net_MEs$eigengenes
+

Visualizing the distribution of each ME.

+
library(cowplot)
+plot.list = c()
+for(i in colnames(net_MEs)){
+  plot.list[[i]] <- SpotVisualize(st, meta = net_MEs[, i], size = 1.5, return = T,title = i, legend.name = "Expression")
+}
+plot_grid(plotlist = plot.list, ncol = 3)
+

+

SpaTME enables quantification of 1396 LR interactions collected from +the CellPhoneDB database, which includes multi-subunit +information of protein complexes. The BuildLRExprssion() +function recalculates the effect expression of secreted proteins and +membrane proteins separately, taking into account the interaction +distance.

+
data("LR_input")
+expr <- GetAssayData(object = st,
+                     assay = "SCT",
+                     slot = "data")
+LR_expression <- BuildLRExprssion(Expr = expr, st_pos = stf@Position,
+                                  LR_input = LR_input, do.dist = T, r.dist = 4,
+                                  long.dist.method = "weighted",
+                                  adjust = 2, na.rm = T, verbose = F)
+

The CalLRIScore function quantifies the interaction +scores of LR pairs.

+
data(interaction_input)
+LRscore <- CalLRIScore(interaction_input = interaction_input[1:100,],
+                       LR_expression = LR_expression, na.rm = T,
+                       p = 3, verbose = F)
+
+
+

Cellular features

+

SpaTME describes the spatial distribution of cell populations by +calculating co-distribution scores of cell types and measuring the +degree of immune infiltration.

+
# Visualizaition of top three cell types for each spot
+library("scatterpie")
+library("cowplot")
+data <- stf@CellAnno
+data <- apply(data, 1, function(x){
+  th <- sort(x, decreasing = T)[3]
+  x[x < th] = 0
+  return(x/sum(x))
+})
+data <- t(data)
+colors <- LabelMapcolor(labels = unique(colnames(data)),
+                        assgin.col = c("Tumor" = "#ced4da", "Plasma/B.cells" = "#0077b6", "T.cells" = "#b95b13", "Epithelial" = "#6c809a"))
+PlotCellSpot(decon_mtrx = data, st_pos = stf@Position,
+             pie_scale = 0.5, pie_color = colors,
+             separate = F, e.color = NA)
+

+

SpaTME calculates the cell co-distribution scores with the +CalCellCodis() function.

+
stf@CellCodis <- CalCellCodis(stf@CellAnno, sort = T)
+# Distribution of T cells and Plasma/B.cells, and their co-diatribution scores.
+p1 <- SpotVisualize(pos = stf@Position, meta = stf@CellAnno$`Plasma/B.cells`,
+              size = 1.5, return = T, title = "Plasma/B.cells")
+p2 <- SpotVisualize(pos = stf@Position, meta = stf@CellAnno$T.cells,
+              size = 1.5, return = T, title = "T.cells")
+p3 <- SpotVisualize(pos = stf@Position,
+              meta = stf@CellCodis$`Plasma/B.cells_T.cells`,
+              size = 1.5, return = T, title = "Plasma/B.cells_T.cells")
+plot_grid(plotlist = list(p1, p2, p3), ncol = 3)
+

+

The degree of immune infiltration in ST sample.

+
immune.cells <- sort(c("Plasma/B.cells", "T.cells", "Dendritic.cells", "Macrophage", "Monocyte", "NK"))
+data <- stf@CellAnno[, immune.cells]
+## Immune cells with a proportion less than 0.05 will not be considered for immune infiltration
+Imm.infil <- CalImmInfiltration(data, min.prop = 0.05)
+p1 <- SpotVisualize(pos = stf@Position, meta = Imm.infil$Imm.enrichment, return = T,
+               size = 1.5, title = "Imm.enrichment")
+p2 <- SpotVisualize(pos = stf@Position, meta = Imm.infil$Imm.diversity, return = T,
+              size = 1.5, title = "Imm.diversity")
+plot_grid(plotlist = list(p1, p2), ncol = 2)
+

+
+
+
+

Recognition of spatial architectures

+
+

Domain annotation

+

Any domain labels annotated with others tools can be used for SpaTME. +In addition to clusters obtained from Seurat, SpaTME also +implements the BayesSpace tool to annotate the spatial +domains of ST data. If the number of clusters (num_clu) is +not specified, the BayesCluster() function will +automatically select it by running the BayesSpace algorithm for multiple +values specified by the qs parameter. Alternatively, users +could select the point around the elbow of plot if the +show_qplot is set to TRUE. For example, we run the +BayesCluster() with qs = 5:12 and select the +point 9, which around the first elbow.

+
sce <- BayesCluster(st = st, assay = "Spatial",platfrom = "Visium",
+                    qs = 6:12, show_qplot = T)
+
## [1] "Starting clustering with BayesSpace"
+
## Number of cluster is not provided, qTune will used for selecting best q value
+
## Neighbors were identified for 1921 out of 1946 spots.
+
## Fitting model...
+## Fitting model...
+## Fitting model...
+## Fitting model...
+## Fitting model...
+## Fitting model...
+## Fitting model...
+
## [1] "The recommended cluster number is 9"
+
## Neighbors were identified for 1921 out of 1946 spots.
+## Fitting model...
+
## Calculating labels using iterations 1000 through 50000.
+

+
domains <- paste0("Domain_", sce@colData@listData$spatial.cluster)
+stf@Annotation <- data.frame(row.names = rownames(stf@Position))
+stf@Annotation[, "bayes_cluster"] <- domains
+SpotVisualize(pos = stf@Position, size = 1.5,
+                  meta = domains, title = "Domain annotation",
+                  cha.col = LabelMapcolor(unique(domains)))
+

+
+
+

TLS prediction

+

SpaTME employs two TLS signatures, as well as the co-localization of +Plasma/B cells and T cells, to detect TLS. In particular, SpaTME +improves the accuracy of TLS prediction by integrating the neighborhood +information of spots within the same domain. The CalTLSfea +function returns a list containing distinct forms of TLS features and +the final TLS scores.

+
tls.sig <- stf@GsetSig$CuratedSig$Immune$TLS[, c("LC.50sig", "imprint.65sig")]
+cell.fea <- stf@CellCodis$`Plasma/B.cells_T.cells`
+comb.fea <- cbind(tls.sig, cell.fea)
+rownames(comb.fea) <- rownames(stf@Position)
+TLS.fea <- CalTLSfea(data = comb.fea, st_pos = stf@Position,
+                     cluster = stf@Annotation$bayes_cluster,
+                     r.dist = 2, method = "weighted", verbose = F)
+SpotVisualize(pos = stf@Position, size = 1.5, limits = c(0, 1),
+                  meta = TLS.fea$TLS.score, title = "TLS score")
+

+
+
+

Tumor-normal interface (TNI) identification

+

SpaTME identifies the TNI regions by inputted with tumor cell and +domain annotations. Here, we used a RCC slice with clear tumor +boundaries.

+
st2 <- readRDS("~/test_data/st2.rds")
+st2 <- STcoordCheck(st = st2, platform = "Visium", hexagon.correct = T, reset = T,
+                   hexagon.direct = "vertical",verbose = T)
+
## [1] "Completing coordinate check!"
+
st2 <- SePreprocess(se = st2, assay = "Spatial", norm.SCT = TRUE, cluster.resolution = 0.8)
+stf2 <- readRDS("~/test_data/stf2.rds")
+

In SpaTME, the TNIscore and DefineTNIregion +functions are developed to calculate the TNI scores and determine the +final TNI regions respectively. Firstly, we adjust the proportions of +tumor cells using the total abundances of each spot (This step can be +ignored if only proportions are provided).

+
cell.abun <- readRDS("~/test_data/cell.abun_2.rds")
+abun.all <- log2(rowSums(cell.abun)+1)
+abun.all <- abun.all/max(abun.all)
+cell.abun <- cell.abun/rowSums(cell.abun)
+ES <- cell.abun$Tumor*abun.all
+ES <- ES/max(ES)
+score <- TNIscore(ES, st_pos = stf2@Position,
+                  cluster = stf2@Annotation$bayes_cluster,
+                  r.dist = 2)
+SpotVisualize(pos = stf2@Position,
+                meta = score, size = 1.5,
+                title = "TNI score", limits = c(0,0.5),
+              f.color = c("#ced4da", "#e5614b", "#a4161a"))
+

+

In DefineTNIregion, the maxval and +minval directly control the selection of TNI spots.

+
TNIreg <- DefineTNIregion(score, st_pos = stf2@Position,
+                          maxval = 0.08, minval = 0.03,
+                          r.dist = 2, verbose = T)
+
## [1] "Identifing TNI spots from edge scores ..."
+## [1] "removing unconnected TNI regions ..."
+## [1] "Completing the identificaton of TNI spots !"
+
SpotVisualize(pos = stf2@Position,
+              meta = TNIreg, size = 1.5,
+              cha.col = c(TNI = "#e76f51", nTNI = "#ced4da"),
+              title = "TNI regions")
+

+

Visualization of TNI region and tumor cell distribution.

+
AbunTNIPlot(abun = ES, label = TNIreg, pos = stf2@Position,
+            l_nshow = "nTNI", size = 1.5, legend.name = "Abundance",
+            line_col = c("TNI" = "black", "nTNI" = NA))
+

+

SpaTME categorizes the TNI regions into distinct types based on their +spatial domain composition.

+
TNI_types <- GroupTNItypes(TNI_pos = stf2@Position[TNIreg == "TNI",],
+                           cluster = stf2@Annotation$bayes_cluster[TNIreg == "TNI"])
+
## Separating tumor boundary to different type according it's label composition of spot-neighbors
+
## pamk: best number of clusters is 4
+
## Filtering short boundary type and remove discontinuous spots
+
allspots <- rep("others", length(TNIreg))
+names(allspots) <- names(TNIreg)
+allspots[names(TNI_types)] <- TNI_types
+SpotVisualize(pos = stf2@Position,
+              meta = allspots, size = 1.5,
+              cha.col = LabelMapcolor(allspots, assgin.col = c(others = "#ced4da")),
+              title = "TNI types")
+

+

SpaTME further classifies the TNI spots into either tumor or normal +boundaries based on the tumor cell density.

+
TNI_class <- TNIClass(type = allspots, cluster = stf2@Annotation$bayes_cluster,
+                      ES = ES)
+SpotVisualize(pos = stf2@Position,
+              meta = TNI_class, size = 1.5,
+              cha.col = LabelMapcolor(labels = TNI_class,
+                       assgin.col = c(others = "#ced4da")),
+             title = "TNI classes")
+

+
+
+
+

Linking with phentoypes

+

The bulk RNA-seq profiles and paired clinical annotations of KIRC +from TCGA were downloaded and preprocessed. The phenotype-related genes +are identified using PhenoAssoFeatures function with +cox method. As an illustration, we conducted NMF analysis +using survival-negative genes.

+
Bulk_data <- readRDS("~/test_data/TCGA_KIRC.rds")
+bulk_input <- NMF_bulk_input(data = Bulk_data$Expr_TPM, pt_gene_exp = 0.2, totpm = F, base_gene_exp = 1, dolog = F)
+asso.genes <- PhenoAssoFeatures(data = bulk_input, phenotype = Bulk_data$phenotype, 
+                                method = "cox", p.cut = 0.01)
+used.genes <- asso.genes[asso.genes$coef > 0, "features"]
+

In SpaTME, the NMF analysis is completed with the +RunNMFtest function. The number of factors (parameter +rank) can be specified with a single numeric value or +selected from a numeric vector.

+
library(NMF)
+nmf <- RunNMFtest(expr = bulk_input[used.genes,], rank = seq(2, 8), 
+                      min_cophenetic = 0.95, return.all = F)
+
## [1] "2024-07-29 15:58:14 CST"
+## Compute NMF rank= 2  ... + measures ... OK
+## Compute NMF rank= 3  ... + measures ... OK
+## Compute NMF rank= 4  ... + measures ... OK
+## Compute NMF rank= 5  ... + measures ... OK
+## Compute NMF rank= 6  ... + measures ... OK
+## Compute NMF rank= 7  ... + measures ... OK
+## Compute NMF rank= 8  ... + measures ... OK
+
consensusmap(nmf)
+

+
W_type <- HPhenoAsso(nmfobj = nmf, phenotype = Bulk_data$phenotype, method = "cox",
+                       p.cut = 1, cox.test.cut = 0, p.adj = F,verbose = F)
+print(W_type)
+
##  Factor_1  Factor_2  Factor_3  Factor_4  Factor_5 
+## "Neg_***" "Neg_***" "Neg_***" "Neg_***" "Neg_***"
+

Showing the top metagenes of factors.

+
W <- basis(nmf)
+colnames(W) <- paste0(names(W_type), "_", W_type)
+mg_vt <- FactorMetagenes(ref_W = W, top_num = 200)
+library(ComplexHeatmap)
+library(circlize)
+Heatmap(W[names(mg_vt),], show_row_names = F, row_title = NULL, cluster_rows = F,
+               name = "Weight", split = mg_vt, cluster_columns = F, column_names_rot = 45, row_gap = unit(0, "mm"),
+               col = colorRamp2(c(min(W), max(W)), colors = c("#336699", "#c32f27")))
+

+

Enrichment analysis of these metagenes.

+
mg_enrich <- FactorEnrichAnalysis(mg_vt = mg_vt, wrap_width = 50,
+                                    fun = "enrichPathway",
+                                    pAdjustMethod = "none")
+

+

SpaTME recovers the relative expression of these factors in ST +data.

+
NMFpred <- PredNMFinST(st2, W = nmf@fit@W, 
+                       assay = "SCT", slot = "data", 
+                       size = 1.2, numCol = 2)
+

+
+ + + + +
+ + + + + + + + + + + + + + +