update and add tutorials for SpaTrack

STOmics · Nov 6, 2024 · 6a4df89 · 6a4df89
1 parent 06ea05f
commit 6a4df89
Show file tree

Hide file tree

Showing 23 changed files with 8,811 additions and 79 deletions.
diff --git a/.gitignore b/.gitignore
@@ -138,4 +138,7 @@ tests/test_data/
 .vscode/
 
 代码规范.md
-ubuntu_enviroment.txt
+ubuntu_enviroment.txt
+
+docs/source/content/stereo*
+docs/build
diff --git a/docs/build.sh b/docs/build.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+conda activate stereopy-doc
+
+rm -rf ./build/*
+
+make html
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -9,7 +9,7 @@ seaborn==0.12.2
 h5py>=3.7.0
 gefpy>=1.1.9
 # setuptools>=41.0.0,<60.0.0
-setuptools==68.2.2
+setuptools==69.5.1
 #todo python3.8/site-packages/umap/distances.py:1053: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator.
 #The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0.
 #See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
@@ -26,7 +26,7 @@ panel==0.14.4
 holoviews==1.17.1
 param==1.13.0
 hvplot>=0.9.0
-# colorcet==2.0.6
+colorcet>=3.1.0
 datashader>=0.15.2
 anndata>=0.7.5
 phenograph==1.5.7
@@ -64,7 +64,7 @@ hotspotsc==1.1.1
 distributed>=2023.3.2.1
 # keras==2.7.0
 pyarrow>=10.0.1
-tables>=3.6.1
+# tables>=3.6.1
 spatialpandas>=0.4.4
 # selenium>=4.4.3
 loompy==3.0.6
@@ -99,6 +99,8 @@ ipython-genutils==0.2.0
 ipywidgets==8.0.4
 
 mudata>=0.2.3
+pysal<=2.6.0
+pygam>=0.8.0
 
 Sphinx>=7.1.2
 nbsphinx>=0.9.3

diff --git a/.../Tutorials/Apply_spaTrack_on_spatial_data_of_Intrahepatic_cholangiocarcinoma_cancer.ipynb b/.../Tutorials/Apply_spaTrack_on_spatial_data_of_Intrahepatic_cholangiocarcinoma_cancer.ipynb
diff --git a/...Tutorials/Apply_spaTrack_on_spatial_data_of_axolotl_brain_regeneration_after_injury.ipynb b/...Tutorials/Apply_spaTrack_on_spatial_data_of_axolotl_brain_regeneration_after_injury.ipynb
diff --git a/...spatial_transcriptomic_data_from_multiple_time_slices_of_axolotl_brain_regeneration.ipynb b/...spatial_transcriptomic_data_from_multiple_time_slices_of_axolotl_brain_regeneration.ipynb
diff --git a/..._scRNA-seq_data_from__hematopoietic_stem_cells_development_with_multiple_directions.ipynb b/..._scRNA-seq_data_from__hematopoietic_stem_cells_development_with_multiple_directions.ipynb
diff --git a/..._across_multiple_time_points_in_spatial_transcriptomic_data_from_the_mouse_midbrain.ipynb b/..._across_multiple_time_points_in_spatial_transcriptomic_data_from_the_mouse_midbrain.ipynb
diff --git a/docs/source/Tutorials/Cell_Fate_Inference.rst b/docs/source/Tutorials/Cell_Fate_Inference.rst
@@ -5,4 +5,5 @@ This section introduces several functions to infer the cell fate.
 .. nbgallery::
 
     Trajectory_Analysis
-    RNA_Velocity
+    RNA_Velocity
+    SpaTrack
diff --git a/docs/source/Tutorials/SpaTrack.rst b/docs/source/Tutorials/SpaTrack.rst
@@ -0,0 +1,19 @@
+SpaTrack
+====================
+Trajectory inference (TI) provides important insights in understanding cell development and biological process.
+However, the integration of transcriptomic profiles and spatial locations to organize spatiotemporal cell orders is currently remaining challenges. 
+Here we introduce spaTrack, which effectively constructs cell trajectories from an optimal-transport matrix at single cell resolution, 
+taking into account both profile of gene expression and distance cost of cell transition in a spatial context.
+
+spaTrack has the potential to capture fine local details of trajectory within a single tissue section of spatial transcriptomics (ST) data, 
+as well as reconstruct cell dynamics across multiple tissue sections in a time series. To capture potential dynamic drivers, 
+spaTrack models the fate of a cell as a function of expression profile along the time points driven by transcription factors, 
+which facilitates the identification of key molecular regulators that govern cellular trajectories.
+
+.. nbgallery::
+
+    Apply_spaTrack_on_spatial_data_of_axolotl_brain_regeneration_after_injury
+    Apply_spaTrack_on_spatial_data_of_Intrahepatic_cholangiocarcinoma_cancer
+    Apply_spaTrack_to_infer_a_trajectory_on_spatial_transcriptomic_data_from_multiple_time_slices_of_axolotl_brain_regeneration
+    Apply_spaTrack_to_infer_cell_transitions_across_multiple_time_points_in_spatial_transcriptomic_data_from_the_mouse_midbrain
+    Apply_spaTrack_to_infer_cell_trajectory_in_scRNA-seq_data_from__hematopoietic_stem_cells_development_with_multiple_directions
diff --git a/docs/source/_static/230906.gif b/docs/source/_static/230906.gif
diff --git a/docs/source/_static/SpaTrack.png b/docs/source/_static/SpaTrack.png
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -115,6 +115,7 @@
     "Tutorials/Cell_Community_Detection": "_static/CellCommunityDetection.png",
     "Tutorials/Cell_Segmentation": "_static/CellSegmentation.png",
     "Tutorials/Trajectory_Analysis": "_static/TrajectoryAnalysis.png",
+    "Tutorials/SpaTrack": "_static/SpaTrack.png",
     "Tutorials(Multi-sample)/Batch_Effect": "_static/batches_integration.png",
     "Tutorials(Multi-sample)/3D_Cell_Cell_Communication": "_static/3D_Cell_Cell_Communication.png",
     "Tutorials(Multi-sample)/3D_Gene_Regulatory_Network": "_static/gene_regulatory_network.png",

diff --git a/docs/source/content/032_API_StPipeline.rst b/docs/source/content/032_API_StPipeline.rst
@@ -64,4 +64,27 @@ which is compromised of basic preprocessing, embedding, clustering, and so on.
     core.ms_pipeline.MSDataPipeLine.set_scope_and_mode
     algorithm.spa_seg.SpaSeg.main
     algorithm.score_genes.ScoreGenes.main
-    algorithm.score_genes_cell_cycle.ScoreGenesCellCycle.main
+    algorithm.score_genes_cell_cycle.ScoreGenesCellCycle.main
+    algorithm.spa_track.SpaTrack.main
+    algorithm.spa_track.SpaTrack.assess_start_cluster
+    algorithm.spa_track.SpaTrack.set_start_cells
+    algorithm.spa_track.SpaTrack.auto_estimate_param
+    algorithm.spa_track.SpaTrack.calc_alpha_by_moransI
+    algorithm.spa_track.SpaTrack.get_ot_matrix
+    algorithm.spa_track.SpaTrack.get_ptime
+    algorithm.spa_track.SpaTrack.get_velocity_grid
+    algorithm.spa_track.SpaTrack.get_velocity
+    algorithm.spa_track.SpaTrack.auto_get_start_cluster
+    algorithm.spa_track.SpaTrack.lasso_select
+    algorithm.spa_track.SpaTrack.create_vector_field
+    algorithm.spa_track.SpaTrack.set_lap_endpoints
+    algorithm.spa_track.SpaTrack.least_action
+    algorithm.spa_track.SpaTrack.map_cell_to_LAP
+    algorithm.spa_track.SpaTrack.filter_genes
+    algorithm.spa_track.SpaTrack.ptime_gene_GAM
+    algorithm.spa_track.SpaTrack.order_trajectory_genes
+    algorithm.spa_track.SpaTrack.gr_training
+    algorithm.ms_spa_track.MSSpaTrack.main
+    algorithm.ms_spa_track.MSSpaTrack.transfer_matrix
+    algorithm.ms_spa_track.MSSpaTrack.generate_animate_input
+    algorithm.ms_spa_track.MSSpaTrack.gr_training
diff --git a/stereo/algorithm/ms_spa_track.py b/stereo/algorithm/ms_spa_track.py
@@ -20,6 +20,11 @@ def main(
         self,
         cluster_res_key: str = 'cluster'
     ):
+        """
+        Create an object of SpaTrack for multiple samples.
+
+        :param cluster_res_key: the key of clustering result to be used in cells/obs
+        """
         # if cluster_res_key not in self.pipeline_res:
         #     raise KeyError(f'Cannot find clustering result by key {cluster_res_key}')
         if 'spa_track' not in self.pipeline_res:
@@ -32,7 +37,6 @@ def main(
     def transfer_matrix(
         self,
         data_indices: List[Union[str, int]] = None,
-        layer: str = None,
         spatial_key: str = 'spatial',
         alpha: float = 0.1, 
         epsilon = 0.01,
@@ -41,6 +45,19 @@ def transfer_matrix(
         G_2 = None,
         **kwargs
     ):
+        """
+        Squentially calculate transfer matrix between each two time specified by data_indices.
+
+        :param data_indices: A list of indices or names in the ms_data of the data to calculate transfer matrix, defaults to None
+        :param spatial_key: The key to get position information of cells, defaults to 'spatial'
+        :param alpha: Alignment tuning parameter. Note:0 <= alpha <= 1. 
+                        When ``alpha = 0`` only the gene expression data is taken into account,
+                        while ``alpha =1`` only the spatial coordinates are taken into account.
+        :param epsilon: Weight for entropy regularization term, defaults to 0.01
+        :param rho: Weight for KL divergence penalizing unbalanced transport, defaults to np.inf
+        :param G_1: Distance matrix within spatial data 1 (spots, spots), defaults to None
+        :param G_2: Distance matrix within spatial data 2 (spots, spots), defaults to None
+        """
         assert spatial_key is not None, 'spatial_key must be provided'
         if data_indices is None:
             data_indices = self.ms_data.names
@@ -58,7 +75,7 @@ def transfer_matrix(
             data1 = data_list_to_calculate[i]
             data2 = data_list_to_calculate[i + 1]
             transfer_matrices[(data_names[i], data_names[i + 1])] = transfer_matrix(
-                data1, data2, layer=layer, spatial_key=spatial_key, alpha=alpha, epsilon=epsilon,
+                data1, data2, layer=None, spatial_key=spatial_key, alpha=alpha, epsilon=epsilon,
                 rho=rho, G_1=G_1, G_2=G_2, **kwargs
             )
         self.pipeline_res['spa_track']['transfer_spatial_key'] = spatial_key
@@ -69,6 +86,13 @@ def generate_animate_input(
         data_indices: List[Union[str, int]] = None,
         time_key: str = 'batch'
     ):
+        """
+        Generate animate transfer input of two or more times. 
+
+        :param data_indices: A list of indices or names in the ms_data of the data,
+                            must be the same as the data used to calculate transfer matrix.
+        :param time_key: time Key in `data.cells` or `data.adata.obs`, defaults to 'batch'.
+        """
         data_names = [
             self.ms_data.names[di] if isinstance(di, int) else di for di in data_indices
         ]
@@ -117,6 +141,28 @@ def gr_training(
         filename: str = "weights.csv",
         lr_ratio: float = 0.1
     ):
+        """
+        Create and run a trainer for gene regulatory network training in **2_time** mode(two samples).
+
+        :param data1_index: The index in the ms_data of the first data
+        :param data2_index: The index in the ms_data of the second data
+        :param tfs_path: The path of the tf names file, defaults to None
+        :param min_cells_1: The minimum number of cells for filtering the first data
+        :param min_cells_2: The minimum number of cells for filtering the second data
+        :param cell_select_per_time: The number of randomly selected cells at each time point, defaults to 10
+        :param cell_generate_per_time: The number of cells generated at each time point, defaults to 500
+        :param train_ratio:  Ratio of training data, defaults to 0.8
+        :param use_gpu: Whether to use gpu, by default, to use if available.
+        :param random_state: Random seed of numpy and torch, fixed for reproducibility, defaults to 0
+        :param training_times: Number of times to randomly initialize the model and retrain, defaults to 10
+        :param iter_times: The number of iterations for each training model, defaults to 30
+        :param mapping_num: The number of top weight pairs you want to extract, defaults to 3000
+        :param filename: The name of the file to save the weights, defaults to "weights.csv"
+        :param lr_ratio: The learning rate, defaults to 0.1
+
+
+        :return: A trainer object for gene regulatory network training.
+        """
         data_list = [deepcopy(self.ms_data[data1_index]), deepcopy(self.ms_data[data2_index])]
         min_cells = [min_cells_1, min_cells_2]
         cell_mapping = self.map_data(data1_index, data2_index)