fix n_jobs

chenyangkang · chenyangkang · Oct 27, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024
commit 577637df95a0afc3c300e9c59b548e01905b5fdd
diff --git a/docs/A_brief_introduction/A_brief_introduction.md b/docs/A_brief_introduction/A_brief_introduction.md
@@ -37,7 +37,7 @@ In the first case, the classifier and regressor "talk" to each other in each sep
 User can define the size of the stixels (spatial temporal grids) in terms of space and time. Larger stixel promotes generalizability but loses precision in fine resolution; Smaller stixel may have better predictability in the exact area but reduced ability of extrapolation for points outside the stixel. See section [Optimizing stixel size](https://chenyangkang.github.io/stemflow/Examples/07.Optimizing_stixel_size.html) for discussion about selecting gridding parameters and [Tips for spatiotemporal indexing](https://chenyangkang.github.io/stemflow/Tips/Tips_for_spatiotemporal_indexing.html).
 
 ## A simple demo
-In the demo, we first split the training data using temporal sliding windows with a size of 50 day of year (DOY) and step of 20 DOY (`temporal_start = 1`, `temporal_end=366`, `temporal_step=20`, `temporal_bin_interval=50`). For each temporal slice, a spatial gridding is applied, where we force the stixel to be split into smaller 1/4 pieces if the edge is larger than 25 units (measured in longitude and latitude, `grid_len_upper_threshold=25`), and stop splitting to prevent the edge length being chunked below 5 units (`grid_len_lower_threshold=5`) or containing less than 50 checklists (`points_lower_threshold=50`).  Model fitting is run using 1 core (`njobs=1`).
+In the demo, we first split the training data using temporal sliding windows with a size of 50 day of year (DOY) and step of 20 DOY (`temporal_start = 1`, `temporal_end=366`, `temporal_step=20`, `temporal_bin_interval=50`). For each temporal slice, a spatial gridding is applied, where we force the stixel to be split into smaller 1/4 pieces if the edge is larger than 25 units (measured in longitude and latitude, `grid_len_upper_threshold=25`), and stop splitting to prevent the edge length being chunked below 5 units (`grid_len_lower_threshold=5`) or containing less than 50 checklists (`points_lower_threshold=50`).  Model fitting is run using 1 core (`n_jobs=1`).
 
 This process is executed 10 times (`ensemble_fold = 10`), each time with random jitter and random rotation of the gridding, generating 10 ensembles. In the prediction phase, only spatial-temporal points with more than 7 (`min_ensemble_required = 7`) ensembles usable are predicted (otherwise, set as `np.nan`).
 
@@ -68,7 +68,8 @@ model = AdaSTEMRegressor(
     Spatio2='latitude',                         # spatial coordinates shown in the dataframe
     Temporal1='DOY',
     use_temporal_to_train=True,                   # In each stixel, whether 'DOY' should be a predictor
-    njobs=1
+    n_jobs=1,
+    random_state=42
 )
 ```
 

diff --git a/docs/Examples/01.AdaSTEM_demo.ipynb b/docs/Examples/01.AdaSTEM_demo.ipynb
@@ -2985,6 +2985,25 @@
     "\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tarfile \n",
+    "  \n",
+    "# open file \n",
+    "file = tarfile.open('test.tar.gz') \n",
+    "print(file.getnames()) \n",
+    "  \n",
+    "# extract files \n",
+    "file.extractall('./Destination_FolderName') \n",
+    "  \n",
+    "# close file \n",
+    "file.close() "
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},

diff --git a/docs/Examples/08.Lazy_loading.ipynb b/docs/Examples/08.Lazy_loading.ipynb
@@ -0,0 +1,19 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/Tips/Tips_for_different_tasks.md b/docs/Tips/Tips_for_different_tasks.md
@@ -37,7 +37,7 @@ model = AdaSTEMClassifier(
     Spatio2='proj_lat',
     Temporal1='DOY',
     use_temporal_to_train=True,
-    njobs=1
+    n_jobs=1
 )
 ```
 
@@ -103,7 +103,7 @@ model = AdaSTEMRegressor(
     Spatio2='proj_lat',
     Temporal1='DOY',
     use_temporal_to_train=True,
-    njobs=1
+    n_jobs=1
 )
 ```
 Correspondingly, you would use a set of metrics for the regression problem:

diff --git a/docs/Tips/Tips_for_spatiotemporal_indexing.md b/docs/Tips/Tips_for_spatiotemporal_indexing.md
@@ -57,7 +57,7 @@ model = AdaSTEMClassifier(
     Spatio2='proj_lat',
     Temporal1='Week',
     use_temporal_to_train=True,            # In each stixel, whether 'Week' should be a predictor
-    njobs=1
+    n_jobs=1
 )
 ```
 
@@ -100,7 +100,7 @@ model = AdaSTEMClassifier(
     Spatio2='proj_lat',
     Temporal1='Week',
     use_temporal_to_train=True,
-    njobs=1
+    n_jobs=1
 )
 ```
 
@@ -132,7 +132,7 @@ model = AdaSTEMClassifier(
     Spatio2='proj_lat',
     Temporal1='Week',
     use_temporal_to_train=True,
-    njobs=1
+    n_jobs=1
 )
 ```
 
@@ -161,7 +161,7 @@ model = STEMClassifier(
     Spatio2='proj_lat',
     Temporal1='Week',
     use_temporal_to_train=True,
-    njobs=1
+    n_jobs=1
 )
 ```
 
@@ -194,7 +194,7 @@ model = SphereAdaSTEMRegressor(
     points_lower_threshold=50,        # Only stixels with more than 50 samples are trained
     Temporal1='DOY',
     use_temporal_to_train=True,       # In each stixel, whether 'DOY' should be a predictor
-    njobs=1
+    n_jobs=1
 )
 ```
 

diff --git a/docs/index.md b/docs/index.md
@@ -157,7 +157,8 @@ model = AdaSTEMRegressor(
     Spatio2='latitude',                     # spatial coordinates shown in the dataframe
     Temporal1='DOY',
     use_temporal_to_train=True,             # In each stixel, whether 'DOY' should be a predictor
-    njobs=1
+    n_jobs=1,
+    random_state=42
 )
 ```
 

diff --git a/stemflow/model/Hurdle.py b/stemflow/model/Hurdle.py
@@ -217,7 +217,7 @@ def fit(self, X_train: Union[pd.core.frame.DataFrame, np.ndarray], y_train: Sequ
     def predict(
         self,
         X_test: Union[pd.core.frame.DataFrame, np.ndarray],
-        njobs: int = 1,
+        n_jobs: int = 1,
         verbosity: int = 1,
         return_by_separate_ensembles: bool = False,
     ) -> np.ndarray:
@@ -226,7 +226,7 @@ def predict(
         Args:
             X_test:
                 Test variables
-            njobs:
+            n_jobs:
                 Multi-processing in prediction.
             verbosity:
                 Whether to show progress bar. 0 for No, and Yes other wise.
@@ -238,17 +238,17 @@ def predict(
         """
         if verbosity == 0:
             cls_res = self.classifier.predict(
-                X_test, njobs=njobs, verbosity=0, return_by_separate_ensembles=return_by_separate_ensembles
+                X_test, n_jobs=n_jobs, verbosity=0, return_by_separate_ensembles=return_by_separate_ensembles
             )
             reg_res = self.regressor.predict(
-                X_test, njobs=njobs, verbosity=0, return_by_separate_ensembles=return_by_separate_ensembles
+                X_test, n_jobs=n_jobs, verbosity=0, return_by_separate_ensembles=return_by_separate_ensembles
             )
         else:
             cls_res = self.classifier.predict(
-                X_test, njobs=njobs, verbosity=1, return_by_separate_ensembles=return_by_separate_ensembles
+                X_test, n_jobs=n_jobs, verbosity=1, return_by_separate_ensembles=return_by_separate_ensembles
             )
             reg_res = self.regressor.predict(
-                X_test, njobs=njobs, verbosity=1, return_by_separate_ensembles=return_by_separate_ensembles
+                X_test, n_jobs=n_jobs, verbosity=1, return_by_separate_ensembles=return_by_separate_ensembles
             )
         # reg_res = np.where(reg_res>=0, reg_res, 0) ### we constrain the reg value to be positive
         res = np.where(cls_res < 0.5, 0, cls_res)
@@ -267,7 +267,7 @@ def predict_proba(
         Args:
             X_test:
                 Testing variables
-            njobs:
+            n_jobs:
                 Multi-processing in prediction.
             verbosity:
                 Whether to show progress bar. 0 for No, and Yes other wise.