Merge remote-tracking branch 'origin/riemann' into riemann

# Conflicts: # fedot_ind/api/utils/checkers_collections.py # fedot_ind/api/utils/industrial_strategy.py # fedot_ind/core/operation/interfaces/industrial_model_strategy.py # fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py # fedot_ind/core/repository/industrial_implementations/optimisation.py # fedot_ind/core/tuning/search_space.py
aimclub · May 2, 2024 · a9d4244 · a9d4244
2 parents 169e989 + c43113b
commit a9d4244
Show file tree

Hide file tree

Showing 250 changed files with 507 additions and 569 deletions.
diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
@@ -1,33 +1,48 @@
 name: Integration Tests
 
 on:
+#  push:
+#    branches: [ main ]
+#  pull_request:
+#    branches: [ main ]
   workflow_dispatch:
 
 jobs:
-  build:
+  test:
     runs-on: ubuntu-latest
-    timeout-minutes: 95
+    timeout-minutes: 30
     strategy:
       matrix:
-        python-version: [ 3.9 ]
+        python-version: [3.8, 3.9, '3.10']
 
     steps:
+      - uses: actions/checkout@v2
 
-      - name: Checkout branch
-        uses: actions/checkout@v3
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v3
+      - uses: actions/setup-python@v2
         with:
           python-version: ${{ matrix.python-version }}
 
+      - name: Cache Poetry virtualenv
+        uses: actions/cache@v2
+        with:
+          path: ~/.local/share/poetry/virtualenvs/  # Cache Poetry virtualenvs
+          key: ${{ runner.os }}-poetry-${{ hashFiles('pyproject.toml') }}  # Cache key based on project dependencies
+
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
+        with:
+          version: 1.8.2  # Specify your desired Poetry version (pin it for stability)
+          virtualenvs-create: true
+          virtualenvs-in-project: true  # Create venv within project directory
+
       - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install pytest
-          pip install -r requirements.txt
-          pip install pytest-cov
-
-      - name: Test with pytest
-        run: |
-          pytest --cov=fedot -s tests/integration
+        run: poetry install
+
+      - name: Run tests with pytest
+        run: poetry run pytest --cov=fedot -s tests/integration
+
+      - name: Codecov-coverage
+        uses: codecov/codecov-action@v3
+        with:
+          file: ./coverage.xml
+          flags: unittests
diff --git a/benchmark/benchmark_TSC.py b/benchmark/benchmark_TSC.py
@@ -70,29 +70,31 @@ def finetune(self):
         self.logger.info('Benchmark finetune started')
         dataset_result = {}
         for dataset_name in self.custom_datasets:
-            path_to_results = PROJECT_PATH + self.path_to_save + f'/{dataset_name}'
+            path_to_results = PROJECT_PATH + \
+                self.path_to_save + f'/{dataset_name}'
             composed_model_path = [path_to_results + f'/{x}' for x in os.listdir(path_to_results)
                                    if x.__contains__('pipeline_saved')]
             metric_result = {}
             for p in composed_model_path:
                 if os.path.isdir(p):
                     try:
                         self.experiment_setup['output_folder'] = PROJECT_PATH + \
-                                                                 self.path_to_save
+                            self.path_to_save
                         experiment_setup = deepcopy(self.experiment_setup)
                         prediction, model = self.finetune_loop(
                             dataset_name, experiment_setup, p)
                         metric_result.update({p:
-                                                  {'metric': Accuracy(model.predict_data.target,
-                                                                      prediction.ravel()).metric(),
-                                                   'tuned_model': model}})
+                                              {'metric': Accuracy(model.predict_data.target,
+                                                                  prediction.ravel()).metric(),
+                                               'tuned_model': model}})
                     except ModuleNotFoundError as ex:
                         print(f'{ex}.OLD VERSION OF PIPELINE. DELETE DIRECTORY')
                         if len(composed_model_path) != 1:
                             print(f'OLD VERSION OF PIPELINE. DELETE DIRECTORY')
                             shutil.rmtree(p)
                         else:
-                            print(f'OLD VERSION OF PIPELINE. IT IS A LAST SAVED MODEL')
+                            print(
+                                f'OLD VERSION OF PIPELINE. IT IS A LAST SAVED MODEL')
                 else:
                     print(f"No composed model for dataset - {dataset_name}")
             dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}',
@@ -102,17 +104,20 @@ def finetune(self):
                 best_metric = 0
                 for _ in metric_result.keys():
                     if best_metric == 0:
-                        best_metric, best_model, path = metric_result[_]['metric'], metric_result[_]['tuned_model'], _
+                        best_metric, best_model, path = metric_result[_][
+                            'metric'], metric_result[_]['tuned_model'], _
                     elif metric_result[_]['metric'] > best_metric:
-                        best_metric, best_model, path = metric_result[_]['metric'], metric_result[_]['tuned_model'], _
-                fedot_results.loc[dataset_name, 'Fedot_Industrial_finetuned'] = best_metric
+                        best_metric, best_model, path = metric_result[_][
+                            'metric'], metric_result[_]['tuned_model'], _
+                fedot_results.loc[dataset_name,
+                                  'Fedot_Industrial_finetuned'] = best_metric
                 best_model.output_folder = f'{_}_tuned'
                 best_model.save_best_model()
                 fedot_results.to_csv(dataset_path)
             else:
                 fedot_results.to_csv(dataset_path)
             gc.collect()
-            dataset_result.update({dataset_name:metric_result})
+            dataset_result.update({dataset_name: metric_result})
         self.logger.info("Benchmark finetune finished")
 
     def load_local_basic_results(self, path: str = None):
@@ -125,7 +130,7 @@ def load_local_basic_results(self, path: str = None):
             except Exception:
                 results = self.load_web_results()
             self.experiment_setup['output_folder'] = PROJECT_PATH + \
-                                                     self.path_to_save
+                self.path_to_save
             return results
         else:
             return self.results_picker.run(get_metrics_df=True, add_info=True)
@@ -135,14 +140,14 @@ def create_report(self):
         names = []
         for dataset_name in self.custom_datasets:
             model_result_path = PROJECT_PATH + self.path_to_save + \
-                                f'/{dataset_name}' + '/metrics_report.csv'
+                f'/{dataset_name}' + '/metrics_report.csv'
             if os.path.isfile(model_result_path):
                 df = pd.read_csv(model_result_path, index_col=0, sep=',')
                 df = df.fillna(0)
                 if 'Fedot_Industrial_finetuned' not in df.columns:
                     df['Fedot_Industrial_finetuned'] = 0
                 metrics = df.loc[dataset_name,
-                          'Fedot_Industrial':'Fedot_Industrial_finetuned']
+                                 'Fedot_Industrial':'Fedot_Industrial_finetuned']
                 _.append(metrics.T.values)
                 names.append(dataset_name)
         stacked_resutls = np.stack(_, axis=1).T

diff --git a/benchmark/benchmark_TSER.py b/benchmark/benchmark_TSER.py
@@ -102,7 +102,7 @@ def finetune_loop(self, dataset, experiment_setup, composed_model_path):
         model = FedotIndustrial(**experiment_setup)
         model.load(path=composed_model_path)
 
-        model.finetune(train_data,tuning_params)
+        model.finetune(train_data, tuning_params)
         prediction = model.predict(test_data)
         return prediction, model
 

diff --git a/benchmark/benchmark_TSF.py b/benchmark/benchmark_TSF.py
@@ -53,9 +53,12 @@ def __init__(self,
     def evaluate_loop(self, dataset, experiment_setup: dict = None):
         matplotlib.use('TkAgg')
         train_data = DataLoader(dataset_name=dataset).load_forecast_data()
-        experiment_setup['task_params'] = TsForecastingParams(forecast_length=M4_FORECASTING_LENGTH[dataset[0]])
-        target = train_data.iloc[-experiment_setup['task_params'].forecast_length:, :].values.ravel()
-        train_data = train_data.iloc[:-experiment_setup['task_params'].forecast_length, :]
+        experiment_setup['task_params'] = TsForecastingParams(
+            forecast_length=M4_FORECASTING_LENGTH[dataset[0]])
+        target = train_data.iloc[-experiment_setup['task_params']
+                                 .forecast_length:, :].values.ravel()
+        train_data = train_data.iloc[:-
+                                     experiment_setup['task_params'].forecast_length, :]
         model = FedotIndustrial(**experiment_setup)
         model.fit(train_data)
         prediction = model.predict(train_data)
@@ -68,29 +71,35 @@ def run(self):
         metric_dict = {}
         for dataset_name in self.custom_datasets:
             experiment_setup = deepcopy(self.experiment_setup)
-            prediction, target, model = self.evaluate_loop(dataset_name, experiment_setup)
+            prediction, target, model = self.evaluate_loop(
+                dataset_name, experiment_setup)
             metric = SMAPE(prediction, target).metric()
             metric_dict.update({dataset_name: metric})
-            dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}')
+            dataset_path = os.path.join(
+                self.experiment_setup['output_folder'], f'{dataset_name}')
             if not os.path.exists(dataset_path):
                 os.makedirs(dataset_path)
             basic_results.loc[dataset_name, 'Fedot_Industrial'] = metric
-            basic_results.to_csv(os.path.join(dataset_path, 'metrics_report.csv'))
+            basic_results.to_csv(os.path.join(
+                dataset_path, 'metrics_report.csv'))
             pred_df = pd.DataFrame([target, prediction]).T
             pred_df.columns = ['label', 'prediction']
             pred_df.to_csv(os.path.join(dataset_path, 'prediction.csv'))
             model.solver.save(dataset_path)
             gc.collect()
-        basic_path = os.path.join(self.experiment_setup['output_folder'], 'comprasion_metrics_report.csv')
+        basic_path = os.path.join(
+            self.experiment_setup['output_folder'], 'comprasion_metrics_report.csv')
         basic_results.to_csv(basic_path)
         self.logger.info("Benchmark test finished")
 
     def finetune(self):
         self.logger.info('Benchmark finetune started')
         for dataset_name in self.custom_datasets:
-            composed_model_path = PROJECT_PATH + self.path_to_save + f'/{dataset_name}' + '/0_pipeline_saved'
+            composed_model_path = PROJECT_PATH + self.path_to_save + \
+                f'/{dataset_name}' + '/0_pipeline_saved'
             if os.path.isdir(composed_model_path):
-                self.experiment_setup['output_folder'] = PROJECT_PATH + self.path_to_save
+                self.experiment_setup['output_folder'] = PROJECT_PATH + \
+                    self.path_to_save
                 experiment_setup = deepcopy(self.experiment_setup)
                 prediction, target = self.finetune_loop(
                     dataset_name, experiment_setup)
@@ -99,7 +108,8 @@ def finetune(self):
                                             f'{dataset_name}',
                                             'metrics_report.csv')
                 fedot_results = pd.read_csv(dataset_path, index_col=0)
-                fedot_results.loc[dataset_name, 'Fedot_Industrial_finetuned'] = metric
+                fedot_results.loc[dataset_name,
+                                  'Fedot_Industrial_finetuned'] = metric
 
                 fedot_results.to_csv(dataset_path)
             else:
@@ -112,20 +122,23 @@ def load_local_basic_results(self, path: str = None):
         results = pd.read_csv(path, sep=',', index_col=0).T
         results = results.dropna(axis=1, how='all')
         results = results.dropna(axis=0, how='all')
-        self.experiment_setup['output_folder'] = PROJECT_PATH + self.path_to_save
+        self.experiment_setup['output_folder'] = PROJECT_PATH + \
+            self.path_to_save
         return results
 
     def create_report(self):
         _ = []
         names = []
         for dataset_name in self.custom_datasets:
-            model_result_path = PROJECT_PATH + self.path_to_save + f'/{dataset_name}' + '/metrics_report.csv'
+            model_result_path = PROJECT_PATH + self.path_to_save + \
+                f'/{dataset_name}' + '/metrics_report.csv'
             if os.path.isfile(model_result_path):
                 df = pd.read_csv(model_result_path, index_col=0, sep=',')
                 df = df.fillna(0)
                 if 'Fedot_Industrial_finetuned' not in df.columns:
                     df['Fedot_Industrial_finetuned'] = 0
-                metrics = df.loc[dataset_name, 'Fedot_Industrial':'Fedot_Industrial_finetuned']
+                metrics = df.loc[dataset_name,
+                                 'Fedot_Industrial':'Fedot_Industrial_finetuned']
                 _.append(metrics.T.values)
                 names.append(dataset_name)
         stacked_results = np.stack(_, axis=1).T

diff --git a/benchmark/feature_utils.py b/benchmark/feature_utils.py
@@ -52,7 +52,8 @@ def denoise(x, wavelet='haar', level=1):
     sigma = (1 / 0.6745) * maddest(coeff[-level])
 
     uthresh = sigma * np.sqrt(2 * np.log(len(x)))
-    coeff[1:] = (pywt.threshold(i, value=uthresh, mode='hard') for i in coeff[1:])
+    coeff[1:] = (pywt.threshold(i, value=uthresh, mode='hard')
+                 for i in coeff[1:])
 
     ret = pywt.waverec(coeff, wavelet, mode='per')
 
@@ -96,7 +97,8 @@ def spectrogram_from_eeg(parquet_path, display=False):
                                       n_fft=1024, n_mels=128, fmin=0, fmax=20, win_length=128)
             # LOG TRANSFORM
             width = (mel_spec.shape[1] // 32) * 32
-            mel_spec_db = power_to_db(mel_spec, ref=np.max).astype(np.float32)[:, :width]
+            mel_spec_db = power_to_db(
+                mel_spec, ref=np.max).astype(np.float32)[:, :width]
 
             # STANDARDIZE TO -1 TO 1
             mel_spec_db = (mel_spec_db + 40) / 40
@@ -115,9 +117,11 @@ def __init__(self, is_train=True):
     def _read_data(self, data_type, file_id):
 
         if self.is_train:
-            PATH = PROJECT_PATH + f"/data/hms-harmful-brain-activity-classification/train_{data_type}/{file_id}.parquet"
+            PATH = PROJECT_PATH + \
+                f"/data/hms-harmful-brain-activity-classification/train_{data_type}/{file_id}.parquet"
         else:
-            PATH = PROJECT_PATH + f"/data/hms-harmful-brain-activity-classification/test_{data_type}/{file_id}.parquet"
+            PATH = PROJECT_PATH + \
+                f"/data/hms-harmful-brain-activity-classification/test_{data_type}/{file_id}.parquet"
 
         return pd.read_parquet(PATH)
 
@@ -133,14 +137,16 @@ def read_eeg_built_spectrogram_data(self, eeg_id) -> pd.DataFrame:
         spec = pd.DataFrame()
 
         if self.is_train:
-            _ = PROJECT_PATH + f"/data/hms-harmful-brain-activity-classification/EEG_Spectrograms/{eeg_id}.npy"
+            _ = PROJECT_PATH + \
+                f"/data/hms-harmful-brain-activity-classification/EEG_Spectrograms/{eeg_id}.npy"
             eeg_specs = np.load(_)
         else:
             eeg_specs = spectrogram_from_eeg(
                 f"/kaggle/input/hms-harmful-brain-activity-classification/test_eegs/{eeg_id}.parquet")
 
         for i in range(len(montages)):
-            spec = pd.concat([spec, pd.DataFrame(eeg_specs[:, :, i]).T.add_prefix(f'{montages[i]}_')], axis=1)
+            spec = pd.concat([spec, pd.DataFrame(
+                eeg_specs[:, :, i]).T.add_prefix(f'{montages[i]}_')], axis=1)
 
         return spec
 
@@ -221,9 +227,9 @@ def apply_mask(df):
                 .set_axis(['var_1', 'var_2', 'corr'], axis=1)
                 .query("var_1 != var_2")
                 .assign(
-                        row_id=self.row_id,
-                        label=lambda x: x.var_1 + "_" + x.var_2
-        )
+                    row_id=self.row_id,
+                    label=lambda x: x.var_1 + "_" + x.var_2
+                )
                 .pivot(columns='label', values='corr', index='row_id')
                 .add_prefix('cor_')
                 )
@@ -272,7 +278,8 @@ def format_eeg_data(self, window_sizes={}):
 
         offset_range = self.get_offset()
 
-        df = self.read_eeg_data(self.metadata['eeg_id']).iloc[offset_range[0]:offset_range[1]]
+        df = self.read_eeg_data(
+            self.metadata['eeg_id']).iloc[offset_range[0]:offset_range[1]]
 
         eeg_df = pd.DataFrame()
         for window in window_sizes:
@@ -281,7 +288,8 @@ def format_eeg_data(self, window_sizes={}):
 
             eeg_df = pd.concat([
                 eeg_df,
-                self.get_features(df.iloc[left_index:right_index], time_id=window)
+                self.get_features(
+                    df.iloc[left_index:right_index], time_id=window)
             ], axis=1)
 
         return eeg_df
@@ -327,7 +335,8 @@ def format_spectrogram_data(self, window_sizes={}):
 
             spec_df = pd.concat([
                 spec_df,
-                self.get_features(df.loc[middle + left_index:middle + right_index], time_id=window)
+                self.get_features(
+                    df.loc[middle + left_index:middle + right_index], time_id=window)
             ], axis=1)
 
         return spec_df
@@ -346,7 +355,8 @@ def get_features(self, df, time_id) -> pd.DataFrame():
 
 class EEGBuiltSpectrogramFeatures(FeatureEngineerData):
     def format_custom_spectrogram(self, window_sizes={()}):
-        df = self.read_eeg_built_spectrogram_data(self.metadata['eeg_id']).copy()
+        df = self.read_eeg_built_spectrogram_data(
+            self.metadata['eeg_id']).copy()
 
         spec_df = pd.DataFrame()
         for window in window_sizes:
@@ -355,7 +365,8 @@ def format_custom_spectrogram(self, window_sizes={()}):
 
             spec_df = pd.concat([
                 spec_df,
-                self.get_features(df.iloc[left_index:right_index], time_id=window)
+                self.get_features(
+                    df.iloc[left_index:right_index], time_id=window)
             ], axis=1)
 
         return spec_df

diff --git a/...example/api_example/advanced_example/explainability/optimisation_history_visualisation.py b/...example/api_example/advanced_example/explainability/optimisation_history_visualisation.py
@@ -23,5 +23,5 @@
     else:
         # tutorial sample of opt history
         opt_hist = PROJECT_PATH + '/examples/data/forecasting/D1679_opt_history/'
-    opt_hist = industrial.vis_optimisation_history(opt_history_path=opt_hist, return_history=True)
-
+    opt_hist = industrial.vis_optimisation_history(
+        opt_history_path=opt_hist, return_history=True)
diff --git a/examples/automl_example/api_example/advanced_example/multimodal/multimodal.py b/examples/automl_example/api_example/advanced_example/multimodal/multimodal.py
@@ -12,7 +12,8 @@
     input_train_data = init_input_data(train_data[0], train_data[1])
     input_test_data = init_input_data(test_data[0], test_data[1])
 
-    metric_dict = {'accuracy': accuracy_score, 'f1': f1_score, 'roc_auc': roc_auc_score}
+    metric_dict = {'accuracy': accuracy_score,
+                   'f1': f1_score, 'roc_auc': roc_auc_score}
     with IndustrialModels():
         pipeline = PipelineBuilder().add_node('recurrence_extractor', params={'window_size': 30,
                                                                               'stride': 5,