aimclub · technocreep · May 17, 2024 · May 16, 2024 · May 16, 2024 · May 16, 2024
diff --git a/.github/workflows/pep8_action.yml b/.github/workflows/pep8_action.yml
@@ -0,0 +1,24 @@
+name: autopep8
+on: pull_request
+jobs:
+  autopep8:
+    # Check if the PR is not from a fork
+    if: github.event.pull_request.head.repo.full_name == github.repository
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          token: ${{ secrets.PEP8_TOKEN }}
+          ref: ${{ github.head_ref }}
+      - name: autopep8
+        id: autopep8
+        uses: peter-evans/autopep8@v2
+        with:
+          args: --exit-code --recursive --in-place --aggressive --aggressive .
+      - name: Commit autopep8 changes
+        if: steps.autopep8.outputs.exit-code == 2
+        run: |
+          git config --global user.name 'autopep8 bot'
+          git config --global user.email 'autopep8bot@users.noreply.github.com'
+          git commit -am "Automated autopep8 fixes"
+          git push
diff --git a/benchmark/benchmark_TSC.py b/benchmark/benchmark_TSC.py
@@ -57,12 +57,15 @@ def run(self):
             metric = Accuracy(target, prediction).metric()
             metric_dict.update({dataset_name: metric})
             basic_results.loc[dataset_name, 'Fedot_Industrial'] = metric
-            dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}',
-                                        'metrics_report.csv')
+            dataset_path = os.path.join(
+                self.experiment_setup['output_folder'],
+                f'{dataset_name}',
+                'metrics_report.csv')
             basic_results.to_csv(dataset_path)
             gc.collect()
         basic_path = os.path.join(
-            self.experiment_setup['output_folder'], 'comprasion_metrics_report.csv')
+            self.experiment_setup['output_folder'],
+            'comprasion_metrics_report.csv')
         basic_results.to_csv(basic_path)
         self.logger.info("Benchmark test finished")
 
@@ -72,8 +75,9 @@ def finetune(self):
         for dataset_name in self.custom_datasets:
             path_to_results = PROJECT_PATH + \
                 self.path_to_save + f'/{dataset_name}'
-            composed_model_path = [path_to_results + f'/{x}' for x in os.listdir(path_to_results)
-                                   if x.__contains__('pipeline_saved')]
+            composed_model_path = [
+                path_to_results +
+                f'/{x}' for x in os.listdir(path_to_results) if x.__contains__('pipeline_saved')]
             metric_result = {}
             for p in composed_model_path:
                 if os.path.isdir(p):
@@ -97,8 +101,10 @@ def finetune(self):
                                 f'OLD VERSION OF PIPELINE. IT IS A LAST SAVED MODEL')
                 else:
                     print(f"No composed model for dataset - {dataset_name}")
-            dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}',
-                                        'metrics_report.csv')
+            dataset_path = os.path.join(
+                self.experiment_setup['output_folder'],
+                f'{dataset_name}',
+                'metrics_report.csv')
             fedot_results = pd.read_csv(dataset_path, index_col=0)
             if len(metric_result) != 0:
                 best_metric = 0

diff --git a/benchmark/benchmark_TSER.py b/benchmark/benchmark_TSER.py
@@ -52,10 +52,13 @@ def run(self):
             metric_dict.update({dataset_name: metric})
             basic_results.loc[dataset_name, 'Fedot_Industrial'] = metric
             dataset_path = os.path.join(
-                self.experiment_setup['output_folder'], f'{dataset_name}', 'metrics_report.csv')
+                self.experiment_setup['output_folder'],
+                f'{dataset_name}',
+                'metrics_report.csv')
             basic_results.to_csv(dataset_path)
         basic_path = os.path.join(
-            self.experiment_setup['output_folder'], 'comprasion_metrics_report.csv')
+            self.experiment_setup['output_folder'],
+            'comprasion_metrics_report.csv')
         basic_results.to_csv(basic_path)
         self.logger.info("Benchmark test finished")
 
@@ -74,10 +77,11 @@ def load_local_basic_results(self, path: str = None):
     def finetune(self):
         for dataset_name in self.custom_datasets:
             experiment_setup = deepcopy(self.experiment_setup)
-            path_to_results = PROJECT_PATH + '/benchmark/results/ts_regression' + \
-                f'/{dataset_name}'
-            composed_model_path = [path_to_results + f'/{x}' for x in os.listdir(path_to_results)
-                                   if x.__contains__('pipeline_saved')]
+            path_to_results = PROJECT_PATH + \
+                '/benchmark/results/ts_regression' + f'/{dataset_name}'
+            composed_model_path = [
+                path_to_results +
+                f'/{x}' for x in os.listdir(path_to_results) if x.__contains__('pipeline_saved')]
             for p in composed_model_path:
                 experiment_setup['output_folder'] = path_to_results
                 prediction, model = self.finetune_loop(

diff --git a/benchmark/benchmark_TSF.py b/benchmark/benchmark_TSF.py
@@ -88,7 +88,8 @@ def run(self):
             model.solver.save(dataset_path)
             gc.collect()
         basic_path = os.path.join(
-            self.experiment_setup['output_folder'], 'comprasion_metrics_report.csv')
+            self.experiment_setup['output_folder'],
+            'comprasion_metrics_report.csv')
         basic_results.to_csv(basic_path)
         self.logger.info("Benchmark test finished")
 
@@ -104,9 +105,10 @@ def finetune(self):
                 prediction, target = self.finetune_loop(
                     dataset_name, experiment_setup)
                 metric = RMSE(target, prediction).metric()
-                dataset_path = os.path.join(self.experiment_setup['output_folder'],
-                                            f'{dataset_name}',
-                                            'metrics_report.csv')
+                dataset_path = os.path.join(
+                    self.experiment_setup['output_folder'],
+                    f'{dataset_name}',
+                    'metrics_report.csv')
                 fedot_results = pd.read_csv(dataset_path, index_col=0)
                 fedot_results.loc[dataset_name,
                                   'Fedot_Industrial_finetuned'] = metric

diff --git a/benchmark/feature_utils.py b/benchmark/feature_utils.py
@@ -93,8 +93,16 @@ def spectrogram_from_eeg(parquet_path, display=False):
             signals.append(x)
 
             # RAW SPECTROGRAM
-            mel_spec = melspectrogram(y=x, sr=200, hop_length=len(x) // 256,
-                                      n_fft=1024, n_mels=128, fmin=0, fmax=20, win_length=128)
+            mel_spec = melspectrogram(
+                y=x,
+                sr=200,
+                hop_length=len(x) //
+                256,
+                n_fft=1024,
+                n_mels=128,
+                fmin=0,
+                fmax=20,
+                win_length=128)
             # LOG TRANSFORM
             width = (mel_spec.shape[1] // 32) * 32
             mel_spec_db = power_to_db(
@@ -126,7 +134,9 @@ def _read_data(self, data_type, file_id):
         return pd.read_parquet(PATH)
 
     def read_spectrogram_data(self, spectrogram_id):
-        return self._read_data('spectrograms', spectrogram_id).set_index('time')
+        return self._read_data(
+            'spectrograms',
+            spectrogram_id).set_index('time')
 
     def read_eeg_data(self, eeg_id) -> pd.DataFrame:
         return self._read_data('eegs', eeg_id)
@@ -160,7 +170,8 @@ def read_train_data(self):
 
     def read_test_data(self):
         TEST_PATH = PROJECT_PATH + '/data/hms-harmful-brain-activity-classification/test.csv'
-        return pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/test.csv")
+        return pd.read_csv(
+            "/kaggle/input/hms-harmful-brain-activity-classification/test.csv")
 
 
 class FeatureEngineerData(ReadData):
@@ -238,7 +249,8 @@ def filter_spectrogram_corr(self, corr_df) -> pd.DataFrame:
         """
         Returns a dataframe with only the correlation across the same frequency
         """
-        return corr_df[[col for col in corr_df.columns if col.split('_')[2] == col.split('_')[4]]]
+        return corr_df[[col for col in corr_df.columns if col.split('_')[
+            2] == col.split('_')[4]]]
 
     def filter_eegspectrogram_corr(self, corr_df) -> pd.DataFrame:
         pass
@@ -272,7 +284,9 @@ def get_offset(self):
         if self.metadata.get('right_eeg_index') is None:
             return [0, 10000]
         else:
-            return [self.metadata['left_eeg_index'], self.metadata['right_eeg_index']]
+            return [
+                self.metadata['left_eeg_index'],
+                self.metadata['right_eeg_index']]
 
     def format_eeg_data(self, window_sizes={}):
 

diff --git a/...example/api_example/advanced_example/explainability/optimisation_history_visualisation.py b/...example/api_example/advanced_example/explainability/optimisation_history_visualisation.py
@@ -15,9 +15,8 @@
                       n_jobs=2,
                       logging_level=10)
 
-    industrial, labels, metrics = industrial_common_modelling_loop(api_config=api_config,
-                                                                   dataset_name=dataset_name,
-                                                                   finetune=finetune)
+    industrial, labels, metrics = industrial_common_modelling_loop(
+        api_config=api_config, dataset_name=dataset_name, finetune=finetune)
     if return_history:
         opt_hist = industrial.save_optimization_history(return_history=True)
     else:

diff --git a/examples/automl_example/api_example/advanced_example/multimodal/multimodal.py b/examples/automl_example/api_example/advanced_example/multimodal/multimodal.py
@@ -15,12 +15,16 @@
     metric_dict = {'accuracy': accuracy_score,
                    'f1': f1_score, 'roc_auc': roc_auc_score}
     with IndustrialModels():
-        pipeline = PipelineBuilder().add_node('recurrence_extractor', params={'window_size': 30,
-                                                                              'stride': 5,
-                                                                              'image_mode': True}) \
-            .add_node('resnet_model', params={'epochs': 50,
-                                              'model_name': 'ResNet50one'}) \
-            .build()
+        pipeline = PipelineBuilder().add_node(
+            'recurrence_extractor',
+            params={
+                'window_size': 30,
+                'stride': 5,
+                'image_mode': True}) .add_node(
+            'resnet_model',
+            params={
+                'epochs': 50,
+                'model_name': 'ResNet50one'}) .build()
         pipeline.fit(input_train_data)
         output = pipeline.predict(input_test_data)
 

diff --git a/examples/automl_example/api_example/advanced_example/multimodal/recurrent_resnet.py b/examples/automl_example/api_example/advanced_example/multimodal/recurrent_resnet.py
@@ -14,12 +14,17 @@
     metric_dict = {'accuracy': accuracy_score,
                    'f1': f1_score, 'roc_auc': roc_auc_score}
     with IndustrialModels():
-        pipeline = PipelineBuilder().add_node('recurrence_extractor', params={'window_size': 30,
-                                                                              'stride': 1,
-                                                                              'image_mode': True}) \
-            .add_node('resnet_model', params={'epochs': 5,
-                                              'model_name': 'ResNet18',
-                                              'batch_size': 64}).build()
+        pipeline = PipelineBuilder().add_node(
+            'recurrence_extractor',
+            params={
+                'window_size': 30,
+                'stride': 1,
+                'image_mode': True}) .add_node(
+            'resnet_model',
+            params={
+                'epochs': 5,
+                'model_name': 'ResNet18',
+                'batch_size': 64}).build()
         pipeline.fit(input_train_data)
         output = pipeline.predict(input_test_data)
 

diff --git a/...example/api_example/advanced_example/specific_strategy/probability_calibration_example.py b/...example/api_example/advanced_example/specific_strategy/probability_calibration_example.py
@@ -31,7 +31,9 @@ def predict(self, X):
         return self.estimator.predict(init_input_data(X, None)).predict
 
     def predict_proba(self, X):
-        return self.estimator.predict(init_input_data(X, None), output_mode='probs').predict
+        return self.estimator.predict(
+            init_input_data(
+                X, None), output_mode='probs').predict
 
 
 if __name__ == "__main__":

diff --git a/...ples/automl_example/api_example/computer_vision/image_classification/image_clf_example.py b/...ples/automl_example/api_example/computer_vision/image_classification/image_clf_example.py
@@ -10,21 +10,26 @@
 NUM_CLASSES = 21
 TASK = 'image_classification'
 
-model_dict = {'basic': FedotIndustrial(task=TASK, num_classes=NUM_CLASSES),
-              'advanced': FedotIndustrial(task=TASK, num_classes=NUM_CLASSES, optimization='svd',
-                                          optimization_params={'energy_thresholds': [0.99]})}
+model_dict = {
+    'basic': FedotIndustrial(
+        task=TASK,
+        num_classes=NUM_CLASSES),
+    'advanced': FedotIndustrial(
+        task=TASK,
+        num_classes=NUM_CLASSES,
+        optimization='svd',
+        optimization_params={
+            'energy_thresholds': [0.99]})}
 
 
 def run_industrial_model(model_type: str = 'basic'):
     fed = model_dict[model_type]
 
-    trained_model = fed.fit(dataset_path=DATASETS_PATH, transform=Compose([ToTensor(),
-                                                                           Resize((256, 256),
-                                                                                  antialias=None)]))
+    trained_model = fed.fit(dataset_path=DATASETS_PATH, transform=Compose(
+        [ToTensor(), Resize((256, 256), antialias=None)]))
 
-    predict = fed.predict(data_path=TEST_IMAGE_FOLDER, transform=Compose([ToTensor(),
-                                                                          Resize((256, 256),
-                                                                                 antialias=None)]))
+    predict = fed.predict(data_path=TEST_IMAGE_FOLDER, transform=Compose(
+        [ToTensor(), Resize((256, 256), antialias=None)]))
 
     plt.figure(figsize=(20, 10))
     for i in range(1, 7):

diff --git a/examples/automl_example/api_example/computer_vision/object_detection/obj_rec_example.py b/examples/automl_example/api_example/computer_vision/object_detection/obj_rec_example.py
@@ -33,7 +33,10 @@ def run_industrial_model(model_type: str = 'basic'):
         data_path=os.path.join(DATASETS_PATH, 'test/images'))
     image = random.choice(list(predict.keys()))
     fig = draw_sample_with_bboxes(
-        image=image, target=predict[image], prediction=predict_proba[image], threshold=0.2)
+        image=image,
+        target=predict[image],
+        prediction=predict_proba[image],
+        threshold=0.2)
 
     return trained_model
 

diff --git a/...les/automl_example/api_example/time_series/ts_classification/ts_classification_example.py b/...les/automl_example/api_example/time_series/ts_classification/ts_classification_example.py
@@ -12,7 +12,6 @@
                       n_jobs=2,
                       logging_level=20)
 
-    model, labels, metrics = industrial_common_modelling_loop(api_config=api_config,
-                                                              dataset_name=dataset_name,
-                                                              finetune=finetune)
+    model, labels, metrics = industrial_common_modelling_loop(
+        api_config=api_config, dataset_name=dataset_name, finetune=finetune)
     print(metrics)
diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py b/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py
@@ -11,12 +11,13 @@
 benchmark = 'M4'
 finetune = False
 
-forecast_result_path = os.listdir(PROJECT_PATH +
-                                  '/examples/automl_example/api_example/time_series/ts_forecasting/forecasts/')
+forecast_result_path = os.listdir(
+    PROJECT_PATH +
+    '/examples/automl_example/api_example/time_series/ts_forecasting/forecasts/')
 forecast_result_path = set([x.split('_')[0] for x in forecast_result_path])
 
-df_forecast, df_metrics = read_results(PROJECT_PATH +
-                                       '/examples/automl_example/api_example/time_series/ts_forecasting/forecasts/')
+df_forecast, df_metrics = read_results(
+    PROJECT_PATH + '/examples/automl_example/api_example/time_series/ts_forecasting/forecasts/')
 df_comprasion = create_comprasion_df(df_metrics, 'rmse')
 
 if __name__ == "__main__":
@@ -33,7 +34,7 @@
                       logging_level=40)
 
     for dataset_name in M4_FORECASTING_BENCH:
-        if dataset_name in industrial_loss and dataset_name.__contains__('W'):#
+        if dataset_name in industrial_loss and dataset_name.__contains__('W'):
             print('Already evaluated, but with bad metrics')
             horizon = M4_FORECASTING_LENGTH[dataset_name[0]]
             api_config.update(task_params={'forecast_length': horizon})
@@ -59,8 +60,8 @@
             model.save_best_model()
             model.save_optimization_history()
 
-            if metrics_comprasion.T[metrics_comprasion.T['rmse']
-                                    == metrics_comprasion.T.min(axis=0).values[0]].index[0] == 'industrial':
+            if metrics_comprasion.T[metrics_comprasion.T['rmse'] == metrics_comprasion.T.min(
+                    axis=0).values[0]].index[0] == 'industrial':
                 forecast.to_csv(f'./{dataset_name}_forecast.csv')
                 metrics_comprasion.to_csv(f'./{dataset_name}_metrics.csv')
 

diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_exogen.py b/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_exogen.py
@@ -6,7 +6,7 @@
 
 if __name__ == "__main__":
     dataset_name = PROJECT_PATH + \
-        '/examples/data/forecasting\monash_benchmark\MonashBitcoin_30.csv'
+        '/examples/data/forecasting\\monash_benchmark\\MonashBitcoin_30.csv'
     horizon = 60
     metric_names = ('smape', 'rmse', 'median_absolute_error')
 

diff --git a/examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py b/examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py
@@ -9,7 +9,6 @@
                       n_jobs=2,
                       logging_level=20)
     metric_names = ('r2', 'rmse', 'mae')
-    model, labels, metrics = industrial_common_modelling_loop(api_config=api_config,
-                                                              dataset_name=dataset_name,
-                                                              finetune=finetune)
+    model, labels, metrics = industrial_common_modelling_loop(
+        api_config=api_config, dataset_name=dataset_name, finetune=finetune)
     print(metrics)