fix after

aimclub · Jan 23, 2024 · aa6de58 · aa6de58
1 parent 65bf338
commit aa6de58
Show file tree

Hide file tree

Showing 2 changed files with 88 additions and 30 deletions.
diff --git a/fedot_ind/api/utils/checkers_collections.py b/fedot_ind/api/utils/checkers_collections.py
@@ -1,39 +1,62 @@
 import logging
+from typing import Union
 
 import pandas as pd
-
-from fedot_ind.api.utils.data import check_multivariate_data
-from fedot_ind.core.architecture.settings.computational import backend_methods as np
 from fedot.core.data.data import InputData
 from fedot.core.repository.dataset_types import DataTypesEnum
-from sklearn.preprocessing import LabelEncoder
 from fedot.core.repository.tasks import Task, TaskTypesEnum
+from sklearn.preprocessing import LabelEncoder
 
+from fedot_ind.api.utils.data import check_multivariate_data
 from fedot_ind.core.architecture.preprocessing.data_convertor import NumpyConverter
+from fedot_ind.core.architecture.settings.computational import backend_methods as np
 
 
 class DataCheck:
+    """Class for checking and preprocessing input data for Fedot AutoML.
+
+    Args:
+        input_data: Input data in tuple format (X, y) or Fedot InputData object.
+        task: Machine learning task, either "classification" or "regression".
+
+    Attributes:
+        logger (logging.Logger): Logger instance for logging messages.
+        input_data (InputData): Preprocessed and initialized Fedot InputData object.
+        task (str): Machine learning task for the dataset.
+        task_dict (dict): Mapping of string task names to Fedot Task objects.
+
+    """
+
     def __init__(self,
-                 input_data,
-                 task):
+                 input_data: Union[tuple, InputData],
+                 task: str):
         self.logger = logging.getLogger(self.__class__.__name__)
         self.input_data = input_data
         self.task = task
         self.task_dict = {'classification': Task(TaskTypesEnum.classification),
                           'regression': Task(TaskTypesEnum.regression)}
 
-    def _init_input_data(self):
+    def _init_input_data(self) -> None:
+        """Initializes the `input_data` attribute based on its type.
+
+        If a tuple (X, y) is provided, it converts it to a Fedot InputData object
+        with appropriate data types and task information. If an existing InputData
+        object is provided, it checks if it requires further initialization.
+
+        Raises:
+            ValueError: If the input data format is invalid.
+
+        """
 
-        if type(self.input_data) is tuple:
+        if isinstance(self.input_data, tuple):
             X, y = self.input_data[0], self.input_data[1]
             if type(X) is not pd.DataFrame:
                 X = pd.DataFrame(X)
             is_multivariate_data = check_multivariate_data(X)
 
             if is_multivariate_data:
                 self.input_data = InputData(idx=np.arange(len(X)),
-                                            features=np.array(
-                                                X.values.tolist()).astype(float),
+                                            features=np.array(X.values.tolist()).astype(np.float),
                                             target=y.reshape(-1, 1),
                                             task=self.task_dict[self.task],
                                             data_type=DataTypesEnum.image)
@@ -45,16 +68,36 @@ def _init_input_data(self):
                                             data_type=DataTypesEnum.image)
         elif type(self.input_data) is InputData:
             return
+        else:
+            raise ValueError(f"Invalid input data format: {type(self.input_data)}")
+
+    def _check_input_data_features(self) -> None:
+        """Checks and preprocesses the features in the input data.
+
+        - Replaces NaN and infinite values with 0.
+        - Converts features to torch format using NumpyConverter.
+
+        """
 
-    def _check_input_data_features(self):
         self.input_data.features = np.where(
             np.isnan(self.input_data.features), 0, self.input_data.features)
         self.input_data.features = np.where(
             np.isinf(self.input_data.features), 0, self.input_data.features)
         self.input_data.features = NumpyConverter(
             data=self.input_data.features).convert_to_torch_format()
 
+        if self.task == 'regression':
+            self.input_data.target = self.input_data.target.squeeze()
+        elif self.task == 'classification':
+            self.input_data.target[self.input_data.target == -1] = 0
+
     def _check_input_data_target(self):
+        """Checks and preprocesses the target variable in the input data.
+
+        - Encodes labels if the task is classification.
+        - Casts the target variable to float if the task is regression.
+
+        """
         if type(self.input_data.target[0][0]) is np.str_ and self.task == 'classification':
             label_encoder = LabelEncoder()
             self.input_data.target = label_encoder.fit_transform(
@@ -67,7 +110,19 @@ def _check_input_data_target(self):
         elif self.task == 'classification':
             self.input_data.target[self.input_data.target == -1] = 0
 
-    def check_input_data(self):
+    def check_input_data(self) -> InputData:
+        """Checks and preprocesses the input data for Fedot AutoML.
+
+        Performs the following steps:
+            1. Initializes the `input_data` attribute based on its type.
+            2. Checks and preprocesses the features (replacing NaNs, converting to torch format).
+            3. Checks and preprocesses the target variable (encoding labels, casting to float).
+
+        Returns:
+            InputData: The preprocessed and initialized Fedot InputData object.
+
+        """
+
         self._init_input_data()
         self._check_input_data_features()
         self._check_input_data_target()

diff --git a/fedot_ind/core/architecture/settings/computational.py b/fedot_ind/core/architecture/settings/computational.py
@@ -1,16 +1,13 @@
-from itertools import chain
-
 import torch
 from fastcore.basics import defaults
 
 
-
 class BackendMethods:
     def __init__(self, device_type: str = 'CUDA'):
         self.backend = self.define_backend(device_type)
 
     def define_backend(self, device_type: str = 'CUDA'):
-        if device_type == 'CUDA_':
+        if device_type == 'CUDA':
             import cupy, cupyx.scipy.linalg
             return cupy, cupyx.scipy.linalg
         else:
@@ -30,24 +27,22 @@ def _has_mps():
 def global_imports(object_name: str,
                    short_name: str = None,
                    context_module_name: str = None):
-    """import from local function as global import
-
-    Use this statement to import inside a function,
-    but effective as import at the top of the module.
+    """Imports from local function as global import. Use this statement to import inside
+    a function, but effective as import at the top of the module.
 
     Args:
-        object_name: the object name want to import,
-                     could be module or function
+        object_name: the object name want to import, could be module or function
         short_name: the short name for the import
         context_module_name: the context module name in the import
 
-    example usage:
-    import os -> global_imports("os")
-    from fedot_ind.core.architecture.settings.computational import backend_methods as np -> global_imports("numpy", "np")
-    from collections import Counter ->
-        global_imports("Counter", None, "collections")
-    from google.cloud import storage ->
-        global_imports("storage", None, "google.cloud")
+    Examples:
+        Do this::
+            import os -> global_imports("os")
+            from fedot_ind.core.architecture.settings.computational import backend_methods as np -> global_imports("numpy", "np")
+            from collections import Counter ->
+                global_imports("Counter", None, "collections")
+            from google.cloud import storage ->
+                global_imports("storage", None, "google.cloud")
 
     """
 
@@ -62,7 +57,15 @@ def global_imports(object_name: str,
 
 
 def default_device(device_type: str = 'CUDA'):
-    "Return or set default device; `use_cuda`: -1 - CUDA/mps if available; True - error if not available; False - CPU"
+    """Return or set default device. Modified from fastai.
+
+    Args:
+        device_type: 'CUDA' or 'CPU' or None (default: 'CUDA'). If None, use CUDA if available, else CPU.
+
+    Returns:
+        torch.device: The default device: CUDA if available, else CPU.
+
+    """
     if device_type == 'CUDA':
         device_type = defaults.use_cuda
     else: