Skip to content

Commit

Permalink
fix after
Browse files Browse the repository at this point in the history
  • Loading branch information
valer1435 committed Jan 23, 2024
1 parent 65bf338 commit aa6de58
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 30 deletions.
79 changes: 67 additions & 12 deletions fedot_ind/api/utils/checkers_collections.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,62 @@
import logging
from typing import Union

import pandas as pd

from fedot_ind.api.utils.data import check_multivariate_data
from fedot_ind.core.architecture.settings.computational import backend_methods as np
from fedot.core.data.data import InputData
from fedot.core.repository.dataset_types import DataTypesEnum
from sklearn.preprocessing import LabelEncoder
from fedot.core.repository.tasks import Task, TaskTypesEnum
from sklearn.preprocessing import LabelEncoder

from fedot_ind.api.utils.data import check_multivariate_data
from fedot_ind.core.architecture.preprocessing.data_convertor import NumpyConverter
from fedot_ind.core.architecture.settings.computational import backend_methods as np


class DataCheck:
"""Class for checking and preprocessing input data for Fedot AutoML.
Args:
input_data: Input data in tuple format (X, y) or Fedot InputData object.
task: Machine learning task, either "classification" or "regression".
Attributes:
logger (logging.Logger): Logger instance for logging messages.
input_data (InputData): Preprocessed and initialized Fedot InputData object.
task (str): Machine learning task for the dataset.
task_dict (dict): Mapping of string task names to Fedot Task objects.
"""

def __init__(self,
input_data,
task):
input_data: Union[tuple, InputData],
task: str):
self.logger = logging.getLogger(self.__class__.__name__)
self.input_data = input_data
self.task = task
self.task_dict = {'classification': Task(TaskTypesEnum.classification),
'regression': Task(TaskTypesEnum.regression)}

def _init_input_data(self):
def _init_input_data(self) -> None:
"""Initializes the `input_data` attribute based on its type.
If a tuple (X, y) is provided, it converts it to a Fedot InputData object
with appropriate data types and task information. If an existing InputData
object is provided, it checks if it requires further initialization.
Raises:
ValueError: If the input data format is invalid.
"""

if type(self.input_data) is tuple:
if isinstance(self.input_data, tuple):
X, y = self.input_data[0], self.input_data[1]
if type(X) is not pd.DataFrame:
X = pd.DataFrame(X)
is_multivariate_data = check_multivariate_data(X)

if is_multivariate_data:
self.input_data = InputData(idx=np.arange(len(X)),
features=np.array(
X.values.tolist()).astype(float),
features=np.array(X.values.tolist()).astype(np.float),
target=y.reshape(-1, 1),
task=self.task_dict[self.task],
data_type=DataTypesEnum.image)
Expand All @@ -45,16 +68,36 @@ def _init_input_data(self):
data_type=DataTypesEnum.image)
elif type(self.input_data) is InputData:
return
else:
raise ValueError(f"Invalid input data format: {type(self.input_data)}")

def _check_input_data_features(self) -> None:
"""Checks and preprocesses the features in the input data.
- Replaces NaN and infinite values with 0.
- Converts features to torch format using NumpyConverter.
"""

def _check_input_data_features(self):
self.input_data.features = np.where(
np.isnan(self.input_data.features), 0, self.input_data.features)
self.input_data.features = np.where(
np.isinf(self.input_data.features), 0, self.input_data.features)
self.input_data.features = NumpyConverter(
data=self.input_data.features).convert_to_torch_format()

if self.task == 'regression':
self.input_data.target = self.input_data.target.squeeze()
elif self.task == 'classification':
self.input_data.target[self.input_data.target == -1] = 0

def _check_input_data_target(self):
"""Checks and preprocesses the target variable in the input data.
- Encodes labels if the task is classification.
- Casts the target variable to float if the task is regression.
"""
if type(self.input_data.target[0][0]) is np.str_ and self.task == 'classification':
label_encoder = LabelEncoder()
self.input_data.target = label_encoder.fit_transform(
Expand All @@ -67,7 +110,19 @@ def _check_input_data_target(self):
elif self.task == 'classification':
self.input_data.target[self.input_data.target == -1] = 0

def check_input_data(self):
def check_input_data(self) -> InputData:
"""Checks and preprocesses the input data for Fedot AutoML.
Performs the following steps:
1. Initializes the `input_data` attribute based on its type.
2. Checks and preprocesses the features (replacing NaNs, converting to torch format).
3. Checks and preprocesses the target variable (encoding labels, casting to float).
Returns:
InputData: The preprocessed and initialized Fedot InputData object.
"""

self._init_input_data()
self._check_input_data_features()
self._check_input_data_target()
Expand Down
39 changes: 21 additions & 18 deletions fedot_ind/core/architecture/settings/computational.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
from itertools import chain

import torch
from fastcore.basics import defaults



class BackendMethods:
def __init__(self, device_type: str = 'CUDA'):
self.backend = self.define_backend(device_type)

def define_backend(self, device_type: str = 'CUDA'):
if device_type == 'CUDA_':
if device_type == 'CUDA':
import cupy, cupyx.scipy.linalg
return cupy, cupyx.scipy.linalg
else:
Expand All @@ -30,24 +27,22 @@ def _has_mps():
def global_imports(object_name: str,
short_name: str = None,
context_module_name: str = None):
"""import from local function as global import
Use this statement to import inside a function,
but effective as import at the top of the module.
"""Imports from local function as global import. Use this statement to import inside
a function, but effective as import at the top of the module.
Args:
object_name: the object name want to import,
could be module or function
object_name: the object name want to import, could be module or function
short_name: the short name for the import
context_module_name: the context module name in the import
example usage:
import os -> global_imports("os")
from fedot_ind.core.architecture.settings.computational import backend_methods as np -> global_imports("numpy", "np")
from collections import Counter ->
global_imports("Counter", None, "collections")
from google.cloud import storage ->
global_imports("storage", None, "google.cloud")
Examples:
Do this::
import os -> global_imports("os")
from fedot_ind.core.architecture.settings.computational import backend_methods as np -> global_imports("numpy", "np")
from collections import Counter ->
global_imports("Counter", None, "collections")
from google.cloud import storage ->
global_imports("storage", None, "google.cloud")
"""

Expand All @@ -62,7 +57,15 @@ def global_imports(object_name: str,


def default_device(device_type: str = 'CUDA'):
"Return or set default device; `use_cuda`: -1 - CUDA/mps if available; True - error if not available; False - CPU"
"""Return or set default device. Modified from fastai.
Args:
device_type: 'CUDA' or 'CPU' or None (default: 'CUDA'). If None, use CUDA if available, else CPU.
Returns:
torch.device: The default device: CUDA if available, else CPU.
"""
if device_type == 'CUDA':
device_type = defaults.use_cuda
else:
Expand Down

0 comments on commit aa6de58

Please sign in to comment.