Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/riemann' into riemann
Browse files Browse the repository at this point in the history
# Conflicts:
#	fedot_ind/api/utils/checkers_collections.py
#	fedot_ind/api/utils/industrial_strategy.py
#	fedot_ind/core/operation/interfaces/industrial_model_strategy.py
#	fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py
#	fedot_ind/core/repository/industrial_implementations/optimisation.py
#	fedot_ind/core/tuning/search_space.py
  • Loading branch information
v1docq committed May 2, 2024
2 parents 169e989 + c43113b commit a9d4244
Show file tree
Hide file tree
Showing 250 changed files with 507 additions and 569 deletions.
49 changes: 32 additions & 17 deletions .github/workflows/integration_tests.yml
Original file line number Diff line number Diff line change
@@ -1,33 +1,48 @@
name: Integration Tests

on:
# push:
# branches: [ main ]
# pull_request:
# branches: [ main ]
workflow_dispatch:

jobs:
build:
test:
runs-on: ubuntu-latest
timeout-minutes: 95
timeout-minutes: 30
strategy:
matrix:
python-version: [ 3.9 ]
python-version: [3.8, 3.9, '3.10']

steps:
- uses: actions/checkout@v2

- name: Checkout branch
uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
- uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Cache Poetry virtualenv
uses: actions/cache@v2
with:
path: ~/.local/share/poetry/virtualenvs/ # Cache Poetry virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('pyproject.toml') }} # Cache key based on project dependencies

- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.8.2 # Specify your desired Poetry version (pin it for stability)
virtualenvs-create: true
virtualenvs-in-project: true # Create venv within project directory

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
pip install -r requirements.txt
pip install pytest-cov
- name: Test with pytest
run: |
pytest --cov=fedot -s tests/integration
run: poetry install

- name: Run tests with pytest
run: poetry run pytest --cov=fedot -s tests/integration

- name: Codecov-coverage
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: unittests
31 changes: 18 additions & 13 deletions benchmark/benchmark_TSC.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,29 +70,31 @@ def finetune(self):
self.logger.info('Benchmark finetune started')
dataset_result = {}
for dataset_name in self.custom_datasets:
path_to_results = PROJECT_PATH + self.path_to_save + f'/{dataset_name}'
path_to_results = PROJECT_PATH + \
self.path_to_save + f'/{dataset_name}'
composed_model_path = [path_to_results + f'/{x}' for x in os.listdir(path_to_results)
if x.__contains__('pipeline_saved')]
metric_result = {}
for p in composed_model_path:
if os.path.isdir(p):
try:
self.experiment_setup['output_folder'] = PROJECT_PATH + \
self.path_to_save
self.path_to_save
experiment_setup = deepcopy(self.experiment_setup)
prediction, model = self.finetune_loop(
dataset_name, experiment_setup, p)
metric_result.update({p:
{'metric': Accuracy(model.predict_data.target,
prediction.ravel()).metric(),
'tuned_model': model}})
{'metric': Accuracy(model.predict_data.target,
prediction.ravel()).metric(),
'tuned_model': model}})
except ModuleNotFoundError as ex:
print(f'{ex}.OLD VERSION OF PIPELINE. DELETE DIRECTORY')
if len(composed_model_path) != 1:
print(f'OLD VERSION OF PIPELINE. DELETE DIRECTORY')
shutil.rmtree(p)
else:
print(f'OLD VERSION OF PIPELINE. IT IS A LAST SAVED MODEL')
print(
f'OLD VERSION OF PIPELINE. IT IS A LAST SAVED MODEL')
else:
print(f"No composed model for dataset - {dataset_name}")
dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}',
Expand All @@ -102,17 +104,20 @@ def finetune(self):
best_metric = 0
for _ in metric_result.keys():
if best_metric == 0:
best_metric, best_model, path = metric_result[_]['metric'], metric_result[_]['tuned_model'], _
best_metric, best_model, path = metric_result[_][
'metric'], metric_result[_]['tuned_model'], _
elif metric_result[_]['metric'] > best_metric:
best_metric, best_model, path = metric_result[_]['metric'], metric_result[_]['tuned_model'], _
fedot_results.loc[dataset_name, 'Fedot_Industrial_finetuned'] = best_metric
best_metric, best_model, path = metric_result[_][
'metric'], metric_result[_]['tuned_model'], _
fedot_results.loc[dataset_name,
'Fedot_Industrial_finetuned'] = best_metric
best_model.output_folder = f'{_}_tuned'
best_model.save_best_model()
fedot_results.to_csv(dataset_path)
else:
fedot_results.to_csv(dataset_path)
gc.collect()
dataset_result.update({dataset_name:metric_result})
dataset_result.update({dataset_name: metric_result})
self.logger.info("Benchmark finetune finished")

def load_local_basic_results(self, path: str = None):
Expand All @@ -125,7 +130,7 @@ def load_local_basic_results(self, path: str = None):
except Exception:
results = self.load_web_results()
self.experiment_setup['output_folder'] = PROJECT_PATH + \
self.path_to_save
self.path_to_save
return results
else:
return self.results_picker.run(get_metrics_df=True, add_info=True)
Expand All @@ -135,14 +140,14 @@ def create_report(self):
names = []
for dataset_name in self.custom_datasets:
model_result_path = PROJECT_PATH + self.path_to_save + \
f'/{dataset_name}' + '/metrics_report.csv'
f'/{dataset_name}' + '/metrics_report.csv'
if os.path.isfile(model_result_path):
df = pd.read_csv(model_result_path, index_col=0, sep=',')
df = df.fillna(0)
if 'Fedot_Industrial_finetuned' not in df.columns:
df['Fedot_Industrial_finetuned'] = 0
metrics = df.loc[dataset_name,
'Fedot_Industrial':'Fedot_Industrial_finetuned']
'Fedot_Industrial':'Fedot_Industrial_finetuned']
_.append(metrics.T.values)
names.append(dataset_name)
stacked_resutls = np.stack(_, axis=1).T
Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_TSER.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def finetune_loop(self, dataset, experiment_setup, composed_model_path):
model = FedotIndustrial(**experiment_setup)
model.load(path=composed_model_path)

model.finetune(train_data,tuning_params)
model.finetune(train_data, tuning_params)
prediction = model.predict(test_data)
return prediction, model

Expand Down
39 changes: 26 additions & 13 deletions benchmark/benchmark_TSF.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,12 @@ def __init__(self,
def evaluate_loop(self, dataset, experiment_setup: dict = None):
matplotlib.use('TkAgg')
train_data = DataLoader(dataset_name=dataset).load_forecast_data()
experiment_setup['task_params'] = TsForecastingParams(forecast_length=M4_FORECASTING_LENGTH[dataset[0]])
target = train_data.iloc[-experiment_setup['task_params'].forecast_length:, :].values.ravel()
train_data = train_data.iloc[:-experiment_setup['task_params'].forecast_length, :]
experiment_setup['task_params'] = TsForecastingParams(
forecast_length=M4_FORECASTING_LENGTH[dataset[0]])
target = train_data.iloc[-experiment_setup['task_params']
.forecast_length:, :].values.ravel()
train_data = train_data.iloc[:-
experiment_setup['task_params'].forecast_length, :]
model = FedotIndustrial(**experiment_setup)
model.fit(train_data)
prediction = model.predict(train_data)
Expand All @@ -68,29 +71,35 @@ def run(self):
metric_dict = {}
for dataset_name in self.custom_datasets:
experiment_setup = deepcopy(self.experiment_setup)
prediction, target, model = self.evaluate_loop(dataset_name, experiment_setup)
prediction, target, model = self.evaluate_loop(
dataset_name, experiment_setup)
metric = SMAPE(prediction, target).metric()
metric_dict.update({dataset_name: metric})
dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}')
dataset_path = os.path.join(
self.experiment_setup['output_folder'], f'{dataset_name}')
if not os.path.exists(dataset_path):
os.makedirs(dataset_path)
basic_results.loc[dataset_name, 'Fedot_Industrial'] = metric
basic_results.to_csv(os.path.join(dataset_path, 'metrics_report.csv'))
basic_results.to_csv(os.path.join(
dataset_path, 'metrics_report.csv'))
pred_df = pd.DataFrame([target, prediction]).T
pred_df.columns = ['label', 'prediction']
pred_df.to_csv(os.path.join(dataset_path, 'prediction.csv'))
model.solver.save(dataset_path)
gc.collect()
basic_path = os.path.join(self.experiment_setup['output_folder'], 'comprasion_metrics_report.csv')
basic_path = os.path.join(
self.experiment_setup['output_folder'], 'comprasion_metrics_report.csv')
basic_results.to_csv(basic_path)
self.logger.info("Benchmark test finished")

def finetune(self):
self.logger.info('Benchmark finetune started')
for dataset_name in self.custom_datasets:
composed_model_path = PROJECT_PATH + self.path_to_save + f'/{dataset_name}' + '/0_pipeline_saved'
composed_model_path = PROJECT_PATH + self.path_to_save + \
f'/{dataset_name}' + '/0_pipeline_saved'
if os.path.isdir(composed_model_path):
self.experiment_setup['output_folder'] = PROJECT_PATH + self.path_to_save
self.experiment_setup['output_folder'] = PROJECT_PATH + \
self.path_to_save
experiment_setup = deepcopy(self.experiment_setup)
prediction, target = self.finetune_loop(
dataset_name, experiment_setup)
Expand All @@ -99,7 +108,8 @@ def finetune(self):
f'{dataset_name}',
'metrics_report.csv')
fedot_results = pd.read_csv(dataset_path, index_col=0)
fedot_results.loc[dataset_name, 'Fedot_Industrial_finetuned'] = metric
fedot_results.loc[dataset_name,
'Fedot_Industrial_finetuned'] = metric

fedot_results.to_csv(dataset_path)
else:
Expand All @@ -112,20 +122,23 @@ def load_local_basic_results(self, path: str = None):
results = pd.read_csv(path, sep=',', index_col=0).T
results = results.dropna(axis=1, how='all')
results = results.dropna(axis=0, how='all')
self.experiment_setup['output_folder'] = PROJECT_PATH + self.path_to_save
self.experiment_setup['output_folder'] = PROJECT_PATH + \
self.path_to_save
return results

def create_report(self):
_ = []
names = []
for dataset_name in self.custom_datasets:
model_result_path = PROJECT_PATH + self.path_to_save + f'/{dataset_name}' + '/metrics_report.csv'
model_result_path = PROJECT_PATH + self.path_to_save + \
f'/{dataset_name}' + '/metrics_report.csv'
if os.path.isfile(model_result_path):
df = pd.read_csv(model_result_path, index_col=0, sep=',')
df = df.fillna(0)
if 'Fedot_Industrial_finetuned' not in df.columns:
df['Fedot_Industrial_finetuned'] = 0
metrics = df.loc[dataset_name, 'Fedot_Industrial':'Fedot_Industrial_finetuned']
metrics = df.loc[dataset_name,
'Fedot_Industrial':'Fedot_Industrial_finetuned']
_.append(metrics.T.values)
names.append(dataset_name)
stacked_results = np.stack(_, axis=1).T
Expand Down
39 changes: 25 additions & 14 deletions benchmark/feature_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def denoise(x, wavelet='haar', level=1):
sigma = (1 / 0.6745) * maddest(coeff[-level])

uthresh = sigma * np.sqrt(2 * np.log(len(x)))
coeff[1:] = (pywt.threshold(i, value=uthresh, mode='hard') for i in coeff[1:])
coeff[1:] = (pywt.threshold(i, value=uthresh, mode='hard')
for i in coeff[1:])

ret = pywt.waverec(coeff, wavelet, mode='per')

Expand Down Expand Up @@ -96,7 +97,8 @@ def spectrogram_from_eeg(parquet_path, display=False):
n_fft=1024, n_mels=128, fmin=0, fmax=20, win_length=128)
# LOG TRANSFORM
width = (mel_spec.shape[1] // 32) * 32
mel_spec_db = power_to_db(mel_spec, ref=np.max).astype(np.float32)[:, :width]
mel_spec_db = power_to_db(
mel_spec, ref=np.max).astype(np.float32)[:, :width]

# STANDARDIZE TO -1 TO 1
mel_spec_db = (mel_spec_db + 40) / 40
Expand All @@ -115,9 +117,11 @@ def __init__(self, is_train=True):
def _read_data(self, data_type, file_id):

if self.is_train:
PATH = PROJECT_PATH + f"/data/hms-harmful-brain-activity-classification/train_{data_type}/{file_id}.parquet"
PATH = PROJECT_PATH + \
f"/data/hms-harmful-brain-activity-classification/train_{data_type}/{file_id}.parquet"
else:
PATH = PROJECT_PATH + f"/data/hms-harmful-brain-activity-classification/test_{data_type}/{file_id}.parquet"
PATH = PROJECT_PATH + \
f"/data/hms-harmful-brain-activity-classification/test_{data_type}/{file_id}.parquet"

return pd.read_parquet(PATH)

Expand All @@ -133,14 +137,16 @@ def read_eeg_built_spectrogram_data(self, eeg_id) -> pd.DataFrame:
spec = pd.DataFrame()

if self.is_train:
_ = PROJECT_PATH + f"/data/hms-harmful-brain-activity-classification/EEG_Spectrograms/{eeg_id}.npy"
_ = PROJECT_PATH + \
f"/data/hms-harmful-brain-activity-classification/EEG_Spectrograms/{eeg_id}.npy"
eeg_specs = np.load(_)
else:
eeg_specs = spectrogram_from_eeg(
f"/kaggle/input/hms-harmful-brain-activity-classification/test_eegs/{eeg_id}.parquet")

for i in range(len(montages)):
spec = pd.concat([spec, pd.DataFrame(eeg_specs[:, :, i]).T.add_prefix(f'{montages[i]}_')], axis=1)
spec = pd.concat([spec, pd.DataFrame(
eeg_specs[:, :, i]).T.add_prefix(f'{montages[i]}_')], axis=1)

return spec

Expand Down Expand Up @@ -221,9 +227,9 @@ def apply_mask(df):
.set_axis(['var_1', 'var_2', 'corr'], axis=1)
.query("var_1 != var_2")
.assign(
row_id=self.row_id,
label=lambda x: x.var_1 + "_" + x.var_2
)
row_id=self.row_id,
label=lambda x: x.var_1 + "_" + x.var_2
)
.pivot(columns='label', values='corr', index='row_id')
.add_prefix('cor_')
)
Expand Down Expand Up @@ -272,7 +278,8 @@ def format_eeg_data(self, window_sizes={}):

offset_range = self.get_offset()

df = self.read_eeg_data(self.metadata['eeg_id']).iloc[offset_range[0]:offset_range[1]]
df = self.read_eeg_data(
self.metadata['eeg_id']).iloc[offset_range[0]:offset_range[1]]

eeg_df = pd.DataFrame()
for window in window_sizes:
Expand All @@ -281,7 +288,8 @@ def format_eeg_data(self, window_sizes={}):

eeg_df = pd.concat([
eeg_df,
self.get_features(df.iloc[left_index:right_index], time_id=window)
self.get_features(
df.iloc[left_index:right_index], time_id=window)
], axis=1)

return eeg_df
Expand Down Expand Up @@ -327,7 +335,8 @@ def format_spectrogram_data(self, window_sizes={}):

spec_df = pd.concat([
spec_df,
self.get_features(df.loc[middle + left_index:middle + right_index], time_id=window)
self.get_features(
df.loc[middle + left_index:middle + right_index], time_id=window)
], axis=1)

return spec_df
Expand All @@ -346,7 +355,8 @@ def get_features(self, df, time_id) -> pd.DataFrame():

class EEGBuiltSpectrogramFeatures(FeatureEngineerData):
def format_custom_spectrogram(self, window_sizes={()}):
df = self.read_eeg_built_spectrogram_data(self.metadata['eeg_id']).copy()
df = self.read_eeg_built_spectrogram_data(
self.metadata['eeg_id']).copy()

spec_df = pd.DataFrame()
for window in window_sizes:
Expand All @@ -355,7 +365,8 @@ def format_custom_spectrogram(self, window_sizes={()}):

spec_df = pd.concat([
spec_df,
self.get_features(df.iloc[left_index:right_index], time_id=window)
self.get_features(
df.iloc[left_index:right_index], time_id=window)
], axis=1)

return spec_df
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@
else:
# tutorial sample of opt history
opt_hist = PROJECT_PATH + '/examples/data/forecasting/D1679_opt_history/'
opt_hist = industrial.vis_optimisation_history(opt_history_path=opt_hist, return_history=True)

opt_hist = industrial.vis_optimisation_history(
opt_history_path=opt_hist, return_history=True)
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
input_train_data = init_input_data(train_data[0], train_data[1])
input_test_data = init_input_data(test_data[0], test_data[1])

metric_dict = {'accuracy': accuracy_score, 'f1': f1_score, 'roc_auc': roc_auc_score}
metric_dict = {'accuracy': accuracy_score,
'f1': f1_score, 'roc_auc': roc_auc_score}
with IndustrialModels():
pipeline = PipelineBuilder().add_node('recurrence_extractor', params={'window_size': 30,
'stride': 5,
Expand Down
Loading

0 comments on commit a9d4244

Please sign in to comment.