Adding example for tabular data (#117)

* Adding example for tabular dataset
aimclub · Feb 8, 2024 · bb35305 · bb35305
1 parent 3838686
commit bb35305
Show file tree

Hide file tree

Showing 5 changed files with 20,920 additions and 0 deletions.
diff --git a/examples/tabular/requirements.txt b/examples/tabular/requirements.txt
@@ -0,0 +1,33 @@
+fedot @ https://github.com/aimclub/FEDOT.git@fi_exp_prep
+
+# workaround for macos
+catboost==1.1.1; sys_platform == 'darwin'
+
+giotto_tda==0.6.0
+hyperopt==0.2.7
+matplotlib>=3.3.1; python_version >= '3.8'
+MKLpy==0.6
+
+numpy>=1.16.0, !=1.24.0
+pandas>=1.3.0; python_version >='3.8'
+
+Pillow==10.0.0
+PyMonad==2.4.0
+PyWavelets==1.4.1
+PyYAML==6.0.1
+
+ripser==0.6.4
+
+scikit_learn>=1.0.0; python_version >= '3.8'
+
+scipy~=1.7.3
+sktime==0.16.1
+tensorly==0.8.1
+torch==2.0.0
+torchmetrics==0.11.4
+torchvision==0.15.1
+tensorboard>=2.12.0
+statsforecast==1.5.0
+
+chardet
+tqdm
diff --git a/examples/tabular/scoring_prediction.py b/examples/tabular/scoring_prediction.py
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+
+"""scroing_prediction.ipynb
+
+
+
+## Imports
+
+"""
+
+import pandas as pd
+from fedot_ind import fedot_api
+from sklearn.model_selection import train_test_split
+
+"""## Opening Data"""
+
+data = pd.read_csv('scoring_train.csv', index_col=0)
+target = 'target'
+X_train, X_test, y_train, y_test = train_test_split(data.drop(target, axis=1), data[target], test_size=0.3)
+
+print('Shape of train', X_train.shape, 'and test', X_test.shape)
+
+"""## Experiments settings"""
+
+TIMEOUT = 15
+N_JOBS = 1
+EARLY_STOPPING_TIMEOUT = 45
+METRIC = 'roc_auc'
+TUNING = False
+
+"""## Fedot (master)"""
+
+automl = fedot_api.Fedot(
+    problem='classification',
+    timeout=TIMEOUT,
+    n_jobs=N_JOBS,
+    metric=METRIC,
+    with_tuning=TUNING,
+    early_stopping_timeout=EARLY_STOPPING_TIMEOUT,
+    show_progress=True
+)
+
+automl.fit(features=X_train, target=y_train)
+automl.predict(features=X_test)
+metric_after_1 = automl.get_metrics(target=y_test)
+print(metric_after_1)
+fedot_industrial_report = automl.return_report()
+fedot_industrial_report.head(10)
+
+"""## Fedot with use_auto_preprocessing (master)"""
+
+automl = fedot_api.Fedot(
+    problem='classification',
+    timeout=TIMEOUT,
+    n_jobs=N_JOBS,
+    metric=METRIC,
+    with_tuning=TUNING,
+    early_stopping_timeout=EARLY_STOPPING_TIMEOUT,
+    show_progress=True
+
+)
+
+automl.fit(features=X_train, target=y_train)
+automl.predict(features=X_test)
+metric_after_2 = automl.get_metrics(target=y_test)
+print(metric_after_2)
+fedot_industrial_report = automl.return_report()
+fedot_industrial_report.head(10)
+print(automl.history.get_leaderboard())