Skip to content

Commit

Permalink
MORE UNIT TESTS
Browse files Browse the repository at this point in the history
  • Loading branch information
technocreep committed Nov 17, 2023
1 parent e0ed177 commit ffacd2f
Show file tree
Hide file tree
Showing 17 changed files with 311 additions and 143 deletions.
22 changes: 9 additions & 13 deletions fedot_ind/api/utils/input_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,32 @@ def init_input_data(X: pd.DataFrame, y: np.ndarray, task: str = 'classification'
Args:
X: pandas DataFrame with features
y: numpy array with target values
task: str, task type, 'classification' or 'regression'
Returns:
InputData object convenient for FEDOT framework
Example:
To produce input data object::
rows, cols = 100, 50
X = pd.DataFrame(np.random.random((rows, cols)))
y = np.random.randint(0, 2, rows)
input_data = init_input_data(X, y)
"""
is_multivariate_data = True if isinstance(X.iloc[0, 0], pd.Series) else False
if is_multivariate_data:
input_data = InputData(idx=np.arange(len(X)),
features=np.array(X.values.tolist()),
target=y.reshape(-1, 1),
# task=Task(TaskTypesEnum.classification),
task=Task(TaskTypesEnum(task)),
data_type=DataTypesEnum.image)
else:
input_data = InputData(idx=np.arange(len(X)),
features=X.values,
target=np.ravel(y).reshape(-1, 1),
# task=Task(TaskTypesEnum.classification),
task=Task(TaskTypesEnum(task)),
data_type=DataTypesEnum.table)

return input_data


if __name__ == '__main__':
rows, cols = 100, 50

X = pd.DataFrame(np.random.random((rows, cols)))
y = np.random.randint(0, 2, rows)

input_data = init_input_data(X, y)

_ = 1
Empty file.

This file was deleted.

25 changes: 9 additions & 16 deletions fedot_ind/core/ensemble/kernel_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ def transform(self, kernel_params_dict: dict = None, feature_generator: str = No
for specified_params in kernel_params:
feature_extractor, classificator, lambda_func_dict = self._init_pipeline_nodes(**specified_params)

self.feature_matrix_train.append(feature_extractor.extract_features(self.train_features))
self.feature_matrix_test.append(feature_extractor.extract_features(self.test_features))
self.feature_matrix_train.append(feature_extractor.extract_features(self.train_features, self.train_target))
self.feature_matrix_test.append(feature_extractor.extract_features(self.test_features, self.test_target))
return

def __one_stage_kernel(self, kernel_params_dict: dict = None, feature_generator: str = None):
Expand Down Expand Up @@ -109,12 +109,12 @@ def init_kernel_ensemble(train_data,
metric_dict = {}
dataset_name = 'Lightning2'
kernel_list = {'wavelet': [
{'feature_generator_type': 'wavelet',
{'feature_generator_type': 'signal',
'feature_hyperparams': {
'wavelet': "mexh",
'n_components': 2
}},
{'feature_generator_type': 'wavelet',
{'feature_generator_type': 'signal',
'feature_hyperparams': {
'wavelet': "morl",
'n_components': 2
Expand Down Expand Up @@ -151,22 +151,15 @@ def init_kernel_ensemble(train_data,
feature_dict.update({fg_names[rank]: (test_best, test_best)})

for model_name, feature in feature_dict.items():
industrial = Fedot(
# available_operations=['fast_ica', 'scaling','normalization',
# 'xgboost',
# 'rf',
# 'logit',
# 'mlp',
# 'knn',
# 'pca'],
metric='roc_auc', timeout=5, problem='classification', n_jobs=6)
industrial = Fedot(metric='roc_auc', timeout=5, problem='classification', n_jobs=6)

model = industrial.fit(feature[0], train_target)
labels = industrial.predict(feature[1])
proba_dict.update({model_name: industrial.predict_proba(feature[1])})
metric_dict.update({model_name: industrial.get_metrics(test_target, metric_names=['roc_auc', 'f1', 'acc'])})
metric_dict.update({model_name: industrial.get_metrics(test_target, metric_names=['roc_auc', 'f1', 'accuracy'])})
rank_ensembler = RankEnsemble(dataset_name=dataset_name,
proba_dict=proba_dict,
metric_dict=metric_dict)
proba_dict={dataset_name: proba_dict},
metric_dict={dataset_name: metric_dict})

ensemble_result = rank_ensembler.ensemble()
_ = 1
23 changes: 0 additions & 23 deletions fedot_ind/core/models/quantile/quantile_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,29 +47,6 @@ def __init__(self, params: Optional[OperationParameters] = None):
'Stride': self.stride,
'VarTh': self.var_threshold})

def _drop_features(self, predict: pd.DataFrame, columns: Index, n_components: int):
"""
Method for dropping features with low variance
"""
# Fill columns names for every extracted ts component
predict = pd.DataFrame(predict,
columns=[f'{col}{str(i)}' for i in range(1, n_components + 1) for col in columns])

if self.relevant_features is None:
reduced_df, self.relevant_features = self._filter_by_var(predict, threshold=self.var_threshold)
return reduced_df
else:
return predict[self.relevant_features]

def _filter_by_var(self, data: pd.DataFrame, threshold: float):
cols = data.columns
filtrat = {}

for col in cols:
if np.var(data[col].values) > threshold:
filtrat.update({col: data[col].values.flatten()})

return pd.DataFrame(filtrat), list(filtrat.keys())

def _concatenate_global_and_local_feature(self, global_features: InputData,
window_stat_features: InputData) -> InputData:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


def rq(A):
n, m = A.shape()
n, m = A.shape
Q, R = np.linalg.qr(np.flipud(A).T, mode='complete')
R = np.rot90(R.T, 2)
Q = np.flipud(Q.T)
Expand All @@ -19,18 +19,14 @@ def tls(A, B):
raise ValueError('Matrices are not conformant.')
R1 = np.hstack((A, B))
U, S, V = np.linalg.svd(R1)
r = A.shape[1]
R = rq(V[:, r:])
r = B.shape[1]
R, Q = rq(V[:, r:])
Gamma = R[n:, n - r:]
Z = R[:n, n - r:]
Xhat = -np.dot(Z, np.linalg.inv(Gamma))
return Xhat


def dmd_decompose():
pass


def exact_dmd_decompose(X, Y, rank):
Ux, Sx, Vx = svd(X)
Ux = Ux[:, :rank]
Expand Down Expand Up @@ -76,10 +72,10 @@ def symmetric_decompose(X, Y, rank):
r = np.linalg.matrix_rank(X)
Ux = Ux[:, :rank]
Yf = np.zeros((rank, rank))
for i in range(r):
Yf[i, i] = np.real(C1[i, i]) / S[i, i]
for i in range(rank):
Yf[i, i] = np.real(C1[i, i]) / S[i]
for j in range(i + 1, rank):
Yf[i, j] = (S[i, i] * np.conj(C1[j, i]) + S[j, j] * C1[i, j]) / (S[i, i] ** 2 + S[j, j] ** 2)
Yf[i, j] = (S[i] * np.conj(C1[j, i]) + S[j] * C1[i, j]) / (S[i] ** 2 + S[j] ** 2)
Yf = Yf + Yf.T - np.diag(np.diag(np.real(Yf)))
# elif method == 'skewsymmetric':
# for i in range(r):
Expand All @@ -92,7 +88,7 @@ def symmetric_decompose(X, Y, rank):


def hankel_decompose(X, Y, rank):
nx, nt = X.shape()
nx, nt = X.shape
# J = np.eye(nx)
J = np.fliplr(np.eye(nx))
# Define the left matrix
Expand Down
13 changes: 11 additions & 2 deletions tests/unit/core/architecture/datasets/test_visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,19 @@ def synthetic_coco_dataset():
return COCODataset(coco_img_path, coco_path, transform=transforms.ToTensor())


def test_draw_sample_with_bboxes(synthetic_coco_dataset):
@pytest.fixture
def sample_prediction():
return {
'boxes': [[0.0, 0.0, 1.0, 1.0]],
'labels': [1],
'scores': [0.9]
}


def test_draw_sample_with_bboxes(synthetic_coco_dataset, sample_prediction):
sample = synthetic_coco_dataset[0]
image, label = sample
figure = draw_sample_with_bboxes(image=image, target=label)
figure = draw_sample_with_bboxes(image=image, target=label, prediction=sample_prediction)

assert isinstance(figure, plt.Figure)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,33 @@
from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator


@pytest.fixture
def dataset():
def dataset_uni():
(X_train, y_train), (X_test, y_test) = TimeSeriesDatasetsGenerator(num_samples=30,
max_ts_len=50,
n_classes=2,
test_size=0.5).generate_data()
return X_train, y_train, X_test, y_test


def dataset_multi():
(X_train, y_train), (X_test, y_test) = TimeSeriesDatasetsGenerator(num_samples=30,
max_ts_len=50,
n_classes=2,
test_size=0.5,
multivariate=True).generate_data()
return X_train, y_train, X_test, y_test


@pytest.fixture
def params():
return dict(branch_nodes=['eigen_basis'],
dataset='FordA',
model_params={'task': 'classification',
dataset='custom',
model_params={'problem': 'classification',
'n_jobs': -1,
'timeout': 1},
output_folder='.')
'timeout': 0.1},
output_folder='.',
tuning_iterations=1,
tuning_timeout=0.1)


@pytest.fixture
Expand All @@ -31,7 +41,20 @@ def classifier(params):

def test_init(classifier):
assert classifier.branch_nodes == ['eigen_basis']
assert classifier.tuning_iterations == 30
assert classifier.tuning_timeout == 15.0
assert classifier.tuning_iterations == 1
assert classifier.tuning_timeout == 0.1
assert isinstance(classifier.preprocessing_pipeline, Pipeline)
assert classifier.output_folder == '.'


@pytest.mark.parametrize('dataset', [dataset_uni(), dataset_multi()])
def test_fit_predict(classifier, dataset):
X_train, y_train, X_test, y_test = dataset
model = classifier.fit(features=X_train, target=y_train)
labels = classifier.predict(features=X_test, target=y_test)
probs = classifier.predict_proba(features=X_test, target=y_test)
metrics = classifier.get_metrics(target=y_test, metric_names=['f1', 'roc_auc'])
for metric in metrics:
assert metric in ['f1', 'roc_auc']

assert len(labels) == len(y_test)
70 changes: 70 additions & 0 deletions tests/unit/core/ensemble/test_kernel_ensemble.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from fedot.api.main import Fedot

from fedot_ind.core.ensemble.kernel_ensemble import KernelEnsembler, init_kernel_ensemble
import pytest

from fedot_ind.core.ensemble.rank_ensembler import RankEnsemble
from fedot_ind.tools.loader import DataLoader
from fedot_ind.api.utils.path_lib import PROJECT_PATH
import os


@pytest.fixture()
def kernel_dict():
return {'wavelet': [{'feature_generator_type': 'signal',
'feature_hyperparams': {'wavelet': "mexh",
'n_components': 2}
}
],
'quantile': [{'feature_generator_type': 'quantile',
'feature_hyperparams': {'window_mode': True,
'window_size': 25}
}
]
}


@pytest.fixture()
def data():
ds_name = 'ItalyPowerDemand'
folder_path = os.path.join(PROJECT_PATH, 'tests/data/datasets')
return DataLoader(dataset_name=ds_name).load_data()


def test_kernel_ensembler(kernel_dict, data):
train_data, test_data = data
n_best = 2
feature_dict = {}
proba_dict = {}
metric_dict = {}
dataset_name = 'ItalyPowerDemand'

fg_names = []
for key in kernel_dict:
for model_params in kernel_dict[key]:
fg_names.append(f'{key}_{model_params}')

set_of_fg, train_feats, train_target, test_feats, test_target = init_kernel_ensemble(train_data,
test_data,
kernel_list=kernel_dict)
n_best_generators = set_of_fg.T.nlargest(n_best, 0).index
for rank in range(n_best):
fg_rank = n_best_generators[rank]
train_best = train_feats[fg_rank]
test_best = test_feats[fg_rank]
feature_dict.update({fg_names[rank]: (test_best, test_best)})

for model_name, feature in feature_dict.items():
industrial = Fedot(metric='roc_auc', timeout=0.1, problem='classification', n_jobs=6)
model = industrial.fit(feature[0], train_target)
labels = industrial.predict(feature[1])
proba_dict.update({model_name: industrial.predict_proba(feature[1])})
metric_dict.update({model_name: industrial.get_metrics(test_target, metric_names=['roc_auc', 'f1', 'accuracy'])})
rank_ensembler = RankEnsemble(dataset_name=dataset_name,
proba_dict={dataset_name: proba_dict},
metric_dict={dataset_name: metric_dict})

ensemble_result = rank_ensembler.ensemble()
assert ensemble_result is not None


10 changes: 10 additions & 0 deletions tests/unit/core/models/test_classification_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,13 @@ def test_resnet101_one_channel():
def test_resnet152_one_channel():
model = resnet152_one_channel()
assert isinstance(model, ResNet)


def test_CLF_MODELS():
models = CLF_MODELS
assert isinstance(models, dict)


def test_CLF_MODELS_ONE_CHANNEL():
models = CLF_MODELS_ONE_CHANNEL
assert isinstance(models, dict)
Loading

0 comments on commit ffacd2f

Please sign in to comment.