Skip to content

Commit

Permalink
add Industrial mutation
Browse files Browse the repository at this point in the history
  • Loading branch information
v1docq committed Dec 14, 2023
1 parent 4fa48bc commit 2a32423
Show file tree
Hide file tree
Showing 6 changed files with 289 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@
'dimension_reduction',
'inception_model',
'logit',
'rf',
'xgboost',
'minirocket_extractor',
'normalization',
'omniscale_model',
'pca',
'mlp',
'quantile_extractor',
'resample',
# 'resample',
'scaling',
'signal_extractor',
'topological_features'
Expand Down Expand Up @@ -96,33 +98,38 @@

if __name__ == "__main__":
OperationTypesRepository = IndustrialModels().setup_repository()
results = get_averaged_results_from_web(datasets=multivariate_equal_length, classifiers=valid_multi_classifiers)
results = pd.DataFrame(results)
results.columns = valid_multi_classifiers
results.index = multivariate_equal_length
try:
results = pd.read_csv('./multi_ts_res.csv',sep=';',index_col=0)
except Exception:
results = get_averaged_results_from_web(datasets=multivariate_equal_length, classifiers=valid_multi_classifiers)
results = pd.DataFrame(results)
results.columns = valid_multi_classifiers
results.index = multivariate_equal_length
results['Fedot_Ind'] = 0
multivariate_equal_length = [
# 'DuckDuckGeese',
# 'MotorImagery',
# 'Heartbeat',
# 'Handwriting',
# 'EigenWorms',
# 'Epilepsy',
# 'EthanolConcentration',
# 'FaceDetection',
'RacketSports',
'LSST',
'SelfRegulationSCP1',
'SelfRegulationSCP2',
'StandWalkJump',
]
# multivariate_equal_length = [
# # 'DuckDuckGeese',
# # 'MotorImagery',
# 'Heartbeat',
# 'Handwriting',
# 'EigenWorms',
# 'Epilepsy',
# 'EthanolConcentration',
# 'FaceDetection',
# 'RacketSports',
# 'LSST',
# 'SelfRegulationSCP1',
# 'SelfRegulationSCP2',
# 'StandWalkJump'
# ]
# error_model = PipelineBuilder().add_node('resample').add_node('resample', branch_idx=1) \
# .add_node('minirocket_extractor', branch_idx=1).add_node('quantile_extractor', branch_idx=1).join_branches(
# 'logit').build()
#error_model = PipelineBuilder().add_node('logit').add_node('logit').build()
# error_model = PipelineBuilder().add_node('pca').add_node('resample', branch_idx=1).add_node('quantile_extractor', branch_idx=1).join_branches(
# 'logit').build()
#error_model = PipelineBuilder().add_node('pca').add_node('logit').build()
error_model = PipelineBuilder().add_node('signal_extractor').add_node('dimension_reduction').add_node('logit').build()
#(/n_fourier_basis;)/n_quantile_extractor;)/n_rf
for dataset in multivariate_equal_length:
train_data, test_data = DataLoader(dataset_name=dataset).load_data()
input_data = init_input_data(train_data[0], train_data[1])
Expand All @@ -138,12 +145,12 @@
timeout=30,
with_tuning=False
)
#model = error_model
model = error_model
model.fit(input_data)
features = model.predict(val_data)
metric = evaluate_metric(target=val_data.target, prediction=features)
try:
acc = accuracy_score(y_true=val_data.target, y_pred=features.predict)
acc = accuracy_score(y_true=val_data.target, y_pred=features)
except Exception:
acc = accuracy_score(y_true=val_data.target, y_pred=np.argmax(features, axis=1))
metric_dict.update({model: metric})
Expand All @@ -155,5 +162,5 @@
show_fitness=True, dpi=100)

results.loc[dataset, 'Fedot_Ind'] = acc
results.to_csv('./multi_ts_clf_run4.csv')
results.to_csv('./multi_ts_clf_run7.csv')
_ = 1
14 changes: 10 additions & 4 deletions fedot_ind/core/operation/filtration/feature_filtration.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def _transform(self, operation):
if operation.task.task_params is None:
operation_name = operation.task.task_params
else:
operation_name = operation.task.task_params.feature_filter
operation_name = operation.task.task_params.feature_filter if 'feature_filter' \
in operation.task.task_params else operation.task.task_params
if operation_name is None:
return operation.features
elif operation_name in self.method_dict.keys():
Expand All @@ -43,8 +44,10 @@ def filter_dimension_num(self, data):
else:
grouped_components = list(map(self._compute_component_corr, data.features))
dimension_distrib = [x.shape[0] for x in grouped_components]
minimal_dim = min(dimension_distrib)
dominant_dim = stats.mode(dimension_distrib).mode
grouped_predict = [x[:dominant_dim, :] for x in grouped_components]
reduction_dim = min(minimal_dim, dominant_dim)
grouped_predict = [x[:reduction_dim, :] for x in grouped_components]
return np.stack(grouped_predict) if len(grouped_predict) > 1 else grouped_predict[0]

def _compute_component_corr(self, sample):
Expand All @@ -56,7 +59,7 @@ def _compute_component_corr(self, sample):
grouped_predict = sample[0, :].reshape(1, -1)
tmp = pd.DataFrame(sample[1:, :])
component_list = []
correlation_matrix = cdist(metric='cosine', XA=tmp.values, XB=tmp.values)
correlation_matrix = cdist(metric='correlation', XA=tmp.values, XB=tmp.values)
if (correlation_matrix > self.grouping_level).sum() > 0:
for index in component_idx_list:
if len(component_idx_list) == 0:
Expand All @@ -71,7 +74,10 @@ def _compute_component_corr(self, sample):
if cor_level > self.grouping_level:
component_idx = np.where(correlation_level == cor_level)[0][0] + 1
grouped_v = grouped_v + sample[component_idx, :]
component_idx_list.remove(component_idx)
if component_idx in component_idx_list:
component_idx_list.remove(component_idx)
else:
continue
component_list.append(grouped_v)
component_list = [x.reshape(1, -1) for x in component_list]
grouped_predict = np.concatenate([grouped_predict, *component_list], axis=0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@ def __init__(self, params: Optional[OperationParameters] = None):
self.logging_params = {'jobs': self.n_processes}

def _get_basis(self, data):
basis = Either.insert(data).then(self._get_1d_basis if type(data) != list else self._get_multidim_basis).value

if type(data) is list or all([type(data) is np.ndarray and len(data.shape) > 1]):
func = self._get_multidim_basis
else:
func = self._get_1d_basis
basis = Either.insert(data).then(func).value
return basis

def fit(self, data):
Expand Down Expand Up @@ -72,7 +77,7 @@ def _transform(self, input_data: Union[InputData, pd.DataFrame]) -> np.array:
features = np.array(ListMonad(*input_data.features.tolist()).value)
else:
features = np.array(ListMonad(*input_data.tolist()).value)
features = np.array([series[~np.isnan(series)] for series in features])
#features = np.array([series[~np.isnan(series)] for series in features])
if len(features.shape) == 2 and features.shape[1] == 1:
features = features.reshape(1, -1)
parallel = Parallel(n_jobs=self.n_processes, verbose=0, pre_dispatch="2*n_jobs")
Expand Down
18 changes: 5 additions & 13 deletions fedot_ind/core/operation/transformation/basis/eigen_basis.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,14 @@ def _transform(self, input_data: InputData) -> np.array:
self.SV_threshold = max(self.get_threshold(data=features), 2)
self.logging_params.update({'SV_thr': self.SV_threshold})


predict = []
for dimension in range(features.shape[1]):
parallel = Parallel(n_jobs=self.n_processes, verbose=0, pre_dispatch="2*n_jobs")
v = parallel(delayed(self._transform_one_sample)(sample) for sample in features[:,dimension,:])
v = parallel(delayed(self._transform_one_sample)(sample) for sample in features[:, dimension, :])
predict.append(np.array(v) if len(v) > 1 else v[0])
self.predict = np.concatenate(predict,axis=1)
self.predict = np.concatenate(predict, axis=1)

if input_data.task.task_params is None:
input_data.task.task_params = self.__repr__()
else:
Expand Down Expand Up @@ -103,7 +105,7 @@ def _transform_one_sample(self, series: np.array, svd_flag: bool = False):
if svd_flag:
return rank
else:
return self._get_basis(data)
return self._get_1d_basis(data)

def estimate_singular_values(self, data):
svd = lambda x: ListMonad(self.svd_estimator.rsvd(tensor=x, approximation=self.low_rank_approximation))
Expand Down Expand Up @@ -160,13 +162,3 @@ def _get_multidim_basis(self, data):

return basis

def evaluate_derivative(self: class_type,
coefs: np.array,
order: int = 1) -> Tuple[class_type, np.array]:
basis = type(self)(
domain_range=self.domain_range,
n_basis=self.n_basis - order,
)
derivative_coefs = np.array([np.polyder(x[::-1], order)[::-1] for x in coefs])

return basis, derivative_coefs
12 changes: 3 additions & 9 deletions fedot_ind/core/operation/transformation/basis/fourier.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,13 @@ def __init__(self, params: Optional[OperationParameters] = None):
def _decompose_signal(self, input_data):
fourier_coef = np.fft.rfft(input_data)
frequencies = np.fft.rfftfreq(input_data.size, d=2e-3 / input_data.size)
ind_of_main_freq = np.where(frequencies == self.threshold)
if self.approximation == 'exact':
fourier_coef[frequencies != frequencies[self.threshold]] = 0
fourier_coef[frequencies != frequencies[ind_of_main_freq]] = 0
else:
fourier_coef[frequencies > frequencies[self.threshold]] = 0
fourier_coef[frequencies > frequencies[ind_of_main_freq]] = 0
return np.fft.irfft(fourier_coef)

def _transform_one_sample(self, series: np.array):
return self._get_basis(series)

def evaluate_derivative(self, order):
"""Evaluates the derivative of the Fourier decomposition of the given data.
Returns:
np.array: The derivative of the Fourier decomposition of the given data.
"""
return np.fft.ifft(1j * np.arange(len(self.data_range)) * self.decomposed)
Loading

0 comments on commit 2a32423

Please sign in to comment.