diff --git a/examples/pipeline_example/time_series/ts_classification/basic_example.py b/examples/pipeline_example/time_series/ts_classification/basic_example.py index 8acbf2601..85e79e261 100644 --- a/examples/pipeline_example/time_series/ts_classification/basic_example.py +++ b/examples/pipeline_example/time_series/ts_classification/basic_example.py @@ -41,13 +41,15 @@ 'dimension_reduction', 'inception_model', 'logit', + 'rf', + 'xgboost', 'minirocket_extractor', 'normalization', 'omniscale_model', 'pca', 'mlp', 'quantile_extractor', - 'resample', + # 'resample', 'scaling', 'signal_extractor', 'topological_features' @@ -96,26 +98,29 @@ if __name__ == "__main__": OperationTypesRepository = IndustrialModels().setup_repository() - results = get_averaged_results_from_web(datasets=multivariate_equal_length, classifiers=valid_multi_classifiers) - results = pd.DataFrame(results) - results.columns = valid_multi_classifiers - results.index = multivariate_equal_length + try: + results = pd.read_csv('./multi_ts_res.csv',sep=';',index_col=0) + except Exception: + results = get_averaged_results_from_web(datasets=multivariate_equal_length, classifiers=valid_multi_classifiers) + results = pd.DataFrame(results) + results.columns = valid_multi_classifiers + results.index = multivariate_equal_length results['Fedot_Ind'] = 0 - multivariate_equal_length = [ - # 'DuckDuckGeese', - # 'MotorImagery', - # 'Heartbeat', - # 'Handwriting', - # 'EigenWorms', - # 'Epilepsy', - # 'EthanolConcentration', - # 'FaceDetection', - 'RacketSports', - 'LSST', - 'SelfRegulationSCP1', - 'SelfRegulationSCP2', - 'StandWalkJump', - ] + # multivariate_equal_length = [ + # # 'DuckDuckGeese', + # # 'MotorImagery', + # 'Heartbeat', + # 'Handwriting', + # 'EigenWorms', + # 'Epilepsy', + # 'EthanolConcentration', + # 'FaceDetection', + # 'RacketSports', + # 'LSST', + # 'SelfRegulationSCP1', + # 'SelfRegulationSCP2', + # 'StandWalkJump' + # ] # error_model = PipelineBuilder().add_node('resample').add_node('resample', branch_idx=1) \ # .add_node('minirocket_extractor', branch_idx=1).add_node('quantile_extractor', branch_idx=1).join_branches( # 'logit').build() @@ -123,6 +128,8 @@ # error_model = PipelineBuilder().add_node('pca').add_node('resample', branch_idx=1).add_node('quantile_extractor', branch_idx=1).join_branches( # 'logit').build() #error_model = PipelineBuilder().add_node('pca').add_node('logit').build() + error_model = PipelineBuilder().add_node('signal_extractor').add_node('dimension_reduction').add_node('logit').build() + #(/n_fourier_basis;)/n_quantile_extractor;)/n_rf for dataset in multivariate_equal_length: train_data, test_data = DataLoader(dataset_name=dataset).load_data() input_data = init_input_data(train_data[0], train_data[1]) @@ -138,12 +145,12 @@ timeout=30, with_tuning=False ) - #model = error_model + model = error_model model.fit(input_data) features = model.predict(val_data) metric = evaluate_metric(target=val_data.target, prediction=features) try: - acc = accuracy_score(y_true=val_data.target, y_pred=features.predict) + acc = accuracy_score(y_true=val_data.target, y_pred=features) except Exception: acc = accuracy_score(y_true=val_data.target, y_pred=np.argmax(features, axis=1)) metric_dict.update({model: metric}) @@ -155,5 +162,5 @@ show_fitness=True, dpi=100) results.loc[dataset, 'Fedot_Ind'] = acc - results.to_csv('./multi_ts_clf_run4.csv') + results.to_csv('./multi_ts_clf_run7.csv') _ = 1 diff --git a/fedot_ind/core/operation/filtration/feature_filtration.py b/fedot_ind/core/operation/filtration/feature_filtration.py index 201c62b49..5dfb49f97 100644 --- a/fedot_ind/core/operation/filtration/feature_filtration.py +++ b/fedot_ind/core/operation/filtration/feature_filtration.py @@ -30,7 +30,8 @@ def _transform(self, operation): if operation.task.task_params is None: operation_name = operation.task.task_params else: - operation_name = operation.task.task_params.feature_filter + operation_name = operation.task.task_params.feature_filter if 'feature_filter' \ + in operation.task.task_params else operation.task.task_params if operation_name is None: return operation.features elif operation_name in self.method_dict.keys(): @@ -43,8 +44,10 @@ def filter_dimension_num(self, data): else: grouped_components = list(map(self._compute_component_corr, data.features)) dimension_distrib = [x.shape[0] for x in grouped_components] + minimal_dim = min(dimension_distrib) dominant_dim = stats.mode(dimension_distrib).mode - grouped_predict = [x[:dominant_dim, :] for x in grouped_components] + reduction_dim = min(minimal_dim, dominant_dim) + grouped_predict = [x[:reduction_dim, :] for x in grouped_components] return np.stack(grouped_predict) if len(grouped_predict) > 1 else grouped_predict[0] def _compute_component_corr(self, sample): @@ -56,7 +59,7 @@ def _compute_component_corr(self, sample): grouped_predict = sample[0, :].reshape(1, -1) tmp = pd.DataFrame(sample[1:, :]) component_list = [] - correlation_matrix = cdist(metric='cosine', XA=tmp.values, XB=tmp.values) + correlation_matrix = cdist(metric='correlation', XA=tmp.values, XB=tmp.values) if (correlation_matrix > self.grouping_level).sum() > 0: for index in component_idx_list: if len(component_idx_list) == 0: @@ -71,7 +74,10 @@ def _compute_component_corr(self, sample): if cor_level > self.grouping_level: component_idx = np.where(correlation_level == cor_level)[0][0] + 1 grouped_v = grouped_v + sample[component_idx, :] - component_idx_list.remove(component_idx) + if component_idx in component_idx_list: + component_idx_list.remove(component_idx) + else: + continue component_list.append(grouped_v) component_list = [x.reshape(1, -1) for x in component_list] grouped_predict = np.concatenate([grouped_predict, *component_list], axis=0) diff --git a/fedot_ind/core/operation/transformation/basis/abstract_basis.py b/fedot_ind/core/operation/transformation/basis/abstract_basis.py index 8af5b3aa1..e291ce35c 100644 --- a/fedot_ind/core/operation/transformation/basis/abstract_basis.py +++ b/fedot_ind/core/operation/transformation/basis/abstract_basis.py @@ -31,7 +31,12 @@ def __init__(self, params: Optional[OperationParameters] = None): self.logging_params = {'jobs': self.n_processes} def _get_basis(self, data): - basis = Either.insert(data).then(self._get_1d_basis if type(data) != list else self._get_multidim_basis).value + + if type(data) is list or all([type(data) is np.ndarray and len(data.shape) > 1]): + func = self._get_multidim_basis + else: + func = self._get_1d_basis + basis = Either.insert(data).then(func).value return basis def fit(self, data): @@ -72,7 +77,7 @@ def _transform(self, input_data: Union[InputData, pd.DataFrame]) -> np.array: features = np.array(ListMonad(*input_data.features.tolist()).value) else: features = np.array(ListMonad(*input_data.tolist()).value) - features = np.array([series[~np.isnan(series)] for series in features]) + #features = np.array([series[~np.isnan(series)] for series in features]) if len(features.shape) == 2 and features.shape[1] == 1: features = features.reshape(1, -1) parallel = Parallel(n_jobs=self.n_processes, verbose=0, pre_dispatch="2*n_jobs") diff --git a/fedot_ind/core/operation/transformation/basis/eigen_basis.py b/fedot_ind/core/operation/transformation/basis/eigen_basis.py index 33d261f81..746a51d91 100644 --- a/fedot_ind/core/operation/transformation/basis/eigen_basis.py +++ b/fedot_ind/core/operation/transformation/basis/eigen_basis.py @@ -62,12 +62,14 @@ def _transform(self, input_data: InputData) -> np.array: self.SV_threshold = max(self.get_threshold(data=features), 2) self.logging_params.update({'SV_thr': self.SV_threshold}) + predict = [] for dimension in range(features.shape[1]): parallel = Parallel(n_jobs=self.n_processes, verbose=0, pre_dispatch="2*n_jobs") - v = parallel(delayed(self._transform_one_sample)(sample) for sample in features[:,dimension,:]) + v = parallel(delayed(self._transform_one_sample)(sample) for sample in features[:, dimension, :]) predict.append(np.array(v) if len(v) > 1 else v[0]) - self.predict = np.concatenate(predict,axis=1) + self.predict = np.concatenate(predict, axis=1) + if input_data.task.task_params is None: input_data.task.task_params = self.__repr__() else: @@ -103,7 +105,7 @@ def _transform_one_sample(self, series: np.array, svd_flag: bool = False): if svd_flag: return rank else: - return self._get_basis(data) + return self._get_1d_basis(data) def estimate_singular_values(self, data): svd = lambda x: ListMonad(self.svd_estimator.rsvd(tensor=x, approximation=self.low_rank_approximation)) @@ -160,13 +162,3 @@ def _get_multidim_basis(self, data): return basis - def evaluate_derivative(self: class_type, - coefs: np.array, - order: int = 1) -> Tuple[class_type, np.array]: - basis = type(self)( - domain_range=self.domain_range, - n_basis=self.n_basis - order, - ) - derivative_coefs = np.array([np.polyder(x[::-1], order)[::-1] for x in coefs]) - - return basis, derivative_coefs diff --git a/fedot_ind/core/operation/transformation/basis/fourier.py b/fedot_ind/core/operation/transformation/basis/fourier.py index 10b943092..a93c3fb8c 100644 --- a/fedot_ind/core/operation/transformation/basis/fourier.py +++ b/fedot_ind/core/operation/transformation/basis/fourier.py @@ -32,19 +32,13 @@ def __init__(self, params: Optional[OperationParameters] = None): def _decompose_signal(self, input_data): fourier_coef = np.fft.rfft(input_data) frequencies = np.fft.rfftfreq(input_data.size, d=2e-3 / input_data.size) + ind_of_main_freq = np.where(frequencies == self.threshold) if self.approximation == 'exact': - fourier_coef[frequencies != frequencies[self.threshold]] = 0 + fourier_coef[frequencies != frequencies[ind_of_main_freq]] = 0 else: - fourier_coef[frequencies > frequencies[self.threshold]] = 0 + fourier_coef[frequencies > frequencies[ind_of_main_freq]] = 0 return np.fft.irfft(fourier_coef) def _transform_one_sample(self, series: np.array): return self._get_basis(series) - def evaluate_derivative(self, order): - """Evaluates the derivative of the Fourier decomposition of the given data. - - Returns: - np.array: The derivative of the Fourier decomposition of the given data. - """ - return np.fft.ifft(1j * np.arange(len(self.data_range)) * self.decomposed) diff --git a/fedot_ind/core/repository/initializer_industrial_models.py b/fedot_ind/core/repository/initializer_industrial_models.py index 67437b9a4..4c913f5a9 100644 --- a/fedot_ind/core/repository/initializer_industrial_models.py +++ b/fedot_ind/core/repository/initializer_industrial_models.py @@ -2,52 +2,262 @@ import random from enum import Enum from typing import Sequence +from random import choice, sample from fedot.api.api_utils.api_composer import ApiComposer +from fedot.api.api_utils.api_params_repository import ApiParamsRepository from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation, boosting_mutation +from fedot.core.pipelines.adapters import PipelineAdapter from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace from fedot.core.pipelines.verification import class_rules, common_rules from fedot.core.repository.operation_types_repository import OperationTypesRepository, get_operations_for_task from fedot.core.repository.tasks import Task, TaskTypesEnum +from golem.core.dag.graph import ReconnectType +from golem.core.dag.graph_utils import graph_has_cycle from golem.core.dag.verification_rules import ERROR_PREFIX +from golem.core.optimisers.advisor import RemoveType from golem.core.optimisers.genetic.operators.base_mutations import MutationStrengthEnum from golem.core.optimisers.genetic.operators.mutation import MutationTypesEnum +from golem.core.optimisers.graph import OptGraph, OptNode +from golem.core.optimisers.opt_node_factory import OptNodeFactory +from golem.core.optimisers.optimization_parameters import GraphRequirements +from golem.core.optimisers.optimizer import GraphGenerationParams, AlgorithmParameters from fedot_ind.api.utils.path_lib import PROJECT_PATH +from fedot_ind.core.repository.model_repository import INDUSTRIAL_PREPROC_MODEL, AtomizedModel from fedot_ind.core.tuning.search_space import get_industrial_search_space + class MutationStrengthEnumIndustrial(Enum): weak = 1.0 mean = 3.0 strong = 5.0 -def add_preprocessing(pipeline: Pipeline, **kwargs) -> Pipeline: - task = Task(TaskTypesEnum.classification) - basis_models = get_operations_for_task(task=task, mode='data_operation', tags=["basis"]) - extractors = get_operations_for_task(task=task, mode='data_operation', tags=["extractor"]) - models = get_operations_for_task(task=task, mode='model') - basis_model = PipelineNode(random.choice(basis_models)) - extractor_model = PipelineNode(random.choice(extractors), nodes_from=[basis_model]) - node_to_mutate = list(filter(lambda x: x.name in models, pipeline.nodes))[0] - if node_to_mutate.nodes_from: - node_to_mutate.nodes_from.append(extractor_model) - else: - node_to_mutate.nodes_from = [extractor_model] - pipeline.nodes.append(basis_model) - pipeline.nodes.append(extractor_model) - - return pipeline - - -def _get_default_industrial_mutations(task_type: TaskTypesEnum) -> Sequence[MutationTypesEnum]: - mutations = [parameter_change_mutation, - MutationTypesEnum.single_change, +class IndustrialMutations: + def __init__(self): + self.node_adapter = PipelineAdapter() + self.industrial_data_operations = [*list(AtomizedModel.INDUSTRIAL_PREPROC_MODEL.value.keys()), + *list(AtomizedModel.INDUSTRIAL_CLF_PREPROC_MODEL.value.keys()), + *list(AtomizedModel.FEDOT_PREPROC_MODEL.value.keys())] + + def transform_to_pipeline_node(self, node): + return self.node_adapter._transform_to_pipeline_node(node) + + def transform_to_opt_node(self, node): + return self.node_adapter._transform_to_opt_node(node) + + def single_edge_mutation(self, + graph: OptGraph, + requirements: GraphRequirements, + graph_gen_params: GraphGenerationParams, + parameters: 'GPAlgorithmParameters' + ) -> OptGraph: + """ + This mutation adds new edge between two random nodes in graph. + + :param graph: graph to mutate + """ + + def nodes_not_cycling(source_node: OptNode, target_node: OptNode): + parents = source_node.nodes_from + while parents: + if target_node not in parents: + grandparents = [] + for parent in parents: + grandparents.extend(parent.nodes_from) + parents = grandparents + else: + return False + return True + + for _ in range(parameters.max_num_of_operator_attempts): + if len(graph.nodes) < 2 or graph.depth > requirements.max_depth: + return graph + + source_node, target_node = sample(graph.nodes, 2) + if source_node not in target_node.nodes_from: + if graph_has_cycle(graph): + graph.connect_nodes(source_node, target_node) + break + else: + if nodes_not_cycling(source_node, target_node): + graph.connect_nodes(source_node, target_node) + break + return graph + + def add_intermediate_node(self, + graph: OptGraph, + node_to_mutate: OptNode, + node_factory: OptNodeFactory) -> OptGraph: + # add between node and parent + new_node = node_factory.get_parent_node(self.transform_to_opt_node(node_to_mutate), is_primary=False) + new_node = self.transform_to_pipeline_node(new_node) + if not new_node: + return graph + + # rewire old children to new parent + new_node.nodes_from = node_to_mutate.nodes_from + node_to_mutate.nodes_from = [new_node] + + # add new node to graph + graph.add_node(new_node) + return graph + + def add_separate_parent_node(self, + graph: OptGraph, + node_to_mutate: PipelineNode, + node_factory: OptNodeFactory) -> OptGraph: + # add as separate parent + new_node = node_factory.get_parent_node(self.transform_to_opt_node(node_to_mutate), is_primary=True) + new_node = self.transform_to_pipeline_node(new_node) + if not new_node: + # there is no possible operators + return graph + if node_to_mutate.nodes_from: + node_to_mutate.nodes_from.append(new_node) + else: + node_to_mutate.nodes_from = [new_node] + graph.nodes.append(new_node) + return graph + + def add_as_child(self, + graph: OptGraph, + node_to_mutate: OptNode, + node_factory: OptNodeFactory) -> OptGraph: + # add as child + old_node_children = graph.node_children(node_to_mutate) + new_node_child = choice(old_node_children) if old_node_children else None + + while True: + new_node = node_factory.get_node(is_primary=False) + if new_node.name not in self.industrial_data_operations: + break + if not new_node: + return graph + new_node = self.transform_to_pipeline_node(new_node) + graph.add_node(new_node) + graph.connect_nodes(node_parent=node_to_mutate, node_child=new_node) + if new_node_child: + graph.connect_nodes(node_parent=new_node, node_child=new_node_child) + graph.disconnect_nodes(node_parent=node_to_mutate, node_child=new_node_child, + clean_up_leftovers=True) + + return graph - add_preprocessing - ] + def single_add(self, + graph: OptGraph, + requirements: GraphRequirements, + graph_gen_params: GraphGenerationParams, + parameters: AlgorithmParameters + ) -> OptGraph: + """ + Add new node between two sequential existing modes + + :param graph: graph to mutate + """ + + if graph.depth >= requirements.max_depth: + # add mutation is not possible + return graph + + node_to_mutate = choice(graph.nodes) + + single_add_strategies = [ + self.add_as_child, + self.add_separate_parent_node + ] + if node_to_mutate.nodes_from: + single_add_strategies.append(self.add_intermediate_node) + strategy = choice(single_add_strategies) + + result = strategy(graph, node_to_mutate, graph_gen_params.node_factory) + return result + + def single_change(self, + graph: OptGraph, + requirements: GraphRequirements, + graph_gen_params: GraphGenerationParams, + parameters: AlgorithmParameters + ) -> OptGraph: + """ + Change node between two sequential existing modes. + + :param graph: graph to mutate + """ + node = choice(graph.nodes) + new_node = graph_gen_params.node_factory.exchange_node(self.transform_to_opt_node(node)) + if not new_node: + return graph + graph.update_node(node, self.transform_to_pipeline_node(new_node)) + return graph + + def single_drop(self, + graph: OptGraph, + requirements: GraphRequirements, + graph_gen_params: GraphGenerationParams, + parameters: AlgorithmParameters + ) -> OptGraph: + """ + Drop single node from graph. + + :param graph: graph to mutate + """ + if len(graph.nodes) < 2: + return graph + node_to_del = choice(graph.nodes) + node_name = node_to_del.name + removal_type = graph_gen_params.advisor.can_be_removed(node_to_del) + if removal_type == RemoveType.with_direct_children: + # TODO refactor workaround with data_source + graph.delete_node(node_to_del) + nodes_to_delete = \ + [n for n in graph.nodes + if n.descriptive_id.count('data_source') == 1 and node_name in n.descriptive_id] + for child_node in nodes_to_delete: + graph.delete_node(child_node, reconnect=ReconnectType.all) + elif removal_type == RemoveType.with_parents: + graph.delete_subtree(node_to_del) + elif removal_type == RemoveType.node_rewire: + graph.delete_node(node_to_del, reconnect=ReconnectType.all) + elif removal_type == RemoveType.node_only: + graph.delete_node(node_to_del, reconnect=ReconnectType.none) + elif removal_type == RemoveType.forbidden: + pass + else: + raise ValueError("Unknown advice (RemoveType) returned by Advisor ") + return graph + + def add_preprocessing(self, + pipeline: Pipeline, **kwargs) -> Pipeline: + task = Task(TaskTypesEnum.classification) + basis_models = get_operations_for_task(task=task, mode='data_operation', tags=["basis"]) + extractors = get_operations_for_task(task=task, mode='data_operation', tags=["extractor"]) + models = get_operations_for_task(task=task, mode='model') + models = [x for x in models if x != 'fedot_cls'] + basis_model = PipelineNode(random.choice(basis_models)) + extractor_model = PipelineNode(random.choice(extractors), nodes_from=[basis_model]) + node_to_mutate = list(filter(lambda x: x.name in models, pipeline.nodes))[0] + if node_to_mutate.nodes_from: + node_to_mutate.nodes_from.append(extractor_model) + else: + node_to_mutate.nodes_from = [extractor_model] + pipeline.nodes.append(basis_model) + pipeline.nodes.append(extractor_model) + + return pipeline + + +def _get_default_industrial_mutations(task_type: TaskTypesEnum, params) -> Sequence[MutationTypesEnum]: + mutations = [ + parameter_change_mutation, + IndustrialMutations().single_change, + IndustrialMutations().add_preprocessing, + #IndustrialMutations().single_drop, + IndustrialMutations().single_add + ] return mutations @@ -127,7 +337,7 @@ def setup_repository(self): OperationTypesRepository.assign_repo('model', self.industrial_model_path) setattr(PipelineSearchSpace, "get_parameters_dict", get_industrial_search_space) - setattr(ApiComposer, "_get_default_mutations", _get_default_industrial_mutations) + setattr(ApiParamsRepository, "_get_default_mutations", _get_default_industrial_mutations) class_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline) MutationStrengthEnum = MutationStrengthEnumIndustrial # common_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline)