From 9f5a936480e1a6e0067d19604b6ad10ffe263b8d Mon Sep 17 00:00:00 2001 From: technocreep Date: Sat, 27 Apr 2024 11:01:50 +0200 Subject: [PATCH] resolve conflicts --- fedot_ind/api/main.py | 85 +++++++++++++------ fedot_ind/api/utils/industrial_strategy.py | 41 ++++++--- fedot_ind/core/ensemble/kernel_ensemble.py | 39 ++++++--- .../models/nn/network_impl/base_nn_model.py | 4 +- 4 files changed, 112 insertions(+), 57 deletions(-) diff --git a/fedot_ind/api/main.py b/fedot_ind/api/main.py index 69701843e..2244d7a84 100644 --- a/fedot_ind/api/main.py +++ b/fedot_ind/api/main.py @@ -72,7 +72,8 @@ def __init__(self, **kwargs): # init Fedot and Industrial hyperparams and path to results self.output_folder = kwargs.get('output_folder', None) - self.industrial_strategy_params = kwargs.get('industrial_strategy_params', None) + self.industrial_strategy_params = kwargs.get( + 'industrial_strategy_params', None) self.industrial_strategy = kwargs.get('industrial_strategy', None) self.path_to_composition_results = kwargs.get('history_dir', None) self.backend_method = kwargs.get('backend', 'cpu') @@ -119,16 +120,20 @@ def __init__(self, **kwargs): self.config_dict['history_dir'] = prefix self.config_dict['available_operations'] = kwargs.get( 'available_operations', - default_industrial_availiable_operation(self.config_dict['problem']) + default_industrial_availiable_operation( + self.config_dict['problem']) ) - self.config_dict['optimizer'] = kwargs.get('optimizer', IndustrialEvoOptimizer) + self.config_dict['optimizer'] = kwargs.get( + 'optimizer', IndustrialEvoOptimizer) self.config_dict['initial_assumption'] = kwargs.get('initial_assumption', FEDOT_ASSUMPTIONS[self.config_dict['problem']]) - self.config_dict['use_input_preprocessing'] = kwargs.get('use_input_preprocessing', False) + self.config_dict['use_input_preprocessing'] = kwargs.get( + 'use_input_preprocessing', False) if self.task_params is not None and self.config_dict['problem'] == 'ts_forecasting': - self.config_dict['task_params'] = TsForecastingParams(forecast_length=self.task_params['forecast_length']) + self.config_dict['task_params'] = TsForecastingParams( + forecast_length=self.task_params['forecast_length']) # create API subclasses for side task self.__init_experiment_setup() @@ -143,11 +148,13 @@ def __init_experiment_setup(self): # industrial_params = [p for p in self.config_dict.keys() if p not in list(FEDOT_API_PARAMS.keys())] # [self.config_dict.pop(x, None) for x in industrial_params] - industrial_params = set(self.config_dict.keys()) - set(FEDOT_API_PARAMS.keys()) + industrial_params = set(self.config_dict.keys()) - \ + set(FEDOT_API_PARAMS.keys()) for param in industrial_params: self.config_dict.pop(param, None) - backend_method_current, backend_scipy_current = BackendMethods(self.backend_method).backend + backend_method_current, backend_scipy_current = BackendMethods( + self.backend_method).backend globals()['backend_methods'] = backend_method_current globals()['backend_scipy'] = backend_scipy_current @@ -177,7 +184,8 @@ def fit(self, **kwargs: additional parameters """ - self.train_data = deepcopy(input_data) # we do not want to make inplace changes + self.train_data = deepcopy( + input_data) # we do not want to make inplace changes input_preproc = DataCheck(input_data=self.train_data, task=self.config_dict['problem'], task_params=self.task_params, industrial_task_params=self.industrial_strategy_params) self.train_data = input_preproc.check_input_data() @@ -204,35 +212,43 @@ def predict(self, the array with prediction values """ - self.predict_data = deepcopy(predict_data) # we do not want to make inplace changes + self.predict_data = deepcopy( + predict_data) # we do not want to make inplace changes self.predict_data = DataCheck(input_data=self.predict_data, task=self.config_dict['problem'], task_params=self.task_params, industrial_task_params=self.industrial_strategy_params).check_input_data() if self.industrial_strategy is not None and not self.is_finetuned: if predict_mode == 'ensemble': - predict = self.industrial_strategy_class.predict(self.predict_data, 'probs') + predict = self.industrial_strategy_class.predict( + self.predict_data, 'probs') ensemble_strat = self.industrial_strategy_class.ensemble_strategy predict = { - strategy: np.argmax(self.industrial_strategy_class.ensemble_predictions(predict, strategy), axis=1) + strategy: np.argmax(self.industrial_strategy_class.ensemble_predictions( + predict, strategy), axis=1) for strategy in ensemble_strat} else: - predict = self.industrial_strategy_class.predict(self.predict_data, 'labels') + predict = self.industrial_strategy_class.predict( + self.predict_data, 'labels') self.predicted_labels = predict else: if self.condition_check.solver_is_fedot_class(self.solver): predict = self.solver.predict(self.predict_data) else: - predict = self.solver.predict(self.predict_data, 'labels').predict + predict = self.solver.predict( + self.predict_data, 'labels').predict if self.condition_check.is_multiclf_with_labeling_problem(self.config_dict['problem'], self.predict_data.target, predict): - predict = predict + (self.predict_data.target.min() - predict.min()) + predict = predict + \ + (self.predict_data.target.min() - predict.min()) if self.condition_check.solver_have_target_encoder(self.target_encoder): - self.predicted_labels = self.target_encoder.inverse_transform(predict) - self.predict_data.target = self.target_encoder.inverse_transform(self.predict_data.target) + self.predicted_labels = self.target_encoder.inverse_transform( + predict) + self.predict_data.target = self.target_encoder.inverse_transform( + self.predict_data.target) else: self.predicted_labels = predict return self.predicted_labels @@ -245,6 +261,7 @@ def predict_proba(self, Method to obtain prediction probabilities from trained Industrial model. Args: + predict_mode: ``default='default'``. Defines the mode of prediction. Could be 'default' or 'probs'. predict_data: tuple with test_features and test_target Returns: @@ -258,7 +275,8 @@ def predict_proba(self, task_params=self.task_params, industrial_task_params=self.industrial_strategy_params).check_input_data() if self.industrial_strategy is not None and not self.is_finetuned: - predict = self.industrial_strategy_class.predict(self.predict_data, 'probs') + predict = self.industrial_strategy_class.predict( + self.predict_data, 'probs') if predict_mode == 'ensemble': ensemble_strat = self.industrial_strategy_class.ensemble_strategy predict = {strategy: self.industrial_strategy_class.ensemble_predictions(predict, strategy) @@ -267,11 +285,13 @@ def predict_proba(self, if self.condition_check.solver_is_fedot_class(self.solver): predict = self.solver.predict_proba(self.predict_data) else: - predict = self.solver.predict(self.predict_data, 'probs').predict + predict = self.solver.predict( + self.predict_data, 'probs').predict if self.condition_check.is_multiclf_with_labeling_problem(self.config_dict['problem'], self.predict_data.target, predict): - predict = predict + (self.predict_data.target.min() - predict.min()) + predict = predict + \ + (self.predict_data.target.min() - predict.min()) self.predicted_probs = predict return self.predicted_probs @@ -304,9 +324,11 @@ def finetune(self, elif not self.condition_check.solver_is_none(model_to_tune): model_to_tune = model_to_tune else: - model_to_tune = deepcopy(self.config_dict['initial_assumption']).build() + model_to_tune = deepcopy( + self.config_dict['initial_assumption']).build() tuning_params['tuner'] = tuner_type - pipeline_tuner, model_to_tune = build_tuner(self, model_to_tune, tuning_params, train_data, mode) + pipeline_tuner, model_to_tune = build_tuner( + self, model_to_tune, tuning_params, train_data, mode) if abs(pipeline_tuner.obtained_metric) > tuned_metric: tuned_metric = abs(pipeline_tuner.obtained_metric) self.solver = model_to_tune @@ -322,7 +344,8 @@ def _metric_evaluation_loop(self, valid_shape = target.shape if self.condition_check.solver_have_target_encoder(self.target_encoder): new_target = self.target_encoder.transform(target.flatten()) - labels = self.target_encoder.transform(predicted_labels).reshape(valid_shape) + labels = self.target_encoder.transform( + predicted_labels).reshape(valid_shape) else: new_target = target.flatten() labels = predicted_labels.reshape(valid_shape) @@ -357,7 +380,8 @@ def get_metrics(self, """ problem = self.config_dict['problem'] if problem == 'classification' and self.predicted_probs is None and 'roc_auc' in metric_names: - self.logger.info('Predicted probabilities are not available. Use `predict_proba()` method first') + self.logger.info( + 'Predicted probabilities are not available. Use `predict_proba()` method first') if isinstance(self.predicted_probs, dict): metric_dict = {strategy: self._metric_evaluation_loop(target=target, problem=problem, @@ -427,7 +451,8 @@ def load(self, path): def save_optimization_history(self, return_history: bool = False): """Plot prediction of the model""" - self.solver.history.save(f"{self.output_folder}/optimization_history.json") + self.solver.history.save( + f"{self.output_folder}/optimization_history.json") if return_history: return self.solver.history @@ -440,9 +465,12 @@ def save_best_model(self): is_datetime_in_path=True) else: for idx, p in enumerate(self.solver.ensemble_branches): - Pipeline(p).save(f'./raf_ensemble/{idx}_ensemble_branch', create_subdir=True) - Pipeline(self.solver.ensemble_head).save(f'./raf_ensemble/ensemble_head', create_subdir=True) - self.solver.current_pipeline.save(f'./raf_ensemble/ensemble_composed', create_subdir=True) + Pipeline(p).save( + f'./raf_ensemble/{idx}_ensemble_branch', create_subdir=True) + Pipeline(self.solver.ensemble_head).save( + f'./raf_ensemble/ensemble_head', create_subdir=True) + self.solver.current_pipeline.save( + f'./raf_ensemble/ensemble_composed', create_subdir=True) def explain(self, **kwargs): """ Explain model's prediction via time series points perturbation @@ -482,7 +510,8 @@ def vis_optimisation_history(self, opt_history_path: str = None, # Gather pipeline and history. matplotlib.use('TkAgg') if isinstance(opt_history_path, str): - history = OptHistory.load(opt_history_path + 'optimization_history.json') + history = OptHistory.load( + opt_history_path + 'optimization_history.json') else: history = opt_history_path history_visualizer = PipelineHistoryVisualizer(history) diff --git a/fedot_ind/api/utils/industrial_strategy.py b/fedot_ind/api/utils/industrial_strategy.py index 76366fb0f..10e4dc308 100644 --- a/fedot_ind/api/utils/industrial_strategy.py +++ b/fedot_ind/api/utils/industrial_strategy.py @@ -65,16 +65,20 @@ def _federated_strategy(self, input_data): if self.RAF_workers is None: batch_size = FEDOT_WORKER_NUM else: - batch_size = round(input_data.features.shape[0] / self.RAF_workers) + batch_size = round( + input_data.features.shape[0] / self.RAF_workers) # batch_size = round(input_data.features.shape[0] / self.RAF_workers if self.RAF_workers # is not None else FEDOT_WORKER_NUM) - batch_timeout = round(self.config_dict['timeout'] / FEDOT_WORKER_TIMEOUT_PARTITION) + batch_timeout = round( + self.config_dict['timeout'] / FEDOT_WORKER_TIMEOUT_PARTITION) self.config_dict['timeout'] = batch_timeout - self.logger.info(f'Batch_size - {batch_size}. Number of batches - {self.RAF_workers}') + self.logger.info( + f'Batch_size - {batch_size}. Number of batches - {self.RAF_workers}') self.solver = RAFensembler(composing_params=self.config_dict, n_splits=self.RAF_workers, batch_size=batch_size) - self.logger.info(f'Number of AutoMl models in ensemble - {self.solver.n_splits}') + self.logger.info( + f'Number of AutoMl models in ensemble - {self.solver.n_splits}') def _forecasting_strategy(self, input_data): self.logger.info('TS forecasting algorithm was applied') @@ -140,21 +144,25 @@ def _finetune_loop(self, return tuned_kernels def _kernel_strategy(self, input_data): - self.kernel_ensembler = KernelEnsembler(self.industrial_strategy_params) - kernel_ensemble, kernel_data = self.kernel_ensembler.transform(input_data).predict + self.kernel_ensembler = KernelEnsembler( + self.industrial_strategy_params) + kernel_ensemble, kernel_data = self.kernel_ensembler.transform( + input_data).predict self.solver = self._finetune_loop(kernel_ensemble, kernel_data) # tuning_params = {'metric': FEDOT_TUNING_METRICS[self.config_dict['problem']], 'tuner': OptunaTuner} # self.solver # self.solver = build_tuner(self, self.solver, tuning_params, input_data, 'head') + def _federated_predict(self, input_data, mode: str = 'labels'): self.predicted_branch_probs = [x.predict(input_data).predict for x in self.solver.root_node.nodes_from] - self.predicted_branch_labels = [np.argmax(x, axis=1) for x in self.predicted_branch_probs] + self.predicted_branch_labels = [ + np.argmax(x, axis=1) for x in self.predicted_branch_probs] n_samples, n_channels, n_classes = self.predicted_branch_probs[0].shape[0], \ - len(self.predicted_branch_probs), \ - self.predicted_branch_probs[0].shape[1] + len(self.predicted_branch_probs), \ + self.predicted_branch_probs[0].shape[1] head_model = deepcopy(self.solver.root_node) head_model.nodes_from = [] input_data.features = np.hstack(self.predicted_branch_labels).reshape(n_samples, @@ -168,13 +176,15 @@ def _federated_predict(self, def _forecasting_predict(self, input_data, mode: str = 'labels'): - labels_dict = {k: v.predict(input_data, mode) for k, v in self.solver.items()} + labels_dict = {k: v.predict(input_data, mode) + for k, v in self.solver.items()} return labels_dict def _kernel_predict(self, input_data, mode: str = 'labels'): - labels_dict = {k: v.predict(input_data, mode).predict for k, v in self.solver.items()} + labels_dict = {k: v.predict( + input_data, mode).predict for k, v in self.solver.items()} return labels_dict def _check_predictions(self, predictions): @@ -197,10 +207,12 @@ def _check_predictions(self, predictions): transformed = [] if self.random_label is None: self.random_label = { - class_by_gen: np.random.choice(self.kernel_ensembler.classes_misses_by_generator[class_by_gen]) + class_by_gen: np.random.choice( + self.kernel_ensembler.classes_misses_by_generator[class_by_gen]) for class_by_gen in self.kernel_ensembler.classes_described_by_generator} for prob_by_gen, class_by_gen in zip(list_proba, self.kernel_ensembler.classes_described_by_generator): - converted_probs = np.zeros((prob_by_gen.shape[0], len(self.kernel_ensembler.all_classes))) + converted_probs = np.zeros( + (prob_by_gen.shape[0], len(self.kernel_ensembler.all_classes))) for true_class, map_class in self.kernel_ensembler.mapper_dict[class_by_gen].items(): converted_probs[:, true_class] = prob_by_gen[:, map_class] random_label = self.random_label[class_by_gen] @@ -211,7 +223,8 @@ def _check_predictions(self, predictions): def ensemble_predictions(self, prediction_dict, strategy): transformed_predictions = self._check_predictions(prediction_dict) - average_proba_predictions = self.ensemble_strategy_dict[strategy](transformed_predictions, axis=1) + average_proba_predictions = self.ensemble_strategy_dict[strategy]( + transformed_predictions, axis=1) if average_proba_predictions.shape[1] == 1: average_proba_predictions = np.concatenate([average_proba_predictions, 1 - average_proba_predictions], diff --git a/fedot_ind/core/ensemble/kernel_ensemble.py b/fedot_ind/core/ensemble/kernel_ensemble.py index 342e26ad7..6e3be55db 100644 --- a/fedot_ind/core/ensemble/kernel_ensemble.py +++ b/fedot_ind/core/ensemble/kernel_ensemble.py @@ -24,8 +24,10 @@ def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) self.distance_metric = params.get('distance_metric', 'cosine') self.kernel_strategy = params.get('kernel_strategy ', 'one_step_cka') - self.feature_extractor = params.get('feature_extractor', list(KERNEL_BASELINE_FEATURE_GENERATORS.keys())) - self._mapping_dict = {k: v for k, v in enumerate(self.feature_extractor)} + self.feature_extractor = params.get('feature_extractor', list( + KERNEL_BASELINE_FEATURE_GENERATORS.keys())) + self._mapping_dict = {k: v for k, + v in enumerate(self.feature_extractor)} self.lr = params.get('learning_rate', 0.1) self.patience = params.get('patience', 5) self.epoch = params.get('epoch', 500) @@ -46,7 +48,8 @@ def __convert_weights(self, kernel_model): kernel_model.solution.weights.cpu().detach().numpy())) else: for n_class in self.n_classes: - kernels_weights_by_class.append(abs(kernel_model.solution[n_class].weights.cpu().detach().numpy())) + kernels_weights_by_class.append( + abs(kernel_model.solution[n_class].weights.cpu().detach().numpy())) kernel_df = pd.DataFrame(kernels_weights_by_class) # kernel_df.columns = self.feature_extractor return kernel_df @@ -65,7 +68,8 @@ def _select_top_feature_generators(self, kernel_weight_matrix): kernel_weight_matrix['best_generator_by_class'] = kernel_weight_matrix.apply( lambda row: self._mapping_dict[np.where(np.isclose(row.values, max(row)))[0][0]], axis=1) - top_n_generators = kernel_weight_matrix['best_generator_by_class'].value_counts().head(2).index.values.tolist() + top_n_generators = kernel_weight_matrix['best_generator_by_class'].value_counts( + ).head(2).index.values.tolist() self.classes_described_by_generator = {gen: kernel_weight_matrix[kernel_weight_matrix['best_generator_by_class'] == gen].index.values.tolist() @@ -87,11 +91,15 @@ def _create_kernel_ensemble(self, input_data, top_n_generators, classes_describe kernel_data = {} for i, gen in enumerate(top_n_generators): train_fold = deepcopy(input_data) - described_idx, _ = np.where(train_fold.target == classes_described_by_generator[gen]) - not_described_idx = [i for i in np.arange(0, train_fold.target.shape[0]) if i not in described_idx] + described_idx, _ = np.where( + train_fold.target == classes_described_by_generator[gen]) + not_described_idx = [i for i in np.arange( + 0, train_fold.target.shape[0]) if i not in described_idx] mp = np.vectorize(self._map_target_for_generator) - train_fold.target = mp(entry=train_fold.target, mapper_dict=self.mapper_dict[gen]) - train_fold.target[not_described_idx] = max(list(self.mapper_dict[gen].values()))+1 + train_fold.target = mp( + entry=train_fold.target, mapper_dict=self.mapper_dict[gen]) + train_fold.target[not_described_idx] = max( + list(self.mapper_dict[gen].values()))+1 basis, generator = KERNEL_BASELINE_NODE_LIST[gen] if basis is None: kernel_ensemble.update( @@ -109,11 +117,15 @@ def _transform(self, input_data: InputData) -> np.array: self.__multiclass_check(input_data.target) grammian_list = self.generate_grammian(input_data) if self.kernel_strategy.__contains__('one'): - kernel_weight_matrix = self.__one_stage_kernel(grammian_list, input_data.target) + kernel_weight_matrix = self.__one_stage_kernel( + grammian_list, input_data.target) else: - kernel_weight_matrix = self.__two_stage_kernel(grammian_list, input_data.target) - top_n_generators, classes_described_by_generator = self._select_top_feature_generators(kernel_weight_matrix) - self.predict = self._create_kernel_ensemble(input_data, top_n_generators, classes_described_by_generator) + kernel_weight_matrix = self.__two_stage_kernel( + grammian_list, input_data.target) + top_n_generators, classes_described_by_generator = self._select_top_feature_generators( + kernel_weight_matrix) + self.predict = self._create_kernel_ensemble( + input_data, top_n_generators, classes_described_by_generator) return self.predict def generate_grammian(self, input_data) -> list[Any]: @@ -127,7 +139,8 @@ def generate_grammian(self, input_data) -> list[Any]: return KLtr def __one_stage_kernel(self, grammian_list, target): - mkl = KERNEL_ALGO[self.kernel_strategy](multiclass_strategy=self.multiclass_strategy).fit(grammian_list, target) + mkl = KERNEL_ALGO[self.kernel_strategy]( + multiclass_strategy=self.multiclass_strategy).fit(grammian_list, target) kernel_weight_matrix = self.__convert_weights(mkl) return kernel_weight_matrix diff --git a/fedot_ind/core/models/nn/network_impl/base_nn_model.py b/fedot_ind/core/models/nn/network_impl/base_nn_model.py index c4549facf..2b8988c33 100644 --- a/fedot_ind/core/models/nn/network_impl/base_nn_model.py +++ b/fedot_ind/core/models/nn/network_impl/base_nn_model.py @@ -143,9 +143,9 @@ def _train_loop(self, train_loader, val_loader, loss_fn, optimizer): for batch in val_loader: inputs, targets = batch output = self.model(inputs) - + loss = loss_fn(output, targets.float()) - + valid_loss += loss.data.item() * inputs.size(0) total += targets.size(0) correct += (torch.argmax(output, 1) ==