From 460115de5f7a65d593171451a0b1f0be946f022d Mon Sep 17 00:00:00 2001 From: engelharddirk Date: Tue, 28 Jan 2025 13:38:17 +0100 Subject: [PATCH] make analyzer class true static --- apps/analyzer/metadata_analyzer/analyzer.py | 180 ++++++++++---------- apps/analyzer/metadata_analyzer/main.py | 7 +- 2 files changed, 97 insertions(+), 90 deletions(-) diff --git a/apps/analyzer/metadata_analyzer/analyzer.py b/apps/analyzer/metadata_analyzer/analyzer.py index bf94ee01..1e3964d4 100644 --- a/apps/analyzer/metadata_analyzer/analyzer.py +++ b/apps/analyzer/metadata_analyzer/analyzer.py @@ -2,28 +2,30 @@ class Analyzer: - def init( + @classmethod + def __init__( + cls, database, backend, simple_rule_based_analyzer, time_series_analyzer, schedule_based_analyzer, ): - Analyzer.database = database - Analyzer.backend = backend - Analyzer.simple_rule_based_analyzer = simple_rule_based_analyzer - Analyzer.time_series_analyzer = time_series_analyzer - Analyzer.schedule_based_analyzer = schedule_based_analyzer - Analyzer.series_loaded = False - - # Convert a result from the database into the format used by the backend + cls.database = database + cls.backend = backend + cls.simple_rule_based_analyzer = simple_rule_based_analyzer + cls.time_series_analyzer = time_series_analyzer + cls.schedule_based_analyzer = schedule_based_analyzer + cls.series_loaded = False + + @staticmethod def _convert_result(result): backup_type = { "F": "FULL", "I": "INCREMENTAL", "D": "DIFFERENTIAL", "C": "COPY" - }.get(result.fdi_type, "UNKNOWN") # Use .get() to handle unexpected types + }.get(result.fdi_type, "UNKNOWN") return { "id": result.uuid, "saveset": result.saveset, @@ -31,18 +33,19 @@ def _convert_result(result): "creationDate": result.start_time.isoformat(), "type": backup_type, "taskId": result.task_uuid, - "scheduledTime": result.scheduledTime.isoformat() if result.scheduledTime else None, # Handle None value + "scheduledTime": result.scheduledTime.isoformat() if result.scheduledTime else None, } - # Convert a task from the database into the format used by the backend + @staticmethod def _convert_task(task): return { "id": task.uuid, "displayName": task.task, } - def _get_start_date(data, alert_type, backup_type): - latest_id = Analyzer.backend.get_latest_alert_id(alert_type, backup_type) + @classmethod + def _get_start_date(cls, data, alert_type, backup_type): + latest_id = cls.backend.get_latest_alert_id(alert_type, backup_type) if latest_id == "": return datetime.datetime.min else: @@ -52,60 +55,56 @@ def _get_start_date(data, alert_type, backup_type): assert len(latest_alerts) == 1 return latest_alerts[0] - def _get_latest_backup_date_from_backend(): - latest_backup = Analyzer.backend.get_latest_backup_date() + @classmethod + def _get_latest_backup_date_from_backend(cls): + latest_backup = cls.backend.get_latest_backup_date() if latest_backup is None: return None else: return latest_backup['creationDate'] - def _send_Backups(): + @classmethod + def _send_Backups(cls): try: - latest_backup_date = Analyzer._get_latest_backup_date_from_backend() + latest_backup_date = cls._get_latest_backup_date_from_backend() except Exception as e: print(f"Error getting latest backup date: {e}") latest_backup_date = None - results = list(Analyzer.database.get_results(latest_backup_date)) + results = list(cls.database.get_results(latest_backup_date)) - schedules = list(Analyzer.database.get_schedules()) - Analyzer.simple_rule_based_analyzer.analyze_creation_dates(results, schedules, None, latest_backup_date, - "ONLY_SCHEDULES") + schedules = list(cls.database.get_schedules()) + cls.simple_rule_based_analyzer.analyze_creation_dates(results, schedules, None, latest_backup_date, + "ONLY_SCHEDULES") - # Batch the api calls to the backend for improved efficiency batch = [] count = 0 for result in results: - # Only send real backups if (result.is_backup is not None) and (result.is_backup <= 0): continue - # Don't send subtasks if result.subtask_flag != "0": continue - # Only send backups where the relevant data is not null if result.data_size is None or result.start_time is None: continue - batch.append(Analyzer._convert_result(result)) + batch.append(cls._convert_result(result)) count += 1 - # Send a full batch if len(batch) == 100: - Analyzer.backend.send_backup_data_batched(batch) + cls.backend.send_backup_data_batched(batch) batch = [] - # Send the remaining results if len(batch) > 0: - Analyzer.backend.send_backup_data_batched(batch) + cls.backend.send_backup_data_batched(batch) return count - def _send_Tasks(): - tasks = list(Analyzer.database.get_tasks()) + @classmethod + def _send_Tasks(cls): + tasks = list(cls.database.get_tasks()) - # Batch the api calls to the backend for improved efficiency batch = [] count = 0 @@ -114,22 +113,21 @@ def _send_Tasks(): if task.uuid is None or task.task is None: continue - batch.append(Analyzer._convert_task(task)) + batch.append(cls._convert_task(task)) count += 1 - # Send a full batch if len(batch) == 100: - Analyzer.backend.send_task_data_batched(batch) + cls.backend.send_task_data_batched(batch) batch = [] - # Send the remaining results if len(batch) > 0: - Analyzer.backend.send_task_data_batched(batch) + cls.backend.send_task_data_batched(batch) return count - def _send_Storage(): - storages = list(Analyzer.database.get_data_stores()) + @classmethod + def _send_Storage(cls): + storages = list(cls.database.get_data_stores()) for storage in storages: @@ -144,86 +142,96 @@ def _send_Storage(): "filled": storage.filled, } - Analyzer.backend.send_storage_data(storage_data) + cls.backend.send_storage_data(storage_data) return len(storages) - def update_data(): - num_Storage = Analyzer._send_Storage() - num_Tasks = Analyzer._send_Tasks() - num_Backups = Analyzer._send_Backups() + @classmethod + def update_data(cls): + num_Storage = cls._send_Storage() + num_Tasks = cls._send_Tasks() + num_Backups = cls._send_Backups() - # Return the number of items sent to the backend return { "storage": num_Storage, "tasks": num_Tasks, "backups": num_Backups, } - def simple_rule_based_analysis(alert_limit): - data = list(Analyzer.database.get_results()) - start_date = Analyzer._get_start_date(data, "SIZE_ALERT", "FULL") - result = Analyzer.simple_rule_based_analyzer.analyze( + @classmethod + def simple_rule_based_analysis(cls, alert_limit): + data = list(cls.database.get_results()) + start_date = cls._get_start_date(data, "SIZE_ALERT", "FULL") + result = cls.simple_rule_based_analyzer.analyze( data, alert_limit, start_date ) return result - def simple_rule_based_analysis_diff(alert_limit): - data = list(Analyzer.database.get_results()) - start_date = Analyzer._get_start_date(data, "SIZE_ALERT", "DIFFERENTIAL") - result = Analyzer.simple_rule_based_analyzer.analyze_diff( + @classmethod + def simple_rule_based_analysis_diff(cls, alert_limit): + data = list(cls.database.get_results()) + start_date = cls._get_start_date(data, "SIZE_ALERT", "DIFFERENTIAL") + result = cls.simple_rule_based_analyzer.analyze_diff( data, alert_limit, start_date ) return result - def simple_rule_based_analysis_inc(alert_limit): - data = list(Analyzer.database.get_results()) - start_date = Analyzer._get_start_date(data, "SIZE_ALERT", "INCREMENTAL") - result = Analyzer.simple_rule_based_analyzer.analyze_inc( + @classmethod + def simple_rule_based_analysis_inc(cls, alert_limit): + data = list(cls.database.get_results()) + start_date = cls._get_start_date(data, "SIZE_ALERT", "INCREMENTAL") + result = cls.simple_rule_based_analyzer.analyze_inc( data, alert_limit, start_date ) return result + @classmethod def simple_time_series_analysis( + cls, variable, task_id, frequency, backup_type, window_size ): - if not Analyzer.series_loaded: - Analyzer.load_time_series_data() + if not cls.series_loaded: + cls.load_time_series_data() - return Analyzer.time_series_analyzer.k_means_analyze( + return cls.time_series_analyzer.k_means_analyze( variable, task_id, frequency, backup_type, window_size ) - def time_series_get_frequencies(task_id, backup_type, variable): - if not Analyzer.series_loaded: - Analyzer.load_time_series_data() - return Analyzer.time_series_analyzer.get_frequencies( + @classmethod + def time_series_get_frequencies(cls, task_id, backup_type, variable): + if not cls.series_loaded: + cls.load_time_series_data() + return cls.time_series_analyzer.get_frequencies( task_id, backup_type, variable ) - def time_series_get_task_ids(): - if not Analyzer.series_loaded: - Analyzer.load_time_series_data() - return Analyzer.time_series_analyzer.get_task_ids() - - def load_time_series_data(): - data = list(Analyzer.database.get_results()) - Analyzer.time_series_analyzer.preload_data(data) - Analyzer.series_loaded = True - - def schedule_based_analysis(alert_limit, stop_date): - results = list(Analyzer.database.get_results()) - schedules = list(Analyzer.database.get_schedules()) - task_events = list(Analyzer.database.get_task_events()) + @classmethod + def time_series_get_task_ids(cls): + if not cls.series_loaded: + cls.load_time_series_data() + return cls.time_series_analyzer.get_task_ids() + + @classmethod + def load_time_series_data(cls): + data = list(cls.database.get_results()) + cls.time_series_analyzer.preload_data(data) + cls.series_loaded = True + + @classmethod + def schedule_based_analysis(cls, alert_limit, stop_date): + results = list(cls.database.get_results()) + schedules = list(cls.database.get_schedules()) + task_events = list(cls.database.get_task_events()) start_date = max( - Analyzer._get_start_date(results, "CREATION_DATE_ALERT", None), - Analyzer._get_start_date(results, "ADDITIONAL_BACKUP_ALERT", None), + cls._get_start_date(results, "CREATION_DATE_ALERT", None), + cls._get_start_date(results, "ADDITIONAL_BACKUP_ALERT", None), ) - return Analyzer.schedule_based_analyzer.analyze(results, schedules, task_events, alert_limit, start_date, stop_date) + return cls.schedule_based_analyzer.analyze(results, schedules, task_events, alert_limit, start_date, stop_date) - def simple_rule_based_analysis_storage_capacity(alert_limit): - data = list(Analyzer.database.get_data_stores()) - result = Analyzer.simple_rule_based_analyzer.analyze_storage_capacity( + @classmethod + def simple_rule_based_analysis_storage_capacity(cls, alert_limit): + data = list(cls.database.get_data_stores()) + result = cls.simple_rule_based_analyzer.analyze_storage_capacity( data, alert_limit ) return result diff --git a/apps/analyzer/metadata_analyzer/main.py b/apps/analyzer/metadata_analyzer/main.py index cbf3f2ef..430699b6 100644 --- a/apps/analyzer/metadata_analyzer/main.py +++ b/apps/analyzer/metadata_analyzer/main.py @@ -18,7 +18,6 @@ load_dotenv(dotenv_path=".env") path = app.root_path - @app.route("/") def hello_world(): return "Hello, world!" @@ -26,7 +25,7 @@ def hello_world(): @app.route("/updating/basicBackupData", methods=["POST"]) @swag_from(os.path.join(path, "swagger", "updating", "basicBackupData.yaml"), validation=False) def update_data(): - return jsonify(Analyzer.update_data()) + return jsonify(analyzer.update_data()) @app.route("/alerting/size/fullBackups", methods=["POST"]) @@ -36,7 +35,7 @@ def simple_rule_based_analysis(): try: int(alert_limit) - return jsonify(Analyzer.simple_rule_based_analysis(int(alert_limit))) + return jsonify(analyzer.simple_rule_based_analysis(int(alert_limit))) except ValueError: return "Invalid value for alert limit", 400 @@ -243,7 +242,7 @@ def main(): time_series_analyzer = Time_series_analyzer(parameters) simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2) schedule_based_analyzer = ScheduleBasedAnalyzer(backend) - Analyzer.init( + Analyzer.__init__( database, backend, simple_rule_based_analyzer,