Skip to content

Commit

Permalink
make analyzer class true static
Browse files Browse the repository at this point in the history
  • Loading branch information
engelharddirk committed Jan 28, 2025
1 parent c5986dc commit 460115d
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 90 deletions.
180 changes: 94 additions & 86 deletions apps/analyzer/metadata_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,50 @@


class Analyzer:
def init(
@classmethod
def __init__(
cls,
database,
backend,
simple_rule_based_analyzer,
time_series_analyzer,
schedule_based_analyzer,
):
Analyzer.database = database
Analyzer.backend = backend
Analyzer.simple_rule_based_analyzer = simple_rule_based_analyzer
Analyzer.time_series_analyzer = time_series_analyzer
Analyzer.schedule_based_analyzer = schedule_based_analyzer
Analyzer.series_loaded = False

# Convert a result from the database into the format used by the backend
cls.database = database
cls.backend = backend
cls.simple_rule_based_analyzer = simple_rule_based_analyzer
cls.time_series_analyzer = time_series_analyzer
cls.schedule_based_analyzer = schedule_based_analyzer
cls.series_loaded = False

@staticmethod
def _convert_result(result):
backup_type = {
"F": "FULL",
"I": "INCREMENTAL",
"D": "DIFFERENTIAL",
"C": "COPY"
}.get(result.fdi_type, "UNKNOWN") # Use .get() to handle unexpected types
}.get(result.fdi_type, "UNKNOWN")
return {
"id": result.uuid,
"saveset": result.saveset,
"sizeMB": result.data_size / 1_000_000,
"creationDate": result.start_time.isoformat(),
"type": backup_type,
"taskId": result.task_uuid,
"scheduledTime": result.scheduledTime.isoformat() if result.scheduledTime else None, # Handle None value
"scheduledTime": result.scheduledTime.isoformat() if result.scheduledTime else None,
}

# Convert a task from the database into the format used by the backend
@staticmethod
def _convert_task(task):
return {
"id": task.uuid,
"displayName": task.task,
}

def _get_start_date(data, alert_type, backup_type):
latest_id = Analyzer.backend.get_latest_alert_id(alert_type, backup_type)
@classmethod
def _get_start_date(cls, data, alert_type, backup_type):
latest_id = cls.backend.get_latest_alert_id(alert_type, backup_type)
if latest_id == "":
return datetime.datetime.min
else:
Expand All @@ -52,60 +55,56 @@ def _get_start_date(data, alert_type, backup_type):
assert len(latest_alerts) == 1
return latest_alerts[0]

def _get_latest_backup_date_from_backend():
latest_backup = Analyzer.backend.get_latest_backup_date()
@classmethod
def _get_latest_backup_date_from_backend(cls):
latest_backup = cls.backend.get_latest_backup_date()
if latest_backup is None:
return None
else:
return latest_backup['creationDate']

def _send_Backups():
@classmethod
def _send_Backups(cls):
try:
latest_backup_date = Analyzer._get_latest_backup_date_from_backend()
latest_backup_date = cls._get_latest_backup_date_from_backend()
except Exception as e:
print(f"Error getting latest backup date: {e}")
latest_backup_date = None
results = list(Analyzer.database.get_results(latest_backup_date))
results = list(cls.database.get_results(latest_backup_date))

schedules = list(Analyzer.database.get_schedules())
Analyzer.simple_rule_based_analyzer.analyze_creation_dates(results, schedules, None, latest_backup_date,
"ONLY_SCHEDULES")
schedules = list(cls.database.get_schedules())
cls.simple_rule_based_analyzer.analyze_creation_dates(results, schedules, None, latest_backup_date,
"ONLY_SCHEDULES")

# Batch the api calls to the backend for improved efficiency
batch = []
count = 0

for result in results:
# Only send real backups
if (result.is_backup is not None) and (result.is_backup <= 0):
continue

# Don't send subtasks
if result.subtask_flag != "0":
continue

# Only send backups where the relevant data is not null
if result.data_size is None or result.start_time is None:
continue

batch.append(Analyzer._convert_result(result))
batch.append(cls._convert_result(result))
count += 1

# Send a full batch
if len(batch) == 100:
Analyzer.backend.send_backup_data_batched(batch)
cls.backend.send_backup_data_batched(batch)
batch = []

# Send the remaining results
if len(batch) > 0:
Analyzer.backend.send_backup_data_batched(batch)
cls.backend.send_backup_data_batched(batch)

return count

def _send_Tasks():
tasks = list(Analyzer.database.get_tasks())
@classmethod
def _send_Tasks(cls):
tasks = list(cls.database.get_tasks())

# Batch the api calls to the backend for improved efficiency
batch = []
count = 0

Expand All @@ -114,22 +113,21 @@ def _send_Tasks():
if task.uuid is None or task.task is None:
continue

batch.append(Analyzer._convert_task(task))
batch.append(cls._convert_task(task))
count += 1

# Send a full batch
if len(batch) == 100:
Analyzer.backend.send_task_data_batched(batch)
cls.backend.send_task_data_batched(batch)
batch = []

# Send the remaining results
if len(batch) > 0:
Analyzer.backend.send_task_data_batched(batch)
cls.backend.send_task_data_batched(batch)

return count

def _send_Storage():
storages = list(Analyzer.database.get_data_stores())
@classmethod
def _send_Storage(cls):
storages = list(cls.database.get_data_stores())

for storage in storages:

Expand All @@ -144,86 +142,96 @@ def _send_Storage():
"filled": storage.filled,
}

Analyzer.backend.send_storage_data(storage_data)
cls.backend.send_storage_data(storage_data)

return len(storages)

def update_data():
num_Storage = Analyzer._send_Storage()
num_Tasks = Analyzer._send_Tasks()
num_Backups = Analyzer._send_Backups()
@classmethod
def update_data(cls):
num_Storage = cls._send_Storage()
num_Tasks = cls._send_Tasks()
num_Backups = cls._send_Backups()

# Return the number of items sent to the backend
return {
"storage": num_Storage,
"tasks": num_Tasks,
"backups": num_Backups,
}

def simple_rule_based_analysis(alert_limit):
data = list(Analyzer.database.get_results())
start_date = Analyzer._get_start_date(data, "SIZE_ALERT", "FULL")
result = Analyzer.simple_rule_based_analyzer.analyze(
@classmethod
def simple_rule_based_analysis(cls, alert_limit):
data = list(cls.database.get_results())
start_date = cls._get_start_date(data, "SIZE_ALERT", "FULL")
result = cls.simple_rule_based_analyzer.analyze(
data, alert_limit, start_date
)
return result

def simple_rule_based_analysis_diff(alert_limit):
data = list(Analyzer.database.get_results())
start_date = Analyzer._get_start_date(data, "SIZE_ALERT", "DIFFERENTIAL")
result = Analyzer.simple_rule_based_analyzer.analyze_diff(
@classmethod
def simple_rule_based_analysis_diff(cls, alert_limit):
data = list(cls.database.get_results())
start_date = cls._get_start_date(data, "SIZE_ALERT", "DIFFERENTIAL")
result = cls.simple_rule_based_analyzer.analyze_diff(
data, alert_limit, start_date
)
return result

def simple_rule_based_analysis_inc(alert_limit):
data = list(Analyzer.database.get_results())
start_date = Analyzer._get_start_date(data, "SIZE_ALERT", "INCREMENTAL")
result = Analyzer.simple_rule_based_analyzer.analyze_inc(
@classmethod
def simple_rule_based_analysis_inc(cls, alert_limit):
data = list(cls.database.get_results())
start_date = cls._get_start_date(data, "SIZE_ALERT", "INCREMENTAL")
result = cls.simple_rule_based_analyzer.analyze_inc(
data, alert_limit, start_date
)
return result

@classmethod
def simple_time_series_analysis(
cls,
variable, task_id, frequency, backup_type, window_size
):
if not Analyzer.series_loaded:
Analyzer.load_time_series_data()
if not cls.series_loaded:
cls.load_time_series_data()

return Analyzer.time_series_analyzer.k_means_analyze(
return cls.time_series_analyzer.k_means_analyze(
variable, task_id, frequency, backup_type, window_size
)

def time_series_get_frequencies(task_id, backup_type, variable):
if not Analyzer.series_loaded:
Analyzer.load_time_series_data()
return Analyzer.time_series_analyzer.get_frequencies(
@classmethod
def time_series_get_frequencies(cls, task_id, backup_type, variable):
if not cls.series_loaded:
cls.load_time_series_data()
return cls.time_series_analyzer.get_frequencies(
task_id, backup_type, variable
)

def time_series_get_task_ids():
if not Analyzer.series_loaded:
Analyzer.load_time_series_data()
return Analyzer.time_series_analyzer.get_task_ids()

def load_time_series_data():
data = list(Analyzer.database.get_results())
Analyzer.time_series_analyzer.preload_data(data)
Analyzer.series_loaded = True

def schedule_based_analysis(alert_limit, stop_date):
results = list(Analyzer.database.get_results())
schedules = list(Analyzer.database.get_schedules())
task_events = list(Analyzer.database.get_task_events())
@classmethod
def time_series_get_task_ids(cls):
if not cls.series_loaded:
cls.load_time_series_data()
return cls.time_series_analyzer.get_task_ids()

@classmethod
def load_time_series_data(cls):
data = list(cls.database.get_results())
cls.time_series_analyzer.preload_data(data)
cls.series_loaded = True

@classmethod
def schedule_based_analysis(cls, alert_limit, stop_date):
results = list(cls.database.get_results())
schedules = list(cls.database.get_schedules())
task_events = list(cls.database.get_task_events())
start_date = max(
Analyzer._get_start_date(results, "CREATION_DATE_ALERT", None),
Analyzer._get_start_date(results, "ADDITIONAL_BACKUP_ALERT", None),
cls._get_start_date(results, "CREATION_DATE_ALERT", None),
cls._get_start_date(results, "ADDITIONAL_BACKUP_ALERT", None),
)
return Analyzer.schedule_based_analyzer.analyze(results, schedules, task_events, alert_limit, start_date, stop_date)
return cls.schedule_based_analyzer.analyze(results, schedules, task_events, alert_limit, start_date, stop_date)

def simple_rule_based_analysis_storage_capacity(alert_limit):
data = list(Analyzer.database.get_data_stores())
result = Analyzer.simple_rule_based_analyzer.analyze_storage_capacity(
@classmethod
def simple_rule_based_analysis_storage_capacity(cls, alert_limit):
data = list(cls.database.get_data_stores())
result = cls.simple_rule_based_analyzer.analyze_storage_capacity(
data, alert_limit
)
return result
7 changes: 3 additions & 4 deletions apps/analyzer/metadata_analyzer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@
load_dotenv(dotenv_path=".env")
path = app.root_path


@app.route("/")
def hello_world():
return "Hello, world!"

@app.route("/updating/basicBackupData", methods=["POST"])
@swag_from(os.path.join(path, "swagger", "updating", "basicBackupData.yaml"), validation=False)
def update_data():
return jsonify(Analyzer.update_data())
return jsonify(analyzer.update_data())


@app.route("/alerting/size/fullBackups", methods=["POST"])
Expand All @@ -36,7 +35,7 @@ def simple_rule_based_analysis():

try:
int(alert_limit)
return jsonify(Analyzer.simple_rule_based_analysis(int(alert_limit)))
return jsonify(analyzer.simple_rule_based_analysis(int(alert_limit)))
except ValueError:
return "Invalid value for alert limit", 400

Expand Down Expand Up @@ -243,7 +242,7 @@ def main():
time_series_analyzer = Time_series_analyzer(parameters)
simple_rule_based_analyzer = SimpleRuleBasedAnalyzer(backend, 0.2, 0.2, 0.2, 0.2)
schedule_based_analyzer = ScheduleBasedAnalyzer(backend)
Analyzer.init(
Analyzer.__init__(
database,
backend,
simple_rule_based_analyzer,
Expand Down

0 comments on commit 460115d

Please sign in to comment.