Skip to content

Commit

Permalink
Removing Luigi-based alerts.
Browse files Browse the repository at this point in the history
  • Loading branch information
anjackson committed Jul 5, 2022
1 parent 8a7f300 commit 9741ee6
Showing 1 changed file with 0 additions and 36 deletions.
36 changes: 0 additions & 36 deletions monitor/prometheus/alert.rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,6 @@ groups:
summary: "Database backup instance {{ $labels.instance }} failed to run"
description: "{{ $labels.instance }} of job {{ $labels.job }} failed to run."

- alert: daily_access_task_is_missing
expr: absent(ukwa_task_event_timestamp{job="DailyAccessTasks"})
for: 24h
labels:
severity: severe
annotations:
summary: "Task {{ $labels.job }} failed to run"
description: "Job {{ $labels.job }} failed to run successfully."

- alert: daily_ingest_task_is_missing
expr: absent(ukwa_task_event_timestamp{job="DailyIngestTasks"})
for: 24h
labels:
severity: severe
annotations:
summary: "Task {{ $labels.job }} failed to run"
description: "Job {{ $labels.job }} failed to run successfully."

- alert: daily_task_has_not_run
expr: (time() - ukwa_task_event_timestamp{job=~"DailyAccessTasks|DailyIngestTasks", status="event.core.success"} ) / (60*60) > 24
for: 2h
labels:
severity: severe
annotations:
summary: "Task {{ $labels.job }} failed to run"
description: "Job {{ $labels.job }} failed to run successfully."

# Commenting this out until we have an agreement in place:
# - alert: nominet_task_has_not_run
# expr: absent(ukwa_task_event_timestamp{job="NominetDomainListToHDFS"}) or (time() - ukwa_task_event_timestamp{job="NominetDomainListToHDFS", status="event.core.success"} ) / (60*60*24) > 31
Expand All @@ -47,15 +20,6 @@ groups:
# summary: "Task {{ $labels.job }} failed to run"
# description: "Job {{ $labels.job }} failed to run successfully."

- alert: crawl_launcher_has_not_run
expr: absent(ukwa_task_event_timestamp{job="crawl.LaunchCrawls"}) or (time() - ukwa_task_event_timestamp{job='crawl.LaunchCrawls', status="event.core.success"}) > 3600
for: 10m
labels:
severity: severe
annotations:
summary: "Task {{ $labels.job }} failed to run"
description: "Job {{ $labels.job }} failed to run successfully."

- alert: low_crawler_activity
# Explicitly not monitoring the by-permission crawl at present:
expr: increase(heritrix3_crawl_job_uris_total{kind="finished", job!="bypm-heritrix-workers"}[4h]) < 10
Expand Down

0 comments on commit 9741ee6

Please sign in to comment.