Skip to content

Commit

Permalink
Tidy alerts and add SSL alert, for #54.
Browse files Browse the repository at this point in the history
  • Loading branch information
anjackson committed Jul 3, 2023
1 parent 16219eb commit a5ad0a3
Showing 1 changed file with 14 additions and 4 deletions.
18 changes: 14 additions & 4 deletions monitor/prometheus/alert.rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@ groups:
summary: "The FC output volume is filling up with WARCs"
description: "The number of WARCs on the FC Gluster volume appears to be increasing: check move-to-hdfs is working as expected."

- alert: tidy-logs_no_new_crawl_logs
expr: delta(ukwa_crawler_log_size_bytes{log='crawl.log'}[1h]) == 0 or absent(ukwa_crawler_log_size_bytes{log='crawl.log'})
for: 1h
labels:
severity: severe
annotations:
summary: "No new crawl logs from tidy-logs"
description: "{{ $labels.instance }} of job {{ $labels.job }} failed to run."


- name: Generic metrics
rules:
Expand Down Expand Up @@ -180,12 +189,13 @@ groups:
summary: "CPU running too hot?"
description: "The CPU on {{ $labels.instance }} is running hot (>70C for 30mins)."

- alert: tidy-logs_no_new_crawl_logs
expr: delta(ukwa_crawler_log_size_bytes{log='crawl.log'}[1h]) == 0 or absent(ukwa_crawler_log_size_bytes{log='crawl.log'})
- alert: ssl_certs_nearing_expiration
expr: (probe_ssl_earliest_cert_expiry - time())/(60*60*24) < 30
for: 1h
labels:
severity: severe
annotations:
summary: "No new crawl logs from tidy-logs"
description: "{{ $labels.instance }} of job {{ $labels.job }} failed to run."
summary: "SSL certificate for {{ $labels.instance }} will expire soon!"
description: "The SSL certificate for {{ $labels.instance }} (part of {{ $labels.job }}) will expire in less than 30 days."


0 comments on commit a5ad0a3

Please sign in to comment.