diff --git a/examples/observability/adot-amp-grafana-for-haproxy/main.tf b/examples/observability/adot-amp-grafana-for-haproxy/main.tf index ac2e1a9125..40ce9ed677 100644 --- a/examples/observability/adot-amp-grafana-for-haproxy/main.tf +++ b/examples/observability/adot-amp-grafana-for-haproxy/main.tf @@ -144,3 +144,78 @@ resource "grafana_dashboard" "haproxy_dashboards" { folder = grafana_folder.haproxy_dashboards.id config_json = file("${path.module}/dashboards/default.json") } + +#Configure AWS Managed Prometheus rule groups +resource "aws_prometheus_rule_group_namespace" "haproxy" { + name = "haproxy_rules" + workspace_id = module.eks_blueprints.amazon_prometheus_workspace_id + data = < 5 + for: 1m + labels: + severity: critical + annotations: + summary: HAProxy high HTTP 4xx error rate backend (instance {{ $labels.instance }}) + description: "Too many HTTP requests with status 4xx (> 5%) on backend {{ $labels.fqdn }}/{{ $labels.backend }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - name: obsa-haproxy-http4xx-error-alert + rules: + - alert: Ha_proxy_High_Http5xx_ErrorRate_Backend + expr: sum by (backend) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (backend) (rate(haproxy_server_http_responses_total[1m]) * 100) > 5 + for: 1m + labels: + severity: critical + annotations: + summary: HAProxy high HTTP 5xx error rate backend (instance {{ $labels.instance }}) + description: "Too many HTTP requests with status 5xx (> 5%) on backend {{ $labels.fqdn }}/{{ $labels.backend }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - name: obsa-haproxy-Http4xx-ErrorRate-Server-alert + rules: + - alert: Ha_proxy_High_Http4xx_ErrorRate_Server + expr: sum by (server) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]) * 100) > 5 + for: 1m + labels: + severity: critical + annotations: + summary: HAProxy high HTTP 4xx error rate server (instance {{ $labels.instance }}) + description: "Too many HTTP requests with status 4xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - name: obsa-haproxy-Http5xx-ErrorRate-Server-alert + rules: + - alert: Ha_proxy_High_Http5xx_ErrorRate_Server + expr: sum by (server) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]) * 100) > 5 + for: 1m + labels: + severity: critical + annotations: + summary: HAProxy high HTTP 5xx error rate server (instance {{ $labels.instance }}) + description: "Too many HTTP requests with status 5xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + EOF +} + +#Configure AWS Managed Prometheus alert manager +resource "aws_prometheus_alert_manager_definition" "haproxy" { + workspace_id = module.eks_blueprints.amazon_prometheus_workspace_id + definition = <