Skip to content

Commit

Permalink
added managed prometheus alert manager config for memcached and hapro…
Browse files Browse the repository at this point in the history
…xy (#564)

Co-authored-by: Bryant Biggs <bryantbiggs@gmail.com>
  • Loading branch information
rameshv29 and bryantbiggs authored May 18, 2022
1 parent 14275d9 commit afe7184
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 0 deletions.
75 changes: 75 additions & 0 deletions examples/observability/adot-amp-grafana-for-haproxy/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,78 @@ resource "grafana_dashboard" "haproxy_dashboards" {
folder = grafana_folder.haproxy_dashboards.id
config_json = file("${path.module}/dashboards/default.json")
}

#Configure AWS Managed Prometheus rule groups
resource "aws_prometheus_rule_group_namespace" "haproxy" {
name = "haproxy_rules"
workspace_id = module.eks_blueprints.amazon_prometheus_workspace_id
data = <<EOF
groups:
- name: obsa-haproxy-down-alert
rules:
- alert: HA_proxy_down
expr: haproxy_up == 0
for: 0m
labels:
severity: critical
annotations:
summary: HAProxy down (instance {{ $labels.instance }})
description: "HAProxy down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- name: obsa-haproxy-http4xx-error-alert
rules:
- alert: Ha_proxy_High_Http4xx_ErrorRate_Backend
expr: sum by (backend) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (backend) (rate(haproxy_server_http_responses_total[1m]) * 100) > 5
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 4xx error rate backend (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 4xx (> 5%) on backend {{ $labels.fqdn }}/{{ $labels.backend }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- name: obsa-haproxy-http4xx-error-alert
rules:
- alert: Ha_proxy_High_Http5xx_ErrorRate_Backend
expr: sum by (backend) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (backend) (rate(haproxy_server_http_responses_total[1m]) * 100) > 5
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 5xx error rate backend (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 5xx (> 5%) on backend {{ $labels.fqdn }}/{{ $labels.backend }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- name: obsa-haproxy-Http4xx-ErrorRate-Server-alert
rules:
- alert: Ha_proxy_High_Http4xx_ErrorRate_Server
expr: sum by (server) (rate(haproxy_server_http_responses_total{code="4xx"}[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]) * 100) > 5
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 4xx error rate server (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 4xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- name: obsa-haproxy-Http5xx-ErrorRate-Server-alert
rules:
- alert: Ha_proxy_High_Http5xx_ErrorRate_Server
expr: sum by (server) (rate(haproxy_server_http_responses_total{code="5xx"}[1m])) / sum by (server) (rate(haproxy_server_http_responses_total[1m]) * 100) > 5
for: 1m
labels:
severity: critical
annotations:
summary: HAProxy high HTTP 5xx error rate server (instance {{ $labels.instance }})
description: "Too many HTTP requests with status 5xx (> 5%) on server {{ $labels.server }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
EOF
}

#Configure AWS Managed Prometheus alert manager
resource "aws_prometheus_alert_manager_definition" "haproxy" {
workspace_id = module.eks_blueprints.amazon_prometheus_workspace_id
definition = <<EOF
alertmanager_config: |
route:
receiver: 'default'
receivers:
- name: 'default'
EOF
}
31 changes: 31 additions & 0 deletions examples/observability/adot-amp-grafana-for-memcached/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,34 @@ resource "grafana_dashboard" "memchached_dashboards" {
folder = grafana_folder.memchached_dashboards.id
config_json = file("${path.module}/dashboards/default.json")
}

#Configure AWS Managed Prometheus rule groups
resource "aws_prometheus_rule_group_namespace" "memcached" {
name = "memcached_rules"
workspace_id = module.eks_blueprints.amazon_prometheus_workspace_id
data = <<EOF
groups:
- name: obsa-memcached-down-alert
rules:
- alert: memcached-down
expr: memcached_up == 0
for: 0m
labels:
severity: critical
annotations:
summary: memcached down (instance {{ $labels.instance }})
description: "memcached instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
EOF
}

#Configure AWS Managed Prometheus alert manager
resource "aws_prometheus_alert_manager_definition" "memcached" {
workspace_id = module.eks_blueprints.amazon_prometheus_workspace_id
definition = <<EOF
alertmanager_config: |
route:
receiver: 'default'
receivers:
- name: 'default'
EOF
}

0 comments on commit afe7184

Please sign in to comment.