diff --git a/.gitignore b/.gitignore index 2678db8..f6b5c6a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,12 @@ ENV/ *.pyc *out +*log +*pid +*lock +stat-pusher/dev[0-9]-*-[0-9]* +stat-pusher/beta[0-9]-*-[0-9]* +stat-pusher/prod[0-9]-*-[0-9]* */alertmanager/config.yml */grafana/grafana.ini diff --git a/monitor/dev/start_monitor.sh b/monitor/dev/start_monitor.sh index 7d7e471..b28e4a8 100755 --- a/monitor/dev/start_monitor.sh +++ b/monitor/dev/start_monitor.sh @@ -9,7 +9,7 @@ export VISUALIZER_PORT=8081 export GRAFANA_PORT=3000 export PROMETHEUS_PORT=9090 export ALERTMANAGER_PORT=9093 -export DATA_PREFIX=/mnt/nfs/data/monitor +export DATA_PREFIX=/mnt/nfs/data/gilh export DATA_GRAFANA=${DATA_PREFIX}/ukwa-monitor/grafana export DATA_PROMETHEUS=${DATA_PREFIX}/ukwa-monitor/prometheus export DATA_ALERTMANAGER=${DATA_PREFIX}/ukwa-monitor/alertmanager diff --git a/monitor/grafana/provisioning/_dashboard_generator/generate_wa-status_dashboard.py b/monitor/grafana/provisioning/_dashboard_generator/generate_wa-status_dashboard.py index 67a09e3..dd90da9 100755 --- a/monitor/grafana/provisioning/_dashboard_generator/generate_wa-status_dashboard.py +++ b/monitor/grafana/provisioning/_dashboard_generator/generate_wa-status_dashboard.py @@ -147,6 +147,9 @@ def replace_output_single(outHandle, **kwargs): elif kwargs['title'] == 'Used': expr = 'round(100 - hadoop_hdfs_namenode_nninfo_percent_remaining{instance=\\"${HDFS3_EXPORTER}\\"})' templateCode = templateCode.replace('', expr) + elif kwargs['title'] == 'LDLs': + expr = 'recent_connections{instance=\\"ldl_connection_count\\", job=\\"ldl_rr\\"}' + templateCode = templateCode.replace('', expr) # add last comma if not last panel if 'lastPanel' not in kwargs: @@ -207,6 +210,7 @@ def main(): replace_output_single(outHandle, pnl=panelStat, job='infrastructure', title='CPU', h=2, w=2, x=2, y=9) replace_output_single(outHandle, pnl=panelStat, job='infrastructure', title='Dsk', h=2, w=2, x=4, y=9) replace_output_single(outHandle, pnl=panelStat, job = 'infrastructure', title = 'Mem', h=2, w=2, x=6, y=9) + replace_output_single(outHandle, pnl=panelStat, job='infrastructure', title='LDLs', h=2, w=2, x=0, y=11, textmode='value', colour1='#D44A3A', colour3='#299C46', threshold2='8.5', threshold3='9.5') # general services replace_output_title(outHandle, pnl=panelTitle, job='services', title='Services', h=1, w=8, x=8, y=8) replace_output_single(outHandle, pnl=panelStat, job='services', title='Up', h=2, w=2, x=8, y=9) diff --git a/monitor/grafana/provisioning/dashboards/daily_dashboard.json b/monitor/grafana/provisioning/dashboards/daily_dashboard.json index 328829f..3d1322c 100644 --- a/monitor/grafana/provisioning/dashboards/daily_dashboard.json +++ b/monitor/grafana/provisioning/dashboards/daily_dashboard.json @@ -436,7 +436,7 @@ "tableColumn": "", "targets": [ { - "expr": "hdfs_under_replicated_block_count{instance='hdfs-exporter.bapi.wa.bl.uk:80'}", + "expr": "hdfs_under_replicated_block_count{instance='hdfs-exporter.dapi.wa.bl.uk:80'}", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -521,7 +521,7 @@ "tableColumn": "", "targets": [ { - "expr": "hdfs_used_percent{instance='hdfs-exporter.bapi.wa.bl.uk:80'}", + "expr": "hdfs_used_percent{instance='hdfs-exporter.dapi.wa.bl.uk:80'}", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -602,7 +602,7 @@ "tableColumn": "", "targets": [ { - "expr": "hdfs_node_count{status='dead',instance='hdfs-exporter.bapi.wa.bl.uk:80'}", + "expr": "hdfs_node_count{status='dead',instance='hdfs-exporter.dapi.wa.bl.uk:80'}", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -683,7 +683,7 @@ "tableColumn": "", "targets": [ { - "expr": "hdfs_node_count{status='live',instance='hdfs-exporter.bapi.wa.bl.uk:80'}", + "expr": "hdfs_node_count{status='live',instance='hdfs-exporter.dapi.wa.bl.uk:80'}", "format": "time_series", "instant": true, "intervalFactor": 1, diff --git a/monitor/grafana/provisioning/dashboards/wa_status.json b/monitor/grafana/provisioning/dashboards/wa_status.json deleted file mode 100644 index 53b282a..0000000 --- a/monitor/grafana/provisioning/dashboards/wa_status.json +++ /dev/null @@ -1,3589 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": 5, - "links": [], - "panels": [ - { - "type": "text", - "gridPos": { - "h": 2, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 1, - "options": { - "mode": "markdown", - "content": "# Storage" - }, - "transparent": true, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "datasource": null, - "pluginVersion": "7.5.7" - }, - { - "content": "", - "gridPos": { - "h": 1, - "w": 8, - "x": 0, - "y": 2 - }, - "id": 2, - "links": [], - "mode": "markdown", - "title": "Hadoop 0.20", - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 0, - "y": 3 - }, - "id": 3, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "sum(1 - up{job=\"hadoop\"})", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Up", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 2, - "y": 3 - }, - "id": 4, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(irate(node_cpu_seconds_total{job=\"hadoop\",mode=\"idle\"}[5m]) < 0.1) by (instance)) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "CPU", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 4, - "y": 3 - }, - "id": 5, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "hdfs_node_count{status=\"dead\",instance=\"hdfs-exporter.bapi.wa.bl.uk:80\"}", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Nodes", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "gridPos": { - "h": 4, - "w": 2, - "x": 6, - "y": 3 - }, - "id": 6, - "links": [], - "maxDataPoints": 100, - "targets": [ - { - "expr": "hdfs_used_percent{instance=\"hdfs-exporter.bapi.wa.bl.uk:80\"}", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "title": "Used", - "type": "stat", - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "auto", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "percent", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 85, - "color": "#ED8027" - }, - { - "value": 90, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "pluginVersion": "7.1.3", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 0, - "y": 5 - }, - "id": 7, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count((node_filesystem_avail_bytes{job=\"hadoop\",fstype!~\"tmpfs|rootfs|cifs\"} / node_filesystem_size_bytes{job=\"hadoop\",fstype!~\"tmpfs|rootfs|cifs\"}) < 0.04) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Dsk", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1.1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 2, - "y": 5 - }, - "id": 8, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(node_memory_MemFree_bytes{job=\"hadoop\"} + node_memory_Buffers_bytes{job=\"hadoop\"} + node_memory_Cached_bytes{job=\"hadoop\"}) by (instance) / sum(node_memory_MemTotal_bytes{job=\"hadoop\"}) by (instance) < 0.05) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Mem", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 4, - "y": 5 - }, - "id": 9, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "hdfs_under_replicated_block_count{instance=\"hdfs-exporter.bapi.wa.bl.uk:80\"}", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Under-rep", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "content": "", - "gridPos": { - "h": 1, - "w": 8, - "x": 8, - "y": 2 - }, - "id": 10, - "links": [], - "mode": "markdown", - "title": "Gluster", - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 8, - "y": 3 - }, - "id": 11, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "sum(1 - up{job=\"gluster\"})", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Up", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 10, - "y": 3 - }, - "id": 12, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(irate(node_cpu_seconds_total{job=\"gluster\",mode=\"idle\"}[5m]) < 0.1) by (instance)) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "CPU", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 12, - "y": 3 - }, - "id": 13, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count((node_filesystem_avail_bytes{job=\"gluster\",fstype!~\"tmpfs|rootfs|cifs\"} / node_filesystem_size_bytes{job=\"gluster\",fstype!~\"tmpfs|rootfs|cifs\"}) < 0.04) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Dsk", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 14, - "y": 3 - }, - "id": 14, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(node_memory_MemFree_bytes{job=\"gluster\"} + node_memory_Buffers_bytes{job=\"gluster\"} + node_memory_Cached_bytes{job=\"gluster\"}) by (instance) / sum(node_memory_MemTotal_bytes{job=\"gluster\"}) by (instance) < 0.05) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Mem", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "content": "", - "gridPos": { - "h": 1, - "w": 8, - "x": 16, - "y": 2 - }, - "id": 15, - "links": [], - "mode": "markdown", - "title": "Hadoop 3", - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 16, - "y": 3 - }, - "id": 16, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "sum(1 - up{job=\"hadoop3\"})", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Up", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 3 - }, - "id": 17, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(irate(node_cpu_seconds_total{job=\"hadoop3\",mode=\"idle\"}[5m]) < 0.1) by (instance)) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "CPU", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 20, - "y": 3 - }, - "id": 18, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "hadoop_hdfs_namenode_nninfo_dead_nodes_count{instance=\"h3exporter.bapi.wa.bl.uk:80\"}", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Dead Nodes", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "gridPos": { - "h": 4, - "w": 2, - "x": 22, - "y": 3 - }, - "id": 19, - "links": [], - "maxDataPoints": 100, - "targets": [ - { - "expr": "round(100 - hadoop_hdfs_namenode_nninfo_percent_remaining{instance=\"h3exporter.bapi.wa.bl.uk:80\"})", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "title": "Used", - "type": "stat", - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "auto", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "percent", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 85, - "color": "#ED8027" - }, - { - "value": 90, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "pluginVersion": "7.1.3", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 16, - "y": 5 - }, - "id": 20, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count((node_filesystem_avail_bytes{job=\"hadoop3\",fstype!~\"tmpfs|rootfs|cifs\"} / node_filesystem_size_bytes{job=\"hadoop3\",fstype!~\"tmpfs|rootfs|cifs\"}) < 0.04) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Dsk", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1.1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 5 - }, - "id": 21, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(node_memory_MemFree_bytes{job=\"hadoop3\"} + node_memory_Buffers_bytes{job=\"hadoop3\"} + node_memory_Cached_bytes{job=\"hadoop3\"}) by (instance) / sum(node_memory_MemTotal_bytes{job=\"hadoop3\"}) by (instance) < 0.05) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Mem", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 20, - "y": 5 - }, - "id": 22, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "value", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "hadoop_hdfs_namenode_fsname_system_under_replicated_blocks{instance=\"h3exporter.bapi.wa.bl.uk:80\"}", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Under-rep", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "type": "text", - "gridPos": { - "h": 2, - "w": 24, - "x": 0, - "y": 6 - }, - "id": 23, - "options": { - "mode": "markdown", - "content": "# WA Systems" - }, - "transparent": true, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "datasource": null, - "pluginVersion": "7.5.7" - }, - { - "content": "", - "gridPos": { - "h": 1, - "w": 8, - "x": 0, - "y": 8 - }, - "id": 24, - "links": [], - "mode": "markdown", - "title": "Infrastructure", - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 0, - "y": 9 - }, - "id": 25, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "sum(1 - up{job=\"infrastructure\"})", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Up", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 2, - "y": 9 - }, - "id": 26, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(irate(node_cpu_seconds_total{job=\"infrastructure\",mode=\"idle\"}[5m]) < 0.1) by (instance)) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "CPU", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 4, - "y": 9 - }, - "id": 27, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count((node_filesystem_avail_bytes{job=\"infrastructure\",fstype!~\"tmpfs|rootfs|cifs\"} / node_filesystem_size_bytes{job=\"infrastructure\",fstype!~\"tmpfs|rootfs|cifs\"}) < 0.04) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Dsk", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 6, - "y": 9 - }, - "id": 28, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(node_memory_MemFree_bytes{job=\"infrastructure\"} + node_memory_Buffers_bytes{job=\"infrastructure\"} + node_memory_Cached_bytes{job=\"infrastructure\"}) by (instance) / sum(node_memory_MemTotal_bytes{job=\"infrastructure\"}) by (instance) < 0.05) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Mem", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "content": "", - "gridPos": { - "h": 1, - "w": 8, - "x": 8, - "y": 8 - }, - "id": 29, - "links": [], - "mode": "markdown", - "title": "Services", - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 8, - "y": 9 - }, - "id": 30, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "sum(1 - up{job=\"services\"})", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Up", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 10, - "y": 9 - }, - "id": 31, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(irate(node_cpu_seconds_total{job=\"services\",mode=\"idle\"}[5m]) < 0.1) by (instance)) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "CPU", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 12, - "y": 9 - }, - "id": 32, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count((node_filesystem_avail_bytes{job=\"services\",fstype!~\"tmpfs|rootfs|cifs\"} / node_filesystem_size_bytes{job=\"services\",fstype!~\"tmpfs|rootfs|cifs\"}) < 0.04) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Dsk", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 14, - "y": 9 - }, - "id": 33, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(node_memory_MemFree_bytes{job=\"services\"} + node_memory_Buffers_bytes{job=\"services\"} + node_memory_Cached_bytes{job=\"services\"}) by (instance) / sum(node_memory_MemTotal_bytes{job=\"services\"}) by (instance) < 0.05) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Mem", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "content": "", - "gridPos": { - "h": 1, - "w": 8, - "x": 16, - "y": 8 - }, - "id": 34, - "links": [], - "mode": "markdown", - "title": "Solr", - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 16, - "y": 9 - }, - "id": 35, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "sum(1 - up{job=\"solr\"})", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Up", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 9 - }, - "id": 36, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(irate(node_cpu_seconds_total{job=\"solr\",mode=\"idle\"}[5m]) < 0.1) by (instance)) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "CPU", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 20, - "y": 9 - }, - "id": 37, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count((node_filesystem_avail_bytes{job=\"solr\",fstype!~\"tmpfs|rootfs|cifs\"} / node_filesystem_size_bytes{job=\"solr\",fstype!~\"tmpfs|rootfs|cifs\"}) < 0.04) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Dsk", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 22, - "y": 9 - }, - "id": 38, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(node_memory_MemFree_bytes{job=\"solr\"} + node_memory_Buffers_bytes{job=\"solr\"} + node_memory_Cached_bytes{job=\"solr\"}) by (instance) / sum(node_memory_MemTotal_bytes{job=\"solr\"}) by (instance) < 0.05) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Mem", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 16, - "y": 11 - }, - "id": 39, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(probe_http_status_code{job=\"solr-query\"} != 200) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Query", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "type": "text", - "gridPos": { - "h": 2, - "w": 24, - "x": 0, - "y": 12 - }, - "id": 40, - "options": { - "mode": "markdown", - "content": "# WA Services" - }, - "transparent": true, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "datasource": null, - "pluginVersion": "7.5.7" - }, - { - "content": "", - "gridPos": { - "h": 1, - "w": 8, - "x": 0, - "y": 14 - }, - "id": 41, - "links": [], - "mode": "markdown", - "title": "Ingest & Metadata", - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 0, - "y": 15 - }, - "id": 42, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "sum(1 - up{job=\"ingest_metadata\"})", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Up", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ba43a9" - }, - { - "value": 1.1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 2, - "y": 15 - }, - "id": 43, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(irate(node_cpu_seconds_total{job=\"ingest_metadata\",mode=\"idle\"}[5m]) < 0.1) by (instance)) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "CPU", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 4, - "y": 15 - }, - "id": 44, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count((node_filesystem_avail_bytes{job=\"ingest_metadata\",fstype!~\"tmpfs|rootfs|cifs\"} / node_filesystem_size_bytes{job=\"ingest_metadata\",fstype!~\"tmpfs|rootfs|cifs\"}) < 0.04) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Dsk", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 6, - "y": 15 - }, - "id": 45, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(node_memory_MemFree_bytes{job=\"ingest_metadata\"} + node_memory_Buffers_bytes{job=\"ingest_metadata\"} + node_memory_Cached_bytes{job=\"ingest_metadata\"}) by (instance) / sum(node_memory_MemTotal_bytes{job=\"ingest_metadata\"}) by (instance) < 0.05) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Mem", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 0, - "y": 17 - }, - "id": 46, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(probe_http_status_code{job=\"im-access-http\"} != 200) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "WWW", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "content": "", - "gridPos": { - "h": 1, - "w": 8, - "x": 8, - "y": 14 - }, - "id": 47, - "links": [], - "mode": "markdown", - "title": "TrackDB", - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 22, - "color": "#ED8027" - }, - { - "value": 26, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 4, - "x": 8, - "y": 15 - }, - "id": 48, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "value", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "(time() - trackdb_refresh_timestamp) / (60*60)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "trackdb refresh", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#D44A3A" - }, - { - "value": 10, - "color": "#ED8027" - }, - { - "value": 100, - "color": "#299C46" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 4, - "x": 12, - "y": 15 - }, - "id": 49, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "value", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "sum(trackdb_numFound - (trackdb_numFound offset 1d))", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "trackdb numFound", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "content": "", - "gridPos": { - "h": 1, - "w": 8, - "x": 16, - "y": 14 - }, - "id": 50, - "links": [], - "mode": "markdown", - "title": "Discovery & Access", - "type": "text" - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 16, - "y": 15 - }, - "id": 51, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "sum(1 - up{job=\"discovery_access\"})", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Up", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 15 - }, - "id": 52, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(irate(node_cpu_seconds_total{job=\"discovery_access\",mode=\"idle\"}[5m]) < 0.1) by (instance)) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "CPU", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 20, - "y": 15 - }, - "id": 53, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count((node_filesystem_avail_bytes{job=\"discovery_access\",fstype!~\"tmpfs|rootfs|cifs\"} / node_filesystem_size_bytes{job=\"discovery_access\",fstype!~\"tmpfs|rootfs|cifs\"}) < 0.04) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Dsk", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ba43a9" - }, - { - "value": 1.1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 22, - "y": 15 - }, - "id": 54, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(sum(node_memory_MemFree_bytes{job=\"discovery_access\"} + node_memory_Buffers_bytes{job=\"discovery_access\"} + node_memory_Cached_bytes{job=\"discovery_access\"}) by (instance) / sum(node_memory_MemTotal_bytes{job=\"discovery_access\"}) by (instance) < 0.05) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "Mem", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 2.1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 16, - "y": 17 - }, - "id": 55, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(uptimerobot_monitor_up==0) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "UTR", - "cacheTimeout": null, - "interval": null, - "datasource": null - }, - { - "fieldConfig": { - "defaults": { - "custom": {}, - "unit": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "#299C46" - }, - { - "value": 0.1, - "color": "#ED8027" - }, - { - "value": 1.1, - "color": "#D44A3A" - } - ] - }, - "mappings": [ - { - "op": "=", - "text": "N/A", - "value": "null", - "id": 0, - "type": 1 - } - ], - "nullValueMode": "connected" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 17 - }, - "id": 56, - "links": [], - "maxDataPoints": 100, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "orientation": "horizontal", - "textMode": "none", - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto" - }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "count(probe_http_status_code{job=\"da-access-http\"} != 200) OR vector(0)", - "legendFormat": "", - "interval": "", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "type": "stat", - "title": "WWW", - "cacheTimeout": null, - "interval": null, - "datasource": null - } - ], - "refresh": "1m", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "WA Status", - "uid": "wast", - "version": 3 -} diff --git a/monitor/grafana/provisioning/dashboards/wa_status.json-template b/monitor/grafana/provisioning/dashboards/wa_status.json-template index de48008..32d7bd7 100644 --- a/monitor/grafana/provisioning/dashboards/wa_status.json-template +++ b/monitor/grafana/provisioning/dashboards/wa_status.json-template @@ -1782,6 +1782,82 @@ "interval": null, "datasource": null }, + { + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "value": null, + "color": "#D44A3A" + }, + { + "value": 8.5, + "color": "#ED8027" + }, + { + "value": 9.5, + "color": "#299C46" + } + ] + }, + "mappings": [ + { + "op": "=", + "text": "N/A", + "value": "null", + "id": 0, + "type": 1 + } + ], + "nullValueMode": "connected" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 0, + "y": 11 + }, + "id": 29, + "links": [], + "maxDataPoints": 100, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "orientation": "horizontal", + "textMode": "value", + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto" + }, + "pluginVersion": "7.1.3", + "targets": [ + { + "expr": "recent_connections{instance=\"ldl_connection_count\", job=\"ldl_rr\"}", + "legendFormat": "", + "interval": "", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "type": "stat", + "title": "LDLs", + "cacheTimeout": null, + "interval": null, + "datasource": null + }, { "content": "", "gridPos": { @@ -1790,7 +1866,7 @@ "x": 8, "y": 8 }, - "id": 29, + "id": 30, "links": [], "mode": "markdown", "title": "Services", @@ -1837,7 +1913,7 @@ "x": 8, "y": 9 }, - "id": 30, + "id": 31, "links": [], "maxDataPoints": 100, "options": { @@ -1913,7 +1989,7 @@ "x": 10, "y": 9 }, - "id": 31, + "id": 32, "links": [], "maxDataPoints": 100, "options": { @@ -1989,7 +2065,7 @@ "x": 12, "y": 9 }, - "id": 32, + "id": 33, "links": [], "maxDataPoints": 100, "options": { @@ -2065,7 +2141,7 @@ "x": 14, "y": 9 }, - "id": 33, + "id": 34, "links": [], "maxDataPoints": 100, "options": { @@ -2108,7 +2184,7 @@ "x": 16, "y": 8 }, - "id": 34, + "id": 35, "links": [], "mode": "markdown", "title": "Solr", @@ -2155,7 +2231,7 @@ "x": 16, "y": 9 }, - "id": 35, + "id": 36, "links": [], "maxDataPoints": 100, "options": { @@ -2231,7 +2307,7 @@ "x": 18, "y": 9 }, - "id": 36, + "id": 37, "links": [], "maxDataPoints": 100, "options": { @@ -2307,7 +2383,7 @@ "x": 20, "y": 9 }, - "id": 37, + "id": 38, "links": [], "maxDataPoints": 100, "options": { @@ -2383,7 +2459,7 @@ "x": 22, "y": 9 }, - "id": 38, + "id": 39, "links": [], "maxDataPoints": 100, "options": { @@ -2459,7 +2535,7 @@ "x": 16, "y": 11 }, - "id": 39, + "id": 40, "links": [], "maxDataPoints": 100, "options": { @@ -2502,7 +2578,7 @@ "x": 0, "y": 12 }, - "id": 40, + "id": 41, "options": { "mode": "markdown", "content": "# WA Services" @@ -2523,7 +2599,7 @@ "x": 0, "y": 14 }, - "id": 41, + "id": 42, "links": [], "mode": "markdown", "title": "Ingest & Metadata", @@ -2570,7 +2646,7 @@ "x": 0, "y": 15 }, - "id": 42, + "id": 43, "links": [], "maxDataPoints": 100, "options": { @@ -2646,7 +2722,7 @@ "x": 2, "y": 15 }, - "id": 43, + "id": 44, "links": [], "maxDataPoints": 100, "options": { @@ -2722,7 +2798,7 @@ "x": 4, "y": 15 }, - "id": 44, + "id": 45, "links": [], "maxDataPoints": 100, "options": { @@ -2798,7 +2874,7 @@ "x": 6, "y": 15 }, - "id": 45, + "id": 46, "links": [], "maxDataPoints": 100, "options": { @@ -2874,7 +2950,7 @@ "x": 0, "y": 17 }, - "id": 46, + "id": 47, "links": [], "maxDataPoints": 100, "options": { @@ -2917,7 +2993,7 @@ "x": 8, "y": 14 }, - "id": 47, + "id": 48, "links": [], "mode": "markdown", "title": "TrackDB", @@ -2964,7 +3040,7 @@ "x": 8, "y": 15 }, - "id": 48, + "id": 49, "links": [], "maxDataPoints": 100, "options": { @@ -3040,7 +3116,7 @@ "x": 12, "y": 15 }, - "id": 49, + "id": 50, "links": [], "maxDataPoints": 100, "options": { @@ -3083,7 +3159,7 @@ "x": 16, "y": 14 }, - "id": 50, + "id": 51, "links": [], "mode": "markdown", "title": "Discovery & Access", @@ -3130,7 +3206,7 @@ "x": 16, "y": 15 }, - "id": 51, + "id": 52, "links": [], "maxDataPoints": 100, "options": { @@ -3206,7 +3282,7 @@ "x": 18, "y": 15 }, - "id": 52, + "id": 53, "links": [], "maxDataPoints": 100, "options": { @@ -3282,7 +3358,7 @@ "x": 20, "y": 15 }, - "id": 53, + "id": 54, "links": [], "maxDataPoints": 100, "options": { @@ -3358,7 +3434,7 @@ "x": 22, "y": 15 }, - "id": 54, + "id": 55, "links": [], "maxDataPoints": 100, "options": { @@ -3434,7 +3510,7 @@ "x": 16, "y": 17 }, - "id": 55, + "id": 56, "links": [], "maxDataPoints": 100, "options": { @@ -3510,7 +3586,7 @@ "x": 18, "y": 17 }, - "id": 56, + "id": 57, "links": [], "maxDataPoints": 100, "options": { diff --git a/stat-pusher/ldl-pusher.py b/stat-pusher/ldl-pusher.py new file mode 100755 index 0000000..ea7022d --- /dev/null +++ b/stat-pusher/ldl-pusher.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python +''' +Pushes LDL monitoring curls into prometheus +''' + +import os, sys, logging +import socket, re +import configparser +import daemon, lockfile +from http.server import BaseHTTPRequestHandler, HTTPServer +import datetime +from prometheus_client import CollectorRegistry, Gauge, push_to_gateway + +from common import log + +# globals +logger = logging.getLogger(__name__) + +PIDFILE = f"{__file__}.pid" +LOCKFILE = f"{PIDFILE}.lock" +SETTINGSFILE = 'settings' +REQUEST = re.compile("^\w+\s+(/.+)\s+HTTP/\d.\d$") +HOSTREQ = re.compile("^/wa/monitor\?host=(.+)$") +LDLHOST = re.compile("^DLS-(BSP|LON|NLS|NLW)-WB0[1-4]$") +YMDHM = '%Y%m%d%H%M' +INSTANCE = 'ldl_connection_count' + +# environ settings +eset = '' +# dldl - dictionary of latest LDL connections +dldl = {'DLS-BSP-WB01':0, 'DLS-BSP-WB02':0, 'DLS-BSP-WB03':0, 'DLS-BSP-WB04':0, 'DLS-LON-WB01':0, 'DLS-LON-WB02':0, 'DLS-LON-WB03':0, 'DLS-LON-WB04':0, 'DLS-NLS-WB01':0, 'DLS-NLW-WB01':0} +# last YYYYMMDDHHMM push to gateway happened +pushymdhm = 0 + + +# classes and functions ----------------------- +def _read_settings(environ): + cfg = configparser.ConfigParser() + if os.path.isfile(SETTINGSFILE): + cfg.read(SETTINGSFILE) + if environ in cfg.sections(): + return cfg[environ] + else: + print(f"Section [{environ}] missing from [{SETTINGSFILE}] settings file") + sys.exit(1) + else: + print(f"Settings file [{SETTINGSFILE}] missing") + sys.exit(1) + +class webServer(BaseHTTPRequestHandler): + global REQUEST + def _set_headers(self): + self.send_response(200) + self.send_header("Content-type", "text/html") + self.end_headers() + + def do_HEAD(self): + self._set_headers() + + def do_GET(self): + self._set_headers() + + # grab request + try: + reqMatch = REQUEST.match(self.requestline) + request = reqMatch.group(1) + except Exception as e: + logger.warning(f"Failed to match request in [{self.requestline}]") + + # process request + _process_request(request) + +def _process_request(request): + global HOSTREQ + global LDLHOST + global YMDHM + global INSTANCE + global eset + global dldl + global pushymdhm + logger.debug(f"Received request: {request}") + + # get hostname, skip further processing if fail + hostReqMatch = HOSTREQ.match(request) + if hostReqMatch: + hostReq = hostReqMatch.group(1) + else: + logger.warning(f"Failed to get hostname from [{request}]") + return + + # check hostname is LDL VM + ldlHostMatch = LDLHOST.match(hostReq) + if ldlHostMatch: + ldlHost = hostReq + else: + logger.warning(f"Skipping non LDL DLS VM hostname [{hostReq}]") + return + + # get current time + nowymdhm = int((datetime.datetime.now()).strftime(YMDHM)) + + # update LDL in dldl + dldl[ldlHost] = nowymdhm + + # on schedule, report LDL connection status to pushgateway + schedule = int(eset['schedule']) + logger.debug(f"dldl {dldl}") + logger.debug(f"Schedule: [{nowymdhm} - {pushymdhm}] = [{nowymdhm - pushymdhm}], schedule [{schedule}]") + if (nowymdhm - pushymdhm) > schedule: + # count LDLs responded in last schedule period + up = 0 + for _ldl in dldl: + if (nowymdhm - dldl[_ldl]) < schedule: up += 1 + else: logger.debug(f"LDL [{_ldl}] hasn't curled in {schedule} minutes") + + # set pushgateway values and push to prometheus service + registry = CollectorRegistry() + g = Gauge(eset['metric'], eset['desc'], labelnames=['instance'], registry=registry) + g.labels(instance=INSTANCE).set(up) + push_to_gateway(eset['pushgtw'], registry=registry, job=eset['job']) + logger.debug(f"Pushed to gateway:\tjob={eset['job']}, instance={INSTANCE}, recent_connections={up}\n") + + # write latest push to output file (done via output rather than log so log doesn't + # become huge over time) + with open(eset['output'], 'w') as out: + out.write(f"Output datestamp:\t{nowymdhm}\n") + out.write(f"Pushed to gateway:\tjob={eset['job']}, instance={INSTANCE}, recent_connections={up}\n") + for _ldl in dldl: out.write(f"\t{_ldl}:\t{dldl[_ldl]}\tRecent [{(nowymdhm - dldl[_ldl]) < schedule}]\n") + out.write("\n") + out.close() + + # store push time + pushymdhm = nowymdhm + +# script -------------------------------------- +def script(eset): + global pushymdhm + log.configure_file(eset) + + # create web service + monitorServer = HTTPServer((eset['hostname'], int(eset['port'])), webServer) + logger.info(f"Started LDL monitoring web server: {eset['hostname']}:{eset['port']}") + logger.debug(f"Pushing to gateway every [{eset['schedule']}] minutes") + try: + monitorServer.serve_forever() + except Exception as e: + logger.warning(f"LDL monitoring web server exiting") + logger.warning(f"Message: [{e}]") + + # close and end + monitorServer.server_close() + logger.warning(f"//////////////////// RUNNING AS DAEMON - SHOULD NEVER FINISH /////////////////////\n") + +# main ---------------------------------------- +if __name__ == '__main__': + # check for lockfile + if os.path.exists(LOCKFILE): + print(f"Exiting as [{LOCKFILE}] exists, service already be running") + sys.exit(1) + + # get swarm environment + senvMatch = re.match('^(dev|beta|prod)', socket.gethostname()) + if senvMatch: + environ = senvMatch.group(1) + else: + print(f"Swarm environment not identified from [{socket.gethostname()}]") + sys.exit(1) + + # read environment settings + eset = _read_settings(environ) + + # run daemon + with daemon.DaemonContext( + stdout = sys.stdout, + stderr = sys.stderr, + uid = int(eset['uid']), + gid = int(eset['gid']), + pidfile = lockfile.FileLock(PIDFILE) + ): + script(eset) diff --git a/stat-pusher/ldl-requirements.txt b/stat-pusher/ldl-requirements.txt new file mode 100644 index 0000000..4dd84ad --- /dev/null +++ b/stat-pusher/ldl-requirements.txt @@ -0,0 +1,4 @@ +python-daemon +lockfile +datetime +prometheus_client diff --git a/stat-pusher/run_ldl_pusher.sh b/stat-pusher/run_ldl_pusher.sh new file mode 100755 index 0000000..84c0ef6 --- /dev/null +++ b/stat-pusher/run_ldl_pusher.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +#### Swarm environment determined in script, from hostname + +# setup venv +export PYTHONPATH=~/github/ukwa-monitor/stat-pusher +source $PYTHONPATH/venv/bin/activate +cd $PYTHONPATH + +# ensure log directory exists +[[ -d logs/ ]] || mkdir logs + +# ensure python libraries installed +pip install -r ldl-requirements.txt + +# run stat-pusher script +if [[ ${HOSTNAME} =~ ^prod ]]; then + nohup python ldl-pusher.py > /dev/null & # disable generation of large logs over time +else + nohup python ldl-pusher.py & +fi diff --git a/stat-pusher/settings b/stat-pusher/settings index e4ed99c..883a484 100644 --- a/stat-pusher/settings +++ b/stat-pusher/settings @@ -3,15 +3,17 @@ pushgtw = http://monitor-pushgateway.dapi.wa.bl.uk statsfile = dev.stats # ldl server connection testing -logfpfn = /home/monitor/github/ukwa-monitor/stat-pusher/logs/ldl-pusher.log +logfpfn = /home/gilh/github/ukwa-monitor/stat-pusher/logs/ldl-pusher.log +output = /home/gilh/github/ukwa-monitor/stat-pusher/logs/ldl-pusher.out loglevel = DEBUG -uid = 1000 -gid = 1000 +uid = 1004 +gid = 1004 hostname = 0.0.0.0 port = 9119 # prometheus settings -job = ldl_rr_connections -metric = up +schedule = 5 +job = ldl_rr +metric = recent_connections desc = Curl requests from LDL VMs, indicating LDL to WA connectivity @@ -23,3 +25,18 @@ statsfile = beta.stats [prod] pushgtw = http://monitor-pushgateway.api.wa.bl.uk statsfile = prod.stats + +# ldl server connection testing +logfpfn = /home/monitor/github/ukwa-monitor/stat-pusher/logs/ldl-pusher.log +output = /home/monitor/github/ukwa-monitor/stat-pusher/logs/ldl-pusher.out +loglevel = INFO +uid = 1000 +gid = 1000 +hostname = 0.0.0.0 +port = 9119 +# prometheus settings +schedule = 20 +job = ldl_rr +metric = recent_connections +desc = Curl requests from LDL VMs, indicating LDL to WA connectivity +