diff --git a/docs/metrics/prometheus/grafana/README.md b/docs/metrics/prometheus/grafana/README.md new file mode 100644 index 00000000..b8a931d9 --- /dev/null +++ b/docs/metrics/prometheus/grafana/README.md @@ -0,0 +1,15 @@ +# How to monitor KES server with Grafana [![Slack](https://slack.min.io/slack?type=svg)](https://slack.min.io) + +[Grafana](https://grafana.com/) allows you to query, visualize, alert on and understand your metrics no matter where they are stored. Create, explore, and share dashboards with your team and foster a data driven culture. + +## Prerequisites + +- Prometheus and KES configured as explained in [document here](https://github.com/minio/kes/wiki/Monitoring). +- Grafana installed as explained [here](https://grafana.com/grafana/download). + +## KES Grafana Dashboard + +Visualize KES metrics with the dashboard [json file here](https://raw.githubusercontent.com/minio/kes/master/docs/metrics/prometheus/grafana/kes-dashboard.json). + +![Grafana](https://raw.githubusercontent.com/minio/minio/master/docs/metrics/prometheus/grafana/grafana-kes.png) + diff --git a/docs/metrics/prometheus/grafana/grafana-kes.png b/docs/metrics/prometheus/grafana/grafana-kes.png new file mode 100644 index 00000000..b6dc91ec Binary files /dev/null and b/docs/metrics/prometheus/grafana/grafana-kes.png differ diff --git a/docs/metrics/prometheus/grafana/kes-dashboard.json b/docs/metrics/prometheus/grafana/kes-dashboard.json new file mode 100644 index 00000000..00376d72 --- /dev/null +++ b/docs/metrics/prometheus/grafana/kes-dashboard.json @@ -0,0 +1,1109 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.3.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph (old)", + "version": "" + }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "KES Grafana Dashboard - https://min.io/", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 13502, + "graphTooltip": 0, + "id": null, + "links": [ + { + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "minio" + ], + "type": "dashboards" + } + ], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dtdurations", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 0, + "y": 0 + }, + "id": 1, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "max(kes_system_up_time{job=~\"$scrape_jobs\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "process_start_time_seconds", + "refId": "A", + "step": 60 + } + ], + "title": "Uptime", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-yellow", + "value": 75000000 + }, + { + "color": "dark-red", + "value": 100000000 + } + ] + }, + "unit": "short", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 0 + }, + "id": 2, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(kes_system_num_cpu{job=~\"$scrape_jobs\"})", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "total CPUs", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-yellow", + "value": 75000000 + }, + { + "color": "dark-red", + "value": 100000000 + } + ] + }, + "unit": "short", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 3 + }, + "id": 3, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(kes_system_num_cpu_used{job=~\"$scrape_jobs\"})", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Used CPUs", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-yellow", + "value": 75000000 + }, + { + "color": "dark-red", + "value": 100000000 + } + ] + }, + "unit": "short", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 6, + "y": 0 + }, + "id": 4, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(kes_system_num_threads{job=~\"$scrape_jobs\"})", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Number of Threads", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-yellow", + "value": 75000000 + }, + { + "color": "dark-red", + "value": 100000000 + } + ] + }, + "unit": "short", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 9, + "y": 0 + }, + "id": 5, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(kes_http_request_active{job=~\"$scrape_jobs\"})", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Active Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-yellow", + "value": 75000000 + }, + { + "color": "dark-red", + "value": 100000000 + } + ] + }, + "unit": "short", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 6, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(kes_system_mem_heap_objects{job=~\"$scrape_jobs\"})", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Number of Heap Objects", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-yellow", + "value": 75000000 + }, + { + "color": "dark-red", + "value": 100000000 + } + ] + }, + "unit": "bytes", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 7, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(kes_system_mem_heap_used{job=~\"$scrape_jobs\"})", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Heap Usage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-yellow", + "value": 75000000 + }, + { + "color": "dark-red", + "value": 100000000 + } + ] + }, + "unit": "bytes", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 6 + }, + "id": 8, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(kes_system_mem_stack_used{job=~\"$scrape_jobs\"})", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Stack Usage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-yellow", + "value": 75000000 + }, + { + "color": "dark-red", + "value": 100000000 + } + ] + }, + "unit": "short", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 6 + }, + "id": 9, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(kes_log_audit_events{job=~\"$scrape_jobs\"})", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Log Audit Events", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-yellow", + "value": 75000000 + }, + { + "color": "dark-red", + "value": 100000000 + } + ] + }, + "unit": "short", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 6 + }, + "id": 10, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(kes_log_error_events{job=~\"$scrape_jobs\"})", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Log Error Events", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 6 + }, + "id": 11, + "links": [], + "options": { + "displayMode": "lcd", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "10.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": true, + "expr": "kes_http_response_time_bucket{job=~\"$scrape_jobs\"}", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "refId": "A", + "step": 300, + "useBackend": false + } + ], + "title": "Request / Response Times", + "type": "bargauge" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [ + "minio" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(job)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "scrape_jobs", + "options": [], + "query": { + "query": "label_values(job)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "KES Dashboard", + "uid": "TgmJnnnqk", + "version": 54, + "weekStart": "" +}