Skip to content

Commit

Permalink
Add prometheus stack
Browse files Browse the repository at this point in the history
Closes #16
  • Loading branch information
abelfodil committed Dec 16, 2024
1 parent eb92ee7 commit 216d305
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 3 deletions.
127 changes: 127 additions & 0 deletions kubernetes/core/templates/kube-prometheus-stack.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
---
apiVersion: v1
kind: Secret
metadata:
name: prometheus
namespace: kube-system
type: Opaque
# stringData:
# grafana-admin-user: "someuser"
# grafana-admin-password: "somepass"

---
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: kube-prometheus-stack
namespace: kube-system
spec:
chart: kube-prometheus-stack
repo: https://prometheus-community.github.io/helm-charts
version: 65.8.0
targetNamespace: kube-system
valuesContent: |-
defaultRules:
rules:
windows: false
grafana:
admin:
existingSecret: "prometheus"
userKey: grafana-admin-user
passwordKey: grafana-admin-password
ingress:
enabled: true
hosts:
- grafana.{{ .Values.fqdn }}
tls:
- secretName: {{ .Values.fqdn }}-tls
hosts:
- grafana.{{ .Values.fqdn }}
persistence:
enabled: true
storageClassName: "local-path-retain"
accessModes:
- ReadWriteOnce
size: 20Gi
finalizers:
- kubernetes.io/pvc-protection
affinity:
nodeAffinity:
# Schedule onto amd64 to specifically avoid raspberry pi to not wear the SD card
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: "kubernetes.io/arch"
operator: In
values:
- amd64
prometheusSpec:
retention: 365d
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: "local-path-retain"
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 50Gi
affinity:
nodeAffinity:
# Schedule onto amd64 to specifically avoid raspberry pi to not wear the SD card
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: "kubernetes.io/arch"
operator: In
values:
- amd64
prometheus-windows-exporter:
prometheus:
monitor:
enabled: false
# https://github.com/prometheus-community/helm-charts/issues/2865#issuecomment-1490318965
kubelet:
serviceMonitor:
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authentication_token|cadvisor_version|container_blkio|container_cpu|container_fs|container_last|container_memory|container_network|container_oom|container_processes|container|csi_operations|disabled_metric|get_token|go|hidden_metric|kubelet_certificate|kubelet_cgroup|kubelet_container|kubelet_containers|kubelet_cpu|kubelet_device|kubelet_graceful|kubelet_http|kubelet_lifecycle|kubelet_managed|kubelet_node|kubelet_pleg|kubelet_pod|kubelet_run|kubelet_running|kubelet_runtime|kubelet_server|kubelet_started|kubelet_volume|kubernetes_build|kubernetes_feature|machine_cpu|machine_memory|machine_nvm|machine_scrape|node_namespace|plugin_manager|prober_probe|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|storage_operation|volume_manager|volume_operation|workqueue)_(.+)"
action: keep
- sourceLabels: ["node"]
targetLabel: instance
action: replace
kubeApiServer:
serviceMonitor:
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(aggregator_openapi|aggregator_unavailable|apiextensions_openapi|apiserver_admission|apiserver_audit|apiserver_cache|apiserver_cel|apiserver_client|apiserver_crd|apiserver_current|apiserver_envelope|apiserver_flowcontrol|apiserver_init|apiserver_kube|apiserver_longrunning|apiserver_request|apiserver_requested|apiserver_response|apiserver_selfrequest|apiserver_storage|apiserver_terminated|apiserver_tls|apiserver_watch|apiserver_webhooks|authenticated_user|authentication|disabled_metric|etcd_bookmark|etcd_lease|etcd_request|field_validation|get_token|go|grpc_client|hidden_metric|kube_apiserver|kubernetes_build|kubernetes_feature|node_authorizer|pod_security|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|serviceaccount_legacy|serviceaccount_stale|serviceaccount_valid|watch_cache|workqueue)_(.+)"
action: keep
# Remove high cardinality metrics
- sourceLabels: ["__name__"]
regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket
action: drop
- sourceLabels: ["__name__"]
regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket)
action: drop
kubeControllerManager:
endpoints: {{ .Values.nodes_endpoints }}
serviceMonitor:
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|attachdetach_controller|authenticated_user|authentication|cronjob_controller|disabled_metric|endpoint_slice|ephemeral_volume|garbagecollector_controller|get_token|go|hidden_metric|job_controller|kubernetes_build|kubernetes_feature|leader_election|node_collector|node_ipam|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|pv_collector|registered_metric|replicaset_controller|rest_client|retroactive_storageclass|root_ca|running_managed|scrape_duration|scrape_samples|scrape_series|service_controller|storage_count|storage_operation|ttl_after|volume_operation|workqueue)_(.+)"
action: keep
kubeEtcd:
endpoints: {{ .Values.nodes_endpoints }}
kubeProxy:
enabled: false # Disabled because eBPF
endpoints: {{ .Values.nodes_endpoints }}
kubeScheduler:
endpoints: {{ .Values.nodes_endpoints }}
serviceMonitor:
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authenticated_user|authentication|disabled_metric|go|hidden_metric|kubernetes_build|kubernetes_feature|leader_election|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scheduler|scrape_duration|scrape_samples|scrape_series|workqueue)_(.+)"
action: keep
3 changes: 3 additions & 0 deletions kubernetes/core/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@

# Base domain name to be used for all services
fqdn: example.org

# Kubernetes cluster's nodes's endpoints
nodes_endpoints: []
6 changes: 3 additions & 3 deletions kubernetes/services/templates/homer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ data:
- name: "Monitoring"
icon: "fas fa-heartbeat"
items:
- name: "Netdata"
logo: "https://raw.githubusercontent.com/netdata/netdata/483d8481a5a5edf72630068534feb1a4d228ed0b/web/gui/v1/images/favicon-196x196.png"
url: "https://netdata.{{ .Values.fqdn }}"
- name: "Grafana"
logo: "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a1/Grafana_logo.svg/1024px-Grafana_logo.svg.png"
url: "https://grafana.{{ .Values.fqdn }}"
- name: "Speedtest Tracker"
logo: "https://raw.githubusercontent.com/henrywhitaker3/Speedtest-Tracker/8cb2e8a3236850b4a07e887ac376c0d4d5e804f4/public/icons/fav/ms-icon-310x310.png"
url: "https://speedtest.{{ .Values.fqdn }}"
Expand Down
1 change: 1 addition & 0 deletions roles/kubernetes_apps/tasks/apply_folder_manifests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
chart_ref: "{{ chart_folder }}"
set_values:
- value: "fqdn={{ fqdn }}"
- value: nodes_endpoints={{ k3s_nodes_endpoints.stdout }}
delegate_to: localhost
register: helm_template

Expand Down
12 changes: 12 additions & 0 deletions roles/kubernetes_apps/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@
- { to_port: 30778, proto: any, rule: allow, comment: "Allow Minecraft Bedrock" }
when: not (skip_firewall_setup or manifest_only_setup)

- name: Fetch list of nodes
ansible.builtin.shell: "set -o pipefail && kubectl get nodes -o json | jq -c '[.items[].metadata.annotations.\"k3s.io/external-ip\"] | sort'"
args:
executable: /usr/bin/bash
when:
- k3s_control_node is defined
- k3s_control_node
run_once: true
changed_when: false
become: true
register: k3s_nodes_endpoints

- name: Deploy manifests
when:
- k3s_control_node is defined
Expand Down
1 change: 1 addition & 0 deletions roles/system_setup/tasks/packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
pkg:
- curl
- ddclient
- jq
- rasdaemon
- rsync
- smartmontools
Expand Down

0 comments on commit 216d305

Please sign in to comment.