From 414d044909b8f7bb3d452794313f562ce12aa1b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauri=20V=C3=B5sandi?= Date: Sat, 24 Dec 2022 21:11:00 +0200 Subject: [PATCH] prometheus-operator: Less noisy alerting from node-exporter --- prometheus-operator/node-exporter.yml | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/prometheus-operator/node-exporter.yml b/prometheus-operator/node-exporter.yml index 1442b67..598b6bc 100644 --- a/prometheus-operator/node-exporter.yml +++ b/prometheus-operator/node-exporter.yml @@ -4,6 +4,7 @@ kind: Probe metadata: name: nodes-proxmox spec: + scrapeTimeout: 30s targets: staticConfig: static: @@ -86,37 +87,37 @@ spec: summary: Host memory under memory pressure (instance {{ $labels.instance }}) description: The node is under heavy memory pressure. High rate of major page faults - alert: HostUnusualNetworkThroughputIn - expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) > 160e+06 + expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) > 800e+06 for: 1h labels: severity: warning annotations: summary: Host unusual network throughput in (instance {{ $labels.instance }}) - description: Host network interfaces are probably receiving too much data (> 160 MB/s) + description: Host network interfaces are probably receiving too much data (> 800 MB/s) - alert: HostUnusualNetworkThroughputOut - expr: sum by (instance) (rate(node_network_transmit_bytes_total[2m])) > 160e+06 + expr: sum by (instance) (rate(node_network_transmit_bytes_total[2m])) > 800e+06 for: 1h labels: severity: warning annotations: summary: Host unusual network throughput out (instance {{ $labels.instance }}) - description: Host network interfaces are probably sending too much data (> 160 MB/s) + description: Host network interfaces are probably sending too much data (> 800 MB/s) - alert: HostUnusualDiskReadRate - expr: sum by (instance) (rate(node_disk_read_bytes_total[2m])) > 50000000 + expr: sum by (instance) (rate(node_disk_read_bytes_total[2m])) > 500e+06 for: 1h labels: severity: warning annotations: summary: Host unusual disk read rate (instance {{ $labels.instance }}) - description: Disk is probably reading too much data (> 50 MB/s) + description: Disk is probably reading too much data (> 500 MB/s) - alert: HostUnusualDiskWriteRate - expr: sum by (instance) (rate(node_disk_written_bytes_total[2m])) > 50000000 + expr: sum by (instance) (rate(node_disk_written_bytes_total[2m])) > 500e+06 for: 1h labels: severity: warning annotations: summary: Host unusual disk write rate (instance {{ $labels.instance }}) - description: Disk is probably writing too much data (> 50 MB/s) + description: Disk is probably writing too much data (> 500 MB/s) # Please add ignored mountpoints in node_exporter parameters like # "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)". # Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users. @@ -361,11 +362,13 @@ kind: PodMonitor metadata: name: node-exporter spec: + selector: matchLabels: app: node-exporter podMetricsEndpoints: - port: web + scrapeTimeout: 30s relabelings: - sourceLabels: [__meta_kubernetes_pod_node_name] targetLabel: node