prometheus-operator: Less noisy alerting from node-exporter

This commit is contained in:
Lauri Võsandi 2022-12-24 21:11:00 +02:00
parent ea23a52d6b
commit 414d044909

View File

@ -4,6 +4,7 @@ kind: Probe
metadata: metadata:
name: nodes-proxmox name: nodes-proxmox
spec: spec:
scrapeTimeout: 30s
targets: targets:
staticConfig: staticConfig:
static: static:
@ -86,37 +87,37 @@ spec:
summary: Host memory under memory pressure (instance {{ $labels.instance }}) summary: Host memory under memory pressure (instance {{ $labels.instance }})
description: The node is under heavy memory pressure. High rate of major page faults description: The node is under heavy memory pressure. High rate of major page faults
- alert: HostUnusualNetworkThroughputIn - alert: HostUnusualNetworkThroughputIn
expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) > 160e+06 expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) > 800e+06
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
annotations: annotations:
summary: Host unusual network throughput in (instance {{ $labels.instance }}) summary: Host unusual network throughput in (instance {{ $labels.instance }})
description: Host network interfaces are probably receiving too much data (> 160 MB/s) description: Host network interfaces are probably receiving too much data (> 800 MB/s)
- alert: HostUnusualNetworkThroughputOut - alert: HostUnusualNetworkThroughputOut
expr: sum by (instance) (rate(node_network_transmit_bytes_total[2m])) > 160e+06 expr: sum by (instance) (rate(node_network_transmit_bytes_total[2m])) > 800e+06
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
annotations: annotations:
summary: Host unusual network throughput out (instance {{ $labels.instance }}) summary: Host unusual network throughput out (instance {{ $labels.instance }})
description: Host network interfaces are probably sending too much data (> 160 MB/s) description: Host network interfaces are probably sending too much data (> 800 MB/s)
- alert: HostUnusualDiskReadRate - alert: HostUnusualDiskReadRate
expr: sum by (instance) (rate(node_disk_read_bytes_total[2m])) > 50000000 expr: sum by (instance) (rate(node_disk_read_bytes_total[2m])) > 500e+06
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
annotations: annotations:
summary: Host unusual disk read rate (instance {{ $labels.instance }}) summary: Host unusual disk read rate (instance {{ $labels.instance }})
description: Disk is probably reading too much data (> 50 MB/s) description: Disk is probably reading too much data (> 500 MB/s)
- alert: HostUnusualDiskWriteRate - alert: HostUnusualDiskWriteRate
expr: sum by (instance) (rate(node_disk_written_bytes_total[2m])) > 50000000 expr: sum by (instance) (rate(node_disk_written_bytes_total[2m])) > 500e+06
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
annotations: annotations:
summary: Host unusual disk write rate (instance {{ $labels.instance }}) summary: Host unusual disk write rate (instance {{ $labels.instance }})
description: Disk is probably writing too much data (> 50 MB/s) description: Disk is probably writing too much data (> 500 MB/s)
# Please add ignored mountpoints in node_exporter parameters like # Please add ignored mountpoints in node_exporter parameters like
# "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)". # "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)".
# Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users. # Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users.
@ -361,11 +362,13 @@ kind: PodMonitor
metadata: metadata:
name: node-exporter name: node-exporter
spec: spec:
selector: selector:
matchLabels: matchLabels:
app: node-exporter app: node-exporter
podMetricsEndpoints: podMetricsEndpoints:
- port: web - port: web
scrapeTimeout: 30s
relabelings: relabelings:
- sourceLabels: [__meta_kubernetes_pod_node_name] - sourceLabels: [__meta_kubernetes_pod_node_name]
targetLabel: node targetLabel: node