prometheus-operator: Less noisy alerting from node-exporter
This commit is contained in:
parent
ea23a52d6b
commit
414d044909
@ -4,6 +4,7 @@ kind: Probe
|
|||||||
metadata:
|
metadata:
|
||||||
name: nodes-proxmox
|
name: nodes-proxmox
|
||||||
spec:
|
spec:
|
||||||
|
scrapeTimeout: 30s
|
||||||
targets:
|
targets:
|
||||||
staticConfig:
|
staticConfig:
|
||||||
static:
|
static:
|
||||||
@ -86,37 +87,37 @@ spec:
|
|||||||
summary: Host memory under memory pressure (instance {{ $labels.instance }})
|
summary: Host memory under memory pressure (instance {{ $labels.instance }})
|
||||||
description: The node is under heavy memory pressure. High rate of major page faults
|
description: The node is under heavy memory pressure. High rate of major page faults
|
||||||
- alert: HostUnusualNetworkThroughputIn
|
- alert: HostUnusualNetworkThroughputIn
|
||||||
expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) > 160e+06
|
expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) > 800e+06
|
||||||
for: 1h
|
for: 1h
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: Host unusual network throughput in (instance {{ $labels.instance }})
|
summary: Host unusual network throughput in (instance {{ $labels.instance }})
|
||||||
description: Host network interfaces are probably receiving too much data (> 160 MB/s)
|
description: Host network interfaces are probably receiving too much data (> 800 MB/s)
|
||||||
- alert: HostUnusualNetworkThroughputOut
|
- alert: HostUnusualNetworkThroughputOut
|
||||||
expr: sum by (instance) (rate(node_network_transmit_bytes_total[2m])) > 160e+06
|
expr: sum by (instance) (rate(node_network_transmit_bytes_total[2m])) > 800e+06
|
||||||
for: 1h
|
for: 1h
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: Host unusual network throughput out (instance {{ $labels.instance }})
|
summary: Host unusual network throughput out (instance {{ $labels.instance }})
|
||||||
description: Host network interfaces are probably sending too much data (> 160 MB/s)
|
description: Host network interfaces are probably sending too much data (> 800 MB/s)
|
||||||
- alert: HostUnusualDiskReadRate
|
- alert: HostUnusualDiskReadRate
|
||||||
expr: sum by (instance) (rate(node_disk_read_bytes_total[2m])) > 50000000
|
expr: sum by (instance) (rate(node_disk_read_bytes_total[2m])) > 500e+06
|
||||||
for: 1h
|
for: 1h
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: Host unusual disk read rate (instance {{ $labels.instance }})
|
summary: Host unusual disk read rate (instance {{ $labels.instance }})
|
||||||
description: Disk is probably reading too much data (> 50 MB/s)
|
description: Disk is probably reading too much data (> 500 MB/s)
|
||||||
- alert: HostUnusualDiskWriteRate
|
- alert: HostUnusualDiskWriteRate
|
||||||
expr: sum by (instance) (rate(node_disk_written_bytes_total[2m])) > 50000000
|
expr: sum by (instance) (rate(node_disk_written_bytes_total[2m])) > 500e+06
|
||||||
for: 1h
|
for: 1h
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: Host unusual disk write rate (instance {{ $labels.instance }})
|
summary: Host unusual disk write rate (instance {{ $labels.instance }})
|
||||||
description: Disk is probably writing too much data (> 50 MB/s)
|
description: Disk is probably writing too much data (> 500 MB/s)
|
||||||
# Please add ignored mountpoints in node_exporter parameters like
|
# Please add ignored mountpoints in node_exporter parameters like
|
||||||
# "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)".
|
# "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)".
|
||||||
# Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users.
|
# Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users.
|
||||||
@ -361,11 +362,13 @@ kind: PodMonitor
|
|||||||
metadata:
|
metadata:
|
||||||
name: node-exporter
|
name: node-exporter
|
||||||
spec:
|
spec:
|
||||||
|
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: node-exporter
|
app: node-exporter
|
||||||
podMetricsEndpoints:
|
podMetricsEndpoints:
|
||||||
- port: web
|
- port: web
|
||||||
|
scrapeTimeout: 30s
|
||||||
relabelings:
|
relabelings:
|
||||||
- sourceLabels: [__meta_kubernetes_pod_node_name]
|
- sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||||
targetLabel: node
|
targetLabel: node
|
||||||
|
Loading…
Reference in New Issue
Block a user