kube/monitoring/snmp-exporter.yaml
2024-09-12 21:54:46 +03:00

187 lines
4.4 KiB
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
name: snmp-exporter
spec:
revisionHistoryLimit: 0
replicas: 2
selector:
matchLabels:
app: snmp-exporter
template:
metadata:
labels:
app: snmp-exporter
spec:
containers:
- image: mirror.gcr.io/prom/snmp-exporter:v0.22.0
name: snmp-exporter
imagePullPolicy: IfNotPresent
securityContext:
runAsNonRoot: true
runAsUser: 1000
readOnlyRootFilesystem: true
ports:
- containerPort: 9116
name: exporter
livenessProbe:
httpGet:
path: /health
port: exporter
readinessProbe:
httpGet:
path: /health
port: exporter
volumeMounts:
- name: snmp-exporter
mountPath: /etc/snmp_exporter
volumes:
- name: snmp-exporter
configMap:
name: snmp-exporter
nodeSelector:
node-role.kubernetes.io/control-plane: ''
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Equal
value: ''
effect: NoSchedule
topologySpreadConstraints:
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app: snmp-exporter
---
kind: Service
apiVersion: v1
metadata:
name: snmp-exporter
spec:
type: ClusterIP
ports:
- name: exporter
port: 9116
protocol: TCP
selector:
app: snmp-exporter
---
kind: Probe
apiVersion: monitoring.coreos.com/v1
metadata:
name: ups
spec:
interval: 60s
module: rfc1628_ups
prober:
url: snmp-exporter:9116
path: /snmp
metricRelabelings:
- sourceLabels: [__name__]
regex: '(.*)'
replacement: 'snmp_${1}'
targetLabel: __name__
targets:
staticConfig:
static:
- ups-4.mgmt.k-space.ee
- ups-6.mgmt.k-space.ee
- ups-7.mgmt.k-space.ee
<<<<<<< HEAD
=======
- ups-6.mgmt.k-space.ee
>>>>>>> 80cb1eb (replace ups)
- ups-9.mgmt.k-space.ee
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: ups
spec:
groups:
- name: ups
rules:
- alert: UPSBatteryLost
annotations:
summary: One or more UPS-es have degraded batteries.
expr: snmp_upsBatteryStatus{upsBatteryStatus!="batteryNormal"} > 0
for: 1m
labels:
severity: critical
- alert: UPSPowerLost
annotations:
summary: One or more UPS-es is not in normal operation mode. This either means
power is lost or UPS was loaded and it's now in bypass mode.
expr: sum(snmp_upsOutputSource { upsOutputSource = 'normal' }) != 4
for: 1m
labels:
severity: critical
- alert: UPSExcessivelyLoaded
annotations:
summary: One or more UPS-es is loaded more than 50%. Make sure load on UPS-es
is balanced and load for no UPS stays above 50%.
expr: snmp_upsOutputPercentLoad > 80
for: 1h
labels:
severity: critical
---
kind: Probe
apiVersion: monitoring.coreos.com/v1
metadata:
name: printer
spec:
interval: 60s
scrapeTimeout: 50s
module: printer_mib
prober:
url: snmp-exporter:9116
path: /snmp
metricRelabelings:
- sourceLabels: [__name__]
regex: '(.*)'
replacement: 'snmp_${1}'
targetLabel: __name__
targets:
staticConfig:
static:
- mfp-chaos.pub.k-space.ee
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: printer
spec:
groups:
- name: printer
rules:
- alert: PrinterNeedsAttention
annotations:
summary: Printer is in error state. If the underlying reason is 'low on paper'
make sure there is enough paper near the printer. It not drop a line at
accounting@k-space.ee to order more office supplies.
expr: snmp_hrPrinterDetectedErrorState == 1
for: 0m
labels:
severity: warning
---
kind: Probe
apiVersion: monitoring.coreos.com/v1
metadata:
name: beamer
spec:
interval: 60s
module: epson_beamer
prober:
url: snmp-exporter:9116
path: /snmp
metricRelabelings:
- sourceLabels: [__name__]
regex: '(.*)'
replacement: 'snmp_${1}'
targetLabel: __name__
targets:
staticConfig:
static:
- beamer-cyber.sec.k-space.ee