259 lines
7.0 KiB
YAML
259 lines
7.0 KiB
YAML
|
---
|
||
|
apiVersion: monitoring.coreos.com/v1
|
||
|
kind: Probe
|
||
|
metadata:
|
||
|
name: websites
|
||
|
spec:
|
||
|
prober:
|
||
|
url: blackbox-exporter
|
||
|
path: /probe
|
||
|
module: http_2xx
|
||
|
targets:
|
||
|
staticConfig:
|
||
|
static:
|
||
|
- https://git.k-space.ee/
|
||
|
- https://grafana.k-space.ee/
|
||
|
- https://wiki.k-space.ee/
|
||
|
- https://pad.k-space.ee/
|
||
|
- https://members.k-space.ee/
|
||
|
- https://nextcloud.k-space.ee/
|
||
|
- http://minio.infra.k-space.ee:9001/login
|
||
|
---
|
||
|
apiVersion: monitoring.coreos.com/v1
|
||
|
kind: Probe
|
||
|
metadata:
|
||
|
name: k6.ee
|
||
|
spec:
|
||
|
prober:
|
||
|
url: blackbox-exporter
|
||
|
path: /probe
|
||
|
module: dns_check_traefik
|
||
|
targets:
|
||
|
staticConfig:
|
||
|
static:
|
||
|
- 193.40.103.2
|
||
|
- 62.65.250.2
|
||
|
---
|
||
|
apiVersion: monitoring.coreos.com/v1
|
||
|
kind: Probe
|
||
|
metadata:
|
||
|
name: samba-cluster
|
||
|
spec:
|
||
|
prober:
|
||
|
url: blackbox-exporter
|
||
|
path: /metrics
|
||
|
module: tcp_connect
|
||
|
targets:
|
||
|
staticConfig:
|
||
|
static:
|
||
|
- dc1.ad.k-space.ee:636
|
||
|
- dc2.ad.k-space.ee:636
|
||
|
- dc3.ad.k-space.ee:636
|
||
|
---
|
||
|
apiVersion: monitoring.coreos.com/v1
|
||
|
kind: Probe
|
||
|
metadata:
|
||
|
name: misc
|
||
|
spec:
|
||
|
prober:
|
||
|
url: blackbox-exporter
|
||
|
path: /metrics
|
||
|
module: tcp_connect
|
||
|
targets:
|
||
|
staticConfig:
|
||
|
static:
|
||
|
- mail.k-space.ee:465
|
||
|
- dev.k-space.ee:10648
|
||
|
- mariadb.infra.k-space.ee:3306
|
||
|
---
|
||
|
apiVersion: monitoring.coreos.com/v1
|
||
|
kind: PrometheusRule
|
||
|
metadata:
|
||
|
name: blackbox-exporter
|
||
|
spec:
|
||
|
# https://awesome-prometheus-alerts.grep.to/rules#blackbox
|
||
|
groups:
|
||
|
- name: blackbox
|
||
|
rules:
|
||
|
- alert: BlackboxProbeFailed
|
||
|
expr: probe_success == 0
|
||
|
for: 2m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
annotations:
|
||
|
summary: Blackbox probe failed (instance {{ $labels.instance }})
|
||
|
description: Probe failed
|
||
|
- alert: BlackboxSlowProbe
|
||
|
expr: avg_over_time(probe_duration_seconds[1m]) > 1
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
annotations:
|
||
|
summary: Blackbox slow probe (instance {{ $labels.instance }})
|
||
|
description: Blackbox probe took more than 1s to complete
|
||
|
- alert: BlackboxSlowDNS
|
||
|
expr: avg_over_time(probe_dns_lookup_time_seconds[1m]) > 1
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
annotations:
|
||
|
summary: Blackbox slow DNS lookup (instance {{ $labels.instance }})
|
||
|
description: Blackbox DNS lookup took more than 1s to complete.
|
||
|
It seemed using IPv6 DNS servers in conjunction with Docker resulted
|
||
|
in odd 5s latency bump. For now we're using 8.8.8.8 because of that
|
||
|
- alert: BlackboxProbeHttpFailure
|
||
|
expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
annotations:
|
||
|
summary: Blackbox probe HTTP failure (instance {{ $labels.instance }})
|
||
|
description: HTTP status code is not 200-399
|
||
|
- alert: BlackboxSslCertificateWillExpireSoon
|
||
|
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30
|
||
|
for: 0m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
annotations:
|
||
|
summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
|
||
|
description: SSL certificate expires in 30 days
|
||
|
- alert: BlackboxSslCertificateWillExpireSoon
|
||
|
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 3
|
||
|
for: 0m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
annotations:
|
||
|
summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
|
||
|
description: SSL certificate expires in 3 days
|
||
|
- alert: BlackboxSslCertificateExpired
|
||
|
expr: probe_ssl_earliest_cert_expiry - time() <= 0
|
||
|
for: 0m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
annotations:
|
||
|
summary: Blackbox SSL certificate expired (instance {{ $labels.instance }})
|
||
|
description: SSL certificate has expired already
|
||
|
- alert: BlackboxProbeSlowHttp
|
||
|
expr: avg_over_time(probe_http_duration_seconds[1m]) > 1
|
||
|
for: 1m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
annotations:
|
||
|
summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
|
||
|
description: HTTP request took more than 1s
|
||
|
- alert: BlackboxProbeSlowPing
|
||
|
expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1
|
||
|
for: 1m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
annotations:
|
||
|
summary: Blackbox probe slow ping (instance {{ $labels.instance }})
|
||
|
description: Blackbox ping took more than 1s
|
||
|
---
|
||
|
apiVersion: apps/v1
|
||
|
kind: Deployment
|
||
|
metadata:
|
||
|
name: blackbox-exporter
|
||
|
spec:
|
||
|
revisionHistoryLimit: 0
|
||
|
replicas: 2
|
||
|
selector:
|
||
|
matchLabels:
|
||
|
app: blackbox-exporter
|
||
|
template:
|
||
|
metadata:
|
||
|
labels:
|
||
|
app: blackbox-exporter
|
||
|
spec:
|
||
|
containers:
|
||
|
- name: blackbox-exporter
|
||
|
image: prom/blackbox-exporter:v0.20.0
|
||
|
volumeMounts:
|
||
|
- name: blackbox-exporter-config
|
||
|
mountPath: /etc/blackbox_exporter
|
||
|
volumes:
|
||
|
- name: blackbox-exporter-config
|
||
|
configMap:
|
||
|
name: blackbox-exporter-config
|
||
|
# TODO: Results in odd 6s connection lag if scheduled in VLAN20
|
||
|
nodeSelector:
|
||
|
dedicated: monitoring
|
||
|
tolerations:
|
||
|
- key: dedicated
|
||
|
operator: Equal
|
||
|
value: monitoring
|
||
|
effect: NoSchedule
|
||
|
affinity:
|
||
|
podAntiAffinity:
|
||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||
|
- labelSelector:
|
||
|
matchExpressions:
|
||
|
- key: app
|
||
|
operator: In
|
||
|
values:
|
||
|
- blackbox-exporter
|
||
|
topologyKey: "kubernetes.io/hostname"
|
||
|
---
|
||
|
kind: Service
|
||
|
apiVersion: v1
|
||
|
metadata:
|
||
|
name: blackbox-exporter
|
||
|
spec:
|
||
|
type: ClusterIP
|
||
|
ports:
|
||
|
- name: http
|
||
|
port: 80
|
||
|
protocol: TCP
|
||
|
targetPort: 9115
|
||
|
selector:
|
||
|
app: blackbox-exporter
|
||
|
---
|
||
|
apiVersion: v1
|
||
|
kind: ConfigMap
|
||
|
metadata:
|
||
|
name: blackbox-exporter-config
|
||
|
data:
|
||
|
config.yml: |-
|
||
|
modules:
|
||
|
http_2xx:
|
||
|
prober: http
|
||
|
http:
|
||
|
preferred_ip_protocol: "ip4"
|
||
|
ip_protocol_fallback: false
|
||
|
http_post_2xx:
|
||
|
prober: http
|
||
|
http:
|
||
|
method: POST
|
||
|
preferred_ip_protocol: "ip4"
|
||
|
ip_protocol_fallback: false
|
||
|
tcp_connect:
|
||
|
prober: tcp
|
||
|
tcp:
|
||
|
preferred_ip_protocol: "ip4"
|
||
|
ip_protocol_fallback: false
|
||
|
icmp:
|
||
|
prober: icmp
|
||
|
icmp:
|
||
|
preferred_ip_protocol: "ip4"
|
||
|
ip_protocol_fallback: false
|
||
|
dns_check_traefik:
|
||
|
prober: dns
|
||
|
dns:
|
||
|
query_name: "traefik.k-space.ee"
|
||
|
query_type: "A"
|
||
|
validate_answer_rrs:
|
||
|
fail_if_not_matches_regexp:
|
||
|
- "traefik\\.k-space\\.ee\\.\\t.*\\tIN\\tA\\t193\\.40\\.103\\.[1-9][0-9]*"
|
||
|
preferred_ip_protocol: "ip4"
|
||
|
ip_protocol_fallback: false
|
||
|
dns_check_k6:
|
||
|
prober: dns
|
||
|
dns:
|
||
|
query_name: "k6.ee"
|
||
|
query_type: "A"
|
||
|
validate_answer_rrs:
|
||
|
fail_if_not_matches_regexp:
|
||
|
- "k6\\.ee\\.\\t.*\\tIN\\tA\\t193\\.40\\.103\\.[1-9][0-9]*"
|
||
|
preferred_ip_protocol: "ip4"
|
||
|
ip_protocol_fallback: false
|