259 lines
7.0 KiB
YAML
259 lines
7.0 KiB
YAML
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: Probe
|
|
metadata:
|
|
name: websites
|
|
spec:
|
|
prober:
|
|
url: blackbox-exporter
|
|
path: /probe
|
|
module: http_2xx
|
|
targets:
|
|
staticConfig:
|
|
static:
|
|
- https://git.k-space.ee/
|
|
- https://grafana.k-space.ee/
|
|
- https://wiki.k-space.ee/
|
|
- https://pad.k-space.ee/
|
|
- https://members.k-space.ee/
|
|
- https://nextcloud.k-space.ee/
|
|
- http://minio.infra.k-space.ee:9001/login
|
|
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: Probe
|
|
metadata:
|
|
name: k6.ee
|
|
spec:
|
|
prober:
|
|
url: blackbox-exporter
|
|
path: /probe
|
|
module: dns_check_traefik
|
|
targets:
|
|
staticConfig:
|
|
static:
|
|
- 193.40.103.2
|
|
- 62.65.250.2
|
|
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: Probe
|
|
metadata:
|
|
name: samba-cluster
|
|
spec:
|
|
prober:
|
|
url: blackbox-exporter
|
|
path: /metrics
|
|
module: tcp_connect
|
|
targets:
|
|
staticConfig:
|
|
static:
|
|
- dc1.ad.k-space.ee:636
|
|
- dc2.ad.k-space.ee:636
|
|
- dc3.ad.k-space.ee:636
|
|
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: Probe
|
|
metadata:
|
|
name: misc
|
|
spec:
|
|
prober:
|
|
url: blackbox-exporter
|
|
path: /metrics
|
|
module: tcp_connect
|
|
targets:
|
|
staticConfig:
|
|
static:
|
|
- mail.k-space.ee:465
|
|
- dev.k-space.ee:10648
|
|
- mariadb.infra.k-space.ee:3306
|
|
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: blackbox-exporter
|
|
spec:
|
|
# https://awesome-prometheus-alerts.grep.to/rules#blackbox
|
|
groups:
|
|
- name: blackbox
|
|
rules:
|
|
- alert: BlackboxProbeFailed
|
|
expr: probe_success == 0
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Blackbox probe failed (instance {{ $labels.instance }})
|
|
description: Probe failed
|
|
- alert: BlackboxSlowProbe
|
|
expr: avg_over_time(probe_duration_seconds[1m]) > 1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Blackbox slow probe (instance {{ $labels.instance }})
|
|
description: Blackbox probe took more than 1s to complete
|
|
- alert: BlackboxSlowDNS
|
|
expr: avg_over_time(probe_dns_lookup_time_seconds[1m]) > 1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Blackbox slow DNS lookup (instance {{ $labels.instance }})
|
|
description: Blackbox DNS lookup took more than 1s to complete.
|
|
It seemed using IPv6 DNS servers in conjunction with Docker resulted
|
|
in odd 5s latency bump. For now we're using 8.8.8.8 because of that
|
|
- alert: BlackboxProbeHttpFailure
|
|
expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Blackbox probe HTTP failure (instance {{ $labels.instance }})
|
|
description: HTTP status code is not 200-399
|
|
- alert: BlackboxSslCertificateWillExpireSoon
|
|
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30
|
|
for: 0m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
|
|
description: SSL certificate expires in 30 days
|
|
- alert: BlackboxSslCertificateWillExpireSoon
|
|
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 3
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
|
|
description: SSL certificate expires in 3 days
|
|
- alert: BlackboxSslCertificateExpired
|
|
expr: probe_ssl_earliest_cert_expiry - time() <= 0
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Blackbox SSL certificate expired (instance {{ $labels.instance }})
|
|
description: SSL certificate has expired already
|
|
- alert: BlackboxProbeSlowHttp
|
|
expr: avg_over_time(probe_http_duration_seconds[1m]) > 1
|
|
for: 1m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
|
|
description: HTTP request took more than 1s
|
|
- alert: BlackboxProbeSlowPing
|
|
expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1
|
|
for: 1m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Blackbox probe slow ping (instance {{ $labels.instance }})
|
|
description: Blackbox ping took more than 1s
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: blackbox-exporter
|
|
spec:
|
|
revisionHistoryLimit: 0
|
|
replicas: 2
|
|
selector:
|
|
matchLabels:
|
|
app: blackbox-exporter
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: blackbox-exporter
|
|
spec:
|
|
containers:
|
|
- name: blackbox-exporter
|
|
image: prom/blackbox-exporter:v0.20.0
|
|
volumeMounts:
|
|
- name: blackbox-exporter-config
|
|
mountPath: /etc/blackbox_exporter
|
|
volumes:
|
|
- name: blackbox-exporter-config
|
|
configMap:
|
|
name: blackbox-exporter-config
|
|
# TODO: Results in odd 6s connection lag if scheduled in VLAN20
|
|
nodeSelector:
|
|
dedicated: monitoring
|
|
tolerations:
|
|
- key: dedicated
|
|
operator: Equal
|
|
value: monitoring
|
|
effect: NoSchedule
|
|
affinity:
|
|
podAntiAffinity:
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
- labelSelector:
|
|
matchExpressions:
|
|
- key: app
|
|
operator: In
|
|
values:
|
|
- blackbox-exporter
|
|
topologyKey: "kubernetes.io/hostname"
|
|
---
|
|
kind: Service
|
|
apiVersion: v1
|
|
metadata:
|
|
name: blackbox-exporter
|
|
spec:
|
|
type: ClusterIP
|
|
ports:
|
|
- name: http
|
|
port: 80
|
|
protocol: TCP
|
|
targetPort: 9115
|
|
selector:
|
|
app: blackbox-exporter
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: blackbox-exporter-config
|
|
data:
|
|
config.yml: |-
|
|
modules:
|
|
http_2xx:
|
|
prober: http
|
|
http:
|
|
preferred_ip_protocol: "ip4"
|
|
ip_protocol_fallback: false
|
|
http_post_2xx:
|
|
prober: http
|
|
http:
|
|
method: POST
|
|
preferred_ip_protocol: "ip4"
|
|
ip_protocol_fallback: false
|
|
tcp_connect:
|
|
prober: tcp
|
|
tcp:
|
|
preferred_ip_protocol: "ip4"
|
|
ip_protocol_fallback: false
|
|
icmp:
|
|
prober: icmp
|
|
icmp:
|
|
preferred_ip_protocol: "ip4"
|
|
ip_protocol_fallback: false
|
|
dns_check_traefik:
|
|
prober: dns
|
|
dns:
|
|
query_name: "traefik.k-space.ee"
|
|
query_type: "A"
|
|
validate_answer_rrs:
|
|
fail_if_not_matches_regexp:
|
|
- "traefik\\.k-space\\.ee\\.\\t.*\\tIN\\tA\\t193\\.40\\.103\\.[1-9][0-9]*"
|
|
preferred_ip_protocol: "ip4"
|
|
ip_protocol_fallback: false
|
|
dns_check_k6:
|
|
prober: dns
|
|
dns:
|
|
query_name: "k6.ee"
|
|
query_type: "A"
|
|
validate_answer_rrs:
|
|
fail_if_not_matches_regexp:
|
|
- "k6\\.ee\\.\\t.*\\tIN\\tA\\t193\\.40\\.103\\.[1-9][0-9]*"
|
|
preferred_ip_protocol: "ip4"
|
|
ip_protocol_fallback: false
|