forked from k-space/kube
Move Prometheus instance to monitoring namespace
This commit is contained in:
258
monitoring/blackbox-exporter.yaml
Normal file
258
monitoring/blackbox-exporter.yaml
Normal file
@@ -0,0 +1,258 @@
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Probe
|
||||
metadata:
|
||||
name: websites
|
||||
spec:
|
||||
prober:
|
||||
url: blackbox-exporter
|
||||
path: /probe
|
||||
module: http_2xx
|
||||
targets:
|
||||
staticConfig:
|
||||
static:
|
||||
- https://git.k-space.ee/
|
||||
- https://grafana.k-space.ee/
|
||||
- https://wiki.k-space.ee/
|
||||
- https://pad.k-space.ee/
|
||||
- https://members.k-space.ee/
|
||||
- https://nextcloud.k-space.ee/
|
||||
- http://minio.infra.k-space.ee:9001/login
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Probe
|
||||
metadata:
|
||||
name: k6.ee
|
||||
spec:
|
||||
prober:
|
||||
url: blackbox-exporter
|
||||
path: /probe
|
||||
module: dns_check_traefik
|
||||
targets:
|
||||
staticConfig:
|
||||
static:
|
||||
- 193.40.103.2
|
||||
- 62.65.250.2
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Probe
|
||||
metadata:
|
||||
name: samba-cluster
|
||||
spec:
|
||||
prober:
|
||||
url: blackbox-exporter
|
||||
path: /metrics
|
||||
module: tcp_connect
|
||||
targets:
|
||||
staticConfig:
|
||||
static:
|
||||
- dc1.ad.k-space.ee:636
|
||||
- dc2.ad.k-space.ee:636
|
||||
- dc3.ad.k-space.ee:636
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Probe
|
||||
metadata:
|
||||
name: misc
|
||||
spec:
|
||||
prober:
|
||||
url: blackbox-exporter
|
||||
path: /metrics
|
||||
module: tcp_connect
|
||||
targets:
|
||||
staticConfig:
|
||||
static:
|
||||
- mail.k-space.ee:465
|
||||
- dev.k-space.ee:10648
|
||||
- mariadb.infra.k-space.ee:3306
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: blackbox-exporter
|
||||
spec:
|
||||
# https://awesome-prometheus-alerts.grep.to/rules#blackbox
|
||||
groups:
|
||||
- name: blackbox
|
||||
rules:
|
||||
- alert: BlackboxProbeFailed
|
||||
expr: probe_success == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Blackbox probe failed (instance {{ $labels.instance }})
|
||||
description: Probe failed
|
||||
- alert: BlackboxSlowProbe
|
||||
expr: avg_over_time(probe_duration_seconds[1m]) > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Blackbox slow probe (instance {{ $labels.instance }})
|
||||
description: Blackbox probe took more than 1s to complete
|
||||
- alert: BlackboxSlowDNS
|
||||
expr: avg_over_time(probe_dns_lookup_time_seconds[1m]) > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Blackbox slow DNS lookup (instance {{ $labels.instance }})
|
||||
description: Blackbox DNS lookup took more than 1s to complete.
|
||||
It seemed using IPv6 DNS servers in conjunction with Docker resulted
|
||||
in odd 5s latency bump. For now we're using 8.8.8.8 because of that
|
||||
- alert: BlackboxProbeHttpFailure
|
||||
expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Blackbox probe HTTP failure (instance {{ $labels.instance }})
|
||||
description: HTTP status code is not 200-399
|
||||
- alert: BlackboxSslCertificateWillExpireSoon
|
||||
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
|
||||
description: SSL certificate expires in 30 days
|
||||
- alert: BlackboxSslCertificateWillExpireSoon
|
||||
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 3
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
|
||||
description: SSL certificate expires in 3 days
|
||||
- alert: BlackboxSslCertificateExpired
|
||||
expr: probe_ssl_earliest_cert_expiry - time() <= 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: Blackbox SSL certificate expired (instance {{ $labels.instance }})
|
||||
description: SSL certificate has expired already
|
||||
- alert: BlackboxProbeSlowHttp
|
||||
expr: avg_over_time(probe_http_duration_seconds[1m]) > 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
|
||||
description: HTTP request took more than 1s
|
||||
- alert: BlackboxProbeSlowPing
|
||||
expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: Blackbox probe slow ping (instance {{ $labels.instance }})
|
||||
description: Blackbox ping took more than 1s
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: blackbox-exporter
|
||||
spec:
|
||||
revisionHistoryLimit: 0
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: blackbox-exporter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: blackbox-exporter
|
||||
spec:
|
||||
containers:
|
||||
- name: blackbox-exporter
|
||||
image: prom/blackbox-exporter:v0.20.0
|
||||
volumeMounts:
|
||||
- name: blackbox-exporter-config
|
||||
mountPath: /etc/blackbox_exporter
|
||||
volumes:
|
||||
- name: blackbox-exporter-config
|
||||
configMap:
|
||||
name: blackbox-exporter-config
|
||||
# TODO: Results in odd 6s connection lag if scheduled in VLAN20
|
||||
nodeSelector:
|
||||
dedicated: monitoring
|
||||
tolerations:
|
||||
- key: dedicated
|
||||
operator: Equal
|
||||
value: monitoring
|
||||
effect: NoSchedule
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchExpressions:
|
||||
- key: app
|
||||
operator: In
|
||||
values:
|
||||
- blackbox-exporter
|
||||
topologyKey: "kubernetes.io/hostname"
|
||||
---
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: blackbox-exporter
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
protocol: TCP
|
||||
targetPort: 9115
|
||||
selector:
|
||||
app: blackbox-exporter
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: blackbox-exporter-config
|
||||
data:
|
||||
config.yml: |-
|
||||
modules:
|
||||
http_2xx:
|
||||
prober: http
|
||||
http:
|
||||
preferred_ip_protocol: "ip4"
|
||||
ip_protocol_fallback: false
|
||||
http_post_2xx:
|
||||
prober: http
|
||||
http:
|
||||
method: POST
|
||||
preferred_ip_protocol: "ip4"
|
||||
ip_protocol_fallback: false
|
||||
tcp_connect:
|
||||
prober: tcp
|
||||
tcp:
|
||||
preferred_ip_protocol: "ip4"
|
||||
ip_protocol_fallback: false
|
||||
icmp:
|
||||
prober: icmp
|
||||
icmp:
|
||||
preferred_ip_protocol: "ip4"
|
||||
ip_protocol_fallback: false
|
||||
dns_check_traefik:
|
||||
prober: dns
|
||||
dns:
|
||||
query_name: "traefik.k-space.ee"
|
||||
query_type: "A"
|
||||
validate_answer_rrs:
|
||||
fail_if_not_matches_regexp:
|
||||
- "traefik\\.k-space\\.ee\\.\\t.*\\tIN\\tA\\t193\\.40\\.103\\.[1-9][0-9]*"
|
||||
preferred_ip_protocol: "ip4"
|
||||
ip_protocol_fallback: false
|
||||
dns_check_k6:
|
||||
prober: dns
|
||||
dns:
|
||||
query_name: "k6.ee"
|
||||
query_type: "A"
|
||||
validate_answer_rrs:
|
||||
fail_if_not_matches_regexp:
|
||||
- "k6\\.ee\\.\\t.*\\tIN\\tA\\t193\\.40\\.103\\.[1-9][0-9]*"
|
||||
preferred_ip_protocol: "ip4"
|
||||
ip_protocol_fallback: false
|
Reference in New Issue
Block a user