---
apiVersion: monitoring.coreos.com/v1
kind: Probe
metadata:
  name: websites
spec:
  prober:
    url: blackbox-exporter
    path: /probe
  module: http_2xx
  targets:
    staticConfig:
      static:
        - https://git.k-space.ee/
        - https://grafana.k-space.ee/
        - https://wiki.k-space.ee/
        - https://pad.k-space.ee/
        - https://nextcloud.k-space.ee/
        - http://external-console.minio-clusters.k-space.ee/login
        - http://shared-console.minio-clusters.k-space.ee/login
---
apiVersion: monitoring.coreos.com/v1
kind: Probe
metadata:
  name: bind
spec:
  prober:
    url: blackbox-exporter
    path: /probe
  module: dns_check_traefik
  targets:
    staticConfig:
      static:
        - 193.40.103.2
        - 62.65.250.2
        - 172.20.53.1
        - 172.20.53.2
        - 172.20.53.3
---
apiVersion: monitoring.coreos.com/v1
kind: Probe
metadata:
  name: misc
spec:
  prober:
    url: blackbox-exporter
    path: /probe
  module: tcp_connect
  targets:
    staticConfig:
      static:
        - mail.k-space.ee:465
        - mariadb.infra.k-space.ee:3306
---
apiVersion: monitoring.coreos.com/v1
kind: Probe
metadata:
  name: wildduck
spec:
  prober:
    url: blackbox-exporter
    path: /probe
  module: tcp_connect
  targets:
    staticConfig:
      static:
        - mail.k-space.ee:25
        - mail.k-space.ee:465
        - mail.k-space.ee:993
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  name: blackbox-exporter
spec:
  # https://awesome-prometheus-alerts.grep.to/rules#blackbox
  groups:
  - name: blackbox
    rules:
    - alert: BlackboxProbeFailed
      expr: probe_success == 0
      for: 2m
      labels:
        severity: critical
      annotations:
        summary: Blackbox probe failed (instance {{ $labels.instance }})
        description: Probe failed
    - alert: BlackboxSlowProbe
      expr: avg_over_time(probe_duration_seconds[1m]) > 1
      for: 5m
      labels:
        severity: warning
      annotations:
        summary: Blackbox slow probe (instance {{ $labels.instance }})
        description: Blackbox probe took more than 1s to complete
    - alert: BlackboxSlowDNS
      expr: avg_over_time(probe_dns_lookup_time_seconds[1m]) > 1
      for: 5m
      labels:
        severity: warning
      annotations:
        summary: Blackbox slow DNS lookup (instance {{ $labels.instance }})
        description: Blackbox DNS lookup took more than 1s to complete.
          It seemed using IPv6 DNS servers in conjunction with Docker resulted
          in odd 5s latency bump. For now we're using 8.8.8.8 because of that
    - alert: BlackboxProbeHttpFailure
      expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
      for: 5m
      labels:
        severity: critical
      annotations:
        summary: Blackbox probe HTTP failure (instance {{ $labels.instance }})
        description: HTTP status code is not 200-399
    - alert: BlackboxSslCertificateWillExpireSoon
      expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30
      for: 0m
      labels:
        severity: warning
      annotations:
        summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
        description: SSL certificate expires in 30 days
    - alert: BlackboxSslCertificateWillExpireSoon
      expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 3
      for: 0m
      labels:
        severity: critical
      annotations:
        summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
        description: SSL certificate expires in 3 days
    - alert: BlackboxSslCertificateExpired
      expr: probe_ssl_earliest_cert_expiry - time() <= 0
      for: 0m
      labels:
        severity: critical
      annotations:
        summary: Blackbox SSL certificate expired (instance {{ $labels.instance }})
        description: SSL certificate has expired already
    - alert: BlackboxProbeSlowHttp
      expr: avg_over_time(probe_http_duration_seconds[1m]) > 1
      for: 1m
      labels:
        severity: warning
      annotations:
        summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
        description: HTTP request took more than 1s
    - alert: BlackboxProbeSlowPing
      expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1
      for: 1m
      labels:
        severity: warning
      annotations:
        summary: Blackbox probe slow ping (instance {{ $labels.instance }})
        description: Blackbox ping took more than 1s
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: blackbox-exporter
spec:
  revisionHistoryLimit: 0
  replicas: 2
  selector:
    matchLabels:
      app: blackbox-exporter
  template:
    metadata:
      labels:
        app: blackbox-exporter
    spec:
      containers:
      - name: blackbox-exporter
        image: prom/blackbox-exporter:v0.24.0
        ports:
          - name: http
            containerPort: 9115
        volumeMounts:
        - name: blackbox-exporter-config
          mountPath: /etc/blackbox_exporter
      volumes:
        - name: blackbox-exporter-config
          configMap:
            name: blackbox-exporter-config
      # TODO: Results in odd 6s connection lag if scheduled in VLAN20
      nodeSelector:
        dedicated: monitoring
      tolerations:
        - key: dedicated
          operator: Equal
          value: monitoring
          effect: NoSchedule
      affinity:
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchExpressions:
              - key: app
                operator: In
                values:
                - blackbox-exporter
            topologyKey: "kubernetes.io/hostname"
---
kind: Service
apiVersion: v1
metadata:
  name: blackbox-exporter
spec:
  type: ClusterIP
  ports:
    - name: http
      port: 80
      protocol: TCP
      targetPort: 9115
  selector:
    app: blackbox-exporter
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: blackbox-exporter-config
data:
  config.yml: |-
    modules:
      http_2xx:
        prober: http
        http:
          preferred_ip_protocol: "ip4"
          ip_protocol_fallback: false
      http_post_2xx:
        prober: http
        http:
          method: POST
          preferred_ip_protocol: "ip4"
          ip_protocol_fallback: false
      tcp_connect:
        prober: tcp
        tcp:
          preferred_ip_protocol: "ip4"
          ip_protocol_fallback: false
      icmp:
        prober: icmp
        icmp:
          preferred_ip_protocol: "ip4"
          ip_protocol_fallback: false
      dns_check_traefik:
        prober: dns
        dns:
          query_name: "traefik.k-space.ee"
          query_type: "A"
          validate_answer_rrs:
            fail_if_not_matches_regexp:
             - "traefik\\.k-space\\.ee\\.\\t.*\\tIN\\tA\\t193\\.40\\.103\\.[1-9][0-9]*"
          preferred_ip_protocol: "ip4"
          ip_protocol_fallback: false
      dns_check_k6:
        prober: dns
        dns:
          query_name: "k6.ee"
          query_type: "A"
          validate_answer_rrs:
            fail_if_not_matches_regexp:
             - "k6\\.ee\\.\\t.*\\tIN\\tA\\t193\\.40\\.103\\.[1-9][0-9]*"
          preferred_ip_protocol: "ip4"
          ip_protocol_fallback: false