diff --git a/monitoring/README.md b/monitoring/README.md new file mode 100644 index 0000000..f15edd9 --- /dev/null +++ b/monitoring/README.md @@ -0,0 +1,28 @@ +## Monitoring + +This namespace is managed by +[ArgoCD](https://argocd.k-space.ee/applications/argocd/monitoring) + +To reconfigure SNMP targets etc: + +``` +kubectl delete -n monitoring configmap snmp-exporter +kubectl create -n monitoring configmap snmp-exporter --from-file=snmp.yml=snmp-configs.yaml +``` + +To set Slack secrets: + +``` + kubectl create -n monitoring secret generic slack-secrets \ + --from-literal=webhook-url=https://hooks.slack.com/services/... +``` + +To set Mikrotik secrets: + +``` + kubectl create -n monitoring secret generic mikrotik-exporter \ + --from-literal=MIKROTIK_PASSWORD='f7W!H*Pu' \ + --from-literal=PROMETHEUS_BEARER_TOKEN=$(cat /dev/urandom | base64 | head -c 30) +``` + + diff --git a/monitoring/alertmanager.yaml b/monitoring/alertmanager.yaml new file mode 100644 index 0000000..3bb061b --- /dev/null +++ b/monitoring/alertmanager.yaml @@ -0,0 +1,62 @@ +--- +apiVersion: monitoring.coreos.com/v1alpha1 +kind: AlertmanagerConfig +metadata: + name: alertmanager + labels: + app.kubernetes.io/name: alertmanager +spec: + route: + routes: + - continue: false + receiver: slack-notifications + matchers: + - matchType: "=" + name: severity + value: critical + receiver: 'null' + receivers: + - name: 'null' + - name: 'slack-notifications' + slackConfigs: + - channel: '#kube-prod' + sendResolved: true + apiURL: + name: slack-secrets + key: webhook-url + +--- +apiVersion: monitoring.coreos.com/v1 +kind: Alertmanager +metadata: + name: alertmanager +spec: + alertmanagerConfigMatcherStrategy: + type: None + alertmanagerConfigNamespaceSelector: {} + alertmanagerConfigSelector: {} + alertmanagerConfiguration: + name: alertmanager + secrets: + - slack-secrets + nodeSelector: + dedicated: monitoring + tolerations: + - key: dedicated + operator: Equal + value: monitoring + effect: NoSchedule + replicas: 3 + serviceAccountName: alertmanager + externalUrl: http://am.k-space.ee/ + routePrefix: "/" + securityContext: + fsGroup: 2000 + runAsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: alertmanager diff --git a/prometheus-operator/blackbox-exporter.yml b/monitoring/blackbox-exporter.yaml similarity index 99% rename from prometheus-operator/blackbox-exporter.yml rename to monitoring/blackbox-exporter.yaml index 8a26943..e16b3a3 100644 --- a/prometheus-operator/blackbox-exporter.yml +++ b/monitoring/blackbox-exporter.yaml @@ -156,7 +156,7 @@ metadata: name: blackbox-exporter spec: revisionHistoryLimit: 0 - replicas: 3 + replicas: 2 selector: matchLabels: app: blackbox-exporter diff --git a/prometheus-operator/mikrotik-exporter.yml b/monitoring/mikrotik-exporter.yaml similarity index 100% rename from prometheus-operator/mikrotik-exporter.yml rename to monitoring/mikrotik-exporter.yaml diff --git a/prometheus-operator/node-exporter.yml b/monitoring/node-exporter.yaml similarity index 100% rename from prometheus-operator/node-exporter.yml rename to monitoring/node-exporter.yaml diff --git a/prometheus-operator/application.yml b/monitoring/prometheus.yaml similarity index 90% rename from prometheus-operator/application.yml rename to monitoring/prometheus.yaml index 27d1e5b..26b93fc 100644 --- a/prometheus-operator/application.yml +++ b/monitoring/prometheus.yaml @@ -1,30 +1,4 @@ --- -apiVersion: monitoring.coreos.com/v1alpha1 -kind: AlertmanagerConfig -metadata: - name: alertmanager - labels: - app.kubernetes.io/name: alertmanager -spec: - route: - routes: - - continue: false - receiver: slack-notifications - matchers: - - matchType: "=" - name: severity - value: critical - receiver: 'null' - receivers: - - name: 'null' - - name: 'slack-notifications' - slackConfigs: - - channel: '#kube-prod' - sendResolved: true - apiURL: - name: slack-secrets - key: webhook-url ---- apiVersion: monitoring.coreos.com/v1 kind: PodMonitor metadata: @@ -37,41 +11,6 @@ spec: - port: metrics --- apiVersion: monitoring.coreos.com/v1 -kind: Alertmanager -metadata: - name: alertmanager -spec: - alertmanagerConfigMatcherStrategy: - type: None - alertmanagerConfigNamespaceSelector: {} - alertmanagerConfigSelector: {} - alertmanagerConfiguration: - name: alertmanager - secrets: - - slack-secrets - nodeSelector: - dedicated: monitoring - tolerations: - - key: dedicated - operator: Equal - value: monitoring - effect: NoSchedule - replicas: 3 - serviceAccountName: alertmanager - externalUrl: http://am.k-space.ee/ - routePrefix: "/" - securityContext: - fsGroup: 2000 - runAsGroup: 2000 - runAsNonRoot: true - runAsUser: 1000 ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: alertmanager ---- -apiVersion: monitoring.coreos.com/v1 kind: Prometheus metadata: name: prometheus @@ -172,7 +111,7 @@ spec: description: "A Prometheus job has disappeared\n VALUE = {{ $value }}\n \ \ LABELS = {{ $labels }}" summary: Prometheus job missing (instance {{ $labels.instance }}) - expr: absent(up{job="prometheus-operator/prometheus"}) + expr: absent(up{job="monitoring/prometheus"}) for: 0m labels: severity: warning @@ -221,7 +160,7 @@ spec: \ $value }}\n LABELS = {{ $labels }}" summary: Prometheus AlertManager job missing (instance {{ $labels.instance }}) - expr: absent(up{job="prometheus-operator/alertmanager"}) + expr: absent(up{job="monitoring/alertmanager"}) for: 0m labels: severity: warning @@ -413,7 +352,7 @@ metadata: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" external-dns.alpha.kubernetes.io/target: traefik.k-space.ee - traefik.ingress.kubernetes.io/router.middlewares: prometheus-operator-prometheus@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: monitoring-prometheus@kubernetescrd spec: rules: - host: prom.k-space.ee @@ -438,7 +377,7 @@ metadata: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" external-dns.alpha.kubernetes.io/target: traefik.k-space.ee - traefik.ingress.kubernetes.io/router.middlewares: prometheus-operator-alertmanager@kubernetescrd + traefik.ingress.kubernetes.io/router.middlewares: monitoring-alertmanager@kubernetescrd spec: rules: - host: am.k-space.ee diff --git a/prometheus-operator/snmp.yml b/monitoring/snmp-configs.yaml similarity index 100% rename from prometheus-operator/snmp.yml rename to monitoring/snmp-configs.yaml diff --git a/prometheus-operator/snmp-exporter.yml b/monitoring/snmp-exporter.yaml similarity index 100% rename from prometheus-operator/snmp-exporter.yml rename to monitoring/snmp-exporter.yaml diff --git a/prometheus-operator/README.md b/prometheus-operator/README.md index ef8e8be..ec28a2d 100644 --- a/prometheus-operator/README.md +++ b/prometheus-operator/README.md @@ -1,28 +1,11 @@ # Prometheus operator +To deploy Prometheus operator: + ``` curl -L https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.61.1/bundle.yaml | sed -e 's/namespace: default/namespace: prometheus-operator/g' > bundle.yml kubectl create namespace prometheus-operator kubectl apply --server-side -n prometheus-operator -f bundle.yml -kubectl delete -n prometheus-operator configmap snmp-exporter -kubectl create -n prometheus-operator configmap snmp-exporter --from-file=snmp.yml -kubectl apply -n prometheus-operator -f application.yml -f node-exporter.yml -f blackbox-exporter.yml -f snmp-exporter.yml -f mikrotik-exporter.yml -``` - - -# Slack - -``` - kubectl create -n prometheus-operator secret generic slack-secrets \ - --from-literal=webhook-url=https://hooks.slack.com/services/... -``` - - -# Mikrotik exporter - -``` - kubectl create -n prometheus-operator secret generic mikrotik-exporter \ - --from-literal=MIKROTIK_PASSWORD='f7W!H*Pu' \ - --from-literal=PROMETHEUS_BEARER_TOKEN=$(cat /dev/urandom | base64 | head -c 30) ``` +Note: Do not put any Prometheus instances or exporters in this namespace, instead have them in `monitoring` namespace