From d7287018ace83e6ce595b96f865780641718b024 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauri=20V=C3=B5sandi?= Date: Sat, 24 Aug 2024 12:36:37 +0300 Subject: [PATCH] monitoring: Specify resource limits --- monitoring/README.md | 8 +++- monitoring/alertmanager.yaml | 11 +++++- monitoring/prometheus.yaml | 72 ++++++++++++++++++++++++++---------- 3 files changed, 68 insertions(+), 23 deletions(-) diff --git a/monitoring/README.md b/monitoring/README.md index 3309fb1..c7c2ac3 100644 --- a/monitoring/README.md +++ b/monitoring/README.md @@ -1,15 +1,17 @@ -## Monitoring namespace +# Monitoring namespace Prometheus is accessible at [prom.k-space.ee](https://prom.k-space.ee/) and the corresponding AlertManager is accessible at [am.k-space.ee](https://am.k-space.ee/). Both are [deployed by ArgoCD](https://argocd.k-space.ee/applications/monitoring) from this Git repo directory using Prometheus operator. -Note that Prometheus and other monitoring stack components should appropriate +Note that Prometheus and other monitoring stack components should use appropriate node selector to make sure the components get scheduled on nodes which are hosted in a privileged VLAN where they have access to UPS SNMP targets, Mikrotik router/switch API-s etc. +## For users + To add monitoring targets inside the Kubernetes cluster make use of [PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md#using-podmonitors) or ServiceMonitor custom resource definitions. @@ -30,6 +32,8 @@ Sample queries: * [Disk space left](https://prom.k-space.ee/graph?g0.range_input=1h&g0.expr=node_filesystem_avail_bytes&g0.tab=1) * Minio [s3 egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_s3_traffic_sent_bytes%5B3m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [internode egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_inter_node_traffic_sent_bytes%5B2m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [storage used](https://prom.k-space.ee/graph?g0.expr=minio_node_disk_used_bytes&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h) +# For administrators + To reconfigure SNMP targets etc: ``` diff --git a/monitoring/alertmanager.yaml b/monitoring/alertmanager.yaml index 8d25cb0..6dd7e5b 100644 --- a/monitoring/alertmanager.yaml +++ b/monitoring/alertmanager.yaml @@ -3,6 +3,7 @@ apiVersion: monitoring.coreos.com/v1alpha1 kind: AlertmanagerConfig metadata: name: alertmanager + namespace: monitoring labels: app.kubernetes.io/name: alertmanager spec: @@ -24,12 +25,12 @@ spec: apiURL: name: slack-secrets key: webhook-url - --- apiVersion: monitoring.coreos.com/v1 kind: Alertmanager metadata: name: alertmanager + namespace: monitoring spec: topologySpreadConstraints: - maxSkew: 1 @@ -55,6 +56,13 @@ spec: value: '' effect: NoSchedule replicas: 3 + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 8m + memory: 35Mi serviceAccountName: alertmanager externalUrl: http://am.k-space.ee/ routePrefix: "/" @@ -68,3 +76,4 @@ apiVersion: v1 kind: ServiceAccount metadata: name: alertmanager + namespace: monitoring diff --git a/monitoring/prometheus.yaml b/monitoring/prometheus.yaml index aca37d9..11571ab 100644 --- a/monitoring/prometheus.yaml +++ b/monitoring/prometheus.yaml @@ -3,6 +3,7 @@ apiVersion: monitoring.coreos.com/v1 kind: PodMonitor metadata: name: metrics + namespace: monitoring spec: namespaceSelector: {} selector: {} @@ -14,6 +15,7 @@ apiVersion: monitoring.coreos.com/v1 kind: Prometheus metadata: name: prometheus + namespace: monitoring spec: topologySpreadConstraints: - maxSkew: 1 @@ -53,11 +55,18 @@ spec: ruleNamespaceSelector: {} ruleSelector: {} retentionSize: 8GB + resources: + limits: + cpu: 500m + memory: 2Gi + requests: + cpu: 100m + memory: 700Mi storage: volumeClaimTemplate: spec: accessModes: - - ReadWriteOnce + - ReadWriteOnce resources: requests: storage: 10Gi @@ -67,36 +76,50 @@ apiVersion: v1 kind: ServiceAccount metadata: name: prometheus + namespace: monitoring --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: prometheus + namespace: monitoring rules: -- apiGroups: [""] - resources: - - nodes - - nodes/metrics - - services - - endpoints - - pods - verbs: ["get", "list", "watch"] -- apiGroups: [""] - resources: - - configmaps - verbs: ["get"] -- apiGroups: - - networking.k8s.io - resources: - - ingresses - verbs: ["get", "list", "watch"] -- nonResourceURLs: ["/metrics"] - verbs: ["get"] + - resources: + - nodes + - nodes/metrics + - services + - endpoints + - pods + apiGroups: + - "" + verbs: + - get + - list + - watch + - resources: + - configmaps + apiGroups: + - "" + verbs: + - get + - resources: + - ingresses + apiGroups: + - networking.k8s.io + verbs: + - get + - list + - watch + - nonResourceURLs: + - /metrics + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: prometheus + namespace: monitoring roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -110,6 +133,7 @@ apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: prometheus + namespace: monitoring spec: groups: - name: prometheus @@ -356,6 +380,7 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: prometheus + namespace: monitoring annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" @@ -381,6 +406,7 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: alertmanager + namespace: monitoring annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" @@ -406,6 +432,7 @@ apiVersion: monitoring.coreos.com/v1 kind: PodMonitor metadata: name: prometheus + namespace: monitoring spec: selector: matchLabels: @@ -417,6 +444,7 @@ apiVersion: monitoring.coreos.com/v1 kind: PodMonitor metadata: name: alertmanager + namespace: monitoring spec: selector: matchLabels: @@ -428,6 +456,7 @@ apiVersion: monitoring.coreos.com/v1 kind: PodMonitor metadata: name: operator + namespace: monitoring spec: selector: matchLabels: @@ -439,6 +468,7 @@ apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: kubelet + namespace: monitoring spec: endpoints: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token @@ -467,6 +497,7 @@ apiVersion: codemowers.cloud/v1beta1 kind: OIDCMiddlewareClient metadata: name: prometheus + namespace: monitoring spec: displayName: Prometheus uri: 'https://prom.k-space.ee' @@ -482,6 +513,7 @@ apiVersion: codemowers.cloud/v1beta1 kind: OIDCMiddlewareClient metadata: name: alertmanager + namespace: monitoring spec: displayName: AlertManager uri: 'https://am.k-space.ee'