monitoring: Specify resource limits

This commit is contained in:
Lauri Võsandi 2024-08-24 12:36:37 +03:00
parent 3fbecab179
commit d7287018ac
3 changed files with 68 additions and 23 deletions

View File

@ -1,15 +1,17 @@
## Monitoring namespace # Monitoring namespace
Prometheus is accessible at [prom.k-space.ee](https://prom.k-space.ee/) Prometheus is accessible at [prom.k-space.ee](https://prom.k-space.ee/)
and the corresponding AlertManager is accessible at [am.k-space.ee](https://am.k-space.ee/). and the corresponding AlertManager is accessible at [am.k-space.ee](https://am.k-space.ee/).
Both are [deployed by ArgoCD](https://argocd.k-space.ee/applications/monitoring) Both are [deployed by ArgoCD](https://argocd.k-space.ee/applications/monitoring)
from this Git repo directory using Prometheus operator. from this Git repo directory using Prometheus operator.
Note that Prometheus and other monitoring stack components should appropriate Note that Prometheus and other monitoring stack components should use appropriate
node selector to make sure the components get scheduled on nodes which are node selector to make sure the components get scheduled on nodes which are
hosted in a privileged VLAN where they have access to UPS SNMP targets, hosted in a privileged VLAN where they have access to UPS SNMP targets,
Mikrotik router/switch API-s etc. Mikrotik router/switch API-s etc.
## For users
To add monitoring targets inside the Kubernetes cluster make use of To add monitoring targets inside the Kubernetes cluster make use of
[PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md#using-podmonitors) or ServiceMonitor custom [PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md#using-podmonitors) or ServiceMonitor custom
resource definitions. resource definitions.
@ -30,6 +32,8 @@ Sample queries:
* [Disk space left](https://prom.k-space.ee/graph?g0.range_input=1h&g0.expr=node_filesystem_avail_bytes&g0.tab=1) * [Disk space left](https://prom.k-space.ee/graph?g0.range_input=1h&g0.expr=node_filesystem_avail_bytes&g0.tab=1)
* Minio [s3 egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_s3_traffic_sent_bytes%5B3m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [internode egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_inter_node_traffic_sent_bytes%5B2m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [storage used](https://prom.k-space.ee/graph?g0.expr=minio_node_disk_used_bytes&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h) * Minio [s3 egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_s3_traffic_sent_bytes%5B3m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [internode egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_inter_node_traffic_sent_bytes%5B2m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [storage used](https://prom.k-space.ee/graph?g0.expr=minio_node_disk_used_bytes&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h)
# For administrators
To reconfigure SNMP targets etc: To reconfigure SNMP targets etc:
``` ```

View File

@ -3,6 +3,7 @@ apiVersion: monitoring.coreos.com/v1alpha1
kind: AlertmanagerConfig kind: AlertmanagerConfig
metadata: metadata:
name: alertmanager name: alertmanager
namespace: monitoring
labels: labels:
app.kubernetes.io/name: alertmanager app.kubernetes.io/name: alertmanager
spec: spec:
@ -24,12 +25,12 @@ spec:
apiURL: apiURL:
name: slack-secrets name: slack-secrets
key: webhook-url key: webhook-url
--- ---
apiVersion: monitoring.coreos.com/v1 apiVersion: monitoring.coreos.com/v1
kind: Alertmanager kind: Alertmanager
metadata: metadata:
name: alertmanager name: alertmanager
namespace: monitoring
spec: spec:
topologySpreadConstraints: topologySpreadConstraints:
- maxSkew: 1 - maxSkew: 1
@ -55,6 +56,13 @@ spec:
value: '' value: ''
effect: NoSchedule effect: NoSchedule
replicas: 3 replicas: 3
resources:
limits:
cpu: 100m
memory: 100Mi
requests:
cpu: 8m
memory: 35Mi
serviceAccountName: alertmanager serviceAccountName: alertmanager
externalUrl: http://am.k-space.ee/ externalUrl: http://am.k-space.ee/
routePrefix: "/" routePrefix: "/"
@ -68,3 +76,4 @@ apiVersion: v1
kind: ServiceAccount kind: ServiceAccount
metadata: metadata:
name: alertmanager name: alertmanager
namespace: monitoring

View File

@ -3,6 +3,7 @@ apiVersion: monitoring.coreos.com/v1
kind: PodMonitor kind: PodMonitor
metadata: metadata:
name: metrics name: metrics
namespace: monitoring
spec: spec:
namespaceSelector: {} namespaceSelector: {}
selector: {} selector: {}
@ -14,6 +15,7 @@ apiVersion: monitoring.coreos.com/v1
kind: Prometheus kind: Prometheus
metadata: metadata:
name: prometheus name: prometheus
namespace: monitoring
spec: spec:
topologySpreadConstraints: topologySpreadConstraints:
- maxSkew: 1 - maxSkew: 1
@ -53,6 +55,13 @@ spec:
ruleNamespaceSelector: {} ruleNamespaceSelector: {}
ruleSelector: {} ruleSelector: {}
retentionSize: 8GB retentionSize: 8GB
resources:
limits:
cpu: 500m
memory: 2Gi
requests:
cpu: 100m
memory: 700Mi
storage: storage:
volumeClaimTemplate: volumeClaimTemplate:
spec: spec:
@ -67,36 +76,50 @@ apiVersion: v1
kind: ServiceAccount kind: ServiceAccount
metadata: metadata:
name: prometheus name: prometheus
namespace: monitoring
--- ---
apiVersion: rbac.authorization.k8s.io/v1 apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole kind: ClusterRole
metadata: metadata:
name: prometheus name: prometheus
namespace: monitoring
rules: rules:
- apiGroups: [""] - resources:
resources:
- nodes - nodes
- nodes/metrics - nodes/metrics
- services - services
- endpoints - endpoints
- pods - pods
verbs: ["get", "list", "watch"] apiGroups:
- apiGroups: [""] - ""
resources: verbs:
- get
- list
- watch
- resources:
- configmaps - configmaps
verbs: ["get"] apiGroups:
- apiGroups: - ""
- networking.k8s.io verbs:
resources: - get
- resources:
- ingresses - ingresses
verbs: ["get", "list", "watch"] apiGroups:
- nonResourceURLs: ["/metrics"] - networking.k8s.io
verbs: ["get"] verbs:
- get
- list
- watch
- nonResourceURLs:
- /metrics
verbs:
- get
--- ---
apiVersion: rbac.authorization.k8s.io/v1 apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding kind: ClusterRoleBinding
metadata: metadata:
name: prometheus name: prometheus
namespace: monitoring
roleRef: roleRef:
apiGroup: rbac.authorization.k8s.io apiGroup: rbac.authorization.k8s.io
kind: ClusterRole kind: ClusterRole
@ -110,6 +133,7 @@ apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule kind: PrometheusRule
metadata: metadata:
name: prometheus name: prometheus
namespace: monitoring
spec: spec:
groups: groups:
- name: prometheus - name: prometheus
@ -356,6 +380,7 @@ apiVersion: networking.k8s.io/v1
kind: Ingress kind: Ingress
metadata: metadata:
name: prometheus name: prometheus
namespace: monitoring
annotations: annotations:
traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true" traefik.ingress.kubernetes.io/router.tls: "true"
@ -381,6 +406,7 @@ apiVersion: networking.k8s.io/v1
kind: Ingress kind: Ingress
metadata: metadata:
name: alertmanager name: alertmanager
namespace: monitoring
annotations: annotations:
traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true" traefik.ingress.kubernetes.io/router.tls: "true"
@ -406,6 +432,7 @@ apiVersion: monitoring.coreos.com/v1
kind: PodMonitor kind: PodMonitor
metadata: metadata:
name: prometheus name: prometheus
namespace: monitoring
spec: spec:
selector: selector:
matchLabels: matchLabels:
@ -417,6 +444,7 @@ apiVersion: monitoring.coreos.com/v1
kind: PodMonitor kind: PodMonitor
metadata: metadata:
name: alertmanager name: alertmanager
namespace: monitoring
spec: spec:
selector: selector:
matchLabels: matchLabels:
@ -428,6 +456,7 @@ apiVersion: monitoring.coreos.com/v1
kind: PodMonitor kind: PodMonitor
metadata: metadata:
name: operator name: operator
namespace: monitoring
spec: spec:
selector: selector:
matchLabels: matchLabels:
@ -439,6 +468,7 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor kind: ServiceMonitor
metadata: metadata:
name: kubelet name: kubelet
namespace: monitoring
spec: spec:
endpoints: endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
@ -467,6 +497,7 @@ apiVersion: codemowers.cloud/v1beta1
kind: OIDCMiddlewareClient kind: OIDCMiddlewareClient
metadata: metadata:
name: prometheus name: prometheus
namespace: monitoring
spec: spec:
displayName: Prometheus displayName: Prometheus
uri: 'https://prom.k-space.ee' uri: 'https://prom.k-space.ee'
@ -482,6 +513,7 @@ apiVersion: codemowers.cloud/v1beta1
kind: OIDCMiddlewareClient kind: OIDCMiddlewareClient
metadata: metadata:
name: alertmanager name: alertmanager
namespace: monitoring
spec: spec:
displayName: AlertManager displayName: AlertManager
uri: 'https://am.k-space.ee' uri: 'https://am.k-space.ee'