monitoring: Specify resource limits
This commit is contained in:
parent
3fbecab179
commit
d7287018ac
@ -1,15 +1,17 @@
|
||||
## Monitoring namespace
|
||||
# Monitoring namespace
|
||||
|
||||
Prometheus is accessible at [prom.k-space.ee](https://prom.k-space.ee/)
|
||||
and the corresponding AlertManager is accessible at [am.k-space.ee](https://am.k-space.ee/).
|
||||
Both are [deployed by ArgoCD](https://argocd.k-space.ee/applications/monitoring)
|
||||
from this Git repo directory using Prometheus operator.
|
||||
|
||||
Note that Prometheus and other monitoring stack components should appropriate
|
||||
Note that Prometheus and other monitoring stack components should use appropriate
|
||||
node selector to make sure the components get scheduled on nodes which are
|
||||
hosted in a privileged VLAN where they have access to UPS SNMP targets,
|
||||
Mikrotik router/switch API-s etc.
|
||||
|
||||
## For users
|
||||
|
||||
To add monitoring targets inside the Kubernetes cluster make use of
|
||||
[PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md#using-podmonitors) or ServiceMonitor custom
|
||||
resource definitions.
|
||||
@ -30,6 +32,8 @@ Sample queries:
|
||||
* [Disk space left](https://prom.k-space.ee/graph?g0.range_input=1h&g0.expr=node_filesystem_avail_bytes&g0.tab=1)
|
||||
* Minio [s3 egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_s3_traffic_sent_bytes%5B3m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [internode egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_inter_node_traffic_sent_bytes%5B2m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [storage used](https://prom.k-space.ee/graph?g0.expr=minio_node_disk_used_bytes&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h)
|
||||
|
||||
# For administrators
|
||||
|
||||
To reconfigure SNMP targets etc:
|
||||
|
||||
```
|
||||
|
@ -3,6 +3,7 @@ apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: AlertmanagerConfig
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app.kubernetes.io/name: alertmanager
|
||||
spec:
|
||||
@ -24,12 +25,12 @@ spec:
|
||||
apiURL:
|
||||
name: slack-secrets
|
||||
key: webhook-url
|
||||
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Alertmanager
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: monitoring
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
@ -55,6 +56,13 @@ spec:
|
||||
value: ''
|
||||
effect: NoSchedule
|
||||
replicas: 3
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 100Mi
|
||||
requests:
|
||||
cpu: 8m
|
||||
memory: 35Mi
|
||||
serviceAccountName: alertmanager
|
||||
externalUrl: http://am.k-space.ee/
|
||||
routePrefix: "/"
|
||||
@ -68,3 +76,4 @@ apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: monitoring
|
||||
|
@ -3,6 +3,7 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: PodMonitor
|
||||
metadata:
|
||||
name: metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
namespaceSelector: {}
|
||||
selector: {}
|
||||
@ -14,6 +15,7 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
spec:
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
@ -53,11 +55,18 @@ spec:
|
||||
ruleNamespaceSelector: {}
|
||||
ruleSelector: {}
|
||||
retentionSize: 8GB
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 2Gi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 700Mi
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
@ -67,36 +76,50 @@ apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/metrics
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
verbs: ["get"]
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- ingresses
|
||||
verbs: ["get", "list", "watch"]
|
||||
- nonResourceURLs: ["/metrics"]
|
||||
verbs: ["get"]
|
||||
- resources:
|
||||
- nodes
|
||||
- nodes/metrics
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
apiGroups:
|
||||
- ""
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- resources:
|
||||
- configmaps
|
||||
apiGroups:
|
||||
- ""
|
||||
verbs:
|
||||
- get
|
||||
- resources:
|
||||
- ingresses
|
||||
apiGroups:
|
||||
- networking.k8s.io
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- nonResourceURLs:
|
||||
- /metrics
|
||||
verbs:
|
||||
- get
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
@ -110,6 +133,7 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: prometheus
|
||||
@ -356,6 +380,7 @@ apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
@ -381,6 +406,7 @@ apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: monitoring
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
@ -406,6 +432,7 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: PodMonitor
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
@ -417,6 +444,7 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: PodMonitor
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: monitoring
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
@ -428,6 +456,7 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: PodMonitor
|
||||
metadata:
|
||||
name: operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
@ -439,6 +468,7 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: kubelet
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
@ -467,6 +497,7 @@ apiVersion: codemowers.cloud/v1beta1
|
||||
kind: OIDCMiddlewareClient
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
spec:
|
||||
displayName: Prometheus
|
||||
uri: 'https://prom.k-space.ee'
|
||||
@ -482,6 +513,7 @@ apiVersion: codemowers.cloud/v1beta1
|
||||
kind: OIDCMiddlewareClient
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: monitoring
|
||||
spec:
|
||||
displayName: AlertManager
|
||||
uri: 'https://am.k-space.ee'
|
||||
|
Loading…
Reference in New Issue
Block a user