monitoring: Specify resource limits
This commit is contained in:
parent
3fbecab179
commit
d7287018ac
@ -1,15 +1,17 @@
|
|||||||
## Monitoring namespace
|
# Monitoring namespace
|
||||||
|
|
||||||
Prometheus is accessible at [prom.k-space.ee](https://prom.k-space.ee/)
|
Prometheus is accessible at [prom.k-space.ee](https://prom.k-space.ee/)
|
||||||
and the corresponding AlertManager is accessible at [am.k-space.ee](https://am.k-space.ee/).
|
and the corresponding AlertManager is accessible at [am.k-space.ee](https://am.k-space.ee/).
|
||||||
Both are [deployed by ArgoCD](https://argocd.k-space.ee/applications/monitoring)
|
Both are [deployed by ArgoCD](https://argocd.k-space.ee/applications/monitoring)
|
||||||
from this Git repo directory using Prometheus operator.
|
from this Git repo directory using Prometheus operator.
|
||||||
|
|
||||||
Note that Prometheus and other monitoring stack components should appropriate
|
Note that Prometheus and other monitoring stack components should use appropriate
|
||||||
node selector to make sure the components get scheduled on nodes which are
|
node selector to make sure the components get scheduled on nodes which are
|
||||||
hosted in a privileged VLAN where they have access to UPS SNMP targets,
|
hosted in a privileged VLAN where they have access to UPS SNMP targets,
|
||||||
Mikrotik router/switch API-s etc.
|
Mikrotik router/switch API-s etc.
|
||||||
|
|
||||||
|
## For users
|
||||||
|
|
||||||
To add monitoring targets inside the Kubernetes cluster make use of
|
To add monitoring targets inside the Kubernetes cluster make use of
|
||||||
[PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md#using-podmonitors) or ServiceMonitor custom
|
[PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md#using-podmonitors) or ServiceMonitor custom
|
||||||
resource definitions.
|
resource definitions.
|
||||||
@ -30,6 +32,8 @@ Sample queries:
|
|||||||
* [Disk space left](https://prom.k-space.ee/graph?g0.range_input=1h&g0.expr=node_filesystem_avail_bytes&g0.tab=1)
|
* [Disk space left](https://prom.k-space.ee/graph?g0.range_input=1h&g0.expr=node_filesystem_avail_bytes&g0.tab=1)
|
||||||
* Minio [s3 egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_s3_traffic_sent_bytes%5B3m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [internode egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_inter_node_traffic_sent_bytes%5B2m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [storage used](https://prom.k-space.ee/graph?g0.expr=minio_node_disk_used_bytes&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h)
|
* Minio [s3 egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_s3_traffic_sent_bytes%5B3m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [internode egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_inter_node_traffic_sent_bytes%5B2m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [storage used](https://prom.k-space.ee/graph?g0.expr=minio_node_disk_used_bytes&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h)
|
||||||
|
|
||||||
|
# For administrators
|
||||||
|
|
||||||
To reconfigure SNMP targets etc:
|
To reconfigure SNMP targets etc:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -3,6 +3,7 @@ apiVersion: monitoring.coreos.com/v1alpha1
|
|||||||
kind: AlertmanagerConfig
|
kind: AlertmanagerConfig
|
||||||
metadata:
|
metadata:
|
||||||
name: alertmanager
|
name: alertmanager
|
||||||
|
namespace: monitoring
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: alertmanager
|
app.kubernetes.io/name: alertmanager
|
||||||
spec:
|
spec:
|
||||||
@ -24,12 +25,12 @@ spec:
|
|||||||
apiURL:
|
apiURL:
|
||||||
name: slack-secrets
|
name: slack-secrets
|
||||||
key: webhook-url
|
key: webhook-url
|
||||||
|
|
||||||
---
|
---
|
||||||
apiVersion: monitoring.coreos.com/v1
|
apiVersion: monitoring.coreos.com/v1
|
||||||
kind: Alertmanager
|
kind: Alertmanager
|
||||||
metadata:
|
metadata:
|
||||||
name: alertmanager
|
name: alertmanager
|
||||||
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
topologySpreadConstraints:
|
topologySpreadConstraints:
|
||||||
- maxSkew: 1
|
- maxSkew: 1
|
||||||
@ -55,6 +56,13 @@ spec:
|
|||||||
value: ''
|
value: ''
|
||||||
effect: NoSchedule
|
effect: NoSchedule
|
||||||
replicas: 3
|
replicas: 3
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 100Mi
|
||||||
|
requests:
|
||||||
|
cpu: 8m
|
||||||
|
memory: 35Mi
|
||||||
serviceAccountName: alertmanager
|
serviceAccountName: alertmanager
|
||||||
externalUrl: http://am.k-space.ee/
|
externalUrl: http://am.k-space.ee/
|
||||||
routePrefix: "/"
|
routePrefix: "/"
|
||||||
@ -68,3 +76,4 @@ apiVersion: v1
|
|||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
name: alertmanager
|
name: alertmanager
|
||||||
|
namespace: monitoring
|
||||||
|
@ -3,6 +3,7 @@ apiVersion: monitoring.coreos.com/v1
|
|||||||
kind: PodMonitor
|
kind: PodMonitor
|
||||||
metadata:
|
metadata:
|
||||||
name: metrics
|
name: metrics
|
||||||
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
namespaceSelector: {}
|
namespaceSelector: {}
|
||||||
selector: {}
|
selector: {}
|
||||||
@ -14,6 +15,7 @@ apiVersion: monitoring.coreos.com/v1
|
|||||||
kind: Prometheus
|
kind: Prometheus
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
topologySpreadConstraints:
|
topologySpreadConstraints:
|
||||||
- maxSkew: 1
|
- maxSkew: 1
|
||||||
@ -53,6 +55,13 @@ spec:
|
|||||||
ruleNamespaceSelector: {}
|
ruleNamespaceSelector: {}
|
||||||
ruleSelector: {}
|
ruleSelector: {}
|
||||||
retentionSize: 8GB
|
retentionSize: 8GB
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 2Gi
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 700Mi
|
||||||
storage:
|
storage:
|
||||||
volumeClaimTemplate:
|
volumeClaimTemplate:
|
||||||
spec:
|
spec:
|
||||||
@ -67,36 +76,50 @@ apiVersion: v1
|
|||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
---
|
---
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
rules:
|
rules:
|
||||||
- apiGroups: [""]
|
- resources:
|
||||||
resources:
|
|
||||||
- nodes
|
- nodes
|
||||||
- nodes/metrics
|
- nodes/metrics
|
||||||
- services
|
- services
|
||||||
- endpoints
|
- endpoints
|
||||||
- pods
|
- pods
|
||||||
verbs: ["get", "list", "watch"]
|
apiGroups:
|
||||||
- apiGroups: [""]
|
- ""
|
||||||
resources:
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- resources:
|
||||||
- configmaps
|
- configmaps
|
||||||
verbs: ["get"]
|
apiGroups:
|
||||||
- apiGroups:
|
- ""
|
||||||
- networking.k8s.io
|
verbs:
|
||||||
resources:
|
- get
|
||||||
|
- resources:
|
||||||
- ingresses
|
- ingresses
|
||||||
verbs: ["get", "list", "watch"]
|
apiGroups:
|
||||||
- nonResourceURLs: ["/metrics"]
|
- networking.k8s.io
|
||||||
verbs: ["get"]
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- nonResourceURLs:
|
||||||
|
- /metrics
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
---
|
---
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: ClusterRoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
@ -110,6 +133,7 @@ apiVersion: monitoring.coreos.com/v1
|
|||||||
kind: PrometheusRule
|
kind: PrometheusRule
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
groups:
|
groups:
|
||||||
- name: prometheus
|
- name: prometheus
|
||||||
@ -356,6 +380,7 @@ apiVersion: networking.k8s.io/v1
|
|||||||
kind: Ingress
|
kind: Ingress
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
annotations:
|
annotations:
|
||||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||||
@ -381,6 +406,7 @@ apiVersion: networking.k8s.io/v1
|
|||||||
kind: Ingress
|
kind: Ingress
|
||||||
metadata:
|
metadata:
|
||||||
name: alertmanager
|
name: alertmanager
|
||||||
|
namespace: monitoring
|
||||||
annotations:
|
annotations:
|
||||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||||
@ -406,6 +432,7 @@ apiVersion: monitoring.coreos.com/v1
|
|||||||
kind: PodMonitor
|
kind: PodMonitor
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
@ -417,6 +444,7 @@ apiVersion: monitoring.coreos.com/v1
|
|||||||
kind: PodMonitor
|
kind: PodMonitor
|
||||||
metadata:
|
metadata:
|
||||||
name: alertmanager
|
name: alertmanager
|
||||||
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
@ -428,6 +456,7 @@ apiVersion: monitoring.coreos.com/v1
|
|||||||
kind: PodMonitor
|
kind: PodMonitor
|
||||||
metadata:
|
metadata:
|
||||||
name: operator
|
name: operator
|
||||||
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
@ -439,6 +468,7 @@ apiVersion: monitoring.coreos.com/v1
|
|||||||
kind: ServiceMonitor
|
kind: ServiceMonitor
|
||||||
metadata:
|
metadata:
|
||||||
name: kubelet
|
name: kubelet
|
||||||
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
@ -467,6 +497,7 @@ apiVersion: codemowers.cloud/v1beta1
|
|||||||
kind: OIDCMiddlewareClient
|
kind: OIDCMiddlewareClient
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
displayName: Prometheus
|
displayName: Prometheus
|
||||||
uri: 'https://prom.k-space.ee'
|
uri: 'https://prom.k-space.ee'
|
||||||
@ -482,6 +513,7 @@ apiVersion: codemowers.cloud/v1beta1
|
|||||||
kind: OIDCMiddlewareClient
|
kind: OIDCMiddlewareClient
|
||||||
metadata:
|
metadata:
|
||||||
name: alertmanager
|
name: alertmanager
|
||||||
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
displayName: AlertManager
|
displayName: AlertManager
|
||||||
uri: 'https://am.k-space.ee'
|
uri: 'https://am.k-space.ee'
|
||||||
|
Loading…
Reference in New Issue
Block a user