forked from k-space/kube
		
	monitoring: Specify resource limits
This commit is contained in:
		@@ -1,15 +1,17 @@
 | 
			
		||||
## Monitoring namespace
 | 
			
		||||
# Monitoring namespace
 | 
			
		||||
 | 
			
		||||
Prometheus is accessible at [prom.k-space.ee](https://prom.k-space.ee/)
 | 
			
		||||
and the corresponding AlertManager is accessible at [am.k-space.ee](https://am.k-space.ee/).
 | 
			
		||||
Both are [deployed by ArgoCD](https://argocd.k-space.ee/applications/monitoring)
 | 
			
		||||
from this Git repo directory using Prometheus operator.
 | 
			
		||||
 | 
			
		||||
Note that Prometheus and other monitoring stack components should appropriate
 | 
			
		||||
Note that Prometheus and other monitoring stack components should use appropriate
 | 
			
		||||
node selector to make sure the components get scheduled on nodes which are
 | 
			
		||||
hosted in a privileged VLAN where they have access to UPS SNMP targets,
 | 
			
		||||
Mikrotik router/switch API-s etc.
 | 
			
		||||
 | 
			
		||||
## For users
 | 
			
		||||
 | 
			
		||||
To add monitoring targets inside the Kubernetes cluster make use of
 | 
			
		||||
[PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md#using-podmonitors) or ServiceMonitor custom
 | 
			
		||||
resource definitions.
 | 
			
		||||
@@ -30,6 +32,8 @@ Sample queries:
 | 
			
		||||
* [Disk space left](https://prom.k-space.ee/graph?g0.range_input=1h&g0.expr=node_filesystem_avail_bytes&g0.tab=1)
 | 
			
		||||
* Minio [s3 egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_s3_traffic_sent_bytes%5B3m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [internode egress](https://prom.k-space.ee/graph?g0.expr=rate(minio_inter_node_traffic_sent_bytes%5B2m%5D)&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h), [storage used](https://prom.k-space.ee/graph?g0.expr=minio_node_disk_used_bytes&g0.tab=0&g0.display_mode=lines&g0.show_exemplars=0&g0.range_input=6h)
 | 
			
		||||
 | 
			
		||||
# For administrators
 | 
			
		||||
 | 
			
		||||
To reconfigure SNMP targets etc:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
 
 | 
			
		||||
@@ -3,6 +3,7 @@ apiVersion: monitoring.coreos.com/v1alpha1
 | 
			
		||||
kind: AlertmanagerConfig
 | 
			
		||||
metadata:
 | 
			
		||||
  name: alertmanager
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
  labels:
 | 
			
		||||
    app.kubernetes.io/name: alertmanager
 | 
			
		||||
spec:
 | 
			
		||||
@@ -24,12 +25,12 @@ spec:
 | 
			
		||||
      apiURL:
 | 
			
		||||
        name: slack-secrets
 | 
			
		||||
        key: webhook-url
 | 
			
		||||
 | 
			
		||||
---
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: Alertmanager
 | 
			
		||||
metadata:
 | 
			
		||||
  name: alertmanager
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  topologySpreadConstraints:
 | 
			
		||||
    - maxSkew: 1
 | 
			
		||||
@@ -55,6 +56,13 @@ spec:
 | 
			
		||||
      value: ''
 | 
			
		||||
      effect: NoSchedule
 | 
			
		||||
  replicas: 3
 | 
			
		||||
  resources:
 | 
			
		||||
    limits:
 | 
			
		||||
      cpu: 100m
 | 
			
		||||
      memory: 100Mi
 | 
			
		||||
    requests:
 | 
			
		||||
      cpu: 8m
 | 
			
		||||
      memory: 35Mi
 | 
			
		||||
  serviceAccountName: alertmanager
 | 
			
		||||
  externalUrl: http://am.k-space.ee/
 | 
			
		||||
  routePrefix: "/"
 | 
			
		||||
@@ -68,3 +76,4 @@ apiVersion: v1
 | 
			
		||||
kind: ServiceAccount
 | 
			
		||||
metadata:
 | 
			
		||||
  name: alertmanager
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
 
 | 
			
		||||
@@ -3,6 +3,7 @@ apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: PodMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: metrics
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  namespaceSelector: {}
 | 
			
		||||
  selector: {}
 | 
			
		||||
@@ -14,6 +15,7 @@ apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: Prometheus
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  topologySpreadConstraints:
 | 
			
		||||
    - maxSkew: 1
 | 
			
		||||
@@ -53,11 +55,18 @@ spec:
 | 
			
		||||
  ruleNamespaceSelector: {}
 | 
			
		||||
  ruleSelector: {}
 | 
			
		||||
  retentionSize: 8GB
 | 
			
		||||
  resources:
 | 
			
		||||
    limits:
 | 
			
		||||
      cpu: 500m
 | 
			
		||||
      memory: 2Gi
 | 
			
		||||
    requests:
 | 
			
		||||
      cpu: 100m
 | 
			
		||||
      memory: 700Mi
 | 
			
		||||
  storage:
 | 
			
		||||
    volumeClaimTemplate:
 | 
			
		||||
      spec:
 | 
			
		||||
        accessModes:
 | 
			
		||||
        - ReadWriteOnce
 | 
			
		||||
          - ReadWriteOnce
 | 
			
		||||
        resources:
 | 
			
		||||
          requests:
 | 
			
		||||
            storage: 10Gi
 | 
			
		||||
@@ -67,36 +76,50 @@ apiVersion: v1
 | 
			
		||||
kind: ServiceAccount
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
---
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
kind: ClusterRole
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
rules:
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - nodes
 | 
			
		||||
  - nodes/metrics
 | 
			
		||||
  - services
 | 
			
		||||
  - endpoints
 | 
			
		||||
  - pods
 | 
			
		||||
  verbs: ["get", "list", "watch"]
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - configmaps
 | 
			
		||||
  verbs: ["get"]
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - networking.k8s.io
 | 
			
		||||
  resources:
 | 
			
		||||
  - ingresses
 | 
			
		||||
  verbs: ["get", "list", "watch"]
 | 
			
		||||
- nonResourceURLs: ["/metrics"]
 | 
			
		||||
  verbs: ["get"]
 | 
			
		||||
  - resources:
 | 
			
		||||
      - nodes
 | 
			
		||||
      - nodes/metrics
 | 
			
		||||
      - services
 | 
			
		||||
      - endpoints
 | 
			
		||||
      - pods
 | 
			
		||||
    apiGroups:
 | 
			
		||||
      - ""
 | 
			
		||||
    verbs:
 | 
			
		||||
      - get
 | 
			
		||||
      - list
 | 
			
		||||
      - watch
 | 
			
		||||
  - resources:
 | 
			
		||||
      - configmaps
 | 
			
		||||
    apiGroups:
 | 
			
		||||
      - ""
 | 
			
		||||
    verbs:
 | 
			
		||||
      - get
 | 
			
		||||
  - resources:
 | 
			
		||||
      - ingresses
 | 
			
		||||
    apiGroups:
 | 
			
		||||
      - networking.k8s.io
 | 
			
		||||
    verbs:
 | 
			
		||||
      - get
 | 
			
		||||
      - list
 | 
			
		||||
      - watch
 | 
			
		||||
  - nonResourceURLs:
 | 
			
		||||
      - /metrics
 | 
			
		||||
    verbs:
 | 
			
		||||
      - get
 | 
			
		||||
---
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
kind: ClusterRoleBinding
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
roleRef:
 | 
			
		||||
  apiGroup: rbac.authorization.k8s.io
 | 
			
		||||
  kind: ClusterRole
 | 
			
		||||
@@ -110,6 +133,7 @@ apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: PrometheusRule
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  groups:
 | 
			
		||||
  - name: prometheus
 | 
			
		||||
@@ -356,6 +380,7 @@ apiVersion: networking.k8s.io/v1
 | 
			
		||||
kind: Ingress
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
  annotations:
 | 
			
		||||
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
 | 
			
		||||
    traefik.ingress.kubernetes.io/router.tls: "true"
 | 
			
		||||
@@ -381,6 +406,7 @@ apiVersion: networking.k8s.io/v1
 | 
			
		||||
kind: Ingress
 | 
			
		||||
metadata:
 | 
			
		||||
  name: alertmanager
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
  annotations:
 | 
			
		||||
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
 | 
			
		||||
    traefik.ingress.kubernetes.io/router.tls: "true"
 | 
			
		||||
@@ -406,6 +432,7 @@ apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: PodMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
@@ -417,6 +444,7 @@ apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: PodMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: alertmanager
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
@@ -428,6 +456,7 @@ apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: PodMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: operator
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
@@ -439,6 +468,7 @@ apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kubelet
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
 | 
			
		||||
@@ -467,6 +497,7 @@ apiVersion: codemowers.cloud/v1beta1
 | 
			
		||||
kind: OIDCMiddlewareClient
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  displayName: Prometheus
 | 
			
		||||
  uri: 'https://prom.k-space.ee'
 | 
			
		||||
@@ -482,6 +513,7 @@ apiVersion: codemowers.cloud/v1beta1
 | 
			
		||||
kind: OIDCMiddlewareClient
 | 
			
		||||
metadata:
 | 
			
		||||
  name: alertmanager
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  displayName: AlertManager
 | 
			
		||||
  uri: 'https://am.k-space.ee'
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user