Initial commit

This commit is contained in:
2022-08-16 12:40:54 +03:00
commit 7c5cad55e1
122 changed files with 51731 additions and 0 deletions

1
logging/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
mongoexpress.yml

52
logging/README.md Normal file
View File

@@ -0,0 +1,52 @@
# Logging infrastructure
## Background
Fluent Bit picks up the logs from Kubernetes workers and sends them to Graylog
using GELF over TCP 12201.
Graylog ingests the logs and stores them in Elasticsearch.
## Deployment
To deploy:
```
kubectl create namespace logging
kubectl apply -n logging -f mongodb-support.yml -f application.yml -f networkpolicy-base.yml
kubectl rollout restart -n logging daemonset/fluent-bit
```
To set secrets:
```
GRAYLOG_ROOT_PASSWORD=$(cat /dev/urandom | base64 | head -c 30)
echo "Graylog admin password: $GRAYLOG_ROOT_PASSWORD"
kubectl create secret generic -n logging graylog-secrets \
--from-literal=GRAYLOG_ROOT_PASSWORD_SHA2=$(echo -en $GRAYLOG_ROOT_PASSWORD | sha256sum | cut -d" " -f1) \
--from-literal=GRAYLOG_PASSWORD_SECRET=$(cat /dev/urandom | base64 | head -c 30)
kubectl create secret generic -n logging mongodb-application-readwrite-password --from-literal="password=$(cat /dev/urandom | base64 | head -c 30)"
kubectl create secret generic -n logging mongodb-application-readonly-password --from-literal="password=$(cat /dev/urandom | base64 | head -c 30)"
```
## Graylog setup
Note that Graylog is running without disk journal to
prevent SSD thrashing and to save some disk space.
This will be problematic when there are loads for logs coming in and
ElasticSearch is unable to process the entries in timely manner.
ElasticSearch default index is tuned to match the persistent volume allocated
on Longhorn to prevent running out disk space on that PV.
After Graylog deployment following steps were manually performed via web interface:
* Add Syslog TCP input for external Linux hosts
* Add Syslog UDP input for Mikrotik networking gear
* Add GELF TCP input for Kubernetes workers
* Trusted header authentication was enabled and set to `Remote-User`
https://graylog.k-space.ee/system/authentication/authenticator/edit
Note that user accounts are not provisioned automatically.
Users need to be manually created in Graylog with matching `Username`.
Automatic user account provisioning is supported in Graylog Enterprise version

634
logging/application.yml Normal file
View File

@@ -0,0 +1,634 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: fluent-bit
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: fluent-bit-read
rules:
- apiGroups: [""]
resources:
- namespaces
- pods
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: fluent-bit-read
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: fluent-bit-read
subjects:
- kind: ServiceAccount
name: fluent-bit
namespace: logging
---
apiVersion: v1
kind: ConfigMap
metadata:
name: fluent-bit-config
namespace: logging
labels:
app: fluent-bit
annotations:
reloader.stakater.com/match: "true"
data:
fluent-bit.conf: |
[SERVICE]
Flush 1
Log_Level warn
Daemon off
Parsers_File parsers.conf
HTTP_Server On
HTTP_Listen 0.0.0.0
HTTP_Port 2020
@INCLUDE input-kubernetes.conf
@INCLUDE filter-kubernetes.conf
@INCLUDE output-graylog.conf
input-kubernetes.conf: |
# Following assembles the log fragments of the Kubernetes runtime
# https://github.com/fluent/fluent-bit/blob/d3c71f2ed4ff3625b85715aaefe6bc76b2ac3c2e/src/multiline/flb_ml_parser_docker.c#L57
[INPUT]
name tail
tag kube.*
path /var/log/containers/*.log
multiline.parser cri
db /var/log/flb_kube.db
mem_buf_limit 5MB
skip_long_lines on
refresh_interval 10
filter-kubernetes.conf: |
# Following reassembles stack traces
[FILTER]
name multiline
match *
multiline.key_content log
multiline.parser go,python,java
# Following annotates the Kubernetes logs using Kubernetes API-s
[FILTER]
Name kubernetes
Match kube.*
Kube_URL https://kubernetes.default.svc:443
Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token
Kube_Tag_Prefix kube.var.log.containers.
Merge_Log On
K8S-Logging.Parser On
K8S-Logging.Exclude Off
# Following unnests the kubernetes map
[FILTER]
Name nest
Match kube.*
Operation lift
Nested_under kubernetes
Add_prefix kubernetes_
output-graylog.conf: |
[OUTPUT]
Name gelf
Match *
Host graylog-gelf-tcp
Port 12201
Mode tcp
Gelf_Host_Key kubernetes_host
Gelf_Short_Message_Key log
Retry_Limit no_limits
parsers.conf: |
# http://rubular.com/r/tjUt3Awgg4
[PARSER]
Name cri
Format regex
Regex ^(?<time>[^ ]+) (?<stream>stdout|stderr) (?<logtag>[^ ]*) (?<message>.*)$
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L%z
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: fluent-bit
namespace: logging
annotations:
keel.sh/policy: patch
keel.sh/trigger: poll
keel.sh/pollSchedule: "@midnight"
annotations:
reloader.stakater.com/search: "true"
spec:
revisionHistoryLimit: 0
selector:
matchLabels:
app: fluent-bit
template:
metadata:
labels:
app: fluent-bit
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "2020"
prometheus.io/path: /api/v1/metrics/prometheus
spec:
containers:
- name: fluent-bit
image: fluent/fluent-bit:1.9
imagePullPolicy: Always
ports:
- containerPort: 2020
volumeMounts:
- name: varlog
mountPath: /var/log
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: true
- name: fluent-bit-config
mountPath: /fluent-bit/etc/
volumes:
- name: varlog
hostPath:
path: /var/log
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
- name: fluent-bit-config
configMap:
name: fluent-bit-config
serviceAccountName: fluent-bit
tolerations:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
- operator: "Exists"
effect: "NoExecute"
- operator: "Exists"
effect: "NoSchedule"
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: fluent-bit
spec:
podSelector:
matchLabels:
app: fluent-bit
policyTypes:
- Ingress
- Egress
ingress:
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app: prometheus
ports:
- port: 2020
egress:
- to:
- podSelector:
matchLabels:
app: graylog
ports:
- protocol: TCP
port: 12201
- # Kubernetes API endpoint kubernetes.default.svc.cluster.local
# Determine IP-s and ports with: kubectl get ep -n default kubernetes
to:
- ipBlock:
cidr: 172.21.3.0/24
ports:
- port: 6443
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: elasticsearch
labels:
app: elasticsearch
spec:
serviceName: elasticsearch
revisionHistoryLimit: 0
replicas: 1
selector:
matchLabels:
app: elasticsearch
template:
metadata:
labels:
app: elasticsearch
spec:
securityContext:
fsGroup: 1000
containers:
- name: elasticsearch
image: elasticsearch:7.17.3
securityContext:
runAsNonRoot: true
runAsUser: 1000
env:
- name: discovery.type
value: single-node
- name: xpack.security.enabled
value: "false"
ports:
- containerPort: 9200
readinessProbe:
httpGet:
path: /_cluster/health
port: 9200
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
successThreshold: 1
timeoutSeconds: 5
resources:
limits:
memory: "2147483648"
volumeMounts:
- name: elasticsearch-data
mountPath: /usr/share/elasticsearch/data
- name: elasticsearch-tmp
mountPath: /tmp/
volumes:
- emptyDir: {}
name: elasticsearch-keystore
- emptyDir: {}
name: elasticsearch-tmp
- emptyDir: {}
name: elasticsearch-logs
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
accessModes:
- "ReadWriteOnce"
resources:
requests:
storage: "10Gi"
storageClassName: longhorn
---
apiVersion: v1
kind: Service
metadata:
name: elasticsearch
labels:
app: elasticsearch
spec:
ports:
- name: api
port: 80
targetPort: 9200
selector:
app: elasticsearch
---
apiVersion: v1
kind: Service
metadata:
name: graylog-gelf-tcp
labels:
app: graylog
spec:
ports:
- name: graylog-gelf-tcp
port: 12201
protocol: TCP
targetPort: 12201
selector:
app: graylog
---
apiVersion: v1
kind: Service
metadata:
name: graylog-syslog-tcp
labels:
app: graylog
annotations:
external-dns.alpha.kubernetes.io/hostname: syslog.k-space.ee
metallb.universe.tf/allow-shared-ip: syslog.k-space.ee
spec:
type: LoadBalancer
externalTrafficPolicy: Local
loadBalancerIP: 172.20.51.4
ports:
- name: graylog-syslog
port: 514
protocol: TCP
selector:
app: graylog
---
apiVersion: v1
kind: Service
metadata:
name: graylog-syslog-udp
labels:
app: graylog
annotations:
external-dns.alpha.kubernetes.io/hostname: syslog.k-space.ee
metallb.universe.tf/allow-shared-ip: syslog.k-space.ee
spec:
type: LoadBalancer
externalTrafficPolicy: Local
loadBalancerIP: 172.20.51.4
ports:
- name: graylog-syslog
port: 514
protocol: UDP
selector:
app: graylog
---
apiVersion: v1
kind: Service
metadata:
name: graylog
labels:
app: graylog
spec:
ports:
- name: graylog
port: 9000
protocol: TCP
selector:
app: graylog
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: graylog
labels:
app: graylog
annotations:
keel.sh/policy: minor
keel.sh/trigger: poll
keel.sh/pollSchedule: "@midnight"
spec:
serviceName: graylog
revisionHistoryLimit: 0
replicas: 1
selector:
matchLabels:
app: graylog
template:
metadata:
labels:
app: graylog
annotations:
prometheus.io/port: "9833"
prometheus.io/scrape: "true"
spec:
securityContext:
fsGroup: 1100
volumes:
- name: graylog-config
downwardAPI:
items:
- path: id
fieldRef:
fieldPath: metadata.name
containers:
- name: graylog
image: graylog/graylog:4.3
env:
- name: GRAYLOG_MONGODB_URI
valueFrom:
secretKeyRef:
name: mongodb-application-readwrite
key: connectionString.standard
- name: GRAYLOG_PROMETHEUS_EXPORTER_ENABLED
value: "true"
- name: GRAYLOG_PROMETHEUS_EXPORTER_BIND_ADDRESS
value: "0.0.0.0:9833"
- name: GRAYLOG_NODE_ID_FILE
value: /config/id
- name: GRAYLOG_HTTP_EXTERNAL_URI
value: "https://graylog.k-space.ee/"
- name: GRAYLOG_TRUSTED_PROXIES
value: "0.0.0.0/0"
- name: GRAYLOG_ELASTICSEARCH_HOSTS
value: "http://elasticsearch"
- name: GRAYLOG_MESSAGE_JOURNAL_ENABLED
value: "false"
- name: GRAYLOG_ROTATION_STRATEGY
value: "size"
- name: GRAYLOG_ELASTICSEARCH_MAX_SIZE_PER_INDEX
value: "268435456"
- name: GRAYLOG_ELASTICSEARCH_MAX_NUMBER_OF_INDICES
value: "16"
envFrom:
- secretRef:
name: graylog-secrets
securityContext:
runAsNonRoot: true
runAsUser: 1100
ports:
- containerPort: 9000
name: graylog
- containerPort: 9833
name: graylog-metrics
livenessProbe:
httpGet:
path: /api/system/lbstatus
port: 9000
initialDelaySeconds: 5
periodSeconds: 30
failureThreshold: 3
successThreshold: 1
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /api/system/lbstatus
port: 9000
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
successThreshold: 1
timeoutSeconds: 5
volumeMounts:
- name: graylog-config
mountPath: /config
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: graylog
annotations:
cert-manager.io/cluster-issuer: default
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
external-dns.alpha.kubernetes.io/target: traefik.k-space.ee
traefik.ingress.kubernetes.io/router.middlewares: traefik-sso@kubernetescrd
spec:
rules:
- host: graylog.k-space.ee
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: graylog
port:
number: 9000
tls:
- hosts:
- graylog.k-space.ee
secretName: graylog-tls
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: graylog
spec:
podSelector:
matchLabels:
app: graylog
policyTypes:
- Ingress
- Egress
egress:
- to:
- podSelector:
matchLabels:
app: elasticsearch
ports:
- port: 9200
- to:
- podSelector:
matchLabels:
app: mongodb-svc
ports:
- port: 27017
ingress:
- from:
- ipBlock:
cidr: 172.23.0.0/16
- ipBlock:
cidr: 172.21.0.0/16
- ipBlock:
cidr: 100.102.0.0/16
ports:
- protocol: UDP
port: 514
- protocol: TCP
port: 514
- from:
- podSelector:
matchLabels:
app: fluent-bit
ports:
- protocol: TCP
port: 12201
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app: prometheus
ports:
- port: 9833
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: traefik
podSelector:
matchLabels:
app.kubernetes.io/name: traefik
ports:
- protocol: TCP
port: 9000
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: elasticsearch
spec:
podSelector:
matchLabels:
app: elasticsearch
policyTypes:
- Ingress
- Egress
ingress:
- from:
- podSelector:
matchLabels:
app: graylog
egress:
- to:
- ipBlock:
# geoip.elastic.co updates
cidr: 0.0.0.0/0
ports:
- port: 443
---
apiVersion: mongodbcommunity.mongodb.com/v1
kind: MongoDBCommunity
metadata:
name: mongodb
spec:
members: 3
type: ReplicaSet
version: "5.0.9"
security:
authentication:
modes: ["SCRAM"]
users:
- name: readwrite
db: application
passwordSecretRef:
name: mongodb-application-readwrite-password
roles:
- name: readWrite
db: application
scramCredentialsSecretName: mongodb-application-readwrite
- name: readonly
db: application
passwordSecretRef:
name: mongodb-application-readonly-password
roles:
- name: readOnly
db: application
scramCredentialsSecretName: mongodb-application-readonly
statefulSet:
spec:
template:
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- mongodb-svc
topologyKey: kubernetes.io/hostname
nodeSelector:
dedicated: storage
tolerations:
- key: dedicated
operator: Equal
value: storage
effect: NoSchedule
volumeClaimTemplates:
- metadata:
name: logs-volume
spec:
storageClassName: local-path
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 512Mi
- metadata:
name: data-volume
spec:
storageClassName: local-path
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 2Gi

1
logging/mongodb-support.yml Symbolic link
View File

@@ -0,0 +1 @@
../mongodb-operator/mongodb-support.yml

View File

@@ -0,0 +1 @@
../shared/networkpolicy-base.yml