From 66034d2463f6168bb3a67e02725c6a76c94c6563 Mon Sep 17 00:00:00 2001 From: rasmus Date: Tue, 30 Jul 2024 10:51:34 +0300 Subject: [PATCH] docs: mega refactor Also bunch of edits at wiki.k-space.ee --- CLUSTER.md | 170 +++++++++++++++++++++++++ README.md | 249 +++++-------------------------------- ansible/inventory.yml | 19 ++- ansible/zrepl/playbook.yml | 2 + monitoring/README.md | 1 + 5 files changed, 219 insertions(+), 222 deletions(-) create mode 100644 CLUSTER.md diff --git a/CLUSTER.md b/CLUSTER.md new file mode 100644 index 0000000..ba89edd --- /dev/null +++ b/CLUSTER.md @@ -0,0 +1,170 @@ +# Kubernetes cluster +Kubernetes hosts run on [PVE Cluster](https://wiki.k-space.ee/en/hosting/proxmox). Hosts are listed in Ansible [inventory](ansible/inventory.yml). + +## `kubectl` +- Authorization [ACLs](cluster-role-bindings.yml) +- [Troubleshooting `no such host`](#systemd-resolved-issues) + +Authenticate to auth.k-space.ee: +```bash +kubectl krew install oidc-login +mkdir -p ~/.kube + +cat << EOF > ~/.kube/config +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUMvakNDQWVhZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJeU1EVXdNakEzTXpVMU1Wb1hEVE15TURReU9UQTNNelUxTVZvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBS2J2CjY3UFlXVHJMc3ZCQTZuWHUvcm55SlVhNnppTnNWTVN6N2w4ekhxM2JuQnhqWVNPUDJhN1RXTnpUTmZDanZBWngKTmlNbXJya1hpb2dYQWpVVkhSUWZlYm81TFIrb0JBOTdLWlcrN01UMFVJRXBuWVVaaTdBRHlaS01vcEJFUXlMNwp1SlU5UDhnNUR1T29FRHZieGJSMXFuV1JZRXpteFNmSFpocllpMVA3bFd4emkxR243eGRETFZaMjZjNm0xR3Y1CnViRjZyaFBXK1JSVkhiQzFKakJGeTBwRXdhYlUvUTd0Z2dic0JQUjk5NVZvMktCeElBelRmbHhVanlYVkJ3MjEKU2d3ZGI1amlpemxEM0NSbVdZZ0ZrRzd0NTVZeGF3ZmpaQjh5bW4xYjhUVjkwN3dRcG8veU8zM3RaaEE3L3BFUwpBSDJYeDk5bkpMbFVGVUtSY1A4Q0F3RUFBYU5aTUZjd0RnWURWUjBQQVFIL0JBUURBZ0trTUE4R0ExVWRFd0VCCi93UUZNQU1CQWY4d0hRWURWUjBPQkJZRUZKNnZKeVk1UlJ1aklQWGxIK2ZvU3g2QzFRT2RNQlVHQTFVZEVRUU8KTUF5Q0NtdDFZbVZ5Ym1WMFpYTXdEUVlKS29aSWh2Y05BUUVMQlFBRGdnRUJBQ04zcGtCTVM3ekkrbUhvOWdTZQp6SzdXdjl3bXlCTVE5Q3crQXBSNnRBQXg2T1VIN0d1enc5TTV2bXNkYjkrYXBKMHBlZFB4SUg3YXZ1aG9SUXNMCkxqTzRSVm9BMG9aNDBZV3J3UStBR0dvdkZuaWNleXRNcFVSNEZjRXc0ZDRmcGl6V3d0TVNlRlRIUXR6WG84V2MKNFJGWC9xUXNVR1NWa01PaUcvcVVrSFpXQVgyckdhWXZ1Tkw2eHdSRnh5ZHpsRTFSUk56TkNvQzVpTXhjaVRNagpackEvK0pqVEFWU2FuNXZnODFOSmthZEphbmNPWmEwS3JEdkZzd1JJSG5CMGpMLzh3VmZXSTV6czZURU1VZUk1ClF6dU01QXUxUFZ4VXZJUGhlMHl6UXZjWDV5RlhnMkJGU3MzKzJBajlNcENWVTZNY2dSSTl5TTRicitFTUlHL0kKY0pjPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + server: https://master.kube.k-space.ee:6443 + name: kubernetes +contexts: +- context: + cluster: kubernetes + user: oidc + name: default +current-context: default +kind: Config +preferences: {} +users: +- name: oidc + user: + exec: + apiVersion: client.authentication.k8s.io/v1beta1 + args: + - oidc-login + - get-token + - --oidc-issuer-url=https://auth.k-space.ee/ + - --oidc-client-id=passmower.kubelogin + - --oidc-use-pkce + - --oidc-extra-scope=profile,email,groups + - --listen-address=127.0.0.1:27890 + command: kubectl + env: null + provideClusterInfo: false +EOF + +# Test it: +kubectl get nodes # opens browser for authentication +``` + +### systemd-resolved issues +```sh +Unable to connect to the server: dial tcp: lookup master.kube.k-space.ee on 127.0.0.53:53: no such host +``` +``` +Network → VPN → `IPv4` → Other nameservers (Muud nimeserverid): `172.21.0.1` +Network → VPN → `IPv6` → Other nameservers (Muud nimeserverid): `2001:bb8:4008:21::1` +Network → VPN → `IPv4` → Search domains (Otsingudomeenid): `kube.k-space.ee` +Network → VPN → `IPv6` → Search domains (Otsingudomeenid): `kube.k-space.ee` +``` + +## Cluster formation +Created Ubuntu 22.04 VM-s on Proxmox with local storage. +Added some ARM64 workers by using Ubuntu 22.04 server on Raspberry Pi. + +After machines have booted up and you can reach them via SSH: + +``` +# Disable Ubuntu caching DNS resolver +systemctl disable systemd-resolved.service +systemctl stop systemd-resolved +rm -fv /etc/resolv.conf +cat > /etc/resolv.conf << EOF +nameserver 1.1.1.1 +nameserver 8.8.8.8 +EOF + +# Disable multipathd as Longhorn handles that itself +systemctl mask multipathd snapd +systemctl disable --now multipathd snapd bluetooth ModemManager hciuart wpa_supplicant packagekit + +# Permit root login +sed -i -e 's/PermitRootLogin no/PermitRootLogin without-password/' /etc/ssh/sshd_config +systemctl reload ssh +cat ~ubuntu/.ssh/authorized_keys > /root/.ssh/authorized_keys +userdel -f ubuntu +apt-get install -yqq linux-image-generic +apt-get remove -yq cloud-init linux-image-*-kvm +``` + +On master: + +``` +kubeadm init --token-ttl=120m --pod-network-cidr=10.244.0.0/16 --control-plane-endpoint "master.kube.k-space.ee:6443" --upload-certs --apiserver-cert-extra-sans master.kube.k-space.ee --node-name master1.kube.k-space.ee +``` + +For the `kubeadm join` command specify FQDN via `--node-name $(hostname -f)`. + +Set AZ labels: + +``` +for j in $(seq 1 9); do + for t in master mon worker storage; do + kubectl label nodes ${t}${j}.kube.k-space.ee topology.kubernetes.io/zone=node${j} + done +done +``` + +After forming the cluster add taints: + +```bash +for j in $(seq 1 9); do + kubectl label nodes worker${j}.kube.k-space.ee node-role.kubernetes.io/worker='' +done + +for j in $(seq 1 4); do + kubectl taint nodes mon${j}.kube.k-space.ee dedicated=monitoring:NoSchedule + kubectl label nodes mon${j}.kube.k-space.ee dedicated=monitoring +done + +for j in $(seq 1 4); do + kubectl taint nodes storage${j}.kube.k-space.ee dedicated=storage:NoSchedule + kubectl label nodes storage${j}.kube.k-space.ee dedicated=storage +done +``` + +For `arm64` nodes add suitable taint to prevent scheduling non-multiarch images on them: + +```bash +kubectl taint nodes worker9.kube.k-space.ee arch=arm64:NoSchedule +``` + +For door controllers: +``` +for j in ground front back; do + kubectl taint nodes door-${j}.kube.k-space.ee dedicated=door:NoSchedule + kubectl label nodes door-${j}.kube.k-space.ee dedicated=door + kubectl taint nodes door-${j}.kube.k-space.ee arch=arm64:NoSchedule +done +``` + +To reduce wear on storage: +``` +echo StandardOutput=null >> /etc/systemd/system/kubelet.service.d/10-kubeadm.conf +systemctl daemon-reload +systemctl restart kubelet +``` + +## Technology mapping +Our self-hosted Kubernetes stack compared to AWS based deployments: + +| Hipster startup | Self-hosted hackerspace | Purpose | +|-------------------|-------------------------------------|---------------------------------------------------------------------| +| AWS ALB | Traefik | Reverse proxy also known as ingress controller in Kubernetes jargon | +| AWS AMP | Prometheus Operator | Monitoring and alerting | +| AWS CloudTrail | ECK Operator | Log aggregation | +| AWS DocumentDB | MongoDB Community Operator | Highly available NoSQL database | +| AWS EBS | Longhorn | Block storage for arbitrary applications needing persistent storage | +| AWS EC2 | Proxmox | Virtualization layer | +| AWS ECR | Harbor | Docker registry | +| AWS EKS | kubeadm | Provision Kubernetes master nodes | +| AWS NLB | MetalLB | L2/L3 level load balancing | +| AWS RDS for MySQL | MySQL Operator | Provision highly available relational databases | +| AWS Route53 | Bind and RFC2136 | DNS records and Let's Encrypt DNS validation | +| AWS S3 | Minio Operator | Highly available object storage | +| AWS VPC | Calico | Overlay network | +| Dex | Passmower | ACL mapping and OIDC provider which integrates with GitHub/Samba | +| GitHub Actions | Drone | Build Docker images | +| GitHub | Gitea | Source code management, issue tracking | +| GitHub OAuth2 | Samba (Active Directory compatible) | Source of truth for authentication and authorization | +| Gmail | Wildduck | E-mail | diff --git a/README.md b/README.md index 739499a..05b0192 100644 --- a/README.md +++ b/README.md @@ -1,230 +1,41 @@ -# Kubernetes cluster manifests +# k-space.ee infrastructure +Kubernetes manifests, Ansible [playbooks](ansible/README.md), and documentation for K-SPACE services. -## Introduction +- Repo is deployed with [ArgoCD](https://argocd.k-space.ee). For `kubectl` access, see [CLUSTER.md](CLUSTER.md#kubectl). +- Debugging Kubernetes [on Wiki](https://wiki.k-space.ee/en/hosting/debugging-kubernetes) +- Need help? → [`#kube`](https://k-space-ee.slack.com/archives/C02EYV1NTM2) -This is the Kubernetes manifests of services running on k-space.ee domains. -The applications are listed on https://auth2.k-space.ee for authenticated users. +Jump to docs: [inventory-app](hackerspace/README.md) / [cameras](camtiler/README.md) / [doors](https://wiki.k-space.ee/en/hosting/doors) / [list of apps](https://auth.k-space.ee) // [all infra](ansible/inventory.yml) / [network](https://wiki.k-space.ee/en/hosting/network/sensitive) / [retro](https://wiki.k-space.ee/en/hosting/retro) / [non-infra](https://wiki.k-space.ee) +## Supporting services +- Build [Git](https://git.k-space.ee) repositories with [Woodpecker](https://woodpecker.k-space.ee). +- Passmower: Authz with `kind: OIDCMiddlewareClient` (or `kind: OIDCClient`[^authz]). +- Traefik[^nonginx]: Expose services with `kind: Service` + `kind: Ingress` (TLS and DNS **included**). -## Cluster access +### Additional +- bind: Manage _additional_ DNS records with `kind: DNSEndpoint`. +- [Prometheus](https://wiki.k-space.ee/en/hosting/monitoring): Collect metrics with `kind: PodMonitor` (alerts with `kind: PrometheusRule`). +- [Slack bots](SLACK.md) and Kubernetes [CLUSTER.md](CLUSTER.md) itself. -General discussion is happening in the `#kube` Slack channel. +[^nonginx]: No nginx annotations! Use `kind: Ingress` instead. `IngressRoute` is not used as it doesn't support [`external-dns`](bind/README.md) out of the box. +[^authz]: Applications should prefer `Remote-User` (`kind: OIDCGWMiddlewareClient`), which gates app exposure to the public internet. Where not applicable or possible, use OpenID Connect (`kind: OIDCClient`) for authentication. -
Bootstrapping access -For bootstrap access obtain `/etc/kubernetes/admin.conf` from one of the master -nodes and place it under `~/.kube/config` on your machine. +### Databases / -stores: +- KeyDB: `kind: KeydbClaim` (replaces Redis[^redisdead]) +- Dragonfly: `kind: Dragonfly` (replaces Redis[^redisdead]) +- Longhorn: `storageClassName: longhorn` (filesystem storage) +- Mongo[^mongoproblems]: `kind: MongoDBCommunity` (NAS* `inventory-mongodb`) +- Minio S3: `kind: MinioBucketClaim` with `class: dedicated` (NAS*: `class: external`) +- MariaDB*: search for `mysql`, `mariadb`[^mariadb] (replaces MySQL) +- Postgres*: hardcoded to [harbor/application.yml](harbor/application.yml) -Once Passmower is working, OIDC access for others can be enabled with -running following on Kubernetes masters: +\* External, hosted directly on [nas.k-space.ee](https://wiki.k-space.ee/en/hosting/storage) -```bash -patch /etc/kubernetes/manifests/kube-apiserver.yaml - << EOF -@@ -23,6 +23,10 @@ - - --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd-client.crt - - --etcd-keyfile=/etc/kubernetes/pki/apiserver-etcd-client.key - - --etcd-servers=https://127.0.0.1:2379 -+ - --oidc-issuer-url=https://auth.k-space.ee/ -+ - --oidc-client-id=oidc-gateway.kubelogin -+ - --oidc-username-claim=sub -+ - --oidc-groups-claim=groups - - --kubelet-client-certificate=/etc/kubernetes/pki/apiserver-kubelet-client.crt - - --kubelet-client-key=/etc/kubernetes/pki/apiserver-kubelet-client.key - - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname -EOF -sudo systemctl daemon-reload -systemctl restart kubelet -``` -
+[^mariadb]: As of 2024-07-30 used by auth, authelia, bitwarden, etherpad, freescout, git, grafana, nextcloud, wiki, woodpecker -The following can be used to talk to the Kubernetes cluster using OIDC credentials: +[^redisdead]: Redis has been replaced as redis-operatori couldn't handle itself: didn't reconcile after reboots, master URI was empty, and clients complained about missing masters. ArgoCD still hosts its own Redis. -```bash -kubectl krew install oidc-login -mkdir -p ~/.kube -cat << EOF > ~/.kube/config -apiVersion: v1 -clusters: -- cluster: - certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUMvakNDQWVhZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJeU1EVXdNakEzTXpVMU1Wb1hEVE15TURReU9UQTNNelUxTVZvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBS2J2CjY3UFlXVHJMc3ZCQTZuWHUvcm55SlVhNnppTnNWTVN6N2w4ekhxM2JuQnhqWVNPUDJhN1RXTnpUTmZDanZBWngKTmlNbXJya1hpb2dYQWpVVkhSUWZlYm81TFIrb0JBOTdLWlcrN01UMFVJRXBuWVVaaTdBRHlaS01vcEJFUXlMNwp1SlU5UDhnNUR1T29FRHZieGJSMXFuV1JZRXpteFNmSFpocllpMVA3bFd4emkxR243eGRETFZaMjZjNm0xR3Y1CnViRjZyaFBXK1JSVkhiQzFKakJGeTBwRXdhYlUvUTd0Z2dic0JQUjk5NVZvMktCeElBelRmbHhVanlYVkJ3MjEKU2d3ZGI1amlpemxEM0NSbVdZZ0ZrRzd0NTVZeGF3ZmpaQjh5bW4xYjhUVjkwN3dRcG8veU8zM3RaaEE3L3BFUwpBSDJYeDk5bkpMbFVGVUtSY1A4Q0F3RUFBYU5aTUZjd0RnWURWUjBQQVFIL0JBUURBZ0trTUE4R0ExVWRFd0VCCi93UUZNQU1CQWY4d0hRWURWUjBPQkJZRUZKNnZKeVk1UlJ1aklQWGxIK2ZvU3g2QzFRT2RNQlVHQTFVZEVRUU8KTUF5Q0NtdDFZbVZ5Ym1WMFpYTXdEUVlKS29aSWh2Y05BUUVMQlFBRGdnRUJBQ04zcGtCTVM3ekkrbUhvOWdTZQp6SzdXdjl3bXlCTVE5Q3crQXBSNnRBQXg2T1VIN0d1enc5TTV2bXNkYjkrYXBKMHBlZFB4SUg3YXZ1aG9SUXNMCkxqTzRSVm9BMG9aNDBZV3J3UStBR0dvdkZuaWNleXRNcFVSNEZjRXc0ZDRmcGl6V3d0TVNlRlRIUXR6WG84V2MKNFJGWC9xUXNVR1NWa01PaUcvcVVrSFpXQVgyckdhWXZ1Tkw2eHdSRnh5ZHpsRTFSUk56TkNvQzVpTXhjaVRNagpackEvK0pqVEFWU2FuNXZnODFOSmthZEphbmNPWmEwS3JEdkZzd1JJSG5CMGpMLzh3VmZXSTV6czZURU1VZUk1ClF6dU01QXUxUFZ4VXZJUGhlMHl6UXZjWDV5RlhnMkJGU3MzKzJBajlNcENWVTZNY2dSSTl5TTRicitFTUlHL0kKY0pjPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== - server: https://master.kube.k-space.ee:6443 - name: kubernetes -contexts: -- context: - cluster: kubernetes - user: oidc - name: default -current-context: default -kind: Config -preferences: {} -users: -- name: oidc - user: - exec: - apiVersion: client.authentication.k8s.io/v1beta1 - args: - - oidc-login - - get-token - - --oidc-issuer-url=https://auth.k-space.ee/ - - --oidc-client-id=passmower.kubelogin - - --oidc-use-pkce - - --oidc-extra-scope=profile,email,groups - - --listen-address=127.0.0.1:27890 - command: kubectl - env: null - provideClusterInfo: false -EOF -``` +[^mongoproblems]: Mongo problems: Incompatible with rawfile csi (wiredtiger.wt corrupts), complicated resizing (PVCs from statefulset PVC template). -For access control mapping see [cluster-role-bindings.yml](cluster-role-bindings.yml) - -### systemd-resolved issues on access -```sh -Unable to connect to the server: dial tcp: lookup master.kube.k-space.ee on 127.0.0.53:53: no such host -``` -``` -Network → VPN → `IPv4` → Other nameservers (Muud nimeserverid): `172.21.0.1` -Network → VPN → `IPv6` → Other nameservers (Muud nimeserverid): `2001:bb8:4008:21::1` -Network → VPN → `IPv4` → Search domains (Otsingudomeenid): `kube.k-space.ee` -Network → VPN → `IPv6` → Search domains (Otsingudomeenid): `kube.k-space.ee` -``` - -# Technology mapping - -Our self-hosted Kubernetes stack compared to AWS based deployments: - -| Hipster startup | Self-hosted hackerspace | Purpose | -|-------------------|-------------------------------------|---------------------------------------------------------------------| -| AWS ALB | Traefik | Reverse proxy also known as ingress controller in Kubernetes jargon | -| AWS AMP | Prometheus Operator | Monitoring and alerting | -| AWS CloudTrail | ECK Operator | Log aggregation | -| AWS DocumentDB | MongoDB Community Operator | Highly available NoSQL database | -| AWS EBS | Longhorn | Block storage for arbitrary applications needing persistent storage | -| AWS EC2 | Proxmox | Virtualization layer | -| AWS ECR | Harbor | Docker registry | -| AWS EKS | kubeadm | Provision Kubernetes master nodes | -| AWS NLB | MetalLB | L2/L3 level load balancing | -| AWS RDS for MySQL | MySQL Operator | Provision highly available relational databases | -| AWS Route53 | Bind and RFC2136 | DNS records and Let's Encrypt DNS validation | -| AWS S3 | Minio Operator | Highly available object storage | -| AWS VPC | Calico | Overlay network | -| Dex | Passmower | ACL mapping and OIDC provider which integrates with GitHub/Samba | -| GitHub Actions | Drone | Build Docker images | -| GitHub | Gitea | Source code management, issue tracking | -| GitHub OAuth2 | Samba (Active Directory compatible) | Source of truth for authentication and authorization | -| Gmail | Wildduck | E-mail | - - -External dependencies running as classic virtual machines: - -- Bind as DNS server - - -## Adding applications - -Deploy applications via [ArgoCD](https://argocd.k-space.ee) - -We use Treafik with Passmower for Ingress. -Applications where possible and where applicable should use `Remote-User` -authentication. This prevents application exposure on public Internet. -Otherwise use OpenID Connect for authentication, -see Argo itself as an example how that is done. - -See `camtiler/ingress.yml` for commented Ingress example. - -Note that we do not use IngressRoute objects because they don't -support `external-dns` out of the box. -Do NOT add nginx annotations, we use Traefik. -Do NOT manually add DNS records, they are added by `external-dns`. -Do NOT manually create Certificate objects, -these should be handled by `tls:` section in Ingress. - - -## Cluster formation - -Created Ubuntu 22.04 VM-s on Proxmox with local storage. -Added some ARM64 workers by using Ubuntu 22.04 server on Raspberry Pi. - -After machines have booted up and you can reach them via SSH: - -``` -# Disable Ubuntu caching DNS resolver -systemctl disable systemd-resolved.service -systemctl stop systemd-resolved -rm -fv /etc/resolv.conf -cat > /etc/resolv.conf << EOF -nameserver 1.1.1.1 -nameserver 8.8.8.8 -EOF - -# Disable multipathd as Longhorn handles that itself -systemctl mask multipathd snapd -systemctl disable --now multipathd snapd bluetooth ModemManager hciuart wpa_supplicant packagekit - -# Permit root login -sed -i -e 's/PermitRootLogin no/PermitRootLogin without-password/' /etc/ssh/sshd_config -systemctl reload ssh -cat ~ubuntu/.ssh/authorized_keys > /root/.ssh/authorized_keys -userdel -f ubuntu -apt-get install -yqq linux-image-generic -apt-get remove -yq cloud-init linux-image-*-kvm -``` - -On master: - -``` -kubeadm init --token-ttl=120m --pod-network-cidr=10.244.0.0/16 --control-plane-endpoint "master.kube.k-space.ee:6443" --upload-certs --apiserver-cert-extra-sans master.kube.k-space.ee --node-name master1.kube.k-space.ee -``` - -For the `kubeadm join` command specify FQDN via `--node-name $(hostname -f)`. - -Set AZ labels: - -``` -for j in $(seq 1 9); do - for t in master mon worker storage; do - kubectl label nodes ${t}${j}.kube.k-space.ee topology.kubernetes.io/zone=node${j} - done -done -``` - -After forming the cluster add taints: - -```bash -for j in $(seq 1 9); do - kubectl label nodes worker${j}.kube.k-space.ee node-role.kubernetes.io/worker='' -done - -for j in $(seq 1 4); do - kubectl taint nodes mon${j}.kube.k-space.ee dedicated=monitoring:NoSchedule - kubectl label nodes mon${j}.kube.k-space.ee dedicated=monitoring -done - -for j in $(seq 1 4); do - kubectl taint nodes storage${j}.kube.k-space.ee dedicated=storage:NoSchedule - kubectl label nodes storage${j}.kube.k-space.ee dedicated=storage -done -``` - -For `arm64` nodes add suitable taint to prevent scheduling non-multiarch images on them: - -```bash -kubectl taint nodes worker9.kube.k-space.ee arch=arm64:NoSchedule -``` - -For door controllers: - -``` -for j in ground front back; do - kubectl taint nodes door-${j}.kube.k-space.ee dedicated=door:NoSchedule - kubectl label nodes door-${j}.kube.k-space.ee dedicated=door - kubectl taint nodes door-${j}.kube.k-space.ee arch=arm64:NoSchedule -done -``` - -To reduce wear on storage: - -``` -echo StandardOutput=null >> /etc/systemd/system/kubelet.service.d/10-kubeadm.conf -systemctl daemon-reload -systemctl restart kubelet -``` +*** +_This page is referenced by wiki [front page](https://wiki.k-space.ee) as **the** technical documentation for infra._ diff --git a/ansible/inventory.yml b/ansible/inventory.yml index 6ae543a..b42e502 100644 --- a/ansible/inventory.yml +++ b/ansible/inventory.yml @@ -1,3 +1,8 @@ +# This file is linked from /README.md as 'all infra'. +##### Not otherwise linked: + # Homepage: https://git.k-space.ee/k-space/homepage (on GitLab) + # Slack: https://k-space-ee.slack.com + all: vars: admins: @@ -6,6 +11,7 @@ all: extra_admins: [] children: + # https://wiki.k-space.ee/en/hosting/storage nasgroup: hosts: nas.k-space.ee: { ansible_host: 172.23.0.7 } @@ -14,13 +20,16 @@ all: ansible_port: 10648 vars: offsite_dataset: offsite/backup_zrepl + misc: children: nasgroup: hosts: + # https://git.k-space.ee/k-space/kube: bind/README.md (primary DNS, PVE VM) ns1.k-space.ee: { ansible_host: 172.20.0.2 } - proxmox: + # https://wiki.k-space.ee/hosting/proxmox (depends on nas.k-space.ee) + proxmox: # aka PVE, Proxmox Virtualization Environment vars: extra_admins: - rasmus @@ -30,6 +39,8 @@ all: pve8: { ansible_host: 172.21.20.8 } pve9: { ansible_host: 172.21.20.9 } + # https://git.k-space.ee/k-space/kube: README.md + # CLUSTER.md (PVE VMs + external nas.k-space.ee) kubernetes: children: masters: @@ -39,12 +50,12 @@ all: master3.kube.k-space.ee: { ansible_host: 172.21.3.53 } kubelets: children: - mon: + mon: # they sit in a priviledged VLAN hosts: mon1.kube.k-space.ee: { ansible_host: 172.21.3.61 } mon2.kube.k-space.ee: { ansible_host: 172.21.3.62 } mon3.kube.k-space.ee: { ansible_host: 172.21.3.63 } - storage: + storage: # longhorn, to be replaced with a more direct CSI hosts: storage1.kube.k-space.ee: { ansible_host: 172.21.3.71 } storage2.kube.k-space.ee: { ansible_host: 172.21.3.72 } @@ -58,6 +69,8 @@ all: worker4.kube.k-space.ee: { ansible_host: 172.20.3.84 } worker9.kube.k-space.ee: { ansible_host: 172.20.3.89 } # Raspberry Pi 400 + # https://wiki.k-space.ee/en/hosting/doors + # See also: https://git.k-space.ee/k-space/kube: camtiler/README.md doors: vars: extra_admins: diff --git a/ansible/zrepl/playbook.yml b/ansible/zrepl/playbook.yml index 857b303..b4a74a0 100644 --- a/ansible/zrepl/playbook.yml +++ b/ansible/zrepl/playbook.yml @@ -1,3 +1,5 @@ +# Referenced and documented by https://wiki.k-space.ee/en/hosting/storage#zrepl + - name: zrepl hosts: nasgroup tasks: diff --git a/monitoring/README.md b/monitoring/README.md index f15edd9..69e3ead 100644 --- a/monitoring/README.md +++ b/monitoring/README.md @@ -1,4 +1,5 @@ ## Monitoring +Additional docs: https://wiki.k-space.ee/en/hosting/monitoring This namespace is managed by [ArgoCD](https://argocd.k-space.ee/applications/argocd/monitoring)