From d1c0d49cf04a473de64d739d03c4d9b745c8305b Mon Sep 17 00:00:00 2001 From: Mehran Kholdi Date: Fri, 12 Jun 2020 16:42:49 +0430 Subject: [PATCH] Support online volume expansion Summary: Online volume expansion is a 2 phase process: 1. The backing storage, in this case the raw file, needs to be resized. (i.e. `truncate -s`) 2. The node should be notified, so that it can both refresh its device capacity (i.e. `losetup -c`) and resize the filesystem (`resize2fs`) accordingly. Although in our case both steps could be performed on the node itself, for the sake of following the semantics of how volume expansion works, we perform step 1 from the controller, and step 2 from the node. Also, the `external-resizer` component is added which watches for PVC size updates, and notifies the CSI controller about it. Test Plan: Setup: - Deploy - Create a rawfile-backed pvc, and attach a Deployment to it - Keep an eye on `rawfile` pod logs in `kube-system` namespace to see if any errors pop out during all scenarios Scenario 1: - Increase the size of the pvc - Exec into the pod and verify that the volume is resized indeed (using `df`) Scenario 2: - Decrease deployment's replica to 0 - Increase the size of the pvc. Wait for a couple of minutes. - Increase deployment's replica to 1 - Exec into the pod and verify that the volume is resized indeed. Reviewers: bghadiri, mhyousefi, h.marvi, sina_rad Reviewed By: bghadiri, mhyousefi, sina_rad Differential Revision: https://phab.hamravesh.ir/D817 --- README.md | 3 +- deploy/charts/rawfile-csi/Chart.yaml | 2 +- .../charts/rawfile-csi/templates/00-rbac.yaml | 37 ++++++++++++++++ .../templates/01-controller-plugin.yaml | 11 +++++ rawfile_servicer.py | 42 +++++++++++++++++-- remote.py | 12 ++++++ 6 files changed, 102 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fe8a193..11e0874 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ metadata: provisioner: rawfile.hamravesh.com reclaimPolicy: Delete volumeBindingMode: WaitForFirstConsumer +allowVolumeExpansion: true ``` Features @@ -38,7 +39,7 @@ Features - [ ] `Block` mode - [x] Volume metrics - [ ] Supports fsTypes -- [ ] Online expansion: If fs supports it (e.g. ext4, btrfs) +- [x] Online expansion: If fs supports it (e.g. ext4, btrfs) - [ ] Online shrinking: If fs supports it (e.g. btrfs) - [ ] Offline expansion/shrinking - [ ] Ephemeral inline volume diff --git a/deploy/charts/rawfile-csi/Chart.yaml b/deploy/charts/rawfile-csi/Chart.yaml index 9d8ff24..3af7a54 100644 --- a/deploy/charts/rawfile-csi/Chart.yaml +++ b/deploy/charts/rawfile-csi/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: rawfile-csi description: RawFile Driver Container Storage Interface type: application -version: 0.1.3 +version: 0.1.4 appVersion: 0.0.1 diff --git a/deploy/charts/rawfile-csi/templates/00-rbac.yaml b/deploy/charts/rawfile-csi/templates/00-rbac.yaml index 513b4c7..00c0d18 100644 --- a/deploy/charts/rawfile-csi/templates/00-rbac.yaml +++ b/deploy/charts/rawfile-csi/templates/00-rbac.yaml @@ -75,3 +75,40 @@ roleRef: kind: ClusterRole name: {{ include "rawfile-csi.fullname" . }}-broker apiGroup: rbac.authorization.k8s.io +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "rawfile-csi.fullname" . }}-resizer +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "patch"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["persistentvolumeclaims/status"] + verbs: ["patch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "rawfile-csi.fullname" . }}-resizer +subjects: + - kind: ServiceAccount + name: {{ include "rawfile-csi.fullname" . }}-driver + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "rawfile-csi.fullname" . }}-resizer + apiGroup: rbac.authorization.k8s.io diff --git a/deploy/charts/rawfile-csi/templates/01-controller-plugin.yaml b/deploy/charts/rawfile-csi/templates/01-controller-plugin.yaml index d4cd428..e243a6e 100644 --- a/deploy/charts/rawfile-csi/templates/01-controller-plugin.yaml +++ b/deploy/charts/rawfile-csi/templates/01-controller-plugin.yaml @@ -64,3 +64,14 @@ spec: volumeMounts: - name: socket-dir mountPath: /csi + - name: csi-resizer + image: quay.io/k8scsi/csi-resizer:v0.5.0 + imagePullPolicy: IfNotPresent + args: + - "--csi-address=$(ADDRESS)" + env: + - name: ADDRESS + value: /csi/csi.sock + volumeMounts: + - name: socket-dir + mountPath: /csi diff --git a/rawfile_servicer.py b/rawfile_servicer.py index b5e0e02..45d70ae 100644 --- a/rawfile_servicer.py +++ b/rawfile_servicer.py @@ -7,7 +7,7 @@ import rawfile_util from csi import csi_pb2, csi_pb2_grpc from orchestrator.k8s import volume_to_node, run_on_node from rawfile_util import attach_loop, detach_loops -from remote import init_rawfile, scrub +from remote import init_rawfile, scrub, expand_rawfile from util import log_grpc_request, run NODE_NAME_TOPOLOGY_KEY = "hostname" @@ -31,6 +31,11 @@ class RawFileIdentityServicer(csi_pb2_grpc.IdentityServicer): type=Cap.Service.VOLUME_ACCESSIBILITY_CONSTRAINTS ) ), + Cap( + volume_expansion=Cap.VolumeExpansion( + type=Cap.VolumeExpansion.ONLINE + ) + ), ] ) @@ -47,7 +52,10 @@ class RawFileNodeServicer(csi_pb2_grpc.NodeServicer): def NodeGetCapabilities(self, request, context): Cap = csi_pb2.NodeServiceCapability return csi_pb2.NodeGetCapabilitiesResponse( - capabilities=[Cap(rpc=Cap.RPC(type=Cap.RPC.STAGE_UNSTAGE_VOLUME))] + capabilities=[ + Cap(rpc=Cap.RPC(type=Cap.RPC.STAGE_UNSTAGE_VOLUME)), + Cap(rpc=Cap.RPC(type=Cap.RPC.EXPAND_VOLUME)), + ] ) @log_grpc_request @@ -100,13 +108,28 @@ class RawFileNodeServicer(csi_pb2_grpc.NodeServicer): detach_loops(img_file) return csi_pb2.NodeUnstageVolumeResponse() + @log_grpc_request + def NodeExpandVolume(self, request, context): + volume_id = request.volume_id + size = request.capacity_range.required_bytes + img_file = rawfile_util.img_file(volume_id) + for dev in rawfile_util.attached_loops(img_file): + run(f"losetup -c {dev}") + if True: # TODO: is ext2/ext3/ext4 + run(f"resize2fs {dev}") + break + return csi_pb2.NodeExpandVolumeResponse(capacity_bytes=size) + class RawFileControllerServicer(csi_pb2_grpc.ControllerServicer): @log_grpc_request def ControllerGetCapabilities(self, request, context): Cap = csi_pb2.ControllerServiceCapability return csi_pb2.ControllerGetCapabilitiesResponse( - capabilities=[Cap(rpc=Cap.RPC(type=Cap.RPC.CREATE_DELETE_VOLUME))] + capabilities=[ + Cap(rpc=Cap.RPC(type=Cap.RPC.CREATE_DELETE_VOLUME)), + Cap(rpc=Cap.RPC(type=Cap.RPC.EXPAND_VOLUME)), + ] ) @log_grpc_request @@ -177,3 +200,16 @@ class RawFileControllerServicer(csi_pb2_grpc.ControllerServicer): node_name = volume_to_node(request.volume_id) run_on_node(scrub.as_cmd(volume_id=request.volume_id), node=node_name) return csi_pb2.DeleteVolumeResponse() + + @log_grpc_request + def ControllerExpandVolume(self, request, context): + volume_id = request.volume_id + node_name = volume_to_node(volume_id) + size = request.capacity_range.required_bytes + run_on_node( + expand_rawfile.as_cmd(volume_id=volume_id, size=size), node=node_name + ) + + return csi_pb2.ControllerExpandVolumeResponse( + capacity_bytes=size, node_expansion_required=True, + ) diff --git a/remote.py b/remote.py index 270b302..6e0c909 100644 --- a/remote.py +++ b/remote.py @@ -29,3 +29,15 @@ def init_rawfile(volume_id, size): ) run(f"truncate -s {size} {img_file}") run(f"mkfs.ext4 {img_file}") + + +@remote_fn +def expand_rawfile(volume_id, size): + import rawfile_util + from util import run + + img_file = rawfile_util.img_file(volume_id) + rawfile_util.patch_metadata( + volume_id, {"size": size}, + ) + run(f"truncate -s {size} {img_file}")