From 63c8eb44ba8de04e3a8b5a15e98c34b2369efe02 Mon Sep 17 00:00:00 2001 From: Mehran Kholdi Date: Fri, 19 Nov 2021 18:59:49 +0330 Subject: [PATCH] Fix race condition that was causing dangling loop devices Apparently it is wrong to assume that `DeleteVolume` gets called only after `UnstageVolume` returns success. This was causing the disk image file to be deleted while the volume was still mounted. This would prevent the loop device from getting detached and in turn disk space from getting reclaimed. --- consts.py | 1 + rawfile_servicer.py | 15 +++++++++++++-- remote.py | 8 ++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/consts.py b/consts.py index ac9b857..30686cc 100644 --- a/consts.py +++ b/consts.py @@ -5,3 +5,4 @@ PROVISIONER_VERSION = "0.7.0" DATA_DIR = "/data" CONFIG = {} RESOURCE_EXHAUSTED_EXIT_CODE = 101 +VOLUME_IN_USE_EXIT_CODE = 102 diff --git a/rawfile_servicer.py b/rawfile_servicer.py index 14074d5..378c863 100644 --- a/rawfile_servicer.py +++ b/rawfile_servicer.py @@ -5,7 +5,12 @@ import grpc from google.protobuf.wrappers_pb2 import BoolValue import rawfile_util -from consts import PROVISIONER_VERSION, PROVISIONER_NAME, RESOURCE_EXHAUSTED_EXIT_CODE +from consts import ( + PROVISIONER_VERSION, + PROVISIONER_NAME, + RESOURCE_EXHAUSTED_EXIT_CODE, + VOLUME_IN_USE_EXIT_CODE, +) from csi import csi_pb2, csi_pb2_grpc from declarative import be_symlink, be_absent from fs_util import device_stats, mountpoint_to_dev @@ -208,7 +213,13 @@ class RawFileControllerServicer(csi_pb2_grpc.ControllerServicer): @log_grpc_request def DeleteVolume(self, request, context): - scrub(volume_id=request.volume_id) + try: + scrub(volume_id=request.volume_id) + except CalledProcessError as exc: + if exc.returncode == VOLUME_IN_USE_EXIT_CODE: + context.abort(grpc.StatusCode.FAILED_PRECONDITION, "Volume in use") + else: + raise exc return csi_pb2.DeleteVolumeResponse() @log_grpc_request diff --git a/remote.py b/remote.py index b3b9412..b77d75c 100644 --- a/remote.py +++ b/remote.py @@ -3,12 +3,20 @@ from util import remote_fn def scrub(volume_id): import time + from subprocess import CalledProcessError + import rawfile_util + from consts import VOLUME_IN_USE_EXIT_CODE img_dir = rawfile_util.img_dir(volume_id) if not img_dir.exists(): return + img_file = rawfile_util.img_file(volume_id) + loops = rawfile_util.attached_loops(img_file) + if len(loops) > 0: + raise CalledProcessError(returncode=VOLUME_IN_USE_EXIT_CODE, cmd="") + now = time.time() deleted_at = now gc_at = now # TODO: GC sensitive PVCs later