diff --git a/rawfile.py b/rawfile.py index 5d657ec..1eae50f 100755 --- a/rawfile.py +++ b/rawfile.py @@ -10,7 +10,7 @@ import rawfile_servicer from consts import CONFIG from csi import csi_pb2_grpc from metrics import expose_metrics -from rawfile_util import migrate_all_volume_schemas +from rawfile_util import migrate_all_volume_schemas, gc_all_volumes @click.group() @@ -46,6 +46,12 @@ def csi_driver(endpoint, nodeid, enable_metrics): server.wait_for_termination() +@cli.command() +@click.option("--dry-run/--seriously", default=True) +def gc(dry_run): + gc_all_volumes(dry_run=dry_run) + + if __name__ == "__main__": logging.basicConfig() cli() diff --git a/rawfile_util.py b/rawfile_util.py index 985de77..3577a3b 100644 --- a/rawfile_util.py +++ b/rawfile_util.py @@ -2,8 +2,10 @@ import glob import json from os.path import basename, dirname from pathlib import Path +import time from consts import DATA_DIR +from declarative import be_absent from volume_schema import migrate_to, LATEST_SCHEMA_VERSION from util import run, run_out @@ -27,6 +29,29 @@ def img_file(volume_id): return Path(metadata(volume_id)["img_file"]) +def destroy(volume_id, dry_run=True): + print(f"Destroying {volume_id}") + if not dry_run: + be_absent(img_file(volume_id)) + be_absent(meta_file(volume_id)) + be_absent(img_dir(volume_id)) + + +def gc_if_needed(volume_id, dry_run=True): + meta = metadata(volume_id) + + deleted_at = meta.get("deleted_at", None) + gc_at = meta.get("gc_at", None) + if deleted_at is None or gc_at is None: + return False + + now = time.time() + if gc_at <= now: + destroy(volume_id, dry_run=dry_run) + + return False + + def update_metadata(volume_id: str, obj: dict) -> dict: meta_file(volume_id).write_text(json.dumps(obj)) return obj @@ -83,3 +108,8 @@ def migrate_all_volume_schemas(): target_version = LATEST_SCHEMA_VERSION for volume_id in list_all_volumes(): migrate_metadata(volume_id, target_version) + + +def gc_all_volumes(dry_run=True): + for volume_id in list_all_volumes(): + gc_if_needed(volume_id, dry_run=dry_run) diff --git a/remote.py b/remote.py index 12c60da..6e3f138 100644 --- a/remote.py +++ b/remote.py @@ -6,7 +6,11 @@ def scrub(volume_id): import time import rawfile_util - rawfile_util.patch_metadata(volume_id, {"deleted_at": time.time()}) + now = time.time() + deleted_at = now + gc_at = now # TODO: GC sensitive PVCs later + rawfile_util.patch_metadata(volume_id, {"deleted_at": deleted_at, "gc_at": gc_at}) + rawfile_util.gc_if_needed(volume_id, dry_run=False) @remote_fn diff --git a/volume_schema.py b/volume_schema.py index 7765ba2..e65f1d5 100644 --- a/volume_schema.py +++ b/volume_schema.py @@ -1,6 +1,6 @@ import sys -LATEST_SCHEMA_VERSION = 2 # type: int +LATEST_SCHEMA_VERSION = 3 # type: int def migrate_0_to_1(data: dict) -> dict: @@ -14,6 +14,15 @@ def migrate_1_to_2(data: dict) -> dict: return data +def migrate_2_to_3(data: dict) -> dict: + data["schema_version"] = 3 + deleted_at = data.get("deleted_at", None) + if deleted_at is not None: + gc_at = deleted_at + 7 * 24 * 60 * 60 + data["gc_at"] = gc_at + return data + + def migrate_to(data: dict, version: int) -> dict: current = data.get("schema_version", 0) if current > version: