diff --git a/plugins/disk-cleanup/disk_cleanup.py b/plugins/disk-cleanup/disk_cleanup.py index 8f70631ea..11eb46ca9 100755 --- a/plugins/disk-cleanup/disk_cleanup.py +++ b/plugins/disk-cleanup/disk_cleanup.py @@ -166,9 +166,11 @@ def _is_protected_cron_path(p: Path) -> bool: """Return True if *p* is a cron control-plane file/directory that must never be deleted. - This only matches the directory itself and known control-plane files - (``jobs.json``, ``.tick.lock``) — it does NOT blanket-protect - everything under ``cron/`` because ``cron/output/`` is disposable. + This only matches the directory itself, known control-plane files + (``jobs.json``, ``.tick.lock``), and the ``output/`` root directory. + It does NOT blanket-protect files under ``cron/output/`` because those + run artifacts are disposable, but deleting the output root itself is too + broad and can erase every job's retained run history at once. """ # Lazily build the set once per process so HERMES_HOME is resolved # exactly once. @@ -177,6 +179,7 @@ def _is_protected_cron_path(p: Path) -> bool: for parent in ("cron", "cronjobs"): base = hermes_home / parent _PROTECTED_CRON_PATHS.add(str(base)) + _PROTECTED_CRON_PATHS.add(str(base / "output")) _PROTECTED_CRON_PATHS.add(str(base / "jobs.json")) _PROTECTED_CRON_PATHS.add(str(base / ".tick.lock")) resolved = str(p.resolve()) @@ -566,7 +569,7 @@ def guess_category(path: Path) -> Optional[str]: # (e.g. ``jobs.json``, ``.tick.lock``) must never be # auto-tracked — deleting it wipes the live scheduler # registry. See issue #32164. - if len(rel.parts) >= 2 and rel.parts[1] == "output": + if len(rel.parts) >= 3 and rel.parts[1] == "output": return "cron-output" return None if top == "cache": diff --git a/tests/plugins/test_disk_cleanup_plugin.py b/tests/plugins/test_disk_cleanup_plugin.py index 52afddc9c..03aca293c 100644 --- a/tests/plugins/test_disk_cleanup_plugin.py +++ b/tests/plugins/test_disk_cleanup_plugin.py @@ -136,6 +136,13 @@ class TestGuessCategory: p.write_text("x") assert dg.guess_category(p) == "cron-output" + def test_cron_output_root_not_tracked(self, _isolate_env): + """The cron/output root is durable container state, not an artifact.""" + dg = _load_lib() + output_root = _isolate_env / "cron" / "output" + output_root.mkdir(parents=True) + assert dg.guess_category(output_root) is None + def test_cron_jobs_json_not_tracked(self, _isolate_env): """Regression for #32164: the cron registry must never be tracked.""" dg = _load_lib() @@ -228,6 +235,29 @@ class TestStaleCronEntryMigration: assert summary["deleted"] == 0, "cron/ dir must not be deleted" assert cron_dir.exists() + def test_quick_skips_stale_cron_output_for_output_root(self, _isolate_env): + """Stale entry for cron/output itself must not delete all job output.""" + dg = _load_lib() + output_root = _isolate_env / "cron" / "output" + job_dir = output_root / "job_1" + job_dir.mkdir(parents=True) + run_md = job_dir / "run.md" + run_md.write_text("x") + + tracked_file = _isolate_env / "disk-cleanup" / "tracked.json" + tracked_file.parent.mkdir(parents=True, exist_ok=True) + tracked_file.write_text(json.dumps([{ + "path": str(output_root), + "category": "cron-output", + "timestamp": "2025-01-01T00:00:00+00:00", + "size": 0, + }])) + + summary = dg.quick() + assert summary["deleted"] == 0, "cron/output root must not be deleted" + assert output_root.exists() + assert run_md.exists() + def test_quick_skips_protected_cron_paths_defense_in_depth(self, _isolate_env): """Defense-in-depth: even if guess_category returned cron-output (hypothetically), protected cron paths are never deleted."""