fix(backup): stage SQLite snapshots alongside output zip and stop excluding nested hermes-agent skill dirs

Two bugs in the backup routine:

1. SQLite safe-copy used tempfile.NamedTemporaryFile() which defaults to
   the system temp directory (/tmp).  When /tmp is a small tmpfs and the
   database is large, the copy silently fails and the resulting zip is
   missing state.db, kanban.db, and response_store.db.

   Fix: pass dir=out_path.parent so the temp file is staged alongside the
   output zip on the same filesystem.

2. _EXCLUDED_DIRS contained "hermes-agent" which matched at ANY path
   depth, accidentally excluding the Hermes Agent skill directory at
   skills/autonomous-ai-agents/hermes-agent/.

   Fix: special-case "hermes-agent" to only match when it is the first
   path component (the root-level code checkout).  All other excluded dir
   names continue to match at any depth.

Regression tests added for both fixes.
This commit is contained in:
liuhao1024 2026-05-30 22:08:04 +08:00 committed by kshitijk4poor
parent 4829f8d2c5
commit dd40600e0a
2 changed files with 59 additions and 5 deletions

View file

@ -31,6 +31,9 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Directory names to skip entirely (matched against each path component)
# ``hermes-agent`` is special-cased to root level only in ``_should_exclude``
# so that skill directories like ``skills/autonomous-ai-agents/hermes-agent/``
# are not accidentally excluded.
_EXCLUDED_DIRS = {
"hermes-agent", # the codebase repo — re-clone instead
"__pycache__", # bytecode caches — regenerated on import
@ -69,10 +72,15 @@ def _should_exclude(rel_path: Path) -> bool:
"""Return True if *rel_path* (relative to hermes root) should be skipped."""
parts = rel_path.parts
# Any path component matches an excluded dir name
for part in parts:
if part in _EXCLUDED_DIRS:
return True
if part not in _EXCLUDED_DIRS:
continue
# ``hermes-agent`` only matches at the root level (first component).
# Nested directories with the same name — e.g.
# ``skills/autonomous-ai-agents/hermes-agent/`` — must be preserved.
if part == "hermes-agent" and part != parts[0]:
continue
return True
name = rel_path.name
@ -177,10 +185,13 @@ def run_backup(args) -> None:
rel_dir = dp.relative_to(hermes_root)
# Prune excluded directories in-place so os.walk doesn't descend
# ``hermes-agent`` is only pruned at the root level; nested dirs
# with the same name (e.g. in skills/) must be preserved.
is_root = rel_dir == Path(".")
orig_dirnames = dirnames[:]
dirnames[:] = [
d for d in dirnames
if d not in _EXCLUDED_DIRS
if d not in _EXCLUDED_DIRS or (d == "hermes-agent" and not is_root)
]
for removed in set(orig_dirnames) - set(dirnames):
skipped_dirs.add(str(rel_dir / removed))
@ -211,7 +222,13 @@ def run_backup(args) -> None:
try:
# Safe copy for SQLite databases (handles WAL mode)
if abs_path.suffix == ".db":
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
# Stage the snapshot alongside the output zip so that the
# temp file lives on the same filesystem. The system
# default (/tmp) may be a small tmpfs that cannot hold
# large databases, causing silent backup incompleteness.
with tempfile.NamedTemporaryFile(
suffix=".db", delete=False, dir=str(out_path.parent)
) as tmp:
tmp_db = Path(tmp.name)
if _safe_copy_db(abs_path, tmp_db):
zf.write(tmp_db, arcname=str(rel_path))

View file

@ -146,6 +146,12 @@ class TestShouldExclude:
from hermes_cli.backup import _should_exclude
assert not _should_exclude(Path("logs/agent.log"))
def test_includes_nested_hermes_agent_in_skills(self):
"""skills/autonomous-ai-agents/hermes-agent/ must NOT be excluded —
only the root-level hermes-agent/ repo is skipped."""
from hermes_cli.backup import _should_exclude
assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/SKILL.md"))
assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/sub/item.txt"))
# ---------------------------------------------------------------------------
# Backup tests
@ -206,6 +212,37 @@ class TestBackup:
agent_files = [n for n in names if "hermes-agent" in n]
assert agent_files == [], f"hermes-agent files leaked into backup: {agent_files}"
def test_includes_nested_hermes_agent_in_skills(self, tmp_path, monkeypatch):
"""Backup includes skills/.../hermes-agent/ but NOT root hermes-agent/."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
_make_hermes_tree(hermes_home)
# Add a nested hermes-agent directory inside skills (like the real layout)
nested = hermes_home / "skills" / "autonomous-ai-agents" / "hermes-agent"
nested.mkdir(parents=True)
(nested / "SKILL.md").write_text("# Hermes Agent Skill\n")
(nested / "sub").mkdir()
(nested / "sub" / "item.txt").write_text("nested content\n")
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
out_zip = tmp_path / "backup.zip"
args = Namespace(output=str(out_zip))
from hermes_cli.backup import run_backup
run_backup(args)
with zipfile.ZipFile(out_zip, "r") as zf:
names = zf.namelist()
# Root hermes-agent must be excluded
root_agent = [n for n in names if n.startswith("hermes-agent/")]
assert root_agent == [], f"root hermes-agent leaked: {root_agent}"
# Nested skill hermes-agent must be included
assert "skills/autonomous-ai-agents/hermes-agent/SKILL.md" in names
assert "skills/autonomous-ai-agents/hermes-agent/sub/item.txt" in names
def test_excludes_pycache(self, tmp_path, monkeypatch):
"""Backup does NOT include __pycache__ dirs."""
hermes_home = tmp_path / ".hermes"