fix: keep persisted tool results inside their storage directory

Tool call ids are used to name persisted large-result files. Treating that id as a raw path segment allowed traversal-like ids to resolve outside hermes-results even though the shell command quoted metacharacters.

Convert ids to single filename stems, preserve normal ids, and add a short hash when normalization is needed so unsafe ids do not collide silently.

Constraint: Avoid new dependencies and preserve existing tool-result paths for normal tool call ids
Rejected: Quote only the path | shell quoting does not prevent ../ path traversal
Confidence: high
Scope-risk: narrow
Reversibility: clean
Tested: source /Users/peter/hermes-agent/venv/bin/activate && pytest tests/tools/test_tool_result_storage.py -q
Tested: source /Users/peter/hermes-agent/venv/bin/activate && python -m compileall tools/tool_result_storage.py tests/tools/test_tool_result_storage.py
Tested: git diff --check
This commit is contained in:
WuKongAI-CMU 2026-04-15 01:36:29 -04:00 committed by Teknium
parent caa2034f88
commit 0ea3861b33
2 changed files with 59 additions and 1 deletions

View file

@ -16,6 +16,7 @@ from tools.tool_result_storage import (
_build_persisted_message,
_heredoc_marker,
_resolve_storage_dir,
_safe_result_filename,
_write_to_sandbox,
enforce_turn_budget,
generate_preview,
@ -165,6 +166,19 @@ class TestResolveStorageDir:
assert _resolve_storage_dir(env) == "/data/data/com.termux/files/usr/tmp/hermes-results"
class TestSafeResultFilename:
def test_preserves_normal_tool_call_id(self):
assert _safe_result_filename("tc_456") == "tc_456.txt"
def test_replaces_path_and_shell_metacharacters(self):
filename = _safe_result_filename("../outside/$(whoami);x")
assert filename.startswith("outside_whoami_x_")
assert filename.endswith(".txt")
assert "/" not in filename
assert "$" not in filename
assert ";" not in filename
# ── _build_persisted_message ──────────────────────────────────────────
class TestBuildPersistedMessage:
@ -376,6 +390,28 @@ class TestMaybePersistToolResult:
)
assert "unique_id_abc.txt" in result
def test_tool_use_id_cannot_escape_storage_dir(self):
env = MagicMock()
env.execute.return_value = {"output": "", "returncode": 0}
env.get_temp_dir.return_value = ""
content = "x" * 60_000
result = maybe_persist_tool_result(
content=content,
tool_name="terminal",
tool_use_id="../outside/$(whoami);x",
env=env,
threshold=30_000,
)
cmd = env.execute.call_args[0][0]
target = cmd.split("cat > ", 1)[1].split(" <<", 1)[0]
assert "Full output saved to: /tmp/hermes-results/outside_whoami_x_" in result
assert "/tmp/hermes-results/../" not in result
assert target.startswith("/tmp/hermes-results/outside_whoami_x_")
assert "/../" not in target
assert "$(whoami)" not in target
assert ";" not in target
def test_preview_included_in_persisted_output(self):
env = MagicMock()
env.execute.return_value = {"output": "", "returncode": 0}

View file

@ -22,8 +22,10 @@ Defense against context-window overflow operates at three levels:
where many medium-sized results combine to overflow context.
"""
import hashlib
import logging
import os
import re
import shlex
import uuid
@ -39,6 +41,8 @@ PERSISTED_OUTPUT_CLOSING_TAG = "</persisted-output>"
STORAGE_DIR = "/tmp/hermes-results"
HEREDOC_MARKER = "HERMES_PERSIST_EOF"
_BUDGET_TOOL_NAME = "__budget_enforcement__"
_UNSAFE_RESULT_FILENAME_CHARS = re.compile(r"[^A-Za-z0-9_.-]+")
_MAX_RESULT_FILENAME_STEM = 120
def _resolve_storage_dir(env) -> str:
@ -57,6 +61,24 @@ def _resolve_storage_dir(env) -> str:
return STORAGE_DIR
def _safe_result_filename(tool_use_id: str) -> str:
"""Return a single safe filename for a tool result id."""
raw_id = str(tool_use_id or "tool_result")
safe_stem = _UNSAFE_RESULT_FILENAME_CHARS.sub("_", raw_id).strip("._-")
changed = safe_stem != raw_id
if not safe_stem:
safe_stem = "tool_result"
changed = True
if changed or len(safe_stem) > _MAX_RESULT_FILENAME_STEM:
digest = hashlib.sha256(raw_id.encode("utf-8")).hexdigest()[:12]
safe_stem = safe_stem[:_MAX_RESULT_FILENAME_STEM].rstrip("._-") or "tool_result"
safe_stem = f"{safe_stem}_{digest}"
return f"{safe_stem}.txt"
def generate_preview(content: str, max_chars: int = DEFAULT_PREVIEW_SIZE_CHARS) -> tuple[str, bool]:
"""Truncate at last newline within max_chars. Returns (preview, has_more)."""
if len(content) <= max_chars:
@ -153,7 +175,7 @@ def maybe_persist_tool_result(
return content
storage_dir = _resolve_storage_dir(env)
remote_path = f"{storage_dir}/{tool_use_id}.txt"
remote_path = f"{storage_dir}/{_safe_result_filename(tool_use_id)}"
preview, has_more = generate_preview(content, max_chars=config.preview_size)
if env is not None: