diff --git a/tests/tools/test_tool_result_storage.py b/tests/tools/test_tool_result_storage.py index 0d80581dc..319a52208 100644 --- a/tests/tools/test_tool_result_storage.py +++ b/tests/tools/test_tool_result_storage.py @@ -16,6 +16,7 @@ from tools.tool_result_storage import ( _build_persisted_message, _heredoc_marker, _resolve_storage_dir, + _safe_result_filename, _write_to_sandbox, enforce_turn_budget, generate_preview, @@ -165,6 +166,19 @@ class TestResolveStorageDir: assert _resolve_storage_dir(env) == "/data/data/com.termux/files/usr/tmp/hermes-results" +class TestSafeResultFilename: + def test_preserves_normal_tool_call_id(self): + assert _safe_result_filename("tc_456") == "tc_456.txt" + + def test_replaces_path_and_shell_metacharacters(self): + filename = _safe_result_filename("../outside/$(whoami);x") + assert filename.startswith("outside_whoami_x_") + assert filename.endswith(".txt") + assert "/" not in filename + assert "$" not in filename + assert ";" not in filename + + # ── _build_persisted_message ────────────────────────────────────────── class TestBuildPersistedMessage: @@ -376,6 +390,28 @@ class TestMaybePersistToolResult: ) assert "unique_id_abc.txt" in result + def test_tool_use_id_cannot_escape_storage_dir(self): + env = MagicMock() + env.execute.return_value = {"output": "", "returncode": 0} + env.get_temp_dir.return_value = "" + content = "x" * 60_000 + result = maybe_persist_tool_result( + content=content, + tool_name="terminal", + tool_use_id="../outside/$(whoami);x", + env=env, + threshold=30_000, + ) + cmd = env.execute.call_args[0][0] + target = cmd.split("cat > ", 1)[1].split(" <<", 1)[0] + + assert "Full output saved to: /tmp/hermes-results/outside_whoami_x_" in result + assert "/tmp/hermes-results/../" not in result + assert target.startswith("/tmp/hermes-results/outside_whoami_x_") + assert "/../" not in target + assert "$(whoami)" not in target + assert ";" not in target + def test_preview_included_in_persisted_output(self): env = MagicMock() env.execute.return_value = {"output": "", "returncode": 0} diff --git a/tools/tool_result_storage.py b/tools/tool_result_storage.py index fed8621ee..b9ceccf75 100644 --- a/tools/tool_result_storage.py +++ b/tools/tool_result_storage.py @@ -22,8 +22,10 @@ Defense against context-window overflow operates at three levels: where many medium-sized results combine to overflow context. """ +import hashlib import logging import os +import re import shlex import uuid @@ -39,6 +41,8 @@ PERSISTED_OUTPUT_CLOSING_TAG = "" STORAGE_DIR = "/tmp/hermes-results" HEREDOC_MARKER = "HERMES_PERSIST_EOF" _BUDGET_TOOL_NAME = "__budget_enforcement__" +_UNSAFE_RESULT_FILENAME_CHARS = re.compile(r"[^A-Za-z0-9_.-]+") +_MAX_RESULT_FILENAME_STEM = 120 def _resolve_storage_dir(env) -> str: @@ -57,6 +61,24 @@ def _resolve_storage_dir(env) -> str: return STORAGE_DIR +def _safe_result_filename(tool_use_id: str) -> str: + """Return a single safe filename for a tool result id.""" + raw_id = str(tool_use_id or "tool_result") + safe_stem = _UNSAFE_RESULT_FILENAME_CHARS.sub("_", raw_id).strip("._-") + changed = safe_stem != raw_id + + if not safe_stem: + safe_stem = "tool_result" + changed = True + + if changed or len(safe_stem) > _MAX_RESULT_FILENAME_STEM: + digest = hashlib.sha256(raw_id.encode("utf-8")).hexdigest()[:12] + safe_stem = safe_stem[:_MAX_RESULT_FILENAME_STEM].rstrip("._-") or "tool_result" + safe_stem = f"{safe_stem}_{digest}" + + return f"{safe_stem}.txt" + + def generate_preview(content: str, max_chars: int = DEFAULT_PREVIEW_SIZE_CHARS) -> tuple[str, bool]: """Truncate at last newline within max_chars. Returns (preview, has_more).""" if len(content) <= max_chars: @@ -153,7 +175,7 @@ def maybe_persist_tool_result( return content storage_dir = _resolve_storage_dir(env) - remote_path = f"{storage_dir}/{tool_use_id}.txt" + remote_path = f"{storage_dir}/{_safe_result_filename(tool_use_id)}" preview, has_more = generate_preview(content, max_chars=config.preview_size) if env is not None: