fix(file-tools): preserve container paths for docker file ops (#56637)

This commit is contained in:
Gille 2026-07-02 22:18:20 -06:00 committed by GitHub
parent 3e21cfdebb
commit e9ce250374
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 156 additions and 10 deletions

View file

@ -16,7 +16,7 @@ Core invariant these tests pin:
"""
import os
from pathlib import Path
from pathlib import Path, PurePosixPath
import pytest
@ -100,6 +100,73 @@ def test_absolute_input_path_ignores_base(_isolated_cwd, monkeypatch):
assert resolved == Path(abs_target).resolve()
def test_container_absolute_input_path_does_not_follow_host_symlink(tmp_path, monkeypatch):
"""Docker paths are sandbox-local and must not be host-dereferenced.
A user may have a host symlink at a container-looking path such as
``/workspace/projects``. For Docker file ops, resolving that symlink on the
host rewrites the path before Docker sees it, making file tools and terminal
disagree about where the file lives.
"""
host_project = tmp_path / "host-project"
host_project.mkdir()
container_mount = tmp_path / "workspace-projects"
container_mount.symlink_to(host_project, target_is_directory=True)
monkeypatch.setattr(terminal_tool, "_get_env_config", lambda: {"env_type": "docker"})
monkeypatch.setattr(terminal_tool, "_active_environments", {})
container_path = container_mount / "oilsands-sim" / "README.md"
resolved = ft._resolve_path_for_task(str(container_path), task_id="default")
assert resolved == container_path
assert resolved != (host_project / "oilsands-sim" / "README.md")
def test_container_path_normalization_uses_posix_path_syntax():
resolved = ft._normalize_without_host_deref("/workspace/projects/foo/../bar")
assert resolved == PurePosixPath("/workspace/projects/bar")
assert str(resolved) == "/workspace/projects/bar"
def test_container_relative_path_keeps_container_cwd_symlink(tmp_path, monkeypatch):
"""Relative Docker paths should stay under the container cwd textually."""
host_project = tmp_path / "host-project"
host_project.mkdir()
container_mount = tmp_path / "workspace-projects"
container_mount.symlink_to(host_project, target_is_directory=True)
monkeypatch.setattr(terminal_tool, "_get_env_config", lambda: {"env_type": "docker"})
monkeypatch.setattr(terminal_tool, "_active_environments", {})
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(container_mount))
resolved = ft._resolve_path_for_task("oilsands-sim/README.md", task_id="default")
assert resolved == container_mount / "oilsands-sim" / "README.md"
assert resolved != host_project / "oilsands-sim" / "README.md"
class _DummyDockerEnvironment:
cwd = "/workspace"
cwd_owner = "default"
def test_container_path_detection_uses_live_docker_environment(monkeypatch):
"""A live DockerEnvironment-shaped env should beat config fallback."""
monkeypatch.setattr(
terminal_tool,
"_active_environments",
{"default": _DummyDockerEnvironment()},
)
monkeypatch.setattr(
terminal_tool,
"_get_env_config",
lambda: (_ for _ in ()).throw(AssertionError("should not read config")),
)
monkeypatch.delenv("TERMINAL_ENV", raising=False)
assert ft._uses_container_paths("default") is True
def test_resolution_base_always_absolute_no_terminal_cwd(_isolated_cwd, monkeypatch):
"""With TERMINAL_CWD unset, the base falls back to an ABSOLUTE process cwd."""
workspace, decoy = _isolated_cwd

View file

@ -5,8 +5,9 @@ import errno
import json
import logging
import os
import posixpath
import threading
from pathlib import Path
from pathlib import Path, PurePosixPath
from agent.file_safety import get_read_block_error
from tools.binary_extensions import has_binary_extension
@ -101,7 +102,7 @@ _BLOCKED_DEVICE_PATHS = frozenset({
})
def _resolve_path(filepath: str, task_id: str = "default") -> Path:
def _resolve_path(filepath: str, task_id: str = "default") -> Path | PurePosixPath:
"""Resolve a path relative to TERMINAL_CWD (the worktree base directory)
instead of the main repository root.
"""
@ -117,6 +118,62 @@ def _resolve_path(filepath: str, task_id: str = "default") -> Path:
# (gateway/run.py); the file/terminal-tool layer must do likewise so CLI
# sessions get the same protection. See references/worktree-cwd-discipline.md.
_TERMINAL_CWD_SENTINELS = frozenset({"", ".", "./", "auto", "cwd"})
_CONTAINER_PATH_BACKENDS_FALLBACK = frozenset({"docker", "singularity", "modal", "daytona"})
def _terminal_env_type_for_task(task_id: str = "default") -> str:
"""Best-effort terminal backend type for path-resolution decisions."""
try:
from tools.terminal_tool import (
_active_environments,
_env_lock,
_get_env_config,
_resolve_container_task_id,
)
try:
container_key = _resolve_container_task_id(task_id)
except Exception:
container_key = task_id
with _env_lock:
env = _active_environments.get(container_key) or _active_environments.get(task_id)
if env is not None:
name = env.__class__.__name__.lower()
if "local" in name:
return "local"
if "ssh" in name:
return "ssh"
if "docker" in name:
return "docker"
if "singularity" in name:
return "singularity"
if "modal" in name:
return "modal"
if "daytona" in name:
return "daytona"
cfg = _get_env_config()
return str(cfg.get("env_type") or os.getenv("TERMINAL_ENV") or "local").lower()
except Exception:
return str(os.getenv("TERMINAL_ENV") or "local").lower()
def _uses_container_paths(task_id: str = "default") -> bool:
try:
from tools.terminal_tool import _CONTAINER_BACKENDS
container_backends = _CONTAINER_BACKENDS
except Exception:
container_backends = _CONTAINER_PATH_BACKENDS_FALLBACK
return _terminal_env_type_for_task(task_id) in container_backends
def _normalize_without_host_deref(path: str | Path | PurePosixPath) -> PurePosixPath:
"""Normalize path syntax without following host symlinks.
Container backends use paths that are meaningful inside the sandbox. Calling
``Path.resolve()`` on the host can dereference a host-side symlink such as
``/workspace`` and rewrite the path before Docker sees it.
"""
return PurePosixPath(posixpath.normpath(str(path)))
def _sentinel_free_abs_cwd(raw: str | None) -> str | None:
@ -266,7 +323,11 @@ def _authoritative_workspace_root(task_id: str = "default") -> str | None:
return _configured_terminal_cwd()
def _resolve_base_dir(task_id: str = "default") -> Path:
def _resolve_base_dir(
task_id: str = "default",
*,
container_paths: bool | None = None,
) -> Path | PurePosixPath:
"""Return the ABSOLUTE base directory for resolving relative paths.
Resolution order:
@ -290,10 +351,17 @@ def _resolve_base_dir(task_id: str = "default") -> Path:
the process cwd only as a last resort, deterministically.
"""
root = _authoritative_workspace_root(task_id)
if container_paths is None:
container_paths = _uses_container_paths(task_id)
if root:
base = Path(_expand_tilde(root))
base_text = _expand_tilde(root)
else:
base = Path(os.getcwd())
base_text = os.getcwd()
if container_paths:
if not posixpath.isabs(base_text):
base_text = posixpath.join(os.getcwd(), base_text)
return _normalize_without_host_deref(base_text)
base = Path(base_text)
if not base.is_absolute():
# Last-resort anchoring: a live cwd should already be absolute, but if a
# terminal backend ever reports a relative cwd, anchor it to the process
@ -302,16 +370,24 @@ def _resolve_base_dir(task_id: str = "default") -> Path:
return base.resolve()
def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path:
def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path | PurePosixPath:
"""Resolve *filepath* against the task's absolute base directory.
See :func:`_resolve_base_dir` for how the base is chosen. Absolute input
paths are returned resolved-but-unanchored.
"""
p = Path(_expand_tilde(filepath))
container_paths = _uses_container_paths(task_id)
expanded = _expand_tilde(filepath)
if container_paths:
if posixpath.isabs(expanded):
return _normalize_without_host_deref(expanded)
resolved = _resolve_base_dir(task_id, container_paths=True) / expanded
return _normalize_without_host_deref(resolved)
p = Path(expanded)
if p.is_absolute():
return p.resolve()
return (_resolve_base_dir(task_id) / p).resolve()
resolved = _resolve_base_dir(task_id, container_paths=False) / p
return resolved.resolve()
def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "default") -> str | None:
@ -335,7 +411,10 @@ def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "defa
workspace_root = _authoritative_workspace_root(task_id)
if not workspace_root:
return None # No authoritative workspace root to compare against.
root = Path(_expand_tilde(workspace_root)).resolve()
if _uses_container_paths(task_id):
root = _normalize_without_host_deref(Path(_expand_tilde(workspace_root)))
else:
root = Path(_expand_tilde(workspace_root)).resolve()
# Is `resolved` inside `root`?
try:
resolved.relative_to(root)