diff --git a/tests/tools/test_file_tools_cwd_resolution.py b/tests/tools/test_file_tools_cwd_resolution.py index 88d6265b6..530fd9690 100644 --- a/tests/tools/test_file_tools_cwd_resolution.py +++ b/tests/tools/test_file_tools_cwd_resolution.py @@ -16,7 +16,7 @@ Core invariant these tests pin: """ import os -from pathlib import Path +from pathlib import Path, PurePosixPath import pytest @@ -100,6 +100,73 @@ def test_absolute_input_path_ignores_base(_isolated_cwd, monkeypatch): assert resolved == Path(abs_target).resolve() +def test_container_absolute_input_path_does_not_follow_host_symlink(tmp_path, monkeypatch): + """Docker paths are sandbox-local and must not be host-dereferenced. + + A user may have a host symlink at a container-looking path such as + ``/workspace/projects``. For Docker file ops, resolving that symlink on the + host rewrites the path before Docker sees it, making file tools and terminal + disagree about where the file lives. + """ + host_project = tmp_path / "host-project" + host_project.mkdir() + container_mount = tmp_path / "workspace-projects" + container_mount.symlink_to(host_project, target_is_directory=True) + monkeypatch.setattr(terminal_tool, "_get_env_config", lambda: {"env_type": "docker"}) + monkeypatch.setattr(terminal_tool, "_active_environments", {}) + + container_path = container_mount / "oilsands-sim" / "README.md" + resolved = ft._resolve_path_for_task(str(container_path), task_id="default") + + assert resolved == container_path + assert resolved != (host_project / "oilsands-sim" / "README.md") + + +def test_container_path_normalization_uses_posix_path_syntax(): + resolved = ft._normalize_without_host_deref("/workspace/projects/foo/../bar") + + assert resolved == PurePosixPath("/workspace/projects/bar") + assert str(resolved) == "/workspace/projects/bar" + + +def test_container_relative_path_keeps_container_cwd_symlink(tmp_path, monkeypatch): + """Relative Docker paths should stay under the container cwd textually.""" + host_project = tmp_path / "host-project" + host_project.mkdir() + container_mount = tmp_path / "workspace-projects" + container_mount.symlink_to(host_project, target_is_directory=True) + monkeypatch.setattr(terminal_tool, "_get_env_config", lambda: {"env_type": "docker"}) + monkeypatch.setattr(terminal_tool, "_active_environments", {}) + monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": str(container_mount)) + + resolved = ft._resolve_path_for_task("oilsands-sim/README.md", task_id="default") + + assert resolved == container_mount / "oilsands-sim" / "README.md" + assert resolved != host_project / "oilsands-sim" / "README.md" + + +class _DummyDockerEnvironment: + cwd = "/workspace" + cwd_owner = "default" + + +def test_container_path_detection_uses_live_docker_environment(monkeypatch): + """A live DockerEnvironment-shaped env should beat config fallback.""" + monkeypatch.setattr( + terminal_tool, + "_active_environments", + {"default": _DummyDockerEnvironment()}, + ) + monkeypatch.setattr( + terminal_tool, + "_get_env_config", + lambda: (_ for _ in ()).throw(AssertionError("should not read config")), + ) + monkeypatch.delenv("TERMINAL_ENV", raising=False) + + assert ft._uses_container_paths("default") is True + + def test_resolution_base_always_absolute_no_terminal_cwd(_isolated_cwd, monkeypatch): """With TERMINAL_CWD unset, the base falls back to an ABSOLUTE process cwd.""" workspace, decoy = _isolated_cwd diff --git a/tools/file_tools.py b/tools/file_tools.py index 7bf51aa90..0ebded2f6 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -5,8 +5,9 @@ import errno import json import logging import os +import posixpath import threading -from pathlib import Path +from pathlib import Path, PurePosixPath from agent.file_safety import get_read_block_error from tools.binary_extensions import has_binary_extension @@ -101,7 +102,7 @@ _BLOCKED_DEVICE_PATHS = frozenset({ }) -def _resolve_path(filepath: str, task_id: str = "default") -> Path: +def _resolve_path(filepath: str, task_id: str = "default") -> Path | PurePosixPath: """Resolve a path relative to TERMINAL_CWD (the worktree base directory) instead of the main repository root. """ @@ -117,6 +118,62 @@ def _resolve_path(filepath: str, task_id: str = "default") -> Path: # (gateway/run.py); the file/terminal-tool layer must do likewise so CLI # sessions get the same protection. See references/worktree-cwd-discipline.md. _TERMINAL_CWD_SENTINELS = frozenset({"", ".", "./", "auto", "cwd"}) +_CONTAINER_PATH_BACKENDS_FALLBACK = frozenset({"docker", "singularity", "modal", "daytona"}) + + +def _terminal_env_type_for_task(task_id: str = "default") -> str: + """Best-effort terminal backend type for path-resolution decisions.""" + try: + from tools.terminal_tool import ( + _active_environments, + _env_lock, + _get_env_config, + _resolve_container_task_id, + ) + + try: + container_key = _resolve_container_task_id(task_id) + except Exception: + container_key = task_id + with _env_lock: + env = _active_environments.get(container_key) or _active_environments.get(task_id) + if env is not None: + name = env.__class__.__name__.lower() + if "local" in name: + return "local" + if "ssh" in name: + return "ssh" + if "docker" in name: + return "docker" + if "singularity" in name: + return "singularity" + if "modal" in name: + return "modal" + if "daytona" in name: + return "daytona" + cfg = _get_env_config() + return str(cfg.get("env_type") or os.getenv("TERMINAL_ENV") or "local").lower() + except Exception: + return str(os.getenv("TERMINAL_ENV") or "local").lower() + + +def _uses_container_paths(task_id: str = "default") -> bool: + try: + from tools.terminal_tool import _CONTAINER_BACKENDS + container_backends = _CONTAINER_BACKENDS + except Exception: + container_backends = _CONTAINER_PATH_BACKENDS_FALLBACK + return _terminal_env_type_for_task(task_id) in container_backends + + +def _normalize_without_host_deref(path: str | Path | PurePosixPath) -> PurePosixPath: + """Normalize path syntax without following host symlinks. + + Container backends use paths that are meaningful inside the sandbox. Calling + ``Path.resolve()`` on the host can dereference a host-side symlink such as + ``/workspace`` and rewrite the path before Docker sees it. + """ + return PurePosixPath(posixpath.normpath(str(path))) def _sentinel_free_abs_cwd(raw: str | None) -> str | None: @@ -266,7 +323,11 @@ def _authoritative_workspace_root(task_id: str = "default") -> str | None: return _configured_terminal_cwd() -def _resolve_base_dir(task_id: str = "default") -> Path: +def _resolve_base_dir( + task_id: str = "default", + *, + container_paths: bool | None = None, +) -> Path | PurePosixPath: """Return the ABSOLUTE base directory for resolving relative paths. Resolution order: @@ -290,10 +351,17 @@ def _resolve_base_dir(task_id: str = "default") -> Path: the process cwd only as a last resort, deterministically. """ root = _authoritative_workspace_root(task_id) + if container_paths is None: + container_paths = _uses_container_paths(task_id) if root: - base = Path(_expand_tilde(root)) + base_text = _expand_tilde(root) else: - base = Path(os.getcwd()) + base_text = os.getcwd() + if container_paths: + if not posixpath.isabs(base_text): + base_text = posixpath.join(os.getcwd(), base_text) + return _normalize_without_host_deref(base_text) + base = Path(base_text) if not base.is_absolute(): # Last-resort anchoring: a live cwd should already be absolute, but if a # terminal backend ever reports a relative cwd, anchor it to the process @@ -302,16 +370,24 @@ def _resolve_base_dir(task_id: str = "default") -> Path: return base.resolve() -def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path: +def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path | PurePosixPath: """Resolve *filepath* against the task's absolute base directory. See :func:`_resolve_base_dir` for how the base is chosen. Absolute input paths are returned resolved-but-unanchored. """ - p = Path(_expand_tilde(filepath)) + container_paths = _uses_container_paths(task_id) + expanded = _expand_tilde(filepath) + if container_paths: + if posixpath.isabs(expanded): + return _normalize_without_host_deref(expanded) + resolved = _resolve_base_dir(task_id, container_paths=True) / expanded + return _normalize_without_host_deref(resolved) + p = Path(expanded) if p.is_absolute(): return p.resolve() - return (_resolve_base_dir(task_id) / p).resolve() + resolved = _resolve_base_dir(task_id, container_paths=False) / p + return resolved.resolve() def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "default") -> str | None: @@ -335,7 +411,10 @@ def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "defa workspace_root = _authoritative_workspace_root(task_id) if not workspace_root: return None # No authoritative workspace root to compare against. - root = Path(_expand_tilde(workspace_root)).resolve() + if _uses_container_paths(task_id): + root = _normalize_without_host_deref(Path(_expand_tilde(workspace_root))) + else: + root = Path(_expand_tilde(workspace_root)).resolve() # Is `resolved` inside `root`? try: resolved.relative_to(root)