fix(ci): stabilize main test suite regressions (#17660)

* fix: stabilize main test suite regressions

* test(agent): update MiniMax normalization expectation

* test: stabilize remaining CI assertions

* test: harden config helper monkeypatching

* test: harden CI-only assertions

* fix(agent): propagate fast streaming interrupts
This commit is contained in:
Stephen Schoettler 2026-04-29 23:18:55 -07:00 committed by GitHub
parent e7beaaf184
commit f73364b1c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 450 additions and 127 deletions

View file

@ -608,7 +608,7 @@ class CopilotACPClient:
end = start + limit if isinstance(limit, int) and limit > 0 else None end = start + limit if isinstance(limit, int) and limit > 0 else None
content = "".join(lines[start:end]) content = "".join(lines[start:end])
if content: if content:
content = redact_sensitive_text(content) content = redact_sensitive_text(content, force=True)
response = { response = {
"jsonrpc": "2.0", "jsonrpc": "2.0",
"id": message_id, "id": message_id,

View file

@ -305,11 +305,13 @@ def _redact_form_body(text: str) -> str:
return _redact_query_string(text.strip()) return _redact_query_string(text.strip())
def redact_sensitive_text(text: str) -> str: def redact_sensitive_text(text: str, *, force: bool = False) -> str:
"""Apply all redaction patterns to a block of text. """Apply all redaction patterns to a block of text.
Safe to call on any string -- non-matching text passes through unchanged. Safe to call on any string -- non-matching text passes through unchanged.
Disabled by default enable via security.redact_secrets: true in config.yaml. Disabled by default enable via security.redact_secrets: true in config.yaml.
Set force=True for safety boundaries that must never return raw secrets
regardless of the user's global logging redaction preference.
""" """
if text is None: if text is None:
return None return None
@ -317,7 +319,7 @@ def redact_sensitive_text(text: str) -> str:
text = str(text) text = str(text)
if not text: if not text:
return text return text
if not _REDACT_ENABLED: if not (force or _REDACT_ENABLED):
return text return text
# Known prefixes (sk-, ghp_, etc.) # Known prefixes (sk-, ghp_, etc.)

View file

@ -1815,11 +1815,19 @@ class BasePlatformAdapter(ABC):
if stop_event is None: if stop_event is None:
await asyncio.sleep(interval) await asyncio.sleep(interval)
continue continue
try: loop = asyncio.get_running_loop()
await asyncio.wait_for(stop_event.wait(), timeout=interval) deadline = loop.time() + interval
except asyncio.TimeoutError: while not stop_event.is_set():
continue remaining = deadline - loop.time()
return if remaining <= 0:
break
# Poll instead of wait_for(stop_event.wait()). Cancelling
# wait_for while it owns the inner Event.wait task can leave
# shutdown paths stuck awaiting the typing task on Python
# 3.11/pytest-asyncio; sleep cancellation is immediate.
await asyncio.sleep(min(0.25, remaining))
if stop_event.is_set():
return
except asyncio.CancelledError: except asyncio.CancelledError:
pass # Normal cancellation when handler completes pass # Normal cancellation when handler completes
finally: finally:
@ -2429,6 +2437,16 @@ class BasePlatformAdapter(ABC):
**_keep_typing_kwargs, **_keep_typing_kwargs,
) )
) )
async def _stop_typing_task() -> None:
typing_task.cancel()
try:
await asyncio.wait_for(asyncio.shield(typing_task), timeout=0.5)
except (asyncio.CancelledError, asyncio.TimeoutError):
# Cancellation cleanup must not block adapter shutdown. The
# typing task is already cancelled; if the parent task is also
# cancelling, let this message-processing task unwind now.
pass
try: try:
await self._run_processing_hook("on_processing_start", event) await self._run_processing_hook("on_processing_start", event)
@ -2651,11 +2669,7 @@ class BasePlatformAdapter(ABC):
_active = self._active_sessions.get(session_key) _active = self._active_sessions.get(session_key)
if _active is not None: if _active is not None:
_active.clear() _active.clear()
typing_task.cancel() await _stop_typing_task()
try:
await typing_task
except asyncio.CancelledError:
pass
# Process pending message in new background task # Process pending message in new background task
await self._process_message_background(pending_event, session_key) await self._process_message_background(pending_event, session_key)
return # Already cleaned up return # Already cleaned up
@ -2703,11 +2717,7 @@ class BasePlatformAdapter(ABC):
except Exception: except Exception:
pass pass
# Stop typing indicator # Stop typing indicator
typing_task.cancel() await _stop_typing_task()
try:
await typing_task
except asyncio.CancelledError:
pass
# Also cancel any platform-level persistent typing tasks (e.g. Discord) # Also cancel any platform-level persistent typing tasks (e.g. Discord)
# that may have been recreated by _keep_typing after the last stop_typing() # that may have been recreated by _keep_typing after the last stop_typing()
try: try:

View file

@ -10530,10 +10530,26 @@ class GatewayRunner:
# Tool progress mode — resolved per-platform with env var fallback # Tool progress mode — resolved per-platform with env var fallback
_resolved_tp = resolve_display_setting(user_config, platform_key, "tool_progress") _resolved_tp = resolve_display_setting(user_config, platform_key, "tool_progress")
_env_tp = os.getenv("HERMES_TOOL_PROGRESS_MODE")
_display_cfg = display_config if isinstance(display_config, dict) else {}
_platforms_cfg = _display_cfg.get("platforms") or {}
_platform_cfg = _platforms_cfg.get(platform_key) or {}
_legacy_tp_overrides = _display_cfg.get("tool_progress_overrides") or {}
_tool_progress_configured = (
"tool_progress" in _display_cfg
or (
isinstance(_platform_cfg, dict)
and "tool_progress" in _platform_cfg
)
or (
isinstance(_legacy_tp_overrides, dict)
and platform_key in _legacy_tp_overrides
)
)
progress_mode = ( progress_mode = (
_resolved_tp _env_tp
or os.getenv("HERMES_TOOL_PROGRESS_MODE") if _env_tp and not _tool_progress_configured
or "all" else (_resolved_tp or _env_tp or "all")
) )
# Disable tool progress for webhooks - they don't support message editing, # Disable tool progress for webhooks - they don't support message editing,
# so each progress line would be sent as a separate message. # so each progress line would be sent as a separate message.

View file

@ -62,6 +62,7 @@ from .config import (
) )
from .whatsapp_identity import ( from .whatsapp_identity import (
canonical_whatsapp_identifier, canonical_whatsapp_identifier,
normalize_whatsapp_identifier, # noqa: F401 - re-exported for gateway.session callers
) )
from utils import atomic_replace from utils import atomic_replace

View file

@ -78,6 +78,14 @@ def _system_package_install_cmd(pkg: str) -> str:
return f"sudo apt install {pkg}" return f"sudo apt install {pkg}"
def _safe_which(cmd: str) -> str | None:
"""shutil.which wrapper resilient to platform monkeypatching in tests."""
try:
return shutil.which(cmd)
except Exception:
return None
def _termux_browser_setup_steps(node_installed: bool) -> list[str]: def _termux_browser_setup_steps(node_installed: bool) -> list[str]:
steps: list[str] = [] steps: list[str] = []
step = 1 step = 1
@ -579,7 +587,7 @@ def run_doctor(args):
except Exception as e: except Exception as e:
check_warn("Auth provider status", f"(could not check: {e})") check_warn("Auth provider status", f"(could not check: {e})")
if shutil.which("codex"): if _safe_which("codex"):
check_ok("codex CLI") check_ok("codex CLI")
else: else:
# Native OAuth uses Hermes' own device-code flow — the Codex CLI is # Native OAuth uses Hermes' own device-code flow — the Codex CLI is
@ -797,13 +805,13 @@ def run_doctor(args):
print(color("◆ External Tools", Colors.CYAN, Colors.BOLD)) print(color("◆ External Tools", Colors.CYAN, Colors.BOLD))
# Git # Git
if shutil.which("git"): if _safe_which("git"):
check_ok("git") check_ok("git")
else: else:
check_warn("git not found", "(optional)") check_warn("git not found", "(optional)")
# ripgrep (optional, for faster file search) # ripgrep (optional, for faster file search)
if shutil.which("rg"): if _safe_which("rg"):
check_ok("ripgrep (rg)", "(faster file search)") check_ok("ripgrep (rg)", "(faster file search)")
else: else:
check_warn("ripgrep (rg) not found", "(file search uses grep fallback)") check_warn("ripgrep (rg) not found", "(file search uses grep fallback)")
@ -812,7 +820,7 @@ def run_doctor(args):
# Docker (optional) # Docker (optional)
terminal_env = os.getenv("TERMINAL_ENV", "local") terminal_env = os.getenv("TERMINAL_ENV", "local")
if terminal_env == "docker": if terminal_env == "docker":
if shutil.which("docker"): if _safe_which("docker"):
# Check if docker daemon is running # Check if docker daemon is running
try: try:
result = subprocess.run(["docker", "info"], capture_output=True, timeout=10) result = subprocess.run(["docker", "info"], capture_output=True, timeout=10)
@ -827,7 +835,7 @@ def run_doctor(args):
check_fail("docker not found", "(required for TERMINAL_ENV=docker)") check_fail("docker not found", "(required for TERMINAL_ENV=docker)")
issues.append("Install Docker or change TERMINAL_ENV") issues.append("Install Docker or change TERMINAL_ENV")
else: else:
if shutil.which("docker"): if _safe_which("docker"):
check_ok("docker", "(optional)") check_ok("docker", "(optional)")
else: else:
if _is_termux(): if _is_termux():
@ -918,7 +926,7 @@ def run_doctor(args):
check_info("Vercel persistence: ephemeral filesystem") check_info("Vercel persistence: ephemeral filesystem")
# Node.js + agent-browser (for browser automation tools) # Node.js + agent-browser (for browser automation tools)
if shutil.which("node"): if _safe_which("node"):
check_ok("Node.js") check_ok("Node.js")
# Check if agent-browser is installed # Check if agent-browser is installed
agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser" agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser"
@ -944,7 +952,7 @@ def run_doctor(args):
check_warn("Node.js not found", "(optional, needed for browser tools)") check_warn("Node.js not found", "(optional, needed for browser tools)")
# npm audit for all Node.js packages # npm audit for all Node.js packages
if shutil.which("npm"): if _safe_which("npm"):
npm_dirs = [ npm_dirs = [
(PROJECT_ROOT, "Browser tools (agent-browser)"), (PROJECT_ROOT, "Browser tools (agent-browser)"),
(PROJECT_ROOT / "scripts" / "whatsapp-bridge", "WhatsApp bridge"), (PROJECT_ROOT / "scripts" / "whatsapp-bridge", "WhatsApp bridge"),

View file

@ -5398,7 +5398,7 @@ def _warn_stale_dashboard_processes() -> None:
capture_output=True, text=True, timeout=10, capture_output=True, text=True, timeout=10,
) )
if result.returncode == 0: if result.returncode == 0:
for line in result.stdout.split("\n"): for line in getattr(result, "stdout", "").split("\n"):
stripped = line.strip() stripped = line.strip()
if not stripped or "grep" in stripped: if not stripped or "grep" in stripped:
continue continue

View file

@ -6531,6 +6531,9 @@ class AIAgent:
Falls back to _interruptible_api_call on provider errors indicating Falls back to _interruptible_api_call on provider errors indicating
streaming is not supported. streaming is not supported.
""" """
if self._interrupt_requested:
raise InterruptedError("Agent interrupted before streaming API call")
if self.api_mode == "codex_responses": if self.api_mode == "codex_responses":
# Codex streams internally via _run_codex_stream. The main dispatch # Codex streams internally via _run_codex_stream. The main dispatch
# in _interruptible_api_call already calls it; we just need to # in _interruptible_api_call already calls it; we just need to
@ -7189,6 +7192,12 @@ class AIAgent:
# to non-streaming on the next attempt via _disable_streaming. # to non-streaming on the next attempt via _disable_streaming.
result["error"] = e result["error"] = e
return return
except InterruptedError as e:
# The interrupt may be noticed inside the worker thread before
# the polling loop sees it. Surface it through the normal result
# channel so callers never miss a fast pre-retry interrupt.
result["error"] = e
return
finally: finally:
request_client = request_client_holder.get("client") request_client = request_client_holder.get("client")
if request_client is not None: if request_client is not None:

View file

@ -308,14 +308,14 @@ class TestMinimaxPreserveDots:
from agent.anthropic_adapter import normalize_model_name from agent.anthropic_adapter import normalize_model_name
assert normalize_model_name("MiniMax-M2.7", preserve_dots=True) == "MiniMax-M2.7" assert normalize_model_name("MiniMax-M2.7", preserve_dots=True) == "MiniMax-M2.7"
def test_normalize_converts_without_preserve(self): def test_normalize_preserves_non_anthropic_dots_without_preserve(self):
from agent.anthropic_adapter import normalize_model_name from agent.anthropic_adapter import normalize_model_name
# Post-#17171, dots are only converted to hyphens for claude-*/anthropic-* # Non-Anthropic model families use dots as canonical version separators;
# model names. Non-Anthropic models (including MiniMax) keep their dots # only Claude/Anthropic names are hyphen-normalized by default.
# even when preserve_dots=False — that's the fix this test was written
# against the inverse of, so just assert the new invariant directly.
assert normalize_model_name("MiniMax-M2.7", preserve_dots=False) == "MiniMax-M2.7" assert normalize_model_name("MiniMax-M2.7", preserve_dots=False) == "MiniMax-M2.7"
# Claude models still get dotted→hyphenated when preserve_dots=False.
def test_normalize_still_converts_claude_dots_without_preserve(self):
from agent.anthropic_adapter import normalize_model_name
assert normalize_model_name("claude-opus-4.6", preserve_dots=False) == "claude-opus-4-6" assert normalize_model_name("claude-opus-4.6", preserve_dots=False) == "claude-opus-4-6"

View file

@ -20,6 +20,7 @@ test runner at ``scripts/run_tests.sh``.
""" """
import asyncio import asyncio
import logging
import os import os
import re import re
import signal import signal
@ -174,7 +175,10 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
"HERMES_SESSION_KEY", "HERMES_SESSION_KEY",
"HERMES_GATEWAY_SESSION", "HERMES_GATEWAY_SESSION",
"HERMES_PLATFORM", "HERMES_PLATFORM",
"HERMES_MODEL",
"HERMES_INFERENCE_MODEL",
"HERMES_INFERENCE_PROVIDER", "HERMES_INFERENCE_PROVIDER",
"HERMES_TUI_PROVIDER",
"HERMES_MANAGED", "HERMES_MANAGED",
"HERMES_DEV", "HERMES_DEV",
"HERMES_CONTAINER", "HERMES_CONTAINER",
@ -184,6 +188,14 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
"HERMES_BACKGROUND_NOTIFICATIONS", "HERMES_BACKGROUND_NOTIFICATIONS",
"HERMES_EXEC_ASK", "HERMES_EXEC_ASK",
"HERMES_HOME_MODE", "HERMES_HOME_MODE",
"TERMINAL_CWD",
"TERMINAL_ENV",
"TERMINAL_VERCEL_RUNTIME",
"TERMINAL_CONTAINER_CPU",
"TERMINAL_CONTAINER_DISK",
"TERMINAL_CONTAINER_MEMORY",
"TERMINAL_CONTAINER_PERSISTENT",
"TERMINAL_DOCKER_RUN_AS_HOST_USER",
"BROWSER_CDP_URL", "BROWSER_CDP_URL",
"CAMOFOX_URL", "CAMOFOX_URL",
# Platform allowlists — not credentials, but if set from any source # Platform allowlists — not credentials, but if set from any source
@ -326,6 +338,14 @@ def _reset_module_state():
that don't exist yet (test collection before production import) are that don't exist yet (test collection before production import) are
skipped silently production import later creates fresh empty state. skipped silently production import later creates fresh empty state.
""" """
# --- logging — quiet/one-shot paths mutate process-global logger state ---
logging.disable(logging.NOTSET)
for _logger_name in ("tools", "run_agent", "trajectory_compressor", "cron", "hermes_cli"):
_logger = logging.getLogger(_logger_name)
_logger.disabled = False
_logger.setLevel(logging.NOTSET)
_logger.propagate = True
# --- tools.approval — the single biggest source of cross-test pollution --- # --- tools.approval — the single biggest source of cross-test pollution ---
try: try:
from tools import approval as _approval_mod from tools import approval as _approval_mod
@ -380,6 +400,26 @@ def _reset_module_state():
except Exception: except Exception:
pass pass
# --- tools.terminal_tool — active environment/cwd cache ---
# File tools prefer a live terminal cwd when one is cached for the task.
# Clear terminal environments between tests so a prior terminal call can't
# override TERMINAL_CWD in path-resolution tests.
try:
from tools import terminal_tool as _term_mod
_envs_to_cleanup = []
with _term_mod._env_lock:
_envs_to_cleanup = list(_term_mod._active_environments.values())
_term_mod._active_environments.clear()
_term_mod._last_activity.clear()
_term_mod._creation_locks.clear()
for _env in _envs_to_cleanup:
try:
_env.cleanup()
except Exception:
pass
except Exception:
pass
# --- tools.credential_files — ContextVar<dict> --- # --- tools.credential_files — ContextVar<dict> ---
try: try:
from tools import credential_files as _credf_mod from tools import credential_files as _credf_mod

View file

@ -1276,9 +1276,10 @@ class TestMatrixUploadAndSend:
mock_client.send_message_event = AsyncMock(return_value="$event") mock_client.send_message_event = AsyncMock(return_value="$event")
adapter._client = mock_client adapter._client = mock_client
result = await adapter._upload_and_send( with patch.dict("sys.modules", _make_fake_mautrix()):
"!room:example.org", b"secret", "secret.txt", "text/plain", "m.file", result = await adapter._upload_and_send(
) "!room:example.org", b"secret", "secret.txt", "text/plain", "m.file",
)
assert result.success is True assert result.success is True
# Should have uploaded ciphertext, not plaintext # Should have uploaded ciphertext, not plaintext

View file

@ -1470,6 +1470,8 @@ def test_credential_sources_registry_has_expected_steps():
"~/.hermes/.anthropic_oauth.json", "~/.hermes/.anthropic_oauth.json",
"auth.json providers.nous", "auth.json providers.nous",
"auth.json providers.openai-codex + ~/.codex/auth.json", "auth.json providers.openai-codex + ~/.codex/auth.json",
"auth.json providers.minimax-oauth",
"~/.qwen/oauth_creds.json",
"Custom provider config.yaml api_key field", "Custom provider config.yaml api_key field",
} }
missing = required - set(descriptions) missing = required - set(descriptions)

View file

@ -526,6 +526,11 @@ class TestCmdMigrate:
class TestCmdCleanup: class TestCmdCleanup:
"""Test the cleanup command handler.""" """Test the cleanup command handler."""
@pytest.fixture(autouse=True)
def _mock_openclaw_running(self):
with patch.object(claw_mod, "_detect_openclaw_processes", return_value=[]):
yield
def test_no_dirs_found(self, tmp_path, capsys): def test_no_dirs_found(self, tmp_path, capsys):
args = Namespace(source=None, dry_run=False, yes=False) args = Namespace(source=None, dry_run=False, yes=False)
with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[]): with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[]):

View file

@ -72,7 +72,10 @@ class TestLoadConfigExpansion:
monkeypatch.setenv("GOOGLE_API_KEY", "gsk-test-key") monkeypatch.setenv("GOOGLE_API_KEY", "gsk-test-key")
monkeypatch.setenv("TELEGRAM_BOT_TOKEN", "1234567:ABC-token") monkeypatch.setenv("TELEGRAM_BOT_TOKEN", "1234567:ABC-token")
monkeypatch.setattr("hermes_cli.config.get_config_path", lambda: config_file) # Patch the imported function's own globals. Other tests may reload
# hermes_cli.config, making string-target monkeypatches hit a different
# module object than this collection-time imported load_config().
monkeypatch.setitem(load_config.__globals__, "get_config_path", lambda: config_file)
config = load_config() config = load_config()
@ -86,7 +89,7 @@ class TestLoadConfigExpansion:
config_file.write_text(config_yaml) config_file.write_text(config_yaml)
monkeypatch.delenv("NOT_SET_XYZ_123", raising=False) monkeypatch.delenv("NOT_SET_XYZ_123", raising=False)
monkeypatch.setattr("hermes_cli.config.get_config_path", lambda: config_file) monkeypatch.setitem(load_config.__globals__, "get_config_path", lambda: config_file)
config = load_config() config = load_config()

View file

@ -105,7 +105,7 @@ def test_get_container_exec_info_defaults():
) )
with patch("hermes_constants.is_container", return_value=False), \ with patch("hermes_constants.is_container", return_value=False), \
patch("hermes_cli.config.get_hermes_home", return_value=hermes_home), \ patch.dict(get_container_exec_info.__globals__, {"get_hermes_home": lambda: hermes_home}), \
patch.dict(os.environ, {}, clear=False): patch.dict(os.environ, {}, clear=False):
os.environ.pop("HERMES_DEV", None) os.environ.pop("HERMES_DEV", None)
info = get_container_exec_info() info = get_container_exec_info()

View file

@ -255,7 +255,8 @@ class TestLaunchdServiceRecovery:
target = f"{domain}/{label}" target = f"{domain}/{label}"
def fake_run(cmd, check=False, **kwargs): def fake_run(cmd, check=False, **kwargs):
calls.append(cmd) if cmd and cmd[0] == "launchctl":
calls.append(cmd)
if cmd == ["launchctl", "kickstart", target] and calls.count(cmd) == 1: if cmd == ["launchctl", "kickstart", target] and calls.count(cmd) == 1:
raise gateway_cli.subprocess.CalledProcessError(3, cmd, stderr="Could not find service") raise gateway_cli.subprocess.CalledProcessError(3, cmd, stderr="Could not find service")
return SimpleNamespace(returncode=0, stdout="", stderr="") return SimpleNamespace(returncode=0, stdout="", stderr="")
@ -282,7 +283,8 @@ class TestLaunchdServiceRecovery:
target = f"{domain}/{label}" target = f"{domain}/{label}"
def fake_run(cmd, check=False, **kwargs): def fake_run(cmd, check=False, **kwargs):
calls.append(cmd) if cmd and cmd[0] == "launchctl":
calls.append(cmd)
if cmd == ["launchctl", "kickstart", target] and calls.count(cmd) == 1: if cmd == ["launchctl", "kickstart", target] and calls.count(cmd) == 1:
raise gateway_cli.subprocess.CalledProcessError(113, cmd, stderr="Could not find service") raise gateway_cli.subprocess.CalledProcessError(113, cmd, stderr="Could not find service")
return SimpleNamespace(returncode=0, stdout="", stderr="") return SimpleNamespace(returncode=0, stdout="", stderr="")

View file

@ -96,10 +96,17 @@ class TestPtyBridgeIO:
@skip_on_windows @skip_on_windows
class TestPtyBridgeResize: class TestPtyBridgeResize:
def test_resize_updates_child_winsize(self): def test_resize_updates_child_winsize(self):
# tput reads COLUMNS/LINES from the TTY ioctl (TIOCGWINSZ). # Query the TTY ioctl directly instead of using tput, which requires
# Spawn a shell, resize, then ask tput for the dimensions. # TERM and fails in GitHub Actions' non-interactive environment.
winsize_script = (
"import fcntl, struct, termios, time; "
"time.sleep(0.1); "
"rows, cols, *_ = struct.unpack('HHHH', "
"fcntl.ioctl(0, termios.TIOCGWINSZ, b'\\0' * 8)); "
"print(cols); print(rows)"
)
bridge = PtyBridge.spawn( bridge = PtyBridge.spawn(
["/bin/sh", "-c", "sleep 0.1; tput cols; tput lines"], [sys.executable, "-c", winsize_script],
cols=80, cols=80,
rows=24, rows=24,
) )

View file

@ -30,6 +30,17 @@ def _clear_provider_env(monkeypatch):
monkeypatch.delenv(key, raising=False) monkeypatch.delenv(key, raising=False)
def _clear_vercel_env(monkeypatch):
for key in (
"TERMINAL_VERCEL_RUNTIME",
"VERCEL_OIDC_TOKEN",
"VERCEL_TOKEN",
"VERCEL_PROJECT_ID",
"VERCEL_TEAM_ID",
):
monkeypatch.delenv(key, raising=False)
def _stub_tts(monkeypatch): def _stub_tts(monkeypatch):
"""Stub out TTS prompts so setup_model_provider doesn't block.""" """Stub out TTS prompts so setup_model_provider doesn't block."""
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda q, c, d=0: ( monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda q, c, d=0: (
@ -485,6 +496,7 @@ def test_modal_setup_persists_direct_mode_when_user_chooses_their_own_account(tm
def test_vercel_setup_configures_access_token_auth(tmp_path, monkeypatch): def test_vercel_setup_configures_access_token_auth(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path)) monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_clear_vercel_env(monkeypatch)
monkeypatch.setenv("VERCEL_OIDC_TOKEN", "old-oidc") monkeypatch.setenv("VERCEL_OIDC_TOKEN", "old-oidc")
monkeypatch.setitem(sys.modules, "vercel", types.ModuleType("vercel")) monkeypatch.setitem(sys.modules, "vercel", types.ModuleType("vercel"))
config = load_config() config = load_config()
@ -515,13 +527,7 @@ def test_vercel_setup_configures_access_token_auth(tmp_path, monkeypatch):
def test_vercel_setup_prefills_project_and_team_from_link_file(tmp_path, monkeypatch): def test_vercel_setup_prefills_project_and_team_from_link_file(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path)) monkeypatch.setenv("HERMES_HOME", str(tmp_path))
# Sibling test (test_vercel_setup_configures_access_token_auth) calls _clear_vercel_env(monkeypatch)
# save_env_value which mutates os.environ directly and never restores
# it. When xdist schedules both tests in the same worker, VERCEL_*
# from the earlier run masks the .vercel/project.json defaults that
# this test exercises. Clear them before load.
for _leaked in ("VERCEL_TOKEN", "VERCEL_PROJECT_ID", "VERCEL_TEAM_ID", "VERCEL_OIDC_TOKEN"):
monkeypatch.delenv(_leaked, raising=False)
project_root = tmp_path / "project" project_root = tmp_path / "project"
nested = project_root / "app" / "src" nested = project_root / "app" / "src"
nested.mkdir(parents=True) nested.mkdir(parents=True)

View file

@ -29,7 +29,7 @@ class TestReloadEnv:
"""reload_env() adds vars from .env that are not in os.environ.""" """reload_env() adds vars from .env that are not in os.environ."""
env_file = tmp_path / ".env" env_file = tmp_path / ".env"
env_file.write_text("TEST_RELOAD_VAR=hello123\n") env_file.write_text("TEST_RELOAD_VAR=hello123\n")
with patch("hermes_cli.config.get_env_path", return_value=env_file): with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
os.environ.pop("TEST_RELOAD_VAR", None) os.environ.pop("TEST_RELOAD_VAR", None)
count = reload_env() count = reload_env()
assert count >= 1 assert count >= 1
@ -40,7 +40,7 @@ class TestReloadEnv:
"""reload_env() updates vars whose value changed on disk.""" """reload_env() updates vars whose value changed on disk."""
env_file = tmp_path / ".env" env_file = tmp_path / ".env"
env_file.write_text("TEST_RELOAD_VAR=old_value\n") env_file.write_text("TEST_RELOAD_VAR=old_value\n")
with patch("hermes_cli.config.get_env_path", return_value=env_file): with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
os.environ["TEST_RELOAD_VAR"] = "old_value" os.environ["TEST_RELOAD_VAR"] = "old_value"
# Now change the file # Now change the file
env_file.write_text("TEST_RELOAD_VAR=new_value\n") env_file.write_text("TEST_RELOAD_VAR=new_value\n")
@ -55,7 +55,7 @@ class TestReloadEnv:
env_file.write_text("") # empty .env env_file.write_text("") # empty .env
# Pick a known key from OPTIONAL_ENV_VARS # Pick a known key from OPTIONAL_ENV_VARS
known_key = next(iter(OPTIONAL_ENV_VARS.keys())) known_key = next(iter(OPTIONAL_ENV_VARS.keys()))
with patch("hermes_cli.config.get_env_path", return_value=env_file): with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
os.environ[known_key] = "stale_value" os.environ[known_key] = "stale_value"
count = reload_env() count = reload_env()
assert known_key not in os.environ assert known_key not in os.environ
@ -65,7 +65,7 @@ class TestReloadEnv:
"""reload_env() preserves non-Hermes env vars even when absent from .env.""" """reload_env() preserves non-Hermes env vars even when absent from .env."""
env_file = tmp_path / ".env" env_file = tmp_path / ".env"
env_file.write_text("") env_file.write_text("")
with patch("hermes_cli.config.get_env_path", return_value=env_file): with patch.dict(reload_env.__globals__, {"get_env_path": lambda: env_file}):
os.environ["MY_CUSTOM_UNRELATED_VAR"] = "keep_me" os.environ["MY_CUSTOM_UNRELATED_VAR"] = "keep_me"
reload_env() reload_env()
assert os.environ.get("MY_CUSTOM_UNRELATED_VAR") == "keep_me" assert os.environ.get("MY_CUSTOM_UNRELATED_VAR") == "keep_me"
@ -1851,14 +1851,24 @@ class TestPtyWebSocket:
assert b"round-trip-payload" in buf assert b"round-trip-payload" in buf
def test_resize_escape_is_forwarded(self, monkeypatch): def test_resize_escape_is_forwarded(self, monkeypatch):
# Resize escape gets intercepted and applied via TIOCSWINSZ, # Resize escape gets intercepted and applied via TIOCSWINSZ, then the
# then ``tput cols/lines`` reports the new dimensions back. # child reads the TTY ioctl directly. Avoid tput because CI may not set
# TERM for non-interactive shells.
import sys
winsize_script = (
"import fcntl, struct, termios, time; "
"time.sleep(0.15); "
"rows, cols, *_ = struct.unpack('HHHH', "
"fcntl.ioctl(0, termios.TIOCGWINSZ, b'\\0' * 8)); "
"print(cols); print(rows)"
)
monkeypatch.setattr( monkeypatch.setattr(
self.ws_module, self.ws_module,
"_resolve_chat_argv", "_resolve_chat_argv",
# sleep gives the test time to push the resize before tput runs # sleep gives the test time to push the resize before the child reads the ioctl.
lambda resume=None, sidecar_url=None: ( lambda resume=None, sidecar_url=None: (
["/bin/sh", "-c", "sleep 0.15; tput cols; tput lines"], [sys.executable, "-c", winsize_script],
None, None,
None, None,
), ),

View file

@ -8,12 +8,10 @@ effects (terminal, send_message, delegate_task, etc.).
import threading import threading
from unittest.mock import patch from unittest.mock import patch
from run_agent import AIAgent
def _make_agent_stub(agent_cls):
def _make_agent_stub():
"""Create a minimal AIAgent-like object with just enough state for _spawn_background_review.""" """Create a minimal AIAgent-like object with just enough state for _spawn_background_review."""
agent = object.__new__(AIAgent) agent = object.__new__(agent_cls)
agent.model = "test-model" agent.model = "test-model"
agent.platform = "test" agent.platform = "test"
agent.provider = "openai" agent.provider = "openai"
@ -45,14 +43,16 @@ class _SyncThread:
def test_background_review_agent_uses_restricted_toolsets(): def test_background_review_agent_uses_restricted_toolsets():
"""The review agent must only have access to 'memory' and 'skills' toolsets.""" """The review agent must only have access to 'memory' and 'skills' toolsets."""
agent = _make_agent_stub() import run_agent
agent = _make_agent_stub(run_agent.AIAgent)
captured = {} captured = {}
def _capture_init(self, *args, **kwargs): def _capture_init(self, *args, **kwargs):
captured["enabled_toolsets"] = kwargs.get("enabled_toolsets") captured["enabled_toolsets"] = kwargs.get("enabled_toolsets")
raise RuntimeError("stop after capturing init args") raise RuntimeError("stop after capturing init args")
with patch.object(AIAgent, "__init__", _capture_init), \ with patch.object(run_agent.AIAgent, "__init__", _capture_init), \
patch("threading.Thread", _SyncThread): patch("threading.Thread", _SyncThread):
agent._spawn_background_review( agent._spawn_background_review(
messages_snapshot=[], messages_snapshot=[],

View file

@ -96,7 +96,7 @@ class TestCompactBannerSkinIntegration:
set_active_skin("default") set_active_skin("default")
with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \ with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"): patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v0.1.0 (test)"}):
banner = _build_compact_banner() banner = _build_compact_banner()
assert "NOUS HERMES" in banner assert "NOUS HERMES" in banner
@ -105,7 +105,7 @@ class TestCompactBannerSkinIntegration:
set_active_skin("poseidon") set_active_skin("poseidon")
with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \ with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"): patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v0.1.0 (test)"}):
banner = _build_compact_banner() banner = _build_compact_banner()
assert "Poseidon Agent" in banner assert "Poseidon Agent" in banner
@ -116,7 +116,7 @@ class TestCompactBannerSkinIntegration:
skin = get_active_skin() skin = get_active_skin()
with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \ with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"): patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v0.1.0 (test)"}):
banner = _build_compact_banner() banner = _build_compact_banner()
assert skin.get_color("banner_border") in banner assert skin.get_color("banner_border") in banner
@ -127,7 +127,7 @@ class TestCompactBannerSkinIntegration:
set_active_skin("default") set_active_skin("default")
with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \ with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
patch("cli.format_banner_version_label", return_value="Hermes Agent v1.0 (test) · upstream abc12345"): patch.dict(_build_compact_banner.__globals__, {"format_banner_version_label": lambda: "Hermes Agent v1.0 (test) · upstream abc12345"}):
banner = _build_compact_banner() banner = _build_compact_banner()
assert "upstream abc12345" in banner assert "upstream abc12345" in banner

View file

@ -2544,9 +2544,11 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
# detection entirely and the test would race a non-event. # detection entirely and the test would race a non-event.
build_started = threading.Event() build_started = threading.Event()
release_build = threading.Event() release_build = threading.Event()
build_entered = threading.Event()
def _slow_make_agent(sid, key, session_id=None): def _slow_make_agent(sid, key, session_id=None):
build_started.set() build_started.set()
build_entered.set()
release_build.wait(timeout=3.0) release_build.wait(timeout=3.0)
return _FakeAgent() return _FakeAgent()
@ -2584,6 +2586,7 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
) )
assert resp.get("result"), f"got error: {resp.get('error')}" assert resp.get("result"), f"got error: {resp.get('error')}"
sid = resp["result"]["session_id"] sid = resp["result"]["session_id"]
assert build_entered.wait(timeout=1.0), "deferred build did not start"
# Wait until the (deferred) build thread has actually entered # Wait until the (deferred) build thread has actually entered
# _make_agent — otherwise session.close pops _sessions[sid] before # _make_agent — otherwise session.close pops _sessions[sid] before

View file

@ -131,15 +131,15 @@ class TestApprovalHeartbeat:
"""Polling slices don't delay responsiveness — resolve is near-instant.""" """Polling slices don't delay responsiveness — resolve is near-instant."""
from tools.approval import ( from tools.approval import (
check_all_command_guards, check_all_command_guards,
has_blocking_approval,
register_gateway_notify, register_gateway_notify,
resolve_gateway_approval, resolve_gateway_approval,
) )
register_gateway_notify(self.SESSION_KEY, lambda _payload: None)
start_time = time.monotonic()
result_holder: dict = {} result_holder: dict = {}
register_gateway_notify(self.SESSION_KEY, lambda _payload: None)
def _run_check(): def _run_check():
result_holder["result"] = check_all_command_guards( result_holder["result"] = check_all_command_guards(
"rm -rf /tmp/nonexistent-fast-target", "local" "rm -rf /tmp/nonexistent-fast-target", "local"
@ -148,9 +148,18 @@ class TestApprovalHeartbeat:
thread = threading.Thread(target=_run_check, daemon=True) thread = threading.Thread(target=_run_check, daemon=True)
thread.start() thread.start()
# Wait until the worker has actually enqueued the approval. Resolving
# before registration is a test race, not a responsiveness signal.
deadline = time.monotonic() + 5.0
while time.monotonic() < deadline:
if has_blocking_approval(self.SESSION_KEY):
break
time.sleep(0.01)
assert has_blocking_approval(self.SESSION_KEY)
# Resolve almost immediately — the wait loop should return within # Resolve almost immediately — the wait loop should return within
# its current 1s poll slice. # its current 1s poll slice.
time.sleep(0.1) start_time = time.monotonic()
resolve_gateway_approval(self.SESSION_KEY, "once") resolve_gateway_approval(self.SESSION_KEY, "once")
thread.join(timeout=5) thread.join(timeout=5)
elapsed = time.monotonic() - start_time elapsed = time.monotonic() - start_time

View file

@ -354,6 +354,7 @@ class TestOwnerPidCrossProcess:
monkeypatch.setattr( monkeypatch.setattr(
bt, "_requires_real_termux_browser_install", lambda *a: False bt, "_requires_real_termux_browser_install", lambda *a: False
) )
monkeypatch.setattr(bt, "_chromium_installed", lambda: True)
monkeypatch.setattr( monkeypatch.setattr(
bt, "_get_session_info", bt, "_get_session_info",
lambda task_id: {"session_name": session_name}, lambda task_id: {"session_name": session_name},

View file

@ -205,24 +205,28 @@ class TestMacosOsascript:
class TestIsWsl: class TestIsWsl:
def setup_method(self): def setup_method(self):
# _is_wsl is now hermes_constants.is_wsl — reset its cache # _is_wsl is hermes_constants.is_wsl; reset the function's own module
# globals so this stays stable even if hermes_constants was imported
# through a different module object earlier in a large xdist run.
import hermes_constants import hermes_constants
hermes_constants._wsl_detected = None hermes_constants._wsl_detected = None
_is_wsl.__globals__["_wsl_detected"] = None
def teardown_method(self): def teardown_method(self):
# Reset again after the test so we don't leak a cached value # Reset again after the test so we don't leak a cached value
# (True/False) into whichever test the xdist worker runs next. # (True/False) into whichever test the xdist worker runs next.
import hermes_constants import hermes_constants
hermes_constants._wsl_detected = None hermes_constants._wsl_detected = None
_is_wsl.__globals__["_wsl_detected"] = None
def test_wsl2_detected(self): def test_wsl2_detected(self):
content = "Linux version 5.15.0 (microsoft-standard-WSL2)" content = "Linux version 5.15.0 (microsoft-standard-WSL2)"
with patch("builtins.open", mock_open(read_data=content)): with patch.dict(_is_wsl.__globals__, {"open": mock_open(read_data=content)}):
assert _is_wsl() is True assert _is_wsl() is True
def test_wsl1_detected(self): def test_wsl1_detected(self):
content = "Linux version 4.4.0-microsoft-standard" content = "Linux version 4.4.0-microsoft-standard"
with patch("builtins.open", mock_open(read_data=content)): with patch.dict(_is_wsl.__globals__, {"open": mock_open(read_data=content)}):
assert _is_wsl() is True assert _is_wsl() is True
def test_regular_linux(self): def test_regular_linux(self):
@ -234,20 +238,20 @@ class TestIsWsl:
# short-circuits on the cache. setup_method resets, so we just # short-circuits on the cache. setup_method resets, so we just
# need to be sure the patched `open` is actually reached. # need to be sure the patched `open` is actually reached.
content = "Linux version 6.14.0-37-generic (buildd@lcy02-amd64-049)" content = "Linux version 6.14.0-37-generic (buildd@lcy02-amd64-049)"
with patch("hermes_constants.open", mock_open(read_data=content), create=True): with patch.dict(_is_wsl.__globals__, {"open": mock_open(read_data=content)}):
assert _is_wsl() is False assert _is_wsl() is False
def test_proc_version_missing(self): def test_proc_version_missing(self):
with patch("hermes_constants.open", side_effect=FileNotFoundError, create=True): with patch.dict(_is_wsl.__globals__, {"open": MagicMock(side_effect=FileNotFoundError)}):
assert _is_wsl() is False assert _is_wsl() is False
def test_result_is_cached(self): def test_result_is_cached(self):
import hermes_constants
content = "Linux version 5.15.0 (microsoft-standard-WSL2)" content = "Linux version 5.15.0 (microsoft-standard-WSL2)"
with patch("hermes_constants.open", mock_open(read_data=content), create=True) as m: opener = mock_open(read_data=content)
with patch.dict(_is_wsl.__globals__, {"open": opener}):
assert _is_wsl() is True assert _is_wsl() is True
assert _is_wsl() is True assert _is_wsl() is True
m.assert_called_once() # only read once opener.assert_called_once() # only read once
# ── WSL (powershell.exe) ──────────────────────────────────────────────── # ── WSL (powershell.exe) ────────────────────────────────────────────────

View file

@ -16,6 +16,7 @@ import signal
import subprocess import subprocess
import threading import threading
import time import time
from types import SimpleNamespace
import pytest import pytest
@ -37,6 +38,58 @@ def _pgid_still_alive(pgid: int) -> bool:
return False return False
def _process_group_snapshot(pgid: int) -> str:
"""Return a process-table snapshot for diagnostics."""
return subprocess.run(
["ps", "-o", "pid,ppid,pgid,stat,cmd", "-g", str(pgid)],
capture_output=True,
text=True,
check=False,
).stdout.strip()
def _wait_for_pgid_exit(pgid: int, timeout: float = 10.0) -> bool:
"""Wait for a process group to disappear under loaded xdist hosts."""
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
if not _pgid_still_alive(pgid):
return True
time.sleep(0.1)
return not _pgid_still_alive(pgid)
def test_kill_process_uses_cached_pgid_if_wrapper_already_exited(monkeypatch):
"""If the shell wrapper exits before cleanup, still kill its process group.
Without the cached pgid fallback, ``os.getpgid(proc.pid)`` raises for the
dead wrapper and cleanup falls back to ``proc.kill()``, which cannot reach
orphaned grandchildren still running in the original process group.
"""
env = object.__new__(LocalEnvironment)
proc = SimpleNamespace(
pid=12345,
_hermes_pgid=67890,
poll=lambda: 0,
kill=lambda: None,
)
killpg_calls = []
def fake_getpgid(_pid):
raise ProcessLookupError
def fake_killpg(pgid, sig):
killpg_calls.append((pgid, sig))
if sig == 0:
raise ProcessLookupError
monkeypatch.setattr(os, "getpgid", fake_getpgid)
monkeypatch.setattr(os, "killpg", fake_killpg)
env._kill_process(proc)
assert killpg_calls == [(67890, signal.SIGTERM), (67890, 0)]
def test_wait_for_process_kills_subprocess_on_keyboardinterrupt(): def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
"""When KeyboardInterrupt arrives mid-poll, the subprocess group must be """When KeyboardInterrupt arrives mid-poll, the subprocess group must be
killed before the exception is re-raised.""" killed before the exception is re-raised."""
@ -118,19 +171,15 @@ def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
assert not t.is_alive(), "worker didn't exit within 5 s of the interrupt" assert not t.is_alive(), "worker didn't exit within 5 s of the interrupt"
# The critical assertion: the subprocess GROUP must be dead. Not # The critical assertion: the subprocess GROUP must be dead. Not
# just the bash wrapper — the 'sleep 30' child too. # just the bash wrapper — the 'sleep 30' child too. Under xdist load,
# Give the SIGTERM+1s wait+SIGKILL escalation a moment to complete. # process-group disappearance can lag briefly after the worker exits,
deadline = time.monotonic() + 3.0 # especially if the process is already dying or waiting to be reaped.
while time.monotonic() < deadline: assert _wait_for_pgid_exit(pgid), (
if not _pgid_still_alive(pgid):
break
time.sleep(0.1)
assert not _pgid_still_alive(pgid), (
f"subprocess group {pgid} is STILL ALIVE after worker received " f"subprocess group {pgid} is STILL ALIVE after worker received "
f"KeyboardInterrupt — orphan bug regressed. This is the " f"KeyboardInterrupt — orphan bug regressed. This is the "
f"sleep-300-survives-SIGTERM scenario from Physikal's Apr 2026 " f"sleep-300-survives-SIGTERM scenario from Physikal's Apr 2026 "
f"report. See tools/environments/base.py _wait_for_process " f"report. See tools/environments/base.py _wait_for_process "
f"except-block." f"except-block.\n{_process_group_snapshot(pgid)}"
) )
# And the worker should have observed the KeyboardInterrupt (i.e. # And the worker should have observed the KeyboardInterrupt (i.e.
# it re-raised cleanly, not silently swallowed). # it re-raised cleanly, not silently swallowed).

View file

@ -997,10 +997,13 @@ class TestHermesHomeIsolation:
assert "hermes_test" in hermes_home, "Should point to test temp dir" assert "hermes_test" in hermes_home, "Should point to test temp dir"
def test_get_hermes_home_fallback(self): def test_get_hermes_home_fallback(self):
"""Without HERMES_HOME set, falls back to ~/.hermes.""" """Without HERMES_HOME set, falls back to the active OS home."""
from tools.tirith_security import _get_hermes_home from tools.tirith_security import _get_hermes_home
with patch.dict(os.environ, {}, clear=True): with patch.dict(os.environ, {}, clear=True):
# Remove HERMES_HOME entirely # Remove HERMES_HOME entirely. With HOME also absent, expanduser
# falls back to the account database; compute expected under the
# same environment instead of after patch.dict restores HOME.
os.environ.pop("HERMES_HOME", None) os.environ.pop("HERMES_HOME", None)
expected = os.path.join(os.path.expanduser("~"), ".hermes")
result = _get_hermes_home() result = _get_hermes_home()
assert result == os.path.join(os.path.expanduser("~"), ".hermes") assert result == expected

View file

@ -36,14 +36,16 @@ class TestGetProvider:
monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test") monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test")
monkeypatch.delenv("GROQ_API_KEY", raising=False) monkeypatch.delenv("GROQ_API_KEY", raising=False)
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \ with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
patch("tools.transcription_tools._HAS_OPENAI", True): patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools._has_local_command", return_value=False):
from tools.transcription_tools import _get_provider from tools.transcription_tools import _get_provider
assert _get_provider({"provider": "local"}) == "none" assert _get_provider({"provider": "local"}) == "none"
def test_local_nothing_available(self, monkeypatch): def test_local_nothing_available(self, monkeypatch):
monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False) monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \ with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
patch("tools.transcription_tools._HAS_OPENAI", False): patch("tools.transcription_tools._HAS_OPENAI", False), \
patch("tools.transcription_tools._has_local_command", return_value=False):
from tools.transcription_tools import _get_provider from tools.transcription_tools import _get_provider
assert _get_provider({"provider": "local"}) == "none" assert _get_provider({"provider": "local"}) == "none"

View file

@ -36,6 +36,28 @@ class TestProviderSelectionGate:
configure ``{"enabled": True, "provider": ...}`` for explicit tests. configure ``{"enabled": True, "provider": ...}`` for explicit tests.
""" """
def test_import_after_config_env_patch_uses_restored_dotenv_loader(self):
"""Importing STT while hermes_cli.config.get_env_value is patched must
not freeze that temporary helper into this module forever.
"""
import importlib
import hermes_cli.config as config_mod
from tools import transcription_tools as tt
with pytest.MonkeyPatch.context() as mp:
mp.setattr(config_mod, "get_env_value", lambda name, default=None: "")
tt = importlib.reload(tt)
try:
with patch.object(tt, "_HAS_FASTER_WHISPER", False), \
patch.object(tt, "_HAS_OPENAI", True), \
patch.object(tt, "_has_local_command", return_value=False), \
patch("hermes_cli.config.load_env",
return_value={"GROQ_API_KEY": "dotenv-secret"}):
assert tt._get_provider({"enabled": True, "provider": "groq"}) == "groq"
finally:
importlib.reload(tt)
def test_explicit_groq_sees_dotenv(self): def test_explicit_groq_sees_dotenv(self):
from tools import transcription_tools as tt from tools import transcription_tools as tt

View file

@ -758,19 +758,12 @@ class TestValidateAudioFileEdgeCases:
f = tmp_path / "test.ogg" f = tmp_path / "test.ogg"
f.write_bytes(b"data") f.write_bytes(b"data")
from tools.transcription_tools import _validate_audio_file from tools.transcription_tools import _validate_audio_file
real_stat = f.stat()
call_count = 0
def stat_side_effect(*args, **kwargs): with patch("pathlib.Path.exists", return_value=True), \
nonlocal call_count patch("pathlib.Path.is_file", return_value=True), \
call_count += 1 patch("pathlib.Path.stat", side_effect=OSError("disk error")):
# First calls are from exists() and is_file(), let them pass
if call_count <= 2:
return real_stat
raise OSError("disk error")
with patch("pathlib.Path.stat", side_effect=stat_side_effect):
result = _validate_audio_file(str(f)) result = _validate_audio_file(str(f))
assert result is not None assert result is not None
assert "Failed to access" in result["error"] assert "Failed to access" in result["error"]

View file

@ -174,6 +174,45 @@ class TestRegressionGuard:
key while ``os.environ`` does not. key while ``os.environ`` does not.
""" """
def test_import_after_config_env_patch_uses_restored_dotenv_loader(self, tmp_path, monkeypatch):
"""Importing TTS while hermes_cli.config.get_env_value is patched must
not freeze that temporary helper into this module forever.
"""
import importlib
import hermes_cli.config as config_mod
from tools import tts_tool
monkeypatch.delenv("MINIMAX_API_KEY", raising=False)
with pytest.MonkeyPatch.context() as mp:
mp.setattr(config_mod, "get_env_value", lambda name: "")
tts_tool = importlib.reload(tts_tool)
try:
captured: dict = {}
def fake_post(url, **kwargs):
captured["headers"] = kwargs.get("headers", {})
response = MagicMock()
response.json.return_value = {
"data": {"audio": b"\x00".hex()},
"base_resp": {"status_code": 0},
}
response.raise_for_status = MagicMock()
return response
with patch(
"hermes_cli.config.load_env",
return_value={"MINIMAX_API_KEY": "dotenv-secret"},
), patch("requests.post", side_effect=fake_post):
tts_tool._generate_minimax_tts(
"hi", str(tmp_path / "out.mp3"), {}
)
assert captured["headers"]["Authorization"] == "Bearer dotenv-secret"
finally:
importlib.reload(tts_tool)
def test_minimax_missing_when_only_in_dotenv_before_fix(self, tmp_path, monkeypatch): def test_minimax_missing_when_only_in_dotenv_before_fix(self, tmp_path, monkeypatch):
from tools import tts_tool from tools import tts_tool

View file

@ -6,6 +6,7 @@ import shutil
import signal import signal
import subprocess import subprocess
import tempfile import tempfile
import time
from tools.environments.base import BaseEnvironment, _pipe_stdin from tools.environments.base import BaseEnvironment, _pipe_stdin
@ -369,6 +370,11 @@ class LocalEnvironment(BaseEnvironment):
preexec_fn=None if _IS_WINDOWS else os.setsid, preexec_fn=None if _IS_WINDOWS else os.setsid,
cwd=self.cwd, cwd=self.cwd,
) )
if not _IS_WINDOWS:
try:
proc._hermes_pgid = os.getpgid(proc.pid)
except ProcessLookupError:
pass
if stdin_data is not None: if stdin_data is not None:
_pipe_stdin(proc, stdin_data) _pipe_stdin(proc, stdin_data)
@ -381,12 +387,42 @@ class LocalEnvironment(BaseEnvironment):
if _IS_WINDOWS: if _IS_WINDOWS:
proc.terminate() proc.terminate()
else: else:
pgid = os.getpgid(proc.pid) try:
pgid = os.getpgid(proc.pid)
except ProcessLookupError:
pgid = getattr(proc, "_hermes_pgid", None)
if pgid is None:
raise
os.killpg(pgid, signal.SIGTERM) os.killpg(pgid, signal.SIGTERM)
deadline = time.monotonic() + 1.0
while time.monotonic() < deadline:
if proc.poll() is not None:
try:
os.killpg(pgid, 0)
except ProcessLookupError:
return
time.sleep(0.05)
# The shell can exit quickly while a child in the same process
# group is still shutting down. Escalate based on the process
# group, not just the shell wrapper, so interrupted commands do
# not leave orphaned grandchildren under load.
try:
# _IS_WINDOWS is guarded by the enclosing else branch.
os.killpg(pgid, signal.SIGKILL)
except ProcessLookupError:
return
try: try:
proc.wait(timeout=1.0) proc.wait(timeout=1.0)
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
os.killpg(pgid, signal.SIGKILL) pass
deadline = time.monotonic() + 1.0
while time.monotonic() < deadline:
try:
os.killpg(pgid, 0)
except ProcessLookupError:
return
time.sleep(0.05)
except (ProcessLookupError, PermissionError): except (ProcessLookupError, PermissionError):
try: try:
proc.kill() proc.kill()

View file

@ -489,12 +489,15 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
task_data = _read_tracker.setdefault(task_id, { task_data = _read_tracker.setdefault(task_id, {
"last_key": None, "consecutive": 0, "last_key": None, "consecutive": 0,
"read_history": set(), "dedup": {}, "read_history": set(), "dedup": {},
"dedup_hits": {}, "dedup_hits": {}, "read_timestamps": {},
}) })
# Backward-compat for pre-existing tracker entries that predate # Backward-compat for pre-existing tracker entries that predate
# dedup_hits (long-lived task or crossed an upgrade boundary). # dedup_hits/read_timestamps (long-lived task or crossed an
# upgrade boundary).
if "dedup_hits" not in task_data: if "dedup_hits" not in task_data:
task_data["dedup_hits"] = {} task_data["dedup_hits"] = {}
if "read_timestamps" not in task_data:
task_data["read_timestamps"] = {}
cached_mtime = task_data.get("dedup", {}).get(dedup_key) cached_mtime = task_data.get("dedup", {}).get(dedup_key)
if cached_mtime is not None: if cached_mtime is not None:

View file

@ -915,11 +915,12 @@ class MCPServerTask:
except Exception: except Exception:
logger.exception("MCP server '%s': dynamic tool refresh failed", self.name) logger.exception("MCP server '%s': dynamic tool refresh failed", self.name)
def _schedule_tools_refresh(self) -> None: def _schedule_tools_refresh(self) -> asyncio.Task:
"""Schedule a background tool refresh and keep it strongly referenced.""" """Schedule a background tool refresh and keep it strongly referenced."""
task = asyncio.create_task(self._refresh_tools_task()) task = asyncio.create_task(self._refresh_tools_task())
self._pending_refresh_tasks.add(task) self._pending_refresh_tasks.add(task)
task.add_done_callback(self._pending_refresh_tasks.discard) task.add_done_callback(self._pending_refresh_tasks.discard)
return task
def _make_message_handler(self): def _make_message_handler(self):
"""Build a ``message_handler`` callback for ``ClientSession``. """Build a ``message_handler`` callback for ``ClientSession``.
@ -950,6 +951,10 @@ class MCPServerTask:
# a separate task and let the handler return # a separate task and let the handler return
# promptly. # promptly.
self._schedule_tools_refresh() self._schedule_tools_refresh()
# Yield one loop tick so tests and short-lived
# notification contexts can observe the scheduled
# refresh without awaiting the full server RPC.
await asyncio.sleep(0)
case PromptListChangedNotification(): case PromptListChangedNotification():
logger.debug("MCP server '%s': prompts/list_changed (ignored)", self.name) logger.debug("MCP server '%s': prompts/list_changed (ignored)", self.name)
case ResourceListChangedNotification(): case ResourceListChangedNotification():
@ -2005,8 +2010,12 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
}, ensure_ascii=False) }, ensure_ascii=False)
async def _call(): async def _call():
async with server._rpc_lock: rpc_lock = getattr(server, "_rpc_lock", None)
if rpc_lock is None:
result = await server.session.call_tool(tool_name, arguments=args) result = await server.session.call_tool(tool_name, arguments=args)
else:
async with rpc_lock:
result = await server.session.call_tool(tool_name, arguments=args)
# MCP CallToolResult has .content (list of content blocks) and .isError # MCP CallToolResult has .content (list of content blocks) and .isError
if result.isError: if result.isError:
error_text = "" error_text = ""

View file

@ -42,11 +42,19 @@ from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_opena
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
try: def get_env_value(name, default=None):
from hermes_cli.config import get_env_value """Read env values through the live config module.
except ImportError:
def get_env_value(name, default=None): Tests may monkeypatch and later restore ``hermes_cli.config.get_env_value``
before this module is imported. Resolve the helper at call time so STT does
not keep a stale imported function for the rest of the test process.
"""
try:
from hermes_cli.config import get_env_value as _get_env_value
except ImportError:
return os.getenv(name, default) return os.getenv(name, default)
value = _get_env_value(name)
return default if value is None else value
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Optional imports — graceful degradation # Optional imports — graceful degradation

View file

@ -44,11 +44,19 @@ from urllib.parse import urljoin
from hermes_constants import display_hermes_home from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
try: def get_env_value(name, default=None):
from hermes_cli.config import get_env_value """Read env values through the live config module.
except ImportError:
def get_env_value(name, default=None): Tests may monkeypatch and later restore ``hermes_cli.config.get_env_value``
before this module is imported. Resolve the helper at call time so TTS does
not keep a stale imported function for the rest of the test process.
"""
try:
from hermes_cli.config import get_env_value as _get_env_value
except ImportError:
return os.getenv(name, default) return os.getenv(name, default)
value = _get_env_value(name)
return default if value is None else value
from tools.managed_tool_gateway import resolve_managed_tool_gateway from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway, resolve_openai_audio_api_key from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway, resolve_openai_audio_api_key
from tools.xai_http import hermes_xai_user_agent from tools.xai_http import hermes_xai_user_agent

View file

@ -1860,6 +1860,18 @@ def _enrich_with_attached_images(user_text: str, image_paths: list[str]) -> str:
return text or "What do you see in this image?" return text or "What do you see in this image?"
def _messages_as_conversation(db, session_id: str, *, include_ancestors: bool = False):
if include_ancestors:
try:
return db.get_messages_as_conversation(
session_id, include_ancestors=True
)
except TypeError as exc:
if "include_ancestors" not in str(exc):
raise
return db.get_messages_as_conversation(session_id)
def _history_to_messages(history: list[dict]) -> list[dict]: def _history_to_messages(history: list[dict]) -> list[dict]:
messages = [] messages = []
tool_call_args = {} tool_call_args = {}
@ -2068,9 +2080,9 @@ def _(rid, params: dict) -> dict:
_enable_gateway_prompts() _enable_gateway_prompts()
try: try:
db.reopen_session(target) db.reopen_session(target)
history = db.get_messages_as_conversation(target) history = _messages_as_conversation(db, target)
display_history = db.get_messages_as_conversation( display_history = _messages_as_conversation(
target, include_ancestors=True db, target, include_ancestors=True
) )
messages = _history_to_messages(display_history) messages = _history_to_messages(display_history)
tokens = _set_session_context(target) tokens = _set_session_context(target)
@ -2215,8 +2227,8 @@ def _(rid, params: dict) -> dict:
db = _get_db() db = _get_db()
if db is not None and session.get("session_key"): if db is not None and session.get("session_key"):
try: try:
history = db.get_messages_as_conversation( history = _messages_as_conversation(
session["session_key"], include_ancestors=True db, session["session_key"], include_ancestors=True
) )
except Exception: except Exception:
pass pass