fix(mcp): suppress interactive OAuth stdin prompts during background discovery (#35927)

When an MCP server requires OAuth, the interactive `hermes` TUI froze on
startup: background MCP discovery hit the OAuth flow, which on an interactive
TTY spawns a daemon thread doing a blocking `sys.stdin.readline()` (the
"paste the redirect URL" fallback in mcp_oauth._wait_for_callback). That
thread competes with the TUI's own stdin reader for the same terminal, so
keystrokes get swallowed and the TUI appears frozen (up to the 300s OAuth
timeout). Reported symptom: "MCP OAuth: authorization required / Open this URL
... the tui is freezing, not respond to typing."

Add a thread-local `suppress_interactive_oauth()` context manager in
tools/mcp_oauth.py; `_is_interactive()` returns False while it's active, so the
stdin paste-thread and prompt are never created. Background discovery
(hermes_cli/mcp_startup.py, tui_gateway/entry.py) now runs discovery inside
that context, so OAuth-requiring servers soft-skip (raise
OAuthNonInteractiveError, already handled) instead of stealing the TUI's stdin.
A real `hermes mcp login` on the main thread is unaffected (thread-local).

Salvaged from #35945 by @zapabob (authorship preserved via cherry-pick;
resolved a conflict against main's new mcp_discovery_timeout / wait_for_mcp_
discovery refactor, keeping both). Verified E2E: with suppression the paste
prompt is NOT printed and no stdin thread spawns (raises OAuthNonInteractive
soft-skip); without it the prompt shows (the freeze). Mutation-verified
(removing the suppress check in _is_interactive fails the regression test).
76 tests pass, ruff clean.

Closes #35927.

SELF-REVIEW FIX: the original #35945 used threading.local(), which does NOT
propagate to the dedicated mcp-event-loop thread where OAuth actually runs
(discover_mcp_tools dispatches the connect via run_coroutine_threadsafe), so
the suppression was a NO-OP in production (the tests passed only by stubbing
out the cross-thread dispatch). Converted to a contextvars.ContextVar, which
asyncio copies onto the scheduled coroutine — empirically verified suppression
now holds on the mcp-event-loop thread through the real _run_on_mcp_loop path.
Added a cross-thread regression test (fails on threading.local, passes on the
ContextVar) so the no-op can't regress.
This commit is contained in:
zapabob 2026-06-27 03:36:48 +05:30 committed by kshitij
parent 2d8c44ac87
commit e55ddc3e33
5 changed files with 173 additions and 5 deletions

View file

@ -3,6 +3,7 @@
from __future__ import annotations
import threading
from contextlib import nullcontext
from typing import Optional
_mcp_discovery_lock = threading.Lock()
@ -36,9 +37,7 @@ def start_background_mcp_discovery(*, logger, thread_name: str) -> None:
def _discover() -> None:
try:
from tools.mcp_tool import discover_mcp_tools
discover_mcp_tools()
_discover_mcp_tools_without_interactive_oauth()
except Exception:
logger.debug("Background MCP tool discovery failed", exc_info=True)
@ -72,6 +71,19 @@ def _resolve_discovery_timeout(explicit: "float | None") -> float:
return 1.5
def _discover_mcp_tools_without_interactive_oauth() -> None:
"""Run MCP discovery without letting OAuth read from the user's stdin."""
try:
from tools.mcp_oauth import suppress_interactive_oauth
except Exception:
suppress_interactive_oauth = nullcontext
with suppress_interactive_oauth():
from tools.mcp_tool import discover_mcp_tools
discover_mcp_tools()
def wait_for_mcp_discovery(timeout: "float | None" = None) -> None:
"""Wait for background MCP discovery before the first tool snapshot.

View file

@ -81,6 +81,9 @@ def test_prepare_agent_startup_backgrounds_blocking_mcp_for_chat(monkeypatch):
main_mod._prepare_agent_startup(_agent_args())
elapsed = time.monotonic() - start
assert elapsed < 0.2
deadline = time.monotonic() + 1.0
while calls["mcp"] == 0 and time.monotonic() < deadline:
time.sleep(0.01)
assert calls["mcp"] == 1
assert mcp_startup._mcp_discovery_thread is not None
assert mcp_startup._mcp_discovery_thread.is_alive()
@ -88,6 +91,50 @@ def test_prepare_agent_startup_backgrounds_blocking_mcp_for_chat(monkeypatch):
stop.set()
def test_background_mcp_discovery_suppresses_interactive_oauth(monkeypatch):
state = {"active": False, "during_discover": None}
class SuppressInteractiveOAuth:
def __enter__(self):
state["active"] = True
def __exit__(self, *_exc):
state["active"] = False
def _discover():
state["during_discover"] = state["active"]
monkeypatch.setitem(
sys.modules,
"hermes_cli.config",
types.SimpleNamespace(
read_raw_config=lambda: {"mcp_servers": {"demo": {"url": "https://mcp.example.test/mcp"}}},
),
)
monkeypatch.setitem(
sys.modules,
"tools.mcp_oauth",
types.SimpleNamespace(
suppress_interactive_oauth=lambda: SuppressInteractiveOAuth(),
),
)
monkeypatch.setitem(
sys.modules,
"tools.mcp_tool",
types.SimpleNamespace(discover_mcp_tools=_discover),
)
mcp_startup.start_background_mcp_discovery(
logger=types.SimpleNamespace(debug=lambda *_a, **_k: None),
thread_name="test-mcp-discovery",
)
assert mcp_startup._mcp_discovery_thread is not None
mcp_startup._mcp_discovery_thread.join(timeout=1.0)
assert state["during_discover"] is True
assert state["active"] is False
def test_prepare_agent_startup_skips_mcp_bootstrap_for_tui_chat(monkeypatch):
calls = {"mcp": 0}

View file

@ -466,6 +466,63 @@ class TestIsInteractive:
monkeypatch.setattr("tools.mcp_oauth.sys.stdin", mock_stdin)
assert _is_interactive() is False
def test_suppress_interactive_oauth_disables_stdin_prompts(self, monkeypatch):
import tools.mcp_oauth as mod
mock_stdin = MagicMock()
mock_stdin.isatty.return_value = True
monkeypatch.setattr("tools.mcp_oauth.sys.stdin", mock_stdin)
assert _is_interactive() is True
with mod.suppress_interactive_oauth():
assert _is_interactive() is False
assert _is_interactive() is True
def test_suppression_propagates_across_run_coroutine_threadsafe(self, monkeypatch):
"""#35927 core: suppression set on the discovery thread MUST reach the
coroutine asyncio runs on a *different* (event-loop) thread that is
where the OAuth callback / _is_interactive() actually executes via
run_coroutine_threadsafe. A threading.local would NOT propagate here
(the original fix's defect); a ContextVar does."""
import asyncio
import threading
import tools.mcp_oauth as mod
mock_stdin = MagicMock()
mock_stdin.isatty.return_value = True
monkeypatch.setattr("tools.mcp_oauth.sys.stdin", mock_stdin)
loop = asyncio.new_event_loop()
loop_thread = threading.Thread(target=loop.run_forever, daemon=True)
loop_thread.start()
result = {}
try:
async def _probe_on_loop_thread():
# runs on the loop thread, NOT the one that set suppression
return (threading.current_thread() is not discovery_thread,
_is_interactive())
discovery_thread = None
def _discovery():
nonlocal discovery_thread
discovery_thread = threading.current_thread()
with mod.suppress_interactive_oauth():
fut = asyncio.run_coroutine_threadsafe(
_probe_on_loop_thread(), loop
)
result["cross_thread"], result["interactive"] = fut.result(timeout=5)
dt = threading.Thread(target=_discovery)
dt.start()
dt.join()
finally:
loop.call_soon_threadsafe(loop.stop)
assert result["cross_thread"] is True, "probe must run on the loop thread"
# The whole point: suppression must hold on the loop thread.
assert result["interactive"] is False
class TestWaitForCallbackNoBlocking:
"""_wait_for_callback() must never call input() — it raises instead."""
@ -753,6 +810,26 @@ class TestWaitForCallbackPasteIntegration:
err = capsys.readouterr().err
assert "paste the redirect URL" not in err
def test_paste_prompt_NOT_shown_when_interactivity_suppressed(self, monkeypatch, capsys):
"""Background MCP discovery must not race the CLI/TUI stdin reader."""
import tools.mcp_oauth as mod
mod._oauth_port = _find_free_port()
mock_stdin = MagicMock()
mock_stdin.isatty.return_value = True
monkeypatch.setattr(mod.sys, "stdin", mock_stdin)
async def instant_sleep(_):
pass
with patch.object(mod.asyncio, "sleep", instant_sleep):
with mod.suppress_interactive_oauth():
with pytest.raises(OAuthNonInteractiveError):
asyncio.run(_wait_for_callback())
err = capsys.readouterr().err
assert "paste the redirect URL" not in err
mock_stdin.readline.assert_not_called()
class TestPasteCallbackSkipToken:
"""User can type `skip` (or similar) at the paste prompt to bail out."""

View file

@ -33,6 +33,7 @@ Configuration in config.yaml::
"""
import asyncio
import contextvars
import json
import logging
import os
@ -44,6 +45,7 @@ import sys
import threading
import time
import webbrowser
from contextlib import contextmanager
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
from typing import Any
@ -92,6 +94,15 @@ class OAuthNonInteractiveError(RuntimeError):
# Port used by the most recent build_oauth_auth() call. Exposed so that
# tests can verify the callback server and the redirect_uri share a port.
_oauth_port: int | None = None
# Interactivity gate for OAuth stdin prompts. A ContextVar (NOT threading.local)
# is required: background MCP discovery sets this on the discovery thread, but
# the actual connect+OAuth runs on the dedicated `mcp-event-loop` thread via
# run_coroutine_threadsafe. asyncio copies the *calling context* into the
# scheduled coroutine, so a ContextVar propagates across that boundary while a
# threading.local would not — see #35927. Default True (interactive allowed).
_oauth_interactive_enabled: "contextvars.ContextVar[bool]" = contextvars.ContextVar(
"_oauth_interactive_enabled", default=True
)
# Skip tokens accepted at the paste prompt — exit OAuth without auth.
@ -137,12 +148,30 @@ def _find_free_port() -> int:
def _is_interactive() -> bool:
"""Return True if we can reasonably expect to interact with a user."""
if not _oauth_interactive_enabled.get():
return False
try:
return sys.stdin.isatty()
except (AttributeError, ValueError):
return False
@contextmanager
def suppress_interactive_oauth():
"""Disable stdin-based OAuth prompts for the current execution context.
Uses a ContextVar so the suppression propagates from a background-discovery
thread onto the coroutine scheduled (via run_coroutine_threadsafe) on the
dedicated MCP event-loop thread where the OAuth callback actually runs
(#35927). A threading.local would not cross that thread boundary.
"""
token = _oauth_interactive_enabled.set(False)
try:
yield
finally:
_oauth_interactive_enabled.reset(token)
def _can_open_browser() -> bool:
"""Return True if opening a browser is likely to work."""
# Explicit SSH session → no local display

View file

@ -293,8 +293,11 @@ def main():
if _has_mcp_servers:
def _discover_mcp_background() -> None:
try:
from tools.mcp_tool import discover_mcp_tools
discover_mcp_tools()
from hermes_cli.mcp_startup import (
_discover_mcp_tools_without_interactive_oauth,
)
_discover_mcp_tools_without_interactive_oauth()
except Exception:
logger.warning(
"Background MCP tool discovery failed", exc_info=True