fix(mcp): suppress interactive OAuth stdin prompts during background discovery (#35927)
When an MCP server requires OAuth, the interactive `hermes` TUI froze on startup: background MCP discovery hit the OAuth flow, which on an interactive TTY spawns a daemon thread doing a blocking `sys.stdin.readline()` (the "paste the redirect URL" fallback in mcp_oauth._wait_for_callback). That thread competes with the TUI's own stdin reader for the same terminal, so keystrokes get swallowed and the TUI appears frozen (up to the 300s OAuth timeout). Reported symptom: "MCP OAuth: authorization required / Open this URL ... the tui is freezing, not respond to typing." Add a thread-local `suppress_interactive_oauth()` context manager in tools/mcp_oauth.py; `_is_interactive()` returns False while it's active, so the stdin paste-thread and prompt are never created. Background discovery (hermes_cli/mcp_startup.py, tui_gateway/entry.py) now runs discovery inside that context, so OAuth-requiring servers soft-skip (raise OAuthNonInteractiveError, already handled) instead of stealing the TUI's stdin. A real `hermes mcp login` on the main thread is unaffected (thread-local). Salvaged from #35945 by @zapabob (authorship preserved via cherry-pick; resolved a conflict against main's new mcp_discovery_timeout / wait_for_mcp_ discovery refactor, keeping both). Verified E2E: with suppression the paste prompt is NOT printed and no stdin thread spawns (raises OAuthNonInteractive soft-skip); without it the prompt shows (the freeze). Mutation-verified (removing the suppress check in _is_interactive fails the regression test). 76 tests pass, ruff clean. Closes #35927. SELF-REVIEW FIX: the original #35945 used threading.local(), which does NOT propagate to the dedicated mcp-event-loop thread where OAuth actually runs (discover_mcp_tools dispatches the connect via run_coroutine_threadsafe), so the suppression was a NO-OP in production (the tests passed only by stubbing out the cross-thread dispatch). Converted to a contextvars.ContextVar, which asyncio copies onto the scheduled coroutine — empirically verified suppression now holds on the mcp-event-loop thread through the real _run_on_mcp_loop path. Added a cross-thread regression test (fails on threading.local, passes on the ContextVar) so the no-op can't regress.
This commit is contained in:
parent
2d8c44ac87
commit
e55ddc3e33
5 changed files with 173 additions and 5 deletions
|
|
@ -3,6 +3,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from contextlib import nullcontext
|
||||
from typing import Optional
|
||||
|
||||
_mcp_discovery_lock = threading.Lock()
|
||||
|
|
@ -36,9 +37,7 @@ def start_background_mcp_discovery(*, logger, thread_name: str) -> None:
|
|||
|
||||
def _discover() -> None:
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
|
||||
discover_mcp_tools()
|
||||
_discover_mcp_tools_without_interactive_oauth()
|
||||
except Exception:
|
||||
logger.debug("Background MCP tool discovery failed", exc_info=True)
|
||||
|
||||
|
|
@ -72,6 +71,19 @@ def _resolve_discovery_timeout(explicit: "float | None") -> float:
|
|||
return 1.5
|
||||
|
||||
|
||||
def _discover_mcp_tools_without_interactive_oauth() -> None:
|
||||
"""Run MCP discovery without letting OAuth read from the user's stdin."""
|
||||
try:
|
||||
from tools.mcp_oauth import suppress_interactive_oauth
|
||||
except Exception:
|
||||
suppress_interactive_oauth = nullcontext
|
||||
|
||||
with suppress_interactive_oauth():
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
|
||||
discover_mcp_tools()
|
||||
|
||||
|
||||
def wait_for_mcp_discovery(timeout: "float | None" = None) -> None:
|
||||
"""Wait for background MCP discovery before the first tool snapshot.
|
||||
|
||||
|
|
|
|||
|
|
@ -81,6 +81,9 @@ def test_prepare_agent_startup_backgrounds_blocking_mcp_for_chat(monkeypatch):
|
|||
main_mod._prepare_agent_startup(_agent_args())
|
||||
elapsed = time.monotonic() - start
|
||||
assert elapsed < 0.2
|
||||
deadline = time.monotonic() + 1.0
|
||||
while calls["mcp"] == 0 and time.monotonic() < deadline:
|
||||
time.sleep(0.01)
|
||||
assert calls["mcp"] == 1
|
||||
assert mcp_startup._mcp_discovery_thread is not None
|
||||
assert mcp_startup._mcp_discovery_thread.is_alive()
|
||||
|
|
@ -88,6 +91,50 @@ def test_prepare_agent_startup_backgrounds_blocking_mcp_for_chat(monkeypatch):
|
|||
stop.set()
|
||||
|
||||
|
||||
def test_background_mcp_discovery_suppresses_interactive_oauth(monkeypatch):
|
||||
state = {"active": False, "during_discover": None}
|
||||
|
||||
class SuppressInteractiveOAuth:
|
||||
def __enter__(self):
|
||||
state["active"] = True
|
||||
|
||||
def __exit__(self, *_exc):
|
||||
state["active"] = False
|
||||
|
||||
def _discover():
|
||||
state["during_discover"] = state["active"]
|
||||
|
||||
monkeypatch.setitem(
|
||||
sys.modules,
|
||||
"hermes_cli.config",
|
||||
types.SimpleNamespace(
|
||||
read_raw_config=lambda: {"mcp_servers": {"demo": {"url": "https://mcp.example.test/mcp"}}},
|
||||
),
|
||||
)
|
||||
monkeypatch.setitem(
|
||||
sys.modules,
|
||||
"tools.mcp_oauth",
|
||||
types.SimpleNamespace(
|
||||
suppress_interactive_oauth=lambda: SuppressInteractiveOAuth(),
|
||||
),
|
||||
)
|
||||
monkeypatch.setitem(
|
||||
sys.modules,
|
||||
"tools.mcp_tool",
|
||||
types.SimpleNamespace(discover_mcp_tools=_discover),
|
||||
)
|
||||
|
||||
mcp_startup.start_background_mcp_discovery(
|
||||
logger=types.SimpleNamespace(debug=lambda *_a, **_k: None),
|
||||
thread_name="test-mcp-discovery",
|
||||
)
|
||||
assert mcp_startup._mcp_discovery_thread is not None
|
||||
mcp_startup._mcp_discovery_thread.join(timeout=1.0)
|
||||
|
||||
assert state["during_discover"] is True
|
||||
assert state["active"] is False
|
||||
|
||||
|
||||
def test_prepare_agent_startup_skips_mcp_bootstrap_for_tui_chat(monkeypatch):
|
||||
calls = {"mcp": 0}
|
||||
|
||||
|
|
|
|||
|
|
@ -466,6 +466,63 @@ class TestIsInteractive:
|
|||
monkeypatch.setattr("tools.mcp_oauth.sys.stdin", mock_stdin)
|
||||
assert _is_interactive() is False
|
||||
|
||||
def test_suppress_interactive_oauth_disables_stdin_prompts(self, monkeypatch):
|
||||
import tools.mcp_oauth as mod
|
||||
|
||||
mock_stdin = MagicMock()
|
||||
mock_stdin.isatty.return_value = True
|
||||
monkeypatch.setattr("tools.mcp_oauth.sys.stdin", mock_stdin)
|
||||
|
||||
assert _is_interactive() is True
|
||||
with mod.suppress_interactive_oauth():
|
||||
assert _is_interactive() is False
|
||||
assert _is_interactive() is True
|
||||
|
||||
def test_suppression_propagates_across_run_coroutine_threadsafe(self, monkeypatch):
|
||||
"""#35927 core: suppression set on the discovery thread MUST reach the
|
||||
coroutine asyncio runs on a *different* (event-loop) thread — that is
|
||||
where the OAuth callback / _is_interactive() actually executes via
|
||||
run_coroutine_threadsafe. A threading.local would NOT propagate here
|
||||
(the original fix's defect); a ContextVar does."""
|
||||
import asyncio
|
||||
import threading
|
||||
import tools.mcp_oauth as mod
|
||||
|
||||
mock_stdin = MagicMock()
|
||||
mock_stdin.isatty.return_value = True
|
||||
monkeypatch.setattr("tools.mcp_oauth.sys.stdin", mock_stdin)
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
loop_thread = threading.Thread(target=loop.run_forever, daemon=True)
|
||||
loop_thread.start()
|
||||
result = {}
|
||||
try:
|
||||
async def _probe_on_loop_thread():
|
||||
# runs on the loop thread, NOT the one that set suppression
|
||||
return (threading.current_thread() is not discovery_thread,
|
||||
_is_interactive())
|
||||
|
||||
discovery_thread = None
|
||||
|
||||
def _discovery():
|
||||
nonlocal discovery_thread
|
||||
discovery_thread = threading.current_thread()
|
||||
with mod.suppress_interactive_oauth():
|
||||
fut = asyncio.run_coroutine_threadsafe(
|
||||
_probe_on_loop_thread(), loop
|
||||
)
|
||||
result["cross_thread"], result["interactive"] = fut.result(timeout=5)
|
||||
|
||||
dt = threading.Thread(target=_discovery)
|
||||
dt.start()
|
||||
dt.join()
|
||||
finally:
|
||||
loop.call_soon_threadsafe(loop.stop)
|
||||
|
||||
assert result["cross_thread"] is True, "probe must run on the loop thread"
|
||||
# The whole point: suppression must hold on the loop thread.
|
||||
assert result["interactive"] is False
|
||||
|
||||
|
||||
class TestWaitForCallbackNoBlocking:
|
||||
"""_wait_for_callback() must never call input() — it raises instead."""
|
||||
|
|
@ -753,6 +810,26 @@ class TestWaitForCallbackPasteIntegration:
|
|||
err = capsys.readouterr().err
|
||||
assert "paste the redirect URL" not in err
|
||||
|
||||
def test_paste_prompt_NOT_shown_when_interactivity_suppressed(self, monkeypatch, capsys):
|
||||
"""Background MCP discovery must not race the CLI/TUI stdin reader."""
|
||||
import tools.mcp_oauth as mod
|
||||
|
||||
mod._oauth_port = _find_free_port()
|
||||
mock_stdin = MagicMock()
|
||||
mock_stdin.isatty.return_value = True
|
||||
monkeypatch.setattr(mod.sys, "stdin", mock_stdin)
|
||||
|
||||
async def instant_sleep(_):
|
||||
pass
|
||||
|
||||
with patch.object(mod.asyncio, "sleep", instant_sleep):
|
||||
with mod.suppress_interactive_oauth():
|
||||
with pytest.raises(OAuthNonInteractiveError):
|
||||
asyncio.run(_wait_for_callback())
|
||||
err = capsys.readouterr().err
|
||||
assert "paste the redirect URL" not in err
|
||||
mock_stdin.readline.assert_not_called()
|
||||
|
||||
|
||||
class TestPasteCallbackSkipToken:
|
||||
"""User can type `skip` (or similar) at the paste prompt to bail out."""
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ Configuration in config.yaml::
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -44,6 +45,7 @@ import sys
|
|||
import threading
|
||||
import time
|
||||
import webbrowser
|
||||
from contextlib import contextmanager
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
|
@ -92,6 +94,15 @@ class OAuthNonInteractiveError(RuntimeError):
|
|||
# Port used by the most recent build_oauth_auth() call. Exposed so that
|
||||
# tests can verify the callback server and the redirect_uri share a port.
|
||||
_oauth_port: int | None = None
|
||||
# Interactivity gate for OAuth stdin prompts. A ContextVar (NOT threading.local)
|
||||
# is required: background MCP discovery sets this on the discovery thread, but
|
||||
# the actual connect+OAuth runs on the dedicated `mcp-event-loop` thread via
|
||||
# run_coroutine_threadsafe. asyncio copies the *calling context* into the
|
||||
# scheduled coroutine, so a ContextVar propagates across that boundary while a
|
||||
# threading.local would not — see #35927. Default True (interactive allowed).
|
||||
_oauth_interactive_enabled: "contextvars.ContextVar[bool]" = contextvars.ContextVar(
|
||||
"_oauth_interactive_enabled", default=True
|
||||
)
|
||||
|
||||
|
||||
# Skip tokens accepted at the paste prompt — exit OAuth without auth.
|
||||
|
|
@ -137,12 +148,30 @@ def _find_free_port() -> int:
|
|||
|
||||
def _is_interactive() -> bool:
|
||||
"""Return True if we can reasonably expect to interact with a user."""
|
||||
if not _oauth_interactive_enabled.get():
|
||||
return False
|
||||
try:
|
||||
return sys.stdin.isatty()
|
||||
except (AttributeError, ValueError):
|
||||
return False
|
||||
|
||||
|
||||
@contextmanager
|
||||
def suppress_interactive_oauth():
|
||||
"""Disable stdin-based OAuth prompts for the current execution context.
|
||||
|
||||
Uses a ContextVar so the suppression propagates from a background-discovery
|
||||
thread onto the coroutine scheduled (via run_coroutine_threadsafe) on the
|
||||
dedicated MCP event-loop thread — where the OAuth callback actually runs
|
||||
(#35927). A threading.local would not cross that thread boundary.
|
||||
"""
|
||||
token = _oauth_interactive_enabled.set(False)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
_oauth_interactive_enabled.reset(token)
|
||||
|
||||
|
||||
def _can_open_browser() -> bool:
|
||||
"""Return True if opening a browser is likely to work."""
|
||||
# Explicit SSH session → no local display
|
||||
|
|
|
|||
|
|
@ -293,8 +293,11 @@ def main():
|
|||
if _has_mcp_servers:
|
||||
def _discover_mcp_background() -> None:
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
from hermes_cli.mcp_startup import (
|
||||
_discover_mcp_tools_without_interactive_oauth,
|
||||
)
|
||||
|
||||
_discover_mcp_tools_without_interactive_oauth()
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Background MCP tool discovery failed", exc_info=True
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue