fix(browser): surface launch diagnostics when debug browser never opens the CDP port

Follow-up to the salvaged early-exit retry fix (#35617): the debug-browser
launch path was fire-and-forget (stderr to DEVNULL, no logging), so every
platform failure — Windows singleton forward to an existing instance, bad
profile dir, missing shared libraries, policy blocks — collapsed into the
same unactionable 'port 9222 isn't responding yet' message and debug
reports contained nothing.

- launch_chrome_debug() returns a structured ChromeDebugLaunch with
  per-candidate attempts (state, exit code, stderr tail)
- browser stderr is captured to <hermes_home>/chrome-debug/launch-stderr.log
- clean exit (code 0) without the port opening is detected as Chromium's
  single-instance forward and produces a targeted user hint to close all
  running instances of that browser
- crash exits surface the stderr tail (e.g. missing libnspr4.so)
- every spawn/exit is logged to agent.log so hermes debug share captures it
- CLI (/browser connect) and TUI/desktop (browser.manage) both print the hint
This commit is contained in:
teknium1 2026-07-03 00:37:30 -07:00 committed by Teknium
parent c74f093523
commit eb99f82ce4
4 changed files with 197 additions and 18 deletions

View file

@ -2,15 +2,19 @@
from __future__ import annotations
import logging
import os
import platform
import shlex
import shutil
import subprocess
import time
from dataclasses import dataclass, field
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
DEFAULT_BROWSER_CDP_PORT = 9222
DEFAULT_BROWSER_CDP_URL = f"http://127.0.0.1:{DEFAULT_BROWSER_CDP_PORT}"
@ -222,24 +226,124 @@ def _wait_for_browser_debug_ready_or_exit(
return "starting"
def try_launch_chrome_debug(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> bool:
_LAUNCH_STDERR_LOG = "launch-stderr.log"
_STDERR_TAIL_LIMIT = 2000
@dataclass
class LaunchAttempt:
"""Outcome of one candidate-binary launch attempt."""
binary: str
state: str # "ready" | "starting" | "exited" | "spawn-failed"
returncode: int | None = None
stderr_tail: str = ""
@dataclass
class ChromeDebugLaunch:
"""Structured result of ``launch_chrome_debug``.
``launched`` mirrors the legacy boolean contract: a launch command was
executed and the browser is ready or still starting (it does NOT
guarantee the CDP port ever opens). ``attempts`` carries per-candidate
diagnostics so callers can explain *why* nothing came up.
"""
launched: bool = False
attempts: list[LaunchAttempt] = field(default_factory=list)
@property
def hint(self) -> str | None:
"""Best user-facing explanation for a failed/soft launch, if any."""
for attempt in self.attempts:
if attempt.state == "exited" and attempt.returncode == 0:
name = os.path.basename(attempt.binary)
return (
f"{name} exited immediately without opening the debug port — an already-running "
f"{name} instance likely absorbed the launch (Chromium's single-instance "
"behavior). Close ALL of its processes (including background/tray instances) "
"and retry /browser connect."
)
for attempt in self.attempts:
if attempt.state == "exited" and attempt.stderr_tail:
return (
f"{os.path.basename(attempt.binary)} exited before the debug port opened: "
f"{attempt.stderr_tail.splitlines()[-1].strip()}"
)
return None
def _read_stderr_tail(path: str) -> str:
try:
with open(path, "rb") as fh:
data = fh.read()
return data[-_STDERR_TAIL_LIMIT:].decode("utf-8", errors="replace").strip()
except OSError:
return ""
def launch_chrome_debug(
port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None
) -> ChromeDebugLaunch:
"""Launch a Chromium-family browser with remote debugging, with diagnostics.
Tries each detected candidate binary in turn. A candidate that exits
before the CDP port opens (crash, singleton forward to an existing
instance, bad profile dir) is logged with exit code and a stderr tail
and the next candidate is tried.
"""
system = system or platform.system()
result = ChromeDebugLaunch()
candidates = get_chrome_debug_candidates(system)
if not candidates:
return False
logger.info("browser debug launch: no Chromium-family binary found (system=%s)", system)
return result
data_dir = chrome_debug_data_dir()
os.makedirs(data_dir, exist_ok=True)
stderr_path = os.path.join(data_dir, _LAUNCH_STDERR_LOG)
os.makedirs(chrome_debug_data_dir(), exist_ok=True)
for candidate in candidates:
try:
proc = subprocess.Popen(
[candidate, *_chrome_debug_args(port)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
**_detach_kwargs(system),
)
state = _wait_for_browser_debug_ready_or_exit(proc, port)
if state != "exited":
return True
except Exception:
with open(stderr_path, "wb") as stderr_file:
proc = subprocess.Popen(
[candidate, *_chrome_debug_args(port)],
stdout=subprocess.DEVNULL,
stderr=stderr_file,
**_detach_kwargs(system),
)
except Exception as exc:
result.attempts.append(LaunchAttempt(binary=candidate, state="spawn-failed"))
logger.info("browser debug launch: failed to spawn %s: %s", candidate, exc)
continue
return False
logger.info(
"browser debug launch: spawned %s (pid=%s) with --remote-debugging-port=%d",
candidate,
getattr(proc, "pid", None),
port,
)
state = _wait_for_browser_debug_ready_or_exit(proc, port)
attempt = LaunchAttempt(binary=candidate, state=state)
result.attempts.append(attempt)
if state != "exited":
result.launched = True
return result
attempt.returncode = getattr(proc, "returncode", None)
attempt.stderr_tail = _read_stderr_tail(stderr_path)
logger.warning(
"browser debug launch: %s exited (code=%s) before port %d opened%s",
candidate,
attempt.returncode,
port,
f"; stderr tail: {attempt.stderr_tail}" if attempt.stderr_tail else "",
)
return result
def try_launch_chrome_debug(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> bool:
return launch_chrome_debug(port, system).launched

View file

@ -31,6 +31,7 @@ from hermes_constants import display_hermes_home, is_termux as _is_termux_enviro
from hermes_cli.browser_connect import (
DEFAULT_BROWSER_CDP_URL,
is_browser_debug_ready,
launch_chrome_debug,
manual_chrome_debug_command,
)
@ -1850,8 +1851,8 @@ class CLICommandsMixin:
elif cdp_url == _DEFAULT_CDP:
# Try to auto-launch a Chromium-family browser with remote debugging
print(" Chromium-family browser isn't running with remote debugging — attempting to launch...")
_launched = self._try_launch_chrome_debug(_port, _plat.system())
if _launched:
_launch = launch_chrome_debug(_port, _plat.system())
if _launch.launched:
# Wait for the DevTools discovery endpoint to come up
for _wait in range(10):
if is_browser_debug_ready(cdp_url, timeout=1.0):
@ -1865,6 +1866,9 @@ class CLICommandsMixin:
print(" Try again in a few seconds — the debug instance may still be starting")
else:
print(" ⚠ Could not auto-launch a Chromium-family browser")
_hint = _launch.hint
if _hint:
print(f" {_hint}")
sys_name = _plat.system()
chrome_cmd = manual_chrome_debug_command(_port, sys_name)
if chrome_cmd:

View file

@ -12,6 +12,7 @@ from hermes_cli.browser_connect import (
_wait_for_browser_debug_ready_or_exit,
get_chrome_debug_candidates,
is_browser_debug_ready,
launch_chrome_debug,
manual_chrome_debug_command,
)
@ -197,6 +198,7 @@ class TestChromeDebugLaunch:
return object()
with patch("hermes_cli.browser_connect.get_chrome_debug_candidates", return_value=[brave, chrome]), \
patch("hermes_cli.browser_connect._wait_for_browser_debug_ready_or_exit", return_value="ready"), \
patch("subprocess.Popen", side_effect=fake_popen):
assert HermesCLI._try_launch_chrome_debug(9222, "Linux") is True
@ -236,6 +238,71 @@ class TestChromeDebugLaunch:
assert attempts == [brave, chrome]
def test_launch_result_hints_singleton_forward_on_clean_exit(self, tmp_path, monkeypatch):
"""A candidate that exits code 0 without opening the port = an existing
instance absorbed the launch (Chromium single-instance behavior)."""
chrome = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
class _Proc:
pid = 1234
returncode = 0
def poll(self):
return 0
monkeypatch.setattr(
"hermes_cli.browser_connect.chrome_debug_data_dir", lambda: str(tmp_path)
)
with patch("hermes_cli.browser_connect.get_chrome_debug_candidates", return_value=[chrome]), \
patch("hermes_cli.browser_connect.is_browser_debug_ready", return_value=False), \
patch("subprocess.Popen", return_value=_Proc()):
result = launch_chrome_debug(9222, "Windows")
assert result.launched is False
assert result.attempts[0].state == "exited"
assert result.attempts[0].returncode == 0
assert result.hint is not None
assert "already-running" in result.hint
assert "chrome.exe" in result.hint
def test_launch_result_surfaces_stderr_tail_on_crash(self, tmp_path, monkeypatch):
chrome = "/usr/bin/google-chrome"
class _Proc:
pid = 4321
returncode = 127
def __init__(self, stderr_path):
# Simulate the browser writing to the redirected stderr file.
with open(stderr_path, "w", encoding="utf-8") as fh:
fh.write("error while loading shared libraries: libnspr4.so\n")
def poll(self):
return 127
monkeypatch.setattr(
"hermes_cli.browser_connect.chrome_debug_data_dir", lambda: str(tmp_path)
)
stderr_path = tmp_path / "launch-stderr.log"
with patch("hermes_cli.browser_connect.get_chrome_debug_candidates", return_value=[chrome]), \
patch("hermes_cli.browser_connect.is_browser_debug_ready", return_value=False), \
patch("subprocess.Popen", side_effect=lambda *a, **k: _Proc(stderr_path)):
result = launch_chrome_debug(9222, "Linux")
assert result.launched is False
assert result.attempts[0].returncode == 127
assert "libnspr4.so" in result.attempts[0].stderr_tail
assert result.hint is not None
assert "libnspr4.so" in result.hint
def test_launch_result_no_hint_when_no_candidates(self):
with patch("hermes_cli.browser_connect.get_chrome_debug_candidates", return_value=[]):
result = launch_chrome_debug(9222, "Linux")
assert result.launched is False
assert result.attempts == []
assert result.hint is None
def test_manual_command_uses_wsl_windows_chrome_when_available(self):
chrome = "/mnt/c/Program Files/Google/Chrome/Application/chrome.exe"

View file

@ -13217,13 +13217,14 @@ def _browser_connect(rid, params: dict) -> dict:
ok = any(_http_ok(p, timeout=2.0) for p in probes)
if not ok and _is_default_local_cdp(parsed):
from hermes_cli.browser_connect import try_launch_chrome_debug
from hermes_cli.browser_connect import launch_chrome_debug
announce(
"Chromium-family browser isn't running with remote debugging — attempting to launch..."
)
if try_launch_chrome_debug(port, system):
launch = launch_chrome_debug(port, system)
if launch.launched:
for _ in range(20):
time.sleep(0.5)
if any(_http_ok(p, timeout=1.0) for p in probes):
@ -13233,6 +13234,9 @@ def _browser_connect(rid, params: dict) -> dict:
if ok:
announce(f"Chromium-family browser launched and listening on port {port}")
else:
hint = launch.hint
if hint:
announce(hint, level="error")
for line in _failure_messages(url, port, system)[1:]:
announce(line, level="error")
return _ok(