diff --git a/apps/bootstrap-installer/src-tauri/src/update.rs b/apps/bootstrap-installer/src-tauri/src/update.rs index c085ef60a..28597600e 100644 --- a/apps/bootstrap-installer/src-tauri/src/update.rs +++ b/apps/bootstrap-installer/src-tauri/src/update.rs @@ -230,6 +230,14 @@ async fn run_update(app: AppHandle) -> Result<()> { // us, and wait_for_install_locks_free below force-kills any straggler — so by the // time `hermes update` runs there is no legitimate hermes.exe to protect, // and the guard would only produce a false "Hermes is still running" stop. + // + // NOTE: --force does NOT bypass the venv-python holder guard (that needs + // an explicit `--force-venv`, which we deliberately do not pass). Our lock + // probe only checks the hermes.exe shim and app.asar, so an external venv + // python holding a native .pyd (a user terminal, an unmanaged gateway) + // could still be alive here — mutating the venv under it would strand the + // install half-updated. If that guard fires, it exits 2 and the match arm + // below surfaces the correct "close all Hermes windows" message. update_args.push("--force".into()); update_args.push("--branch".into()); update_args.push(update_branch); diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 29b1ac1de..4400ee9a2 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -8797,6 +8797,20 @@ def _venv_core_imports_healthy() -> tuple[bool, str]: bin_dir = "Scripts" if _is_windows() else "bin" venv_python = venv_dir / bin_dir / python_name if not venv_python.exists(): + # No venv interpreter at all. In a dev checkout that's normal (the + # dev may run hermes from any interpreter), so report healthy to + # avoid forcing reinstalls. But on a MANAGED install (the Windows + # installer / desktop bootstrap stamps `.hermes-bootstrap-complete`, + # and an interrupted update leaves `.update-incomplete`), the venv + # IS the install — its absence means a repair got interrupted after + # the old venv was moved aside, and "Already up to date!" would + # gaslight the user while nothing can run. + managed_markers = ( + PROJECT_ROOT / ".hermes-bootstrap-complete", + _update_marker_path(), + ) + if any(m.exists() for m in managed_markers): + return False, f"venv python missing ({venv_python})" return True, "" # Core web/serve imports plus their newest transitive deps. Import (not @@ -8864,6 +8878,10 @@ def _detect_venv_python_processes( venv_prefix = str(venv_dir.resolve()).lower().rstrip(os.sep) + os.sep except OSError: venv_prefix = str(venv_dir).lower().rstrip(os.sep) + os.sep + try: + root_prefix = str(PROJECT_ROOT.resolve()).lower().rstrip(os.sep) + os.sep + except OSError: + root_prefix = str(PROJECT_ROOT).lower().rstrip(os.sep) + os.sep skip: set[int] = set(exclude_pids or set()) skip.add(os.getpid()) @@ -8875,7 +8893,7 @@ def _detect_venv_python_processes( matches: list[tuple[int, str, str]] = [] try: - proc_iter = psutil.process_iter(["pid", "exe", "name", "cmdline"]) + proc_iter = psutil.process_iter(["pid", "exe", "name", "cmdline", "cwd"]) except Exception: return [] for proc in proc_iter: @@ -8891,11 +8909,27 @@ def _detect_venv_python_processes( exe_norm = str(Path(exe).resolve()).lower() except (OSError, ValueError): exe_norm = str(exe).lower() - if not exe_norm.startswith(venv_prefix): + cmdline_raw = " ".join(info.get("cmdline") or []) + cmdline_low = cmdline_raw.lower() + cwd_low = str(info.get("cwd") or "").lower().rstrip(os.sep) + os.sep + + # Primary match: the executable itself lives under this venv + # (venv\Scripts\python(w).exe — the desktop backend / gateway case). + is_holder = exe_norm.startswith(venv_prefix) + # Fallback: uv/base-interpreter trampolines run a python whose exe is + # OUTSIDE the venv but which still imports from it and holds its .pyd + # files. Catch those by what they're running: a cmdline that references + # this venv's path, or a `-m hermes_cli.main ...` invocation tied to + # this install (install root in the cmdline or as the working dir). + if not is_holder and venv_prefix in cmdline_low: + is_holder = True + if not is_holder and "hermes_cli.main" in cmdline_low: + if root_prefix in cmdline_low or cwd_low.startswith(root_prefix): + is_holder = True + if not is_holder: continue name = info.get("name") or Path(exe).name - cmdline = " ".join(info.get("cmdline") or [])[:120] - matches.append((int(pid), str(name), cmdline)) + matches.append((int(pid), str(name), cmdline_raw[:120])) return matches @@ -8925,7 +8959,7 @@ def _format_venv_python_holders_message(matches: list[tuple[int, str, str]]) -> " Close the Hermes desktop app / other Hermes terminals, then re-run:" ) lines.append(" hermes update") - lines.append(" (or use `hermes update --force` to proceed anyway at your own risk)") + lines.append(" (or use `hermes update --force-venv` to proceed anyway at your own risk)") return "\n".join(lines) @@ -9392,9 +9426,13 @@ def _cmd_update_impl(args, gateway_mode: bool): # (most commonly the Desktop app's `hermes serve` backend) will keep .pyd # files locked and corrupt the dependency sync below. Refuse rather than # race: killing the desktop backend is futile (the app supervises and - # respawns it), so the user must close the app. --force preserves the old - # behavior for users who know what they're doing. - if _is_windows() and not getattr(args, "force", False): + # respawns it), so the user must close the app. Deliberately NOT bypassed + # by plain --force: the desktop bootstrap updater passes --force to skip + # the hermes.exe shim guard above, but its lock probe only checks the shim + # and app.asar — a non-desktop venv python holding a .pyd would sail + # through and corrupt the sync (the exact failure this guard exists for). + # --force-venv is the explicit escape hatch. + if _is_windows() and not getattr(args, "force_venv", False): _venv_holders = _detect_venv_python_processes() if _venv_holders: print(_format_venv_python_holders_message(_venv_holders)) @@ -9619,6 +9657,22 @@ def _cmd_update_impl(args, gateway_mode: bool): from hermes_cli.managed_uv import ensure_uv repair_uv = ensure_uv() + # A managed install whose venv is gone entirely (interrupted + # repair after the old venv was moved aside) needs the venv + # recreated before dependencies can be installed into it. + venv_python_missing = not ( + PROJECT_ROOT + / "venv" + / ("Scripts" if _is_windows() else "bin") + / ("python.exe" if _is_windows() else "python") + ).exists() + if venv_python_missing and repair_uv: + print("→ Recreating virtual environment...") + subprocess.run( + [repair_uv, "venv", "venv"], + cwd=PROJECT_ROOT, + check=False, + ) if repair_uv: repair_env = {**os.environ, "VIRTUAL_ENV": str(PROJECT_ROOT / "venv")} _install_python_dependencies_with_optional_fallback( diff --git a/hermes_cli/subcommands/update.py b/hermes_cli/subcommands/update.py index b2a632f20..bbd5e43e0 100644 --- a/hermes_cli/subcommands/update.py +++ b/hermes_cli/subcommands/update.py @@ -65,6 +65,12 @@ def build_update_parser(subparsers, *, cmd_update: Callable) -> None: "--force", action="store_true", default=False, - help="Windows: proceed with the update even when another hermes.exe is detected. The concurrent process will likely cause WinError 32 warnings and may leave a reboot-deferred .exe replacement.", + help="Windows: proceed with the update even when another hermes.exe is detected. The concurrent process will likely cause WinError 32 warnings and may leave a reboot-deferred .exe replacement. Does NOT bypass the venv-process guard (see --force-venv).", + ) + update_parser.add_argument( + "--force-venv", + action="store_true", + default=False, + help="Windows: mutate the venv even while other processes are running from its interpreter (desktop backend, gateway, terminals). Those processes keep native .pyd files locked, so the dependency sync will likely fail partway and strand the install half-updated. Use only if you know the detected holders are false positives.", ) update_parser.set_defaults(func=cmd_update) diff --git a/scripts/install.ps1 b/scripts/install.ps1 index e090dcb5c..19df6d313 100644 --- a/scripts/install.ps1 +++ b/scripts/install.ps1 @@ -1602,7 +1602,12 @@ function Install-Venv { Write-Info "Creating virtual environment with Python $PythonVersion..." Push-Location $InstallDir - + + # Tasks we disabled below and must re-enable no matter how this stage + # exits. Populated only with tasks that were ENABLED before we touched + # them, so a task the user deliberately disabled is never re-armed. + $gatewayTasksDisabled = @() + try { if (Test-Path "venv") { Write-Info "Virtual environment already exists, recreating..." # On Windows, native Python extensions (e.g. _bcrypt.pyd, tornado's @@ -1614,18 +1619,23 @@ function Install-Venv { if ($env:OS -eq "Windows_NT") { $myPid = $PID Write-Info "Stopping any running hermes processes before recreating venv..." - # Disarm respawners FIRST: the gateway autostart Scheduled Task and - # the Startup-folder entry both relaunch a killed gateway within - # seconds, and losing that race re-locks the venv's .pyd files - # between our kill sweep and Remove-Item (the July 2026 - # _brotlicffi.pyd incident). schtasks /End stops a running task - # instance; /Change /DISABLE stops it from re-firing mid-install. - # Re-enabled after the venv is recreated (below). Best-effort: a - # missing task just errors quietly. - $gatewayTasksDisabled = @() + # Disarm the respawner FIRST: the gateway autostart Scheduled Task + # relaunches a killed gateway within seconds, and losing that race + # re-locks the venv's .pyd files between our kill sweep and + # Remove-Item (the July 2026 _brotlicffi.pyd incident). schtasks + # /End stops a running task instance; /Change /DISABLE stops it + # from re-firing mid-install. (The Startup-folder .vbs fallback is + # NOT touched: it only fires at logon, so it cannot respawn a + # gateway mid-install.) Re-enabled in the finally below — including + # on failure — but only for tasks that were enabled to begin with. + # Best-effort: a missing task just errors quietly. try { schtasks /Query /FO CSV 2>$null | ConvertFrom-Csv | Where-Object { $_.TaskName -like '*Hermes_Gateway*' } | ForEach-Object { $tn = $_.TaskName + if ($_.Status -eq 'Disabled') { + Write-Info " gateway autostart task $tn is already disabled; leaving it that way" + return + } schtasks /End /TN $tn 2>$null | Out-Null schtasks /Change /TN $tn /DISABLE 2>$null | Out-Null $gatewayTasksDisabled += $tn @@ -1727,7 +1737,6 @@ function Install-Venv { # ok=true) when the venv was never created. $venvExitCode = $LASTEXITCODE if ($venvExitCode -ne 0) { - Pop-Location throw "Failed to create virtual environment (uv venv exited with $venvExitCode)" } @@ -1742,19 +1751,21 @@ function Install-Venv { if (Test-Path $venvPythonExe) { $env:UV_PYTHON = $venvPythonExe } - - Pop-Location - - # Re-arm the gateway autostart tasks disabled during the venv teardown. - # Same function scope, so the list survives even under the stage-per- - # process bootstrap. Deliberately NOT started here — dependencies aren't - # installed yet; the task fires normally on next logon and `hermes update` - # / the gateway resume path handles the immediate restart. - if ($gatewayTasksDisabled -and $gatewayTasksDisabled.Count -gt 0) { - foreach ($tn in $gatewayTasksDisabled) { - schtasks /Change /TN $tn /ENABLE 2>$null | Out-Null + } finally { + Pop-Location + # Re-arm the gateway autostart tasks disabled during the venv teardown + # — in a finally so a failed teardown/creation can never strand the + # user's gateway autostart in the disabled state. Same function scope, + # so the list survives even under the stage-per-process bootstrap. + # Deliberately NOT started here — dependencies aren't installed yet; + # the task fires normally on next logon and `hermes update` / the + # gateway resume path handles the immediate restart. + if ($gatewayTasksDisabled -and $gatewayTasksDisabled.Count -gt 0) { + foreach ($tn in $gatewayTasksDisabled) { + schtasks /Change /TN $tn /ENABLE 2>$null | Out-Null + } + Write-Info "Re-enabled gateway autostart task(s): $($gatewayTasksDisabled -join ', ')" } - Write-Info "Re-enabled gateway autostart task(s): $($gatewayTasksDisabled -join ', ')" } Write-Success "Virtual environment ready (Python $PythonVersion)" diff --git a/tests/hermes_cli/test_update_venv_health.py b/tests/hermes_cli/test_update_venv_health.py index b6660a8c1..16a9a1428 100644 --- a/tests/hermes_cli/test_update_venv_health.py +++ b/tests/hermes_cli/test_update_venv_health.py @@ -32,13 +32,33 @@ from hermes_cli import main as cli_main def test_venv_health_reports_healthy_when_no_venv(tmp_path): - """No venv python → nothing to probe → healthy (never blocks update).""" + """No venv python in a DEV checkout → nothing to probe → healthy.""" with patch.object(cli_main, "PROJECT_ROOT", tmp_path): healthy, detail = cli_main._venv_core_imports_healthy() assert healthy is True assert detail == "" +def test_venv_health_missing_venv_unhealthy_on_managed_install(tmp_path): + """On a managed install (bootstrap marker) the venv IS the install — + its absence must be reported unhealthy so the repair lane runs instead + of 'Already up to date!'.""" + (tmp_path / ".hermes-bootstrap-complete").write_text("done") + with patch.object(cli_main, "PROJECT_ROOT", tmp_path): + healthy, detail = cli_main._venv_core_imports_healthy() + assert healthy is False + assert "venv python missing" in detail + + +def test_venv_health_missing_venv_unhealthy_with_interrupted_marker(tmp_path): + """An interrupted-update breadcrumb also flips missing-venv to unhealthy.""" + (tmp_path / ".update-incomplete").write_text("started=1\npid=1\n") + with patch.object(cli_main, "PROJECT_ROOT", tmp_path): + healthy, detail = cli_main._venv_core_imports_healthy() + assert healthy is False + assert "venv python missing" in detail + + def _fake_venv_python(tmp_path, *, windows: bool = False): bin_dir = tmp_path / "venv" / ("Scripts" if windows else "bin") bin_dir.mkdir(parents=True) @@ -104,9 +124,15 @@ def test_venv_health_probe_failure_reports_healthy(tmp_path): # --------------------------------------------------------------------------- -def _proc(pid: int, exe: str, name: str, cmdline: list[str] | None = None): +def _proc(pid: int, exe: str, name: str, cmdline: list[str] | None = None, cwd: str = ""): proc = MagicMock() - proc.info = {"pid": pid, "exe": exe, "name": name, "cmdline": cmdline or []} + proc.info = { + "pid": pid, + "exe": exe, + "name": name, + "cmdline": cmdline or [], + "cwd": cwd, + } return proc @@ -182,4 +208,139 @@ def test_format_venv_holders_message_flags_desktop_backend(tmp_path): assert "desktop app" in msg.lower() assert "gateway" in msg assert "hermes update" in msg - assert "--force" in msg + assert "--force-venv" in msg + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_detect_venv_python_catches_outside_venv_trampoline(_winp, tmp_path): + """uv/base-interpreter trampoline: exe OUTSIDE the venv, but the cmdline + clearly runs Hermes from this install → must still be flagged as a holder + (it imports from the venv and holds its .pyd files).""" + base_py = "C:\\Python311\\python.exe" + venv_path = str(tmp_path / "venv" / "Scripts" / "python.exe") + + me = MagicMock() + me.parents.return_value = [] + fake_psutil = types.SimpleNamespace( + process_iter=lambda attrs: iter( + [ + # cmdline references the venv path directly + _proc(201, base_py, "python.exe", [base_py, venv_path, "-m", "x"]), + # `-m hermes_cli.main serve` with the install root as cwd + _proc( + 202, + base_py, + "python.exe", + [base_py, "-m", "hermes_cli.main", "serve"], + cwd=str(tmp_path), + ), + # unrelated base-interpreter python → NOT a holder + _proc(203, base_py, "python.exe", [base_py, "somescript.py"], cwd="C:\\other"), + ] + ), + Process=lambda *a, **k: me, + ) + with patch.object(cli_main, "PROJECT_ROOT", tmp_path), patch.dict( + sys.modules, {"psutil": fake_psutil} + ): + matches = cli_main._detect_venv_python_processes() + + assert sorted(m[0] for m in matches) == [201, 202] + + +@patch.object(cli_main, "_is_windows", return_value=True) +def test_detect_venv_hermes_cli_cmdline_outside_install_not_matched(_winp, tmp_path): + """A hermes_cli.main process belonging to a DIFFERENT install (neither + install root in cmdline nor cwd under it) must not be flagged.""" + base_py = "C:\\Python311\\python.exe" + me = MagicMock() + me.parents.return_value = [] + fake_psutil = types.SimpleNamespace( + process_iter=lambda attrs: iter( + [ + _proc( + 301, + base_py, + "python.exe", + [base_py, "-m", "hermes_cli.main", "serve"], + cwd="C:\\other-install", + ), + ] + ), + Process=lambda *a, **k: me, + ) + with patch.object(cli_main, "PROJECT_ROOT", tmp_path), patch.dict( + sys.modules, {"psutil": fake_psutil} + ): + assert cli_main._detect_venv_python_processes() == [] + + +# --------------------------------------------------------------------------- +# --force vs --force-venv gating of the venv-holder guard +# --------------------------------------------------------------------------- + + +def _update_args(**overrides): + defaults = dict( + gateway=False, + check=False, + no_backup=True, + backup=False, + yes=True, + branch=None, + force=False, + force_venv=False, + ) + defaults.update(overrides) + return SimpleNamespace(**defaults) + + +def _run_update_until_guard(args): + """Drive _cmd_update_impl just far enough to hit the venv-holder guard. + + Everything before the guard is stubbed; the guard firing is observed via + SystemExit(2). The first statement AFTER the guard is + ``git_dir = PROJECT_ROOT / ".git"`` — a PROJECT_ROOT sentinel whose + ``__truediv__`` raises marks 'guard passed'.""" + + class _PastGuard(Exception): + pass + + class _RootSentinel: + def __truediv__(self, _other): + raise _PastGuard + + with patch.object(cli_main, "_is_windows", return_value=True), patch.object( + cli_main, "_venv_scripts_dir", return_value=None + ), patch.object(cli_main, "_run_pre_update_backup"), patch.object( + cli_main, "_pause_windows_gateways_for_update", return_value=None + ), patch.object( + cli_main, "_resume_windows_gateways_after_update" + ), patch.object( + cli_main, + "_detect_venv_python_processes", + return_value=[(101, "python.exe", "python.exe -m hermes_cli.main serve")], + ), patch.object( + cli_main, "PROJECT_ROOT", _RootSentinel() + ): + try: + cli_main._cmd_update_impl(args, gateway_mode=False) + except _PastGuard: + return "past_guard" + except SystemExit as exc: + return f"exit_{exc.code}" + return "returned" + + +@pytest.mark.parametrize( + "force,force_venv,expected", + [ + (False, False, "exit_2"), # guard fires + (True, False, "exit_2"), # plain --force does NOT bypass the venv guard + (False, True, "past_guard"), # --force-venv is the explicit escape hatch + (True, True, "past_guard"), + ], +) +def test_venv_holder_guard_force_semantics(force, force_venv, expected, capsys): + result = _run_update_until_guard(_update_args(force=force, force_venv=force_venv)) + assert result == expected, capsys.readouterr().out diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md index 1d42519d3..7e7ea5b03 100644 --- a/website/docs/getting-started/updating.md +++ b/website/docs/getting-started/updating.md @@ -102,6 +102,8 @@ $ hermes update Close the listed processes and re-run. If you're sure the concurrent process won't interfere (rare — usually only useful when an antivirus shim is mis-attributed), pass `--force` to skip the check. In that case the updater will still retry the `.exe` rename with exponential backoff and, on stubborn locks, schedule the replacement for next reboot via `MoveFileEx(MOVEFILE_DELAY_UNTIL_REBOOT)` so the update can complete. +A second, separate guard refuses to touch the venv while any process is running from its Python interpreter (the Desktop app's backend, a gateway, a Python REPL). Those processes keep native extension files (`.pyd`) locked, and a dependency sync that dies partway on an access-denied error strands the install between versions. This guard is **not** bypassed by `--force`; if you're certain the detected holders are false positives, use the explicit `hermes update --force-venv`. + Expected output looks like: ```