diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 218040e67..fdd099bfa 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -1926,6 +1926,28 @@ def _nous_inference_env_override() -> Optional[str]: return _optional_base_url(os.getenv("NOUS_INFERENCE_BASE_URL")) +def _nous_portal_env_override() -> Optional[str]: + """Return the user/deployment-set Portal base URL override, if any. + + Mirrors ``_nous_inference_env_override()``: ``HERMES_PORTAL_BASE_URL`` / + ``NOUS_PORTAL_BASE_URL`` are the documented dev/staging escape hatch for + pointing Hermes at a non-production Nous Portal (e.g. a hosted agent + provisioned on nous-account-service's `staging` environment, which stamps + ``HERMES_PORTAL_BASE_URL=https://portal.staging-nousresearch.com`` into + the container env). The env source is trusted (the OS user/deployment + set it themselves), so — like the inference override — it must NOT be + gated by ``_NOUS_PORTAL_ALLOWED_HOSTS``: that allowlist exists to reject + an untrusted NETWORK-provided value (a poisoned portal_base_url + persisted to auth.json), not a value the operator explicitly configured. + + Returns a trailing-slash-stripped non-empty string, or ``None`` when + neither env var is set/blank. + """ + return _optional_base_url( + os.getenv("HERMES_PORTAL_BASE_URL") or os.getenv("NOUS_PORTAL_BASE_URL") + ) + + def _decode_jwt_claims(token: Any) -> Dict[str, Any]: if not isinstance(token, str) or token.count(".") != 2: return {} @@ -5386,20 +5408,29 @@ def resolve_nous_access_token( relogin_required=True, ) - portal_base_url = ( - _optional_base_url(state.get("portal_base_url")) - or os.getenv("HERMES_PORTAL_BASE_URL") - or os.getenv("NOUS_PORTAL_BASE_URL") - or DEFAULT_NOUS_PORTAL_URL - ).rstrip("/") + # HERMES_PORTAL_BASE_URL / NOUS_PORTAL_BASE_URL is the trusted + # operator/deployment override (mirrors NOUS_INFERENCE_BASE_URL) and + # must win OUTRIGHT — including over a stored value — and bypass the + # host allowlist entirely, since the allowlist exists to reject an + # untrusted network-provided value, not one the operator configured. + # Only fall through to the stored/default value + allowlist gate when + # no override is set. + env_portal_override = _nous_portal_env_override() + if env_portal_override: + portal_base_url = env_portal_override.rstrip("/") + else: + portal_base_url = ( + _optional_base_url(state.get("portal_base_url")) + or DEFAULT_NOUS_PORTAL_URL + ).rstrip("/") - parsed_portal_url = urlparse(portal_base_url) - if parsed_portal_url.hostname and parsed_portal_url.hostname not in _NOUS_PORTAL_ALLOWED_HOSTS: - logger.warning( - "auth: ignoring invalid portal_base_url %r (host %r not in allowlist), using default", - portal_base_url, parsed_portal_url.hostname, - ) - portal_base_url = DEFAULT_NOUS_PORTAL_URL + parsed_portal_url = urlparse(portal_base_url) + if parsed_portal_url.hostname and parsed_portal_url.hostname not in _NOUS_PORTAL_ALLOWED_HOSTS: + logger.warning( + "auth: ignoring invalid portal_base_url %r (host %r not in allowlist), using default", + portal_base_url, parsed_portal_url.hostname, + ) + portal_base_url = DEFAULT_NOUS_PORTAL_URL client_id = str(state.get("client_id") or DEFAULT_NOUS_CLIENT_ID) verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state) @@ -5718,13 +5749,22 @@ def resolve_nous_runtime_credentials( # A persisted/stale portal_base_url is where the refresh token gets # POSTed on refresh — reject any host outside the allowlist so a # poisoned value can't exfiltrate the bearer, healing to the default. - parsed_portal_url = urlparse(portal_base_url) - if parsed_portal_url.hostname and parsed_portal_url.hostname not in _NOUS_PORTAL_ALLOWED_HOSTS: - logger.warning( - "auth: ignoring invalid portal_base_url %r (host %r not in allowlist), using default", - portal_base_url, parsed_portal_url.hostname, - ) - portal_base_url = DEFAULT_NOUS_PORTAL_URL + # The trusted operator/deployment env override (HERMES_PORTAL_BASE_URL / + # NOUS_PORTAL_BASE_URL) bypasses this gate entirely — mirrors + # NOUS_INFERENCE_BASE_URL's treatment below; the allowlist exists to + # reject an untrusted NETWORK-provided value, not one the operator + # explicitly configured. + env_portal_override = _nous_portal_env_override() + if env_portal_override: + portal_base_url = env_portal_override.rstrip("/") + else: + parsed_portal_url = urlparse(portal_base_url) + if parsed_portal_url.hostname and parsed_portal_url.hostname not in _NOUS_PORTAL_ALLOWED_HOSTS: + logger.warning( + "auth: ignoring invalid portal_base_url %r (host %r not in allowlist), using default", + portal_base_url, parsed_portal_url.hostname, + ) + portal_base_url = DEFAULT_NOUS_PORTAL_URL # Persisted value: validated network-provenance only. The stored # inference_base_url is re-validated on read so a poisoned/stale diff --git a/tests/hermes_cli/test_nous_portal_staging_allowlist.py b/tests/hermes_cli/test_nous_portal_staging_allowlist.py new file mode 100644 index 000000000..3153db4ec --- /dev/null +++ b/tests/hermes_cli/test_nous_portal_staging_allowlist.py @@ -0,0 +1,198 @@ +"""Regression tests for the Nous Portal env-override bypassing the host +allowlist, mirroring the existing NOUS_INFERENCE_BASE_URL / +_ALLOWED_NOUS_INFERENCE_HOSTS treatment. + +Real incident (2026-07): a hosted agent provisioned by nous-account-service +on the `staging` Vercel environment is stamped with +``HERMES_PORTAL_BASE_URL=https://portal.staging-nousresearch.com`` in its +container env (the documented dev/staging override), while its bootstrap +``auth.json`` ALSO persists ``portal_base_url`` to the same staging host. + +Before this fix, ``resolve_nous_access_token`` / ``resolve_nous_runtime_ +credentials`` read ``state.get("portal_base_url")`` FIRST via a plain ``or`` +chain, so whenever the stored state had ANY value the env vars were never +even consulted — and whichever value won (state or env) was then run through +``_NOUS_PORTAL_ALLOWED_HOSTS``, which only recognised the production host. +The staging host was silently rewritten back to prod on every refresh, so a +staging-issued refresh token got replayed against the PROD token endpoint. +Prod correctly rejected that with ``invalid_grant``, which triggered +``_quarantine_nous_oauth_state`` and wiped the entire credential pool. + +The correct fix (mirroring ``_nous_inference_env_override()``): the env +override is a TRUSTED value the operator/deployment set themselves — it must +win outright (even over a stored value) and bypass the allowlist entirely. +The allowlist exists only to reject an untrusted NETWORK-provided value +(a poisoned portal_base_url written to auth.json by a compromised Portal +response), never a value the operator explicitly configured. +""" + +from __future__ import annotations + +import json +import logging + +from hermes_cli.auth import ( + DEFAULT_NOUS_PORTAL_URL, + _NOUS_PORTAL_ALLOWED_HOSTS, + _nous_portal_env_override, +) + + +class TestPortalEnvOverrideHelper: + def test_none_when_unset(self, monkeypatch): + monkeypatch.delenv("HERMES_PORTAL_BASE_URL", raising=False) + monkeypatch.delenv("NOUS_PORTAL_BASE_URL", raising=False) + assert _nous_portal_env_override() is None + + def test_hermes_portal_base_url_wins(self, monkeypatch): + monkeypatch.setenv( + "HERMES_PORTAL_BASE_URL", "https://portal.staging-nousresearch.com/" + ) + monkeypatch.delenv("NOUS_PORTAL_BASE_URL", raising=False) + assert ( + _nous_portal_env_override() == "https://portal.staging-nousresearch.com" + ) + + def test_nous_portal_base_url_used_as_fallback(self, monkeypatch): + monkeypatch.delenv("HERMES_PORTAL_BASE_URL", raising=False) + monkeypatch.setenv( + "NOUS_PORTAL_BASE_URL", "https://portal.staging-nousresearch.com" + ) + assert ( + _nous_portal_env_override() == "https://portal.staging-nousresearch.com" + ) + + def test_env_override_not_gated_by_allowlist(self, monkeypatch): + """The whole point: an env-set staging host is NOT in + _NOUS_PORTAL_ALLOWED_HOSTS, and the helper must return it anyway — + gating happens only for network-provenance values.""" + monkeypatch.setenv( + "HERMES_PORTAL_BASE_URL", "https://portal.staging-nousresearch.com" + ) + assert "portal.staging-nousresearch.com" not in _NOUS_PORTAL_ALLOWED_HOSTS + assert ( + _nous_portal_env_override() == "https://portal.staging-nousresearch.com" + ) + + +class TestResolveAccessTokenEnvOverrideWins: + """End-to-end: resolve_nous_access_token must use the env override for + the refresh call, bypassing the allowlist, even when state also has a + portal_base_url set (the exact incident shape).""" + + def _write_auth_file(self, tmp_path, *, stored_portal_url): + auth_file = tmp_path / "auth.json" + auth_file.write_text( + json.dumps( + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": stored_portal_url, + "access_token": "expired-access", + "refresh_token": "staging-refresh", + "client_id": "hermes-cli-vps", + "expires_at": "2000-01-01T00:00:00+00:00", + } + }, + } + ) + ) + return auth_file + + def _run_and_capture(self, monkeypatch, auth): + seen_portal_urls = [] + + def _fake_refresh(*, client, portal_base_url, client_id, refresh_token): + seen_portal_urls.append(portal_base_url) + return { + "access_token": "new-access", + "refresh_token": "new-refresh", + "expires_in": 3600, + } + + monkeypatch.setattr(auth, "_refresh_access_token", _fake_refresh) + + caplog_records = [] + logger = logging.getLogger("hermes_cli.auth") + handler = logging.Handler() + handler.emit = lambda record: caplog_records.append(record.getMessage()) + logger.addHandler(handler) + try: + auth.resolve_nous_access_token() + finally: + logger.removeHandler(handler) + return seen_portal_urls, caplog_records + + def test_env_override_wins_even_with_staging_state_stored( + self, monkeypatch, tmp_path + ): + """The real incident: state ALSO has the staging host stored (from + a prior HERMES_AUTH_JSON_BOOTSTRAP seed), and the env var is set to + the same staging host. Both must resolve to staging, and the + allowlist-rejection warning must never fire.""" + import hermes_cli.auth as auth + + staging_portal = "https://portal.staging-nousresearch.com" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("HERMES_PORTAL_BASE_URL", staging_portal) + self._write_auth_file(tmp_path, stored_portal_url=staging_portal) + + seen_portal_urls, records = self._run_and_capture(monkeypatch, auth) + + assert seen_portal_urls == [staging_portal] + assert not any( + "ignoring invalid portal_base_url" in msg for msg in records + ), "env override must bypass the allowlist gate entirely" + + def test_env_override_wins_over_prod_state(self, monkeypatch, tmp_path): + """Even when the STORED state is the prod host (e.g. a stale/healed + value from before the env var was set), the env override must still + win for the actual refresh call.""" + import hermes_cli.auth as auth + + staging_portal = "https://portal.staging-nousresearch.com" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("HERMES_PORTAL_BASE_URL", staging_portal) + self._write_auth_file(tmp_path, stored_portal_url=DEFAULT_NOUS_PORTAL_URL) + + seen_portal_urls, _records = self._run_and_capture(monkeypatch, auth) + + assert seen_portal_urls == [staging_portal] + + def test_no_env_override_stored_staging_host_heals_to_prod( + self, monkeypatch, tmp_path + ): + """Without the env override set, a stored staging host is untrusted + network provenance and correctly heals to prod (this is the + allowlist's actual job — preserved, not regressed, by this fix).""" + import hermes_cli.auth as auth + + staging_portal = "https://portal.staging-nousresearch.com" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.delenv("HERMES_PORTAL_BASE_URL", raising=False) + monkeypatch.delenv("NOUS_PORTAL_BASE_URL", raising=False) + self._write_auth_file(tmp_path, stored_portal_url=staging_portal) + + seen_portal_urls, records = self._run_and_capture(monkeypatch, auth) + + assert seen_portal_urls == [DEFAULT_NOUS_PORTAL_URL] + assert any("ignoring invalid portal_base_url" in msg for msg in records) + + def test_no_env_no_staging_state_prod_url_used_unmodified( + self, monkeypatch, tmp_path + ): + """Baseline: no override, no staging state — prod is used and the + allowlist never even logs a warning (nothing was rejected).""" + import hermes_cli.auth as auth + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.delenv("HERMES_PORTAL_BASE_URL", raising=False) + monkeypatch.delenv("NOUS_PORTAL_BASE_URL", raising=False) + self._write_auth_file(tmp_path, stored_portal_url=DEFAULT_NOUS_PORTAL_URL) + + seen_portal_urls, records = self._run_and_capture(monkeypatch, auth) + + assert seen_portal_urls == [DEFAULT_NOUS_PORTAL_URL] + assert not any("ignoring invalid portal_base_url" in msg for msg in records)