From 01bf61c865c31d47be6cd4cfcc3ecef1a28aba0b Mon Sep 17 00:00:00 2001 From: Harish Kukreja Date: Mon, 29 Jun 2026 17:18:46 -0400 Subject: [PATCH] fix(runtime): honor NOUS_INFERENCE_BASE_URL across pool/explicit/aux paths Upstream #52270 added `_nous_inference_env_override()` but wired it into only `resolve_nous_runtime_credentials`. Three sibling resolution paths still ignored the override, so a self-hosted Nous inference endpoint set via `NOUS_INFERENCE_BASE_URL` was silently dropped whenever credentials arrived through any of them: - the credential-pool path (`_resolve_runtime_from_pool_entry`) - the explicit-provider path (`_resolve_explicit_runtime`) - the auxiliary side-LLM client (`_pool_runtime_base_url`) Route all three through the same auth-layer reader so every `NOUS_INFERENCE_BASE_URL` read shares one normalization path (trailing-slash stripping, blank -> empty) and the documented trusted-bypass intent stays in one place. The override is live-only: it wins for the base URL returned this run but is never persisted to auth.json or the credential pool, so an ephemeral dev/staging value cannot poison durable auth state. Co-Authored-By: Claude Opus 4.8 (1M context) --- agent/auxiliary_client.py | 8 +++ hermes_cli/runtime_provider.py | 14 +++++ tests/agent/test_auxiliary_client.py | 13 +++++ tests/hermes_cli/test_auth_nous_provider.py | 57 +++++++++++++++++++ .../test_runtime_provider_resolution.py | 31 ++++++++++ 5 files changed, 123 insertions(+) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index c4913fb7c..39b88ea95 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -682,6 +682,14 @@ def _pool_runtime_api_key(entry: Any) -> str: def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str: if entry is None: return str(fallback or "").strip().rstrip("/") + if getattr(entry, "provider", None) == "nous": + # Funnel through the canonical auth-layer reader so the env override + # shares one normalization path with the rest of the NOUS resolution. + from hermes_cli.auth import _nous_inference_env_override + + env_url = _nous_inference_env_override() + if env_url: + return env_url # runtime_base_url handles provider-specific logic (e.g. nous prefers inference_base_url). # Fall back through inference_base_url and base_url for non-PooledCredential entries. url = ( diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 7f4692b83..700244fea 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -20,6 +20,7 @@ from hermes_cli.auth import ( DEFAULT_XAI_OAUTH_BASE_URL, PROVIDER_REGISTRY, _agent_key_is_usable, + _nous_inference_env_override, format_auth_error, resolve_provider, resolve_nous_runtime_credentials, @@ -334,6 +335,17 @@ def _parse_api_mode(raw: Any) -> Optional[str]: return None +def _nous_inference_base_url_override() -> str: + """Return the trusted Nous runtime base URL override, if configured. + + Delegates to ``auth._nous_inference_env_override`` so every + ``NOUS_INFERENCE_BASE_URL`` read shares one normalization path + (trailing-slash stripping, blank → empty). The env source is trusted + and intentionally bypasses the network host allowlist there. + """ + return _nous_inference_env_override() or "" + + def _maybe_apply_codex_app_server_runtime( *, provider: str, @@ -412,6 +424,7 @@ def _resolve_runtime_from_pool_entry( api_mode = "codex_responses" elif provider == "nous": api_mode = "chat_completions" + base_url = _nous_inference_base_url_override() or base_url elif provider == "copilot": api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", "")) base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url @@ -1359,6 +1372,7 @@ def _resolve_explicit_runtime( state = auth_mod.get_provider_auth_state("nous") or {} base_url = ( explicit_base_url + or _nous_inference_base_url_override() or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/") ) # Only use the agent_key compatibility field for inference when it diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 06bd800ab..e66618e4d 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -34,6 +34,7 @@ from agent.auxiliary_client import ( _resolve_task_provider_model, _resolve_xai_oauth_for_aux, _CodexCompletionsAdapter, + _pool_runtime_base_url, ) @@ -4376,6 +4377,18 @@ class TestOpenRouterExplicitApiKey: ) +def test_pool_runtime_base_url_uses_nous_env_override(monkeypatch): + entry = SimpleNamespace( + provider="nous", + runtime_base_url="https://inference-api.nousresearch.com/v1", + inference_base_url="https://inference-api.nousresearch.com/v1", + base_url="https://inference-api.nousresearch.com/v1", + ) + monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", "https://ai.wildebeest-newton.ts.net/v1") + + assert _pool_runtime_base_url(entry) == "https://ai.wildebeest-newton.ts.net/v1" + + class TestAnthropicExplicitApiKey: """Test that explicit_api_key is correctly propagated to _try_anthropic(). diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py index 53812f4e7..d769d68eb 100644 --- a/tests/hermes_cli/test_auth_nous_provider.py +++ b/tests/hermes_cli/test_auth_nous_provider.py @@ -217,6 +217,63 @@ def test_resolve_nous_runtime_credentials_prefers_invoke_jwt_and_mirrors( assert pool_entries[0]["source"] == auth_mod.NOUS_DEVICE_CODE_SOURCE +def test_resolve_nous_runtime_credentials_env_override_wins_live_not_persisted( + tmp_path, + monkeypatch, + shared_store_env, +): + """NOUS_INFERENCE_BASE_URL is a LIVE override, not a persisted one. + + The env override wins for the base_url returned to the caller this run, + but durable auth state (auth.json, the credential pool, the shared + store) keeps the network-validated URL from the refresh response. This + keeps an ephemeral dev/staging override from poisoning auth.json after + the env var is later unset. + """ + import hermes_cli.auth as auth_mod + + hermes_home = tmp_path / "hermes" + override_url = "https://ai.wildebeest-newton.ts.net/v1" + network_url = "https://inference-api.nousresearch.com/v1" + refreshed_token = _invoke_jwt(seconds=3600) + _setup_nous_auth( + hermes_home, + access_token=_invoke_jwt(seconds=-60), + refresh_token="refresh-old", + expires_at=_future_iso(-60), + expires_in=0, + ) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", override_url) + + def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token): + return { + "access_token": refreshed_token, + "refresh_token": "refresh-new", + "expires_in": 3600, + "token_type": "Bearer", + "scope": "inference:invoke", + "inference_base_url": network_url, + } + + monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token) + + creds = auth_mod.resolve_nous_runtime_credentials() + + # The env override wins for the LIVE returned base_url... + assert creds["base_url"] == override_url + + # ...but it is deliberately NOT persisted: every durable store keeps the + # network-validated URL, so the ephemeral override can't poison auth.json. + payload = json.loads((hermes_home / "auth.json").read_text()) + assert payload["providers"]["nous"]["inference_base_url"] == network_url + assert payload["providers"]["nous"]["inference_base_url"] != override_url + assert payload["credential_pool"]["nous"][0]["inference_base_url"] == network_url + + shared_payload = json.loads((shared_store_env / "nous_auth.json").read_text()) + assert shared_payload["inference_base_url"] == network_url + + def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent( tmp_path, monkeypatch, diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py index 5ec30cfe7..c6743c23d 100644 --- a/tests/hermes_cli/test_runtime_provider_resolution.py +++ b/tests/hermes_cli/test_runtime_provider_resolution.py @@ -1,6 +1,7 @@ import base64 import json import time +from types import SimpleNamespace import pytest @@ -44,6 +45,36 @@ def test_resolve_runtime_provider_uses_credential_pool(monkeypatch): assert resolved["source"] == "manual" +def test_resolve_runtime_provider_nous_pool_uses_env_base_url_override(monkeypatch): + entry = SimpleNamespace( + provider="nous", + source="device_code", + runtime_api_key="pool-token", + agent_key="pool-token", + agent_key_expires_at="2099-01-01T00:00:00+00:00", + scope="inference:invoke", + runtime_base_url="https://inference-api.nousresearch.com/v1", + ) + + class _Pool: + def has_credentials(self): + return True + + def select(self): + return entry + + monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", "https://ai.wildebeest-newton.ts.net/v1") + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "nous") + monkeypatch.setattr(rp, "_agent_key_is_usable", lambda *a, **k: True) + monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool()) + + resolved = rp.resolve_runtime_provider(requested="nous") + + assert resolved["provider"] == "nous" + assert resolved["api_key"] == "pool-token" + assert resolved["base_url"] == "https://ai.wildebeest-newton.ts.net/v1" + + def test_resolve_runtime_provider_anthropic_pool_respects_config_base_url(monkeypatch): class _Entry: access_token = "pool-token"