fix(runtime): honor NOUS_INFERENCE_BASE_URL across pool/explicit/aux paths

Upstream #52270 added `_nous_inference_env_override()` but wired it into only `resolve_nous_runtime_credentials`. Three sibling resolution paths still ignored the override, so a self-hosted Nous inference endpoint set via `NOUS_INFERENCE_BASE_URL` was silently dropped whenever credentials arrived through any of them: - the credential-pool path (`_resolve_runtime_from_pool_entry`) - the explicit-provider path (`_resolve_explicit_runtime`) - the auxiliary side-LLM client (`_pool_runtime_base_url`) Route all three through the same auth-layer reader so every `NOUS_INFERENCE_BASE_URL` read shares one normalization path (trailing-slash stripping, blank -> empty) and the documented trusted-bypass intent stays in one place. The override is live-only: it wins for the base URL returned this run but is never persisted to auth.json or the credential pool, so an ephemeral dev/staging value cannot poison durable auth state. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-29 17:18:46 -04:00 · 2026-06-29 17:18:46 -04:00 · 01bf61c865
commit 01bf61c865
parent f70abae606
5 changed files with 123 additions and 0 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -682,6 +682,14 @@ def _pool_runtime_api_key(entry: Any) -> str:
 def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
    if entry is None:
        return str(fallback or "").strip().rstrip("/")
+    if getattr(entry, "provider", None) == "nous":
+        # Funnel through the canonical auth-layer reader so the env override
+        # shares one normalization path with the rest of the NOUS resolution.
+        from hermes_cli.auth import _nous_inference_env_override
+
+        env_url = _nous_inference_env_override()
+        if env_url:
+            return env_url
    # runtime_base_url handles provider-specific logic (e.g. nous prefers inference_base_url).
    # Fall back through inference_base_url and base_url for non-PooledCredential entries.
    url = (
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@ -20,6 +20,7 @@ from hermes_cli.auth import (
    DEFAULT_XAI_OAUTH_BASE_URL,
    PROVIDER_REGISTRY,
    _agent_key_is_usable,
+    _nous_inference_env_override,
    format_auth_error,
    resolve_provider,
    resolve_nous_runtime_credentials,
@ -334,6 +335,17 @@ def _parse_api_mode(raw: Any) -> Optional[str]:
    return None


+def _nous_inference_base_url_override() -> str:
+    """Return the trusted Nous runtime base URL override, if configured.
+
+    Delegates to ``auth._nous_inference_env_override`` so every
+    ``NOUS_INFERENCE_BASE_URL`` read shares one normalization path
+    (trailing-slash stripping, blank → empty). The env source is trusted
+    and intentionally bypasses the network host allowlist there.
+    """
+    return _nous_inference_env_override() or ""
+
+
 def _maybe_apply_codex_app_server_runtime(
    *,
    provider: str,
@ -412,6 +424,7 @@ def _resolve_runtime_from_pool_entry(
        api_mode = "codex_responses"
    elif provider == "nous":
        api_mode = "chat_completions"
+        base_url = _nous_inference_base_url_override() or base_url
    elif provider == "copilot":
        api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
        base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
@ -1359,6 +1372,7 @@ def _resolve_explicit_runtime(
        state = auth_mod.get_provider_auth_state("nous") or {}
        base_url = (
            explicit_base_url
+            or _nous_inference_base_url_override()
            or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
        )
        # Only use the agent_key compatibility field for inference when it
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -34,6 +34,7 @@ from agent.auxiliary_client import (
    _resolve_task_provider_model,
    _resolve_xai_oauth_for_aux,
    _CodexCompletionsAdapter,
+    _pool_runtime_base_url,
 )


@ -4376,6 +4377,18 @@ class TestOpenRouterExplicitApiKey:
            )


+def test_pool_runtime_base_url_uses_nous_env_override(monkeypatch):
+    entry = SimpleNamespace(
+        provider="nous",
+        runtime_base_url="https://inference-api.nousresearch.com/v1",
+        inference_base_url="https://inference-api.nousresearch.com/v1",
+        base_url="https://inference-api.nousresearch.com/v1",
+    )
+    monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", "https://ai.wildebeest-newton.ts.net/v1")
+
+    assert _pool_runtime_base_url(entry) == "https://ai.wildebeest-newton.ts.net/v1"
+
+
 class TestAnthropicExplicitApiKey:
    """Test that explicit_api_key is correctly propagated to _try_anthropic().

--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@ -217,6 +217,63 @@ def test_resolve_nous_runtime_credentials_prefers_invoke_jwt_and_mirrors(
    assert pool_entries[0]["source"] == auth_mod.NOUS_DEVICE_CODE_SOURCE


+def test_resolve_nous_runtime_credentials_env_override_wins_live_not_persisted(
+    tmp_path,
+    monkeypatch,
+    shared_store_env,
+):
+    """NOUS_INFERENCE_BASE_URL is a LIVE override, not a persisted one.
+
+    The env override wins for the base_url returned to the caller this run,
+    but durable auth state (auth.json, the credential pool, the shared
+    store) keeps the network-validated URL from the refresh response. This
+    keeps an ephemeral dev/staging override from poisoning auth.json after
+    the env var is later unset.
+    """
+    import hermes_cli.auth as auth_mod
+
+    hermes_home = tmp_path / "hermes"
+    override_url = "https://ai.wildebeest-newton.ts.net/v1"
+    network_url = "https://inference-api.nousresearch.com/v1"
+    refreshed_token = _invoke_jwt(seconds=3600)
+    _setup_nous_auth(
+        hermes_home,
+        access_token=_invoke_jwt(seconds=-60),
+        refresh_token="refresh-old",
+        expires_at=_future_iso(-60),
+        expires_in=0,
+    )
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", override_url)
+
+    def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
+        return {
+            "access_token": refreshed_token,
+            "refresh_token": "refresh-new",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+            "scope": "inference:invoke",
+            "inference_base_url": network_url,
+        }
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
+
+    creds = auth_mod.resolve_nous_runtime_credentials()
+
+    # The env override wins for the LIVE returned base_url...
+    assert creds["base_url"] == override_url
+
+    # ...but it is deliberately NOT persisted: every durable store keeps the
+    # network-validated URL, so the ephemeral override can't poison auth.json.
+    payload = json.loads((hermes_home / "auth.json").read_text())
+    assert payload["providers"]["nous"]["inference_base_url"] == network_url
+    assert payload["providers"]["nous"]["inference_base_url"] != override_url
+    assert payload["credential_pool"]["nous"][0]["inference_base_url"] == network_url
+
+    shared_payload = json.loads((shared_store_env / "nous_auth.json").read_text())
+    assert shared_payload["inference_base_url"] == network_url
+
+
 def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent(
    tmp_path,
    monkeypatch,
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@ -1,6 +1,7 @@
 import base64
 import json
 import time
+from types import SimpleNamespace

 import pytest

@ -44,6 +45,36 @@ def test_resolve_runtime_provider_uses_credential_pool(monkeypatch):
    assert resolved["source"] == "manual"


+def test_resolve_runtime_provider_nous_pool_uses_env_base_url_override(monkeypatch):
+    entry = SimpleNamespace(
+        provider="nous",
+        source="device_code",
+        runtime_api_key="pool-token",
+        agent_key="pool-token",
+        agent_key_expires_at="2099-01-01T00:00:00+00:00",
+        scope="inference:invoke",
+        runtime_base_url="https://inference-api.nousresearch.com/v1",
+    )
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def select(self):
+            return entry
+
+    monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", "https://ai.wildebeest-newton.ts.net/v1")
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "nous")
+    monkeypatch.setattr(rp, "_agent_key_is_usable", lambda *a, **k: True)
+    monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
+
+    resolved = rp.resolve_runtime_provider(requested="nous")
+
+    assert resolved["provider"] == "nous"
+    assert resolved["api_key"] == "pool-token"
+    assert resolved["base_url"] == "https://ai.wildebeest-newton.ts.net/v1"
+
+
 def test_resolve_runtime_provider_anthropic_pool_respects_config_base_url(monkeypatch):
    class _Entry:
        access_token = "pool-token"