From 01bf61c865c31d47be6cd4cfcc3ecef1a28aba0b Mon Sep 17 00:00:00 2001
From: Harish Kukreja <harish.kukreja@gmail.com>
Date: Mon, 29 Jun 2026 17:18:46 -0400
Subject: [PATCH] fix(runtime): honor NOUS_INFERENCE_BASE_URL across
 pool/explicit/aux paths

Upstream #52270 added `_nous_inference_env_override()` but wired it into
only `resolve_nous_runtime_credentials`. Three sibling resolution paths
still ignored the override, so a self-hosted Nous inference endpoint set
via `NOUS_INFERENCE_BASE_URL` was silently dropped whenever credentials
arrived through any of them:

- the credential-pool path (`_resolve_runtime_from_pool_entry`)
- the explicit-provider path (`_resolve_explicit_runtime`)
- the auxiliary side-LLM client (`_pool_runtime_base_url`)

Route all three through the same auth-layer reader so every
`NOUS_INFERENCE_BASE_URL` read shares one normalization path
(trailing-slash stripping, blank -> empty) and the documented
trusted-bypass intent stays in one place. The override is live-only: it
wins for the base URL returned this run but is never persisted to
auth.json or the credential pool, so an ephemeral dev/staging value
cannot poison durable auth state.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 agent/auxiliary_client.py                     |  8 +++
 hermes_cli/runtime_provider.py                | 14 +++++
 tests/agent/test_auxiliary_client.py          | 13 +++++
 tests/hermes_cli/test_auth_nous_provider.py   | 57 +++++++++++++++++++
 .../test_runtime_provider_resolution.py       | 31 ++++++++++
 5 files changed, 123 insertions(+)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index c4913fb7c..39b88ea95 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -682,6 +682,14 @@ def _pool_runtime_api_key(entry: Any) -> str:
 def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
     if entry is None:
         return str(fallback or "").strip().rstrip("/")
+    if getattr(entry, "provider", None) == "nous":
+        # Funnel through the canonical auth-layer reader so the env override
+        # shares one normalization path with the rest of the NOUS resolution.
+        from hermes_cli.auth import _nous_inference_env_override
+
+        env_url = _nous_inference_env_override()
+        if env_url:
+            return env_url
     # runtime_base_url handles provider-specific logic (e.g. nous prefers inference_base_url).
     # Fall back through inference_base_url and base_url for non-PooledCredential entries.
     url = (
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 7f4692b83..700244fea 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -20,6 +20,7 @@ from hermes_cli.auth import (
     DEFAULT_XAI_OAUTH_BASE_URL,
     PROVIDER_REGISTRY,
     _agent_key_is_usable,
+    _nous_inference_env_override,
     format_auth_error,
     resolve_provider,
     resolve_nous_runtime_credentials,
@@ -334,6 +335,17 @@ def _parse_api_mode(raw: Any) -> Optional[str]:
     return None
 
 
+def _nous_inference_base_url_override() -> str:
+    """Return the trusted Nous runtime base URL override, if configured.
+
+    Delegates to ``auth._nous_inference_env_override`` so every
+    ``NOUS_INFERENCE_BASE_URL`` read shares one normalization path
+    (trailing-slash stripping, blank → empty). The env source is trusted
+    and intentionally bypasses the network host allowlist there.
+    """
+    return _nous_inference_env_override() or ""
+
+
 def _maybe_apply_codex_app_server_runtime(
     *,
     provider: str,
@@ -412,6 +424,7 @@ def _resolve_runtime_from_pool_entry(
         api_mode = "codex_responses"
     elif provider == "nous":
         api_mode = "chat_completions"
+        base_url = _nous_inference_base_url_override() or base_url
     elif provider == "copilot":
         api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
         base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
@@ -1359,6 +1372,7 @@ def _resolve_explicit_runtime(
         state = auth_mod.get_provider_auth_state("nous") or {}
         base_url = (
             explicit_base_url
+            or _nous_inference_base_url_override()
             or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
         )
         # Only use the agent_key compatibility field for inference when it
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 06bd800ab..e66618e4d 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -34,6 +34,7 @@ from agent.auxiliary_client import (
     _resolve_task_provider_model,
     _resolve_xai_oauth_for_aux,
     _CodexCompletionsAdapter,
+    _pool_runtime_base_url,
 )
 
 
@@ -4376,6 +4377,18 @@ class TestOpenRouterExplicitApiKey:
             )
 
 
+def test_pool_runtime_base_url_uses_nous_env_override(monkeypatch):
+    entry = SimpleNamespace(
+        provider="nous",
+        runtime_base_url="https://inference-api.nousresearch.com/v1",
+        inference_base_url="https://inference-api.nousresearch.com/v1",
+        base_url="https://inference-api.nousresearch.com/v1",
+    )
+    monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", "https://ai.wildebeest-newton.ts.net/v1")
+
+    assert _pool_runtime_base_url(entry) == "https://ai.wildebeest-newton.ts.net/v1"
+
+
 class TestAnthropicExplicitApiKey:
     """Test that explicit_api_key is correctly propagated to _try_anthropic().
 
diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py
index 53812f4e7..d769d68eb 100644
--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@@ -217,6 +217,63 @@ def test_resolve_nous_runtime_credentials_prefers_invoke_jwt_and_mirrors(
     assert pool_entries[0]["source"] == auth_mod.NOUS_DEVICE_CODE_SOURCE
 
 
+def test_resolve_nous_runtime_credentials_env_override_wins_live_not_persisted(
+    tmp_path,
+    monkeypatch,
+    shared_store_env,
+):
+    """NOUS_INFERENCE_BASE_URL is a LIVE override, not a persisted one.
+
+    The env override wins for the base_url returned to the caller this run,
+    but durable auth state (auth.json, the credential pool, the shared
+    store) keeps the network-validated URL from the refresh response. This
+    keeps an ephemeral dev/staging override from poisoning auth.json after
+    the env var is later unset.
+    """
+    import hermes_cli.auth as auth_mod
+
+    hermes_home = tmp_path / "hermes"
+    override_url = "https://ai.wildebeest-newton.ts.net/v1"
+    network_url = "https://inference-api.nousresearch.com/v1"
+    refreshed_token = _invoke_jwt(seconds=3600)
+    _setup_nous_auth(
+        hermes_home,
+        access_token=_invoke_jwt(seconds=-60),
+        refresh_token="refresh-old",
+        expires_at=_future_iso(-60),
+        expires_in=0,
+    )
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", override_url)
+
+    def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
+        return {
+            "access_token": refreshed_token,
+            "refresh_token": "refresh-new",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+            "scope": "inference:invoke",
+            "inference_base_url": network_url,
+        }
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
+
+    creds = auth_mod.resolve_nous_runtime_credentials()
+
+    # The env override wins for the LIVE returned base_url...
+    assert creds["base_url"] == override_url
+
+    # ...but it is deliberately NOT persisted: every durable store keeps the
+    # network-validated URL, so the ephemeral override can't poison auth.json.
+    payload = json.loads((hermes_home / "auth.json").read_text())
+    assert payload["providers"]["nous"]["inference_base_url"] == network_url
+    assert payload["providers"]["nous"]["inference_base_url"] != override_url
+    assert payload["credential_pool"]["nous"][0]["inference_base_url"] == network_url
+
+    shared_payload = json.loads((shared_store_env / "nous_auth.json").read_text())
+    assert shared_payload["inference_base_url"] == network_url
+
+
 def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent(
     tmp_path,
     monkeypatch,
diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
index 5ec30cfe7..c6743c23d 100644
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@@ -1,6 +1,7 @@
 import base64
 import json
 import time
+from types import SimpleNamespace
 
 import pytest
 
@@ -44,6 +45,36 @@ def test_resolve_runtime_provider_uses_credential_pool(monkeypatch):
     assert resolved["source"] == "manual"
 
 
+def test_resolve_runtime_provider_nous_pool_uses_env_base_url_override(monkeypatch):
+    entry = SimpleNamespace(
+        provider="nous",
+        source="device_code",
+        runtime_api_key="pool-token",
+        agent_key="pool-token",
+        agent_key_expires_at="2099-01-01T00:00:00+00:00",
+        scope="inference:invoke",
+        runtime_base_url="https://inference-api.nousresearch.com/v1",
+    )
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def select(self):
+            return entry
+
+    monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", "https://ai.wildebeest-newton.ts.net/v1")
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "nous")
+    monkeypatch.setattr(rp, "_agent_key_is_usable", lambda *a, **k: True)
+    monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
+
+    resolved = rp.resolve_runtime_provider(requested="nous")
+
+    assert resolved["provider"] == "nous"
+    assert resolved["api_key"] == "pool-token"
+    assert resolved["base_url"] == "https://ai.wildebeest-newton.ts.net/v1"
+
+
 def test_resolve_runtime_provider_anthropic_pool_respects_config_base_url(monkeypatch):
     class _Entry:
         access_token = "pool-token"