fix(runtime): honor NOUS_INFERENCE_BASE_URL across pool/explicit/aux paths

Upstream #52270 added `_nous_inference_env_override()` but wired it into
only `resolve_nous_runtime_credentials`. Three sibling resolution paths
still ignored the override, so a self-hosted Nous inference endpoint set
via `NOUS_INFERENCE_BASE_URL` was silently dropped whenever credentials
arrived through any of them:

- the credential-pool path (`_resolve_runtime_from_pool_entry`)
- the explicit-provider path (`_resolve_explicit_runtime`)
- the auxiliary side-LLM client (`_pool_runtime_base_url`)

Route all three through the same auth-layer reader so every
`NOUS_INFERENCE_BASE_URL` read shares one normalization path
(trailing-slash stripping, blank -> empty) and the documented
trusted-bypass intent stays in one place. The override is live-only: it
wins for the base URL returned this run but is never persisted to
auth.json or the credential pool, so an ephemeral dev/staging value
cannot poison durable auth state.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Harish Kukreja 2026-06-29 17:18:46 -04:00 committed by Teknium
parent f70abae606
commit 01bf61c865
5 changed files with 123 additions and 0 deletions

View file

@ -682,6 +682,14 @@ def _pool_runtime_api_key(entry: Any) -> str:
def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
if entry is None:
return str(fallback or "").strip().rstrip("/")
if getattr(entry, "provider", None) == "nous":
# Funnel through the canonical auth-layer reader so the env override
# shares one normalization path with the rest of the NOUS resolution.
from hermes_cli.auth import _nous_inference_env_override
env_url = _nous_inference_env_override()
if env_url:
return env_url
# runtime_base_url handles provider-specific logic (e.g. nous prefers inference_base_url).
# Fall back through inference_base_url and base_url for non-PooledCredential entries.
url = (

View file

@ -20,6 +20,7 @@ from hermes_cli.auth import (
DEFAULT_XAI_OAUTH_BASE_URL,
PROVIDER_REGISTRY,
_agent_key_is_usable,
_nous_inference_env_override,
format_auth_error,
resolve_provider,
resolve_nous_runtime_credentials,
@ -334,6 +335,17 @@ def _parse_api_mode(raw: Any) -> Optional[str]:
return None
def _nous_inference_base_url_override() -> str:
"""Return the trusted Nous runtime base URL override, if configured.
Delegates to ``auth._nous_inference_env_override`` so every
``NOUS_INFERENCE_BASE_URL`` read shares one normalization path
(trailing-slash stripping, blank empty). The env source is trusted
and intentionally bypasses the network host allowlist there.
"""
return _nous_inference_env_override() or ""
def _maybe_apply_codex_app_server_runtime(
*,
provider: str,
@ -412,6 +424,7 @@ def _resolve_runtime_from_pool_entry(
api_mode = "codex_responses"
elif provider == "nous":
api_mode = "chat_completions"
base_url = _nous_inference_base_url_override() or base_url
elif provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
@ -1359,6 +1372,7 @@ def _resolve_explicit_runtime(
state = auth_mod.get_provider_auth_state("nous") or {}
base_url = (
explicit_base_url
or _nous_inference_base_url_override()
or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
)
# Only use the agent_key compatibility field for inference when it

View file

@ -34,6 +34,7 @@ from agent.auxiliary_client import (
_resolve_task_provider_model,
_resolve_xai_oauth_for_aux,
_CodexCompletionsAdapter,
_pool_runtime_base_url,
)
@ -4376,6 +4377,18 @@ class TestOpenRouterExplicitApiKey:
)
def test_pool_runtime_base_url_uses_nous_env_override(monkeypatch):
entry = SimpleNamespace(
provider="nous",
runtime_base_url="https://inference-api.nousresearch.com/v1",
inference_base_url="https://inference-api.nousresearch.com/v1",
base_url="https://inference-api.nousresearch.com/v1",
)
monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", "https://ai.wildebeest-newton.ts.net/v1")
assert _pool_runtime_base_url(entry) == "https://ai.wildebeest-newton.ts.net/v1"
class TestAnthropicExplicitApiKey:
"""Test that explicit_api_key is correctly propagated to _try_anthropic().

View file

@ -217,6 +217,63 @@ def test_resolve_nous_runtime_credentials_prefers_invoke_jwt_and_mirrors(
assert pool_entries[0]["source"] == auth_mod.NOUS_DEVICE_CODE_SOURCE
def test_resolve_nous_runtime_credentials_env_override_wins_live_not_persisted(
tmp_path,
monkeypatch,
shared_store_env,
):
"""NOUS_INFERENCE_BASE_URL is a LIVE override, not a persisted one.
The env override wins for the base_url returned to the caller this run,
but durable auth state (auth.json, the credential pool, the shared
store) keeps the network-validated URL from the refresh response. This
keeps an ephemeral dev/staging override from poisoning auth.json after
the env var is later unset.
"""
import hermes_cli.auth as auth_mod
hermes_home = tmp_path / "hermes"
override_url = "https://ai.wildebeest-newton.ts.net/v1"
network_url = "https://inference-api.nousresearch.com/v1"
refreshed_token = _invoke_jwt(seconds=3600)
_setup_nous_auth(
hermes_home,
access_token=_invoke_jwt(seconds=-60),
refresh_token="refresh-old",
expires_at=_future_iso(-60),
expires_in=0,
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", override_url)
def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
return {
"access_token": refreshed_token,
"refresh_token": "refresh-new",
"expires_in": 3600,
"token_type": "Bearer",
"scope": "inference:invoke",
"inference_base_url": network_url,
}
monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
creds = auth_mod.resolve_nous_runtime_credentials()
# The env override wins for the LIVE returned base_url...
assert creds["base_url"] == override_url
# ...but it is deliberately NOT persisted: every durable store keeps the
# network-validated URL, so the ephemeral override can't poison auth.json.
payload = json.loads((hermes_home / "auth.json").read_text())
assert payload["providers"]["nous"]["inference_base_url"] == network_url
assert payload["providers"]["nous"]["inference_base_url"] != override_url
assert payload["credential_pool"]["nous"][0]["inference_base_url"] == network_url
shared_payload = json.loads((shared_store_env / "nous_auth.json").read_text())
assert shared_payload["inference_base_url"] == network_url
def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent(
tmp_path,
monkeypatch,

View file

@ -1,6 +1,7 @@
import base64
import json
import time
from types import SimpleNamespace
import pytest
@ -44,6 +45,36 @@ def test_resolve_runtime_provider_uses_credential_pool(monkeypatch):
assert resolved["source"] == "manual"
def test_resolve_runtime_provider_nous_pool_uses_env_base_url_override(monkeypatch):
entry = SimpleNamespace(
provider="nous",
source="device_code",
runtime_api_key="pool-token",
agent_key="pool-token",
agent_key_expires_at="2099-01-01T00:00:00+00:00",
scope="inference:invoke",
runtime_base_url="https://inference-api.nousresearch.com/v1",
)
class _Pool:
def has_credentials(self):
return True
def select(self):
return entry
monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", "https://ai.wildebeest-newton.ts.net/v1")
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "nous")
monkeypatch.setattr(rp, "_agent_key_is_usable", lambda *a, **k: True)
monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
resolved = rp.resolve_runtime_provider(requested="nous")
assert resolved["provider"] == "nous"
assert resolved["api_key"] == "pool-token"
assert resolved["base_url"] == "https://ai.wildebeest-newton.ts.net/v1"
def test_resolve_runtime_provider_anthropic_pool_respects_config_base_url(monkeypatch):
class _Entry:
access_token = "pool-token"