fix(runtime): honor NOUS_INFERENCE_BASE_URL across pool/explicit/aux paths
Upstream #52270 added `_nous_inference_env_override()` but wired it into only `resolve_nous_runtime_credentials`. Three sibling resolution paths still ignored the override, so a self-hosted Nous inference endpoint set via `NOUS_INFERENCE_BASE_URL` was silently dropped whenever credentials arrived through any of them: - the credential-pool path (`_resolve_runtime_from_pool_entry`) - the explicit-provider path (`_resolve_explicit_runtime`) - the auxiliary side-LLM client (`_pool_runtime_base_url`) Route all three through the same auth-layer reader so every `NOUS_INFERENCE_BASE_URL` read shares one normalization path (trailing-slash stripping, blank -> empty) and the documented trusted-bypass intent stays in one place. The override is live-only: it wins for the base URL returned this run but is never persisted to auth.json or the credential pool, so an ephemeral dev/staging value cannot poison durable auth state. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f70abae606
commit
01bf61c865
5 changed files with 123 additions and 0 deletions
|
|
@ -682,6 +682,14 @@ def _pool_runtime_api_key(entry: Any) -> str:
|
|||
def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
|
||||
if entry is None:
|
||||
return str(fallback or "").strip().rstrip("/")
|
||||
if getattr(entry, "provider", None) == "nous":
|
||||
# Funnel through the canonical auth-layer reader so the env override
|
||||
# shares one normalization path with the rest of the NOUS resolution.
|
||||
from hermes_cli.auth import _nous_inference_env_override
|
||||
|
||||
env_url = _nous_inference_env_override()
|
||||
if env_url:
|
||||
return env_url
|
||||
# runtime_base_url handles provider-specific logic (e.g. nous prefers inference_base_url).
|
||||
# Fall back through inference_base_url and base_url for non-PooledCredential entries.
|
||||
url = (
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ from hermes_cli.auth import (
|
|||
DEFAULT_XAI_OAUTH_BASE_URL,
|
||||
PROVIDER_REGISTRY,
|
||||
_agent_key_is_usable,
|
||||
_nous_inference_env_override,
|
||||
format_auth_error,
|
||||
resolve_provider,
|
||||
resolve_nous_runtime_credentials,
|
||||
|
|
@ -334,6 +335,17 @@ def _parse_api_mode(raw: Any) -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def _nous_inference_base_url_override() -> str:
|
||||
"""Return the trusted Nous runtime base URL override, if configured.
|
||||
|
||||
Delegates to ``auth._nous_inference_env_override`` so every
|
||||
``NOUS_INFERENCE_BASE_URL`` read shares one normalization path
|
||||
(trailing-slash stripping, blank → empty). The env source is trusted
|
||||
and intentionally bypasses the network host allowlist there.
|
||||
"""
|
||||
return _nous_inference_env_override() or ""
|
||||
|
||||
|
||||
def _maybe_apply_codex_app_server_runtime(
|
||||
*,
|
||||
provider: str,
|
||||
|
|
@ -412,6 +424,7 @@ def _resolve_runtime_from_pool_entry(
|
|||
api_mode = "codex_responses"
|
||||
elif provider == "nous":
|
||||
api_mode = "chat_completions"
|
||||
base_url = _nous_inference_base_url_override() or base_url
|
||||
elif provider == "copilot":
|
||||
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
|
||||
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
|
||||
|
|
@ -1359,6 +1372,7 @@ def _resolve_explicit_runtime(
|
|||
state = auth_mod.get_provider_auth_state("nous") or {}
|
||||
base_url = (
|
||||
explicit_base_url
|
||||
or _nous_inference_base_url_override()
|
||||
or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
|
||||
)
|
||||
# Only use the agent_key compatibility field for inference when it
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ from agent.auxiliary_client import (
|
|||
_resolve_task_provider_model,
|
||||
_resolve_xai_oauth_for_aux,
|
||||
_CodexCompletionsAdapter,
|
||||
_pool_runtime_base_url,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -4376,6 +4377,18 @@ class TestOpenRouterExplicitApiKey:
|
|||
)
|
||||
|
||||
|
||||
def test_pool_runtime_base_url_uses_nous_env_override(monkeypatch):
|
||||
entry = SimpleNamespace(
|
||||
provider="nous",
|
||||
runtime_base_url="https://inference-api.nousresearch.com/v1",
|
||||
inference_base_url="https://inference-api.nousresearch.com/v1",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
)
|
||||
monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", "https://ai.wildebeest-newton.ts.net/v1")
|
||||
|
||||
assert _pool_runtime_base_url(entry) == "https://ai.wildebeest-newton.ts.net/v1"
|
||||
|
||||
|
||||
class TestAnthropicExplicitApiKey:
|
||||
"""Test that explicit_api_key is correctly propagated to _try_anthropic().
|
||||
|
||||
|
|
|
|||
|
|
@ -217,6 +217,63 @@ def test_resolve_nous_runtime_credentials_prefers_invoke_jwt_and_mirrors(
|
|||
assert pool_entries[0]["source"] == auth_mod.NOUS_DEVICE_CODE_SOURCE
|
||||
|
||||
|
||||
def test_resolve_nous_runtime_credentials_env_override_wins_live_not_persisted(
|
||||
tmp_path,
|
||||
monkeypatch,
|
||||
shared_store_env,
|
||||
):
|
||||
"""NOUS_INFERENCE_BASE_URL is a LIVE override, not a persisted one.
|
||||
|
||||
The env override wins for the base_url returned to the caller this run,
|
||||
but durable auth state (auth.json, the credential pool, the shared
|
||||
store) keeps the network-validated URL from the refresh response. This
|
||||
keeps an ephemeral dev/staging override from poisoning auth.json after
|
||||
the env var is later unset.
|
||||
"""
|
||||
import hermes_cli.auth as auth_mod
|
||||
|
||||
hermes_home = tmp_path / "hermes"
|
||||
override_url = "https://ai.wildebeest-newton.ts.net/v1"
|
||||
network_url = "https://inference-api.nousresearch.com/v1"
|
||||
refreshed_token = _invoke_jwt(seconds=3600)
|
||||
_setup_nous_auth(
|
||||
hermes_home,
|
||||
access_token=_invoke_jwt(seconds=-60),
|
||||
refresh_token="refresh-old",
|
||||
expires_at=_future_iso(-60),
|
||||
expires_in=0,
|
||||
)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", override_url)
|
||||
|
||||
def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
|
||||
return {
|
||||
"access_token": refreshed_token,
|
||||
"refresh_token": "refresh-new",
|
||||
"expires_in": 3600,
|
||||
"token_type": "Bearer",
|
||||
"scope": "inference:invoke",
|
||||
"inference_base_url": network_url,
|
||||
}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
|
||||
|
||||
creds = auth_mod.resolve_nous_runtime_credentials()
|
||||
|
||||
# The env override wins for the LIVE returned base_url...
|
||||
assert creds["base_url"] == override_url
|
||||
|
||||
# ...but it is deliberately NOT persisted: every durable store keeps the
|
||||
# network-validated URL, so the ephemeral override can't poison auth.json.
|
||||
payload = json.loads((hermes_home / "auth.json").read_text())
|
||||
assert payload["providers"]["nous"]["inference_base_url"] == network_url
|
||||
assert payload["providers"]["nous"]["inference_base_url"] != override_url
|
||||
assert payload["credential_pool"]["nous"][0]["inference_base_url"] == network_url
|
||||
|
||||
shared_payload = json.loads((shared_store_env / "nous_auth.json").read_text())
|
||||
assert shared_payload["inference_base_url"] == network_url
|
||||
|
||||
|
||||
def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent(
|
||||
tmp_path,
|
||||
monkeypatch,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import base64
|
||||
import json
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -44,6 +45,36 @@ def test_resolve_runtime_provider_uses_credential_pool(monkeypatch):
|
|||
assert resolved["source"] == "manual"
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_nous_pool_uses_env_base_url_override(monkeypatch):
|
||||
entry = SimpleNamespace(
|
||||
provider="nous",
|
||||
source="device_code",
|
||||
runtime_api_key="pool-token",
|
||||
agent_key="pool-token",
|
||||
agent_key_expires_at="2099-01-01T00:00:00+00:00",
|
||||
scope="inference:invoke",
|
||||
runtime_base_url="https://inference-api.nousresearch.com/v1",
|
||||
)
|
||||
|
||||
class _Pool:
|
||||
def has_credentials(self):
|
||||
return True
|
||||
|
||||
def select(self):
|
||||
return entry
|
||||
|
||||
monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", "https://ai.wildebeest-newton.ts.net/v1")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "nous")
|
||||
monkeypatch.setattr(rp, "_agent_key_is_usable", lambda *a, **k: True)
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="nous")
|
||||
|
||||
assert resolved["provider"] == "nous"
|
||||
assert resolved["api_key"] == "pool-token"
|
||||
assert resolved["base_url"] == "https://ai.wildebeest-newton.ts.net/v1"
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_anthropic_pool_respects_config_base_url(monkeypatch):
|
||||
class _Entry:
|
||||
access_token = "pool-token"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue