refactor(providers): dedupe extra_headers normalizer + key picker groups by headers
Follow-up to @helix4u's #57336 salvage. Two review findings: - W1: model-picker grouped custom-provider rows by (api_url, credential, api_mode) but NOT extra_headers. Entries sharing a URL+credential+api_mode yet declaring different headers (e.g. per-tenant routing behind one proxy) collapsed into one row and probed /models with whichever header set was seen first (order-dependent). Fold a canonical header identity into group_key so distinct header-authed endpoints stay separate; drops the now-dead first-non-empty merge branch. - W2: the extra_headers stringify+None-filter comprehension existed in 5 copies (config.py x2, runtime_provider.py, model_switch.py, models.py). Extract one shared hermes_cli.config.normalize_extra_headers primitive; all sites now call it. Tests: +normalize_extra_headers unit tests, +regression test proving two same-endpoint entries with different headers stay distinct and each probes with its own headers. 223 targeted tests pass; ruff clean.
This commit is contained in:
parent
ab40e952f3
commit
ed4123792c
6 changed files with 115 additions and 36 deletions
|
|
@ -4609,11 +4609,9 @@ def _normalize_custom_provider_entry(
|
|||
# Per-provider extra HTTP headers (proxies, gateways, custom auth).
|
||||
# Values may carry credentials (e.g. CF-Access-Client-Secret) — never
|
||||
# log them anywhere downstream.
|
||||
extra_headers = entry.get("extra_headers")
|
||||
if isinstance(extra_headers, dict) and extra_headers:
|
||||
normalized["extra_headers"] = {
|
||||
str(k): str(v) for k, v in extra_headers.items() if v is not None
|
||||
}
|
||||
normalized_headers = normalize_extra_headers(entry.get("extra_headers"))
|
||||
if normalized_headers:
|
||||
normalized["extra_headers"] = normalized_headers
|
||||
|
||||
ssl_ca_cert = entry.get("ssl_ca_cert")
|
||||
if isinstance(ssl_ca_cert, str) and ssl_ca_cert.strip():
|
||||
|
|
@ -4796,6 +4794,23 @@ def apply_custom_provider_tls_to_client_kwargs(
|
|||
client_kwargs["ssl_verify"] = tls["ssl_verify"]
|
||||
|
||||
|
||||
def normalize_extra_headers(extra_headers: Any) -> Dict[str, str]:
|
||||
"""Normalize a raw ``extra_headers`` value into a ``dict[str, str]``.
|
||||
|
||||
Stringifies keys and values and drops entries whose value is ``None``.
|
||||
Returns ``{}`` for non-dict or empty inputs. This is the single shared
|
||||
normalizer for per-provider ``extra_headers`` across config normalization,
|
||||
runtime resolution, client construction, and live ``/models`` discovery.
|
||||
|
||||
SECURITY: header values routinely carry credentials (Cloudflare Access
|
||||
service tokens, proxy auth, custom bearer schemes). Callers must never
|
||||
log the returned values.
|
||||
"""
|
||||
if not isinstance(extra_headers, dict) or not extra_headers:
|
||||
return {}
|
||||
return {str(k): str(v) for k, v in extra_headers.items() if v is not None}
|
||||
|
||||
|
||||
def get_custom_provider_extra_headers(
|
||||
base_url: str,
|
||||
custom_providers: Optional[List[Dict[str, Any]]] = None,
|
||||
|
|
@ -4827,12 +4842,7 @@ def get_custom_provider_extra_headers(
|
|||
entry_url = (entry.get("base_url") or "").rstrip("/").lower()
|
||||
if not entry_url or entry_url != target_url:
|
||||
continue
|
||||
extra_headers = entry.get("extra_headers")
|
||||
if isinstance(extra_headers, dict) and extra_headers:
|
||||
return {
|
||||
str(k): str(v) for k, v in extra_headers.items() if v is not None
|
||||
}
|
||||
return {}
|
||||
return normalize_extra_headers(entry.get("extra_headers"))
|
||||
return {}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1365,14 +1365,9 @@ _picker_prewarm_done = _threading.Event()
|
|||
def _extra_headers_from_config(entry: Any) -> dict[str, str]:
|
||||
if not isinstance(entry, dict):
|
||||
return {}
|
||||
headers = entry.get("extra_headers")
|
||||
if not isinstance(headers, dict) or not headers:
|
||||
return {}
|
||||
return {
|
||||
str(key): str(value)
|
||||
for key, value in headers.items()
|
||||
if value is not None
|
||||
}
|
||||
from hermes_cli.config import normalize_extra_headers
|
||||
|
||||
return normalize_extra_headers(entry.get("extra_headers"))
|
||||
|
||||
|
||||
def prewarm_picker_cache_async() -> Optional["_threading.Thread"]:
|
||||
|
|
@ -2126,7 +2121,16 @@ def list_authenticated_providers(
|
|||
if isinstance(discover, str):
|
||||
discover = discover.lower() not in {"false", "no", "0"}
|
||||
|
||||
group_key = (api_url, credential_identity, api_mode)
|
||||
# Per-provider extra_headers participate in the group identity:
|
||||
# two entries sharing (api_url, credential, api_mode) but declaring
|
||||
# different headers are distinct endpoints (e.g. different tenants
|
||||
# behind one proxy URL, routed by header) and must probe /models
|
||||
# with their own headers rather than collapsing into one row and
|
||||
# silently adopting whichever header set was seen first.
|
||||
entry_extra_headers = _extra_headers_from_config(entry)
|
||||
headers_identity = tuple(sorted(entry_extra_headers.items()))
|
||||
|
||||
group_key = (api_url, credential_identity, api_mode, headers_identity)
|
||||
if group_key not in groups:
|
||||
# Strip per-model suffix so "Ollama — GLM 5.1" becomes
|
||||
# "Ollama" for the grouped row. Em dash is the convention
|
||||
|
|
@ -2147,13 +2151,13 @@ def list_authenticated_providers(
|
|||
"api_key": api_key,
|
||||
"models": [],
|
||||
"discover_models": discover,
|
||||
"extra_headers": _extra_headers_from_config(entry),
|
||||
"extra_headers": entry_extra_headers,
|
||||
}
|
||||
else:
|
||||
if api_key and not groups[group_key].get("api_key"):
|
||||
groups[group_key]["api_key"] = api_key
|
||||
if not groups[group_key].get("extra_headers"):
|
||||
groups[group_key]["extra_headers"] = _extra_headers_from_config(entry)
|
||||
# extra_headers is part of group_key, so every entry in this
|
||||
# group already carries identical headers — nothing to merge.
|
||||
# If any entry in this group opts out of discovery,
|
||||
# honour that for the whole grouped row.
|
||||
if not discover:
|
||||
|
|
|
|||
|
|
@ -3501,13 +3501,9 @@ def probe_api_models(
|
|||
if isinstance(request_headers, dict):
|
||||
# Per-provider custom headers can contain auth/proxy secrets. Merge
|
||||
# last so endpoint-specific config wins, and never log the values.
|
||||
headers.update(
|
||||
{
|
||||
str(key): str(value)
|
||||
for key, value in request_headers.items()
|
||||
if value is not None
|
||||
}
|
||||
)
|
||||
from hermes_cli.config import normalize_extra_headers
|
||||
|
||||
headers.update(normalize_extra_headers(request_headers))
|
||||
|
||||
for candidate_base, is_fallback in candidates:
|
||||
url = candidate_base.rstrip("/") + "/models"
|
||||
|
|
|
|||
|
|
@ -31,7 +31,11 @@ from hermes_cli.auth import (
|
|||
resolve_external_process_provider_credentials,
|
||||
has_usable_secret,
|
||||
)
|
||||
from hermes_cli.config import get_compatible_custom_providers, load_config
|
||||
from hermes_cli.config import (
|
||||
get_compatible_custom_providers,
|
||||
load_config,
|
||||
normalize_extra_headers,
|
||||
)
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from utils import base_url_host_matches, base_url_hostname, env_int
|
||||
|
||||
|
|
@ -597,11 +601,9 @@ def _lift_extra_headers(entry: Dict[str, Any], result: Dict[str, Any]) -> None:
|
|||
SECURITY: header values routinely carry credentials (Cloudflare Access
|
||||
service tokens, proxy auth, custom bearer schemes). Never log them.
|
||||
"""
|
||||
extra_headers = entry.get("extra_headers")
|
||||
if isinstance(extra_headers, dict) and extra_headers:
|
||||
result["extra_headers"] = {
|
||||
str(k): str(v) for k, v in extra_headers.items() if v is not None
|
||||
}
|
||||
extra_headers = normalize_extra_headers(entry.get("extra_headers"))
|
||||
if extra_headers:
|
||||
result["extra_headers"] = extra_headers
|
||||
|
||||
|
||||
def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]:
|
||||
|
|
|
|||
|
|
@ -10,10 +10,23 @@ from hermes_cli.config import (
|
|||
_normalize_custom_provider_entry,
|
||||
apply_custom_provider_extra_headers_to_client_kwargs,
|
||||
get_custom_provider_extra_headers,
|
||||
normalize_extra_headers,
|
||||
)
|
||||
from hermes_cli import models as models_mod
|
||||
|
||||
|
||||
def test_normalize_extra_headers_stringifies_and_drops_none():
|
||||
assert normalize_extra_headers({"X-Int": 7, "X-Str": "v", "X-None": None}) == {
|
||||
"X-Int": "7",
|
||||
"X-Str": "v",
|
||||
}
|
||||
|
||||
|
||||
def test_normalize_extra_headers_rejects_non_dict_and_empty():
|
||||
for bad in (None, "x", 42, ["a"], {}):
|
||||
assert normalize_extra_headers(bad) == {}
|
||||
|
||||
|
||||
def test_normalize_entry_keeps_extra_headers():
|
||||
normalized = _normalize_custom_provider_entry(
|
||||
{
|
||||
|
|
|
|||
|
|
@ -745,6 +745,60 @@ def test_custom_provider_live_model_probe_uses_extra_headers(monkeypatch):
|
|||
assert gateway_prov["models"] == ["gateway-model"]
|
||||
|
||||
|
||||
def test_same_endpoint_different_extra_headers_not_collapsed(monkeypatch):
|
||||
"""Entries sharing (api_url, credential, api_mode) but declaring different
|
||||
extra_headers must NOT collapse into one picker row — each is a distinct
|
||||
header-authenticated endpoint (e.g. per-tenant routing behind one proxy)
|
||||
and must probe /models with its own headers."""
|
||||
monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
|
||||
monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_fetch_api_models(api_key, base_url, **kwargs):
|
||||
calls.append((api_key, base_url, kwargs.get("headers")))
|
||||
# Return a per-tenant model list keyed by the routing header so we can
|
||||
# assert each row got its OWN probe rather than a shared one.
|
||||
tenant = (kwargs.get("headers") or {}).get("X-Tenant", "none")
|
||||
return [f"model-{tenant}"]
|
||||
|
||||
monkeypatch.setattr("hermes_cli.models.fetch_api_models", fake_fetch_api_models)
|
||||
|
||||
providers = list_authenticated_providers(
|
||||
current_provider="openrouter",
|
||||
current_base_url="https://openrouter.ai/api/v1",
|
||||
custom_providers=[
|
||||
{
|
||||
"name": "Proxy Tenant A",
|
||||
"api_key": "shared-key",
|
||||
"base_url": "http://localhost:8081/v1",
|
||||
"extra_headers": {"X-Tenant": "a"},
|
||||
},
|
||||
{
|
||||
"name": "Proxy Tenant B",
|
||||
"api_key": "shared-key",
|
||||
"base_url": "http://localhost:8081/v1",
|
||||
"extra_headers": {"X-Tenant": "b"},
|
||||
},
|
||||
],
|
||||
max_models=50,
|
||||
)
|
||||
|
||||
rows = [
|
||||
p for p in providers if p.get("api_url") == "http://localhost:8081/v1"
|
||||
]
|
||||
# Two distinct rows, not one collapsed row.
|
||||
assert len(rows) == 2, f"expected 2 rows, got {len(rows)}: {rows}"
|
||||
|
||||
# Each tenant was probed with its OWN header set (order-independent).
|
||||
assert ("shared-key", "http://localhost:8081/v1", {"X-Tenant": "a"}) in calls
|
||||
assert ("shared-key", "http://localhost:8081/v1", {"X-Tenant": "b"}) in calls
|
||||
|
||||
# Each row surfaces the model list its own headers unlocked.
|
||||
models_by_row = {tuple(r["models"]) for r in rows}
|
||||
assert models_by_row == {("model-a",), ("model-b",)}
|
||||
|
||||
|
||||
def test_custom_providers_discover_models_false_keeps_explicit_subset(monkeypatch):
|
||||
"""Custom providers (section 4) with ``discover_models: false`` must keep
|
||||
their explicit ``models:`` subset instead of replacing it with live
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue