refactor(providers): dedupe extra_headers normalizer + key picker groups by headers

Follow-up to @helix4u's #57336 salvage. Two review findings:

- W1: model-picker grouped custom-provider rows by
  (api_url, credential, api_mode) but NOT extra_headers. Entries sharing a
  URL+credential+api_mode yet declaring different headers (e.g. per-tenant
  routing behind one proxy) collapsed into one row and probed /models with
  whichever header set was seen first (order-dependent). Fold a canonical
  header identity into group_key so distinct header-authed endpoints stay
  separate; drops the now-dead first-non-empty merge branch.
- W2: the extra_headers stringify+None-filter comprehension existed in 5
  copies (config.py x2, runtime_provider.py, model_switch.py, models.py).
  Extract one shared hermes_cli.config.normalize_extra_headers primitive;
  all sites now call it.

Tests: +normalize_extra_headers unit tests, +regression test proving two
same-endpoint entries with different headers stay distinct and each probes
with its own headers. 223 targeted tests pass; ruff clean.
This commit is contained in:
kshitijk4poor 2026-07-03 04:15:19 +05:30 committed by kshitij
parent ab40e952f3
commit ed4123792c
6 changed files with 115 additions and 36 deletions

View file

@ -4609,11 +4609,9 @@ def _normalize_custom_provider_entry(
# Per-provider extra HTTP headers (proxies, gateways, custom auth).
# Values may carry credentials (e.g. CF-Access-Client-Secret) — never
# log them anywhere downstream.
extra_headers = entry.get("extra_headers")
if isinstance(extra_headers, dict) and extra_headers:
normalized["extra_headers"] = {
str(k): str(v) for k, v in extra_headers.items() if v is not None
}
normalized_headers = normalize_extra_headers(entry.get("extra_headers"))
if normalized_headers:
normalized["extra_headers"] = normalized_headers
ssl_ca_cert = entry.get("ssl_ca_cert")
if isinstance(ssl_ca_cert, str) and ssl_ca_cert.strip():
@ -4796,6 +4794,23 @@ def apply_custom_provider_tls_to_client_kwargs(
client_kwargs["ssl_verify"] = tls["ssl_verify"]
def normalize_extra_headers(extra_headers: Any) -> Dict[str, str]:
"""Normalize a raw ``extra_headers`` value into a ``dict[str, str]``.
Stringifies keys and values and drops entries whose value is ``None``.
Returns ``{}`` for non-dict or empty inputs. This is the single shared
normalizer for per-provider ``extra_headers`` across config normalization,
runtime resolution, client construction, and live ``/models`` discovery.
SECURITY: header values routinely carry credentials (Cloudflare Access
service tokens, proxy auth, custom bearer schemes). Callers must never
log the returned values.
"""
if not isinstance(extra_headers, dict) or not extra_headers:
return {}
return {str(k): str(v) for k, v in extra_headers.items() if v is not None}
def get_custom_provider_extra_headers(
base_url: str,
custom_providers: Optional[List[Dict[str, Any]]] = None,
@ -4827,12 +4842,7 @@ def get_custom_provider_extra_headers(
entry_url = (entry.get("base_url") or "").rstrip("/").lower()
if not entry_url or entry_url != target_url:
continue
extra_headers = entry.get("extra_headers")
if isinstance(extra_headers, dict) and extra_headers:
return {
str(k): str(v) for k, v in extra_headers.items() if v is not None
}
return {}
return normalize_extra_headers(entry.get("extra_headers"))
return {}

View file

@ -1365,14 +1365,9 @@ _picker_prewarm_done = _threading.Event()
def _extra_headers_from_config(entry: Any) -> dict[str, str]:
if not isinstance(entry, dict):
return {}
headers = entry.get("extra_headers")
if not isinstance(headers, dict) or not headers:
return {}
return {
str(key): str(value)
for key, value in headers.items()
if value is not None
}
from hermes_cli.config import normalize_extra_headers
return normalize_extra_headers(entry.get("extra_headers"))
def prewarm_picker_cache_async() -> Optional["_threading.Thread"]:
@ -2126,7 +2121,16 @@ def list_authenticated_providers(
if isinstance(discover, str):
discover = discover.lower() not in {"false", "no", "0"}
group_key = (api_url, credential_identity, api_mode)
# Per-provider extra_headers participate in the group identity:
# two entries sharing (api_url, credential, api_mode) but declaring
# different headers are distinct endpoints (e.g. different tenants
# behind one proxy URL, routed by header) and must probe /models
# with their own headers rather than collapsing into one row and
# silently adopting whichever header set was seen first.
entry_extra_headers = _extra_headers_from_config(entry)
headers_identity = tuple(sorted(entry_extra_headers.items()))
group_key = (api_url, credential_identity, api_mode, headers_identity)
if group_key not in groups:
# Strip per-model suffix so "Ollama — GLM 5.1" becomes
# "Ollama" for the grouped row. Em dash is the convention
@ -2147,13 +2151,13 @@ def list_authenticated_providers(
"api_key": api_key,
"models": [],
"discover_models": discover,
"extra_headers": _extra_headers_from_config(entry),
"extra_headers": entry_extra_headers,
}
else:
if api_key and not groups[group_key].get("api_key"):
groups[group_key]["api_key"] = api_key
if not groups[group_key].get("extra_headers"):
groups[group_key]["extra_headers"] = _extra_headers_from_config(entry)
# extra_headers is part of group_key, so every entry in this
# group already carries identical headers — nothing to merge.
# If any entry in this group opts out of discovery,
# honour that for the whole grouped row.
if not discover:

View file

@ -3501,13 +3501,9 @@ def probe_api_models(
if isinstance(request_headers, dict):
# Per-provider custom headers can contain auth/proxy secrets. Merge
# last so endpoint-specific config wins, and never log the values.
headers.update(
{
str(key): str(value)
for key, value in request_headers.items()
if value is not None
}
)
from hermes_cli.config import normalize_extra_headers
headers.update(normalize_extra_headers(request_headers))
for candidate_base, is_fallback in candidates:
url = candidate_base.rstrip("/") + "/models"

View file

@ -31,7 +31,11 @@ from hermes_cli.auth import (
resolve_external_process_provider_credentials,
has_usable_secret,
)
from hermes_cli.config import get_compatible_custom_providers, load_config
from hermes_cli.config import (
get_compatible_custom_providers,
load_config,
normalize_extra_headers,
)
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_host_matches, base_url_hostname, env_int
@ -597,11 +601,9 @@ def _lift_extra_headers(entry: Dict[str, Any], result: Dict[str, Any]) -> None:
SECURITY: header values routinely carry credentials (Cloudflare Access
service tokens, proxy auth, custom bearer schemes). Never log them.
"""
extra_headers = entry.get("extra_headers")
if isinstance(extra_headers, dict) and extra_headers:
result["extra_headers"] = {
str(k): str(v) for k, v in extra_headers.items() if v is not None
}
extra_headers = normalize_extra_headers(entry.get("extra_headers"))
if extra_headers:
result["extra_headers"] = extra_headers
def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]:

View file

@ -10,10 +10,23 @@ from hermes_cli.config import (
_normalize_custom_provider_entry,
apply_custom_provider_extra_headers_to_client_kwargs,
get_custom_provider_extra_headers,
normalize_extra_headers,
)
from hermes_cli import models as models_mod
def test_normalize_extra_headers_stringifies_and_drops_none():
assert normalize_extra_headers({"X-Int": 7, "X-Str": "v", "X-None": None}) == {
"X-Int": "7",
"X-Str": "v",
}
def test_normalize_extra_headers_rejects_non_dict_and_empty():
for bad in (None, "x", 42, ["a"], {}):
assert normalize_extra_headers(bad) == {}
def test_normalize_entry_keeps_extra_headers():
normalized = _normalize_custom_provider_entry(
{

View file

@ -745,6 +745,60 @@ def test_custom_provider_live_model_probe_uses_extra_headers(monkeypatch):
assert gateway_prov["models"] == ["gateway-model"]
def test_same_endpoint_different_extra_headers_not_collapsed(monkeypatch):
"""Entries sharing (api_url, credential, api_mode) but declaring different
extra_headers must NOT collapse into one picker row each is a distinct
header-authenticated endpoint (e.g. per-tenant routing behind one proxy)
and must probe /models with its own headers."""
monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
calls = []
def fake_fetch_api_models(api_key, base_url, **kwargs):
calls.append((api_key, base_url, kwargs.get("headers")))
# Return a per-tenant model list keyed by the routing header so we can
# assert each row got its OWN probe rather than a shared one.
tenant = (kwargs.get("headers") or {}).get("X-Tenant", "none")
return [f"model-{tenant}"]
monkeypatch.setattr("hermes_cli.models.fetch_api_models", fake_fetch_api_models)
providers = list_authenticated_providers(
current_provider="openrouter",
current_base_url="https://openrouter.ai/api/v1",
custom_providers=[
{
"name": "Proxy Tenant A",
"api_key": "shared-key",
"base_url": "http://localhost:8081/v1",
"extra_headers": {"X-Tenant": "a"},
},
{
"name": "Proxy Tenant B",
"api_key": "shared-key",
"base_url": "http://localhost:8081/v1",
"extra_headers": {"X-Tenant": "b"},
},
],
max_models=50,
)
rows = [
p for p in providers if p.get("api_url") == "http://localhost:8081/v1"
]
# Two distinct rows, not one collapsed row.
assert len(rows) == 2, f"expected 2 rows, got {len(rows)}: {rows}"
# Each tenant was probed with its OWN header set (order-independent).
assert ("shared-key", "http://localhost:8081/v1", {"X-Tenant": "a"}) in calls
assert ("shared-key", "http://localhost:8081/v1", {"X-Tenant": "b"}) in calls
# Each row surfaces the model list its own headers unlocked.
models_by_row = {tuple(r["models"]) for r in rows}
assert models_by_row == {("model-a",), ("model-b",)}
def test_custom_providers_discover_models_false_keeps_explicit_subset(monkeypatch):
"""Custom providers (section 4) with ``discover_models: false`` must keep
their explicit ``models:`` subset instead of replacing it with live