From ed4123792c135558e7be2e486505bc569faa2a74 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Fri, 3 Jul 2026 04:15:19 +0530 Subject: [PATCH] refactor(providers): dedupe extra_headers normalizer + key picker groups by headers Follow-up to @helix4u's #57336 salvage. Two review findings: - W1: model-picker grouped custom-provider rows by (api_url, credential, api_mode) but NOT extra_headers. Entries sharing a URL+credential+api_mode yet declaring different headers (e.g. per-tenant routing behind one proxy) collapsed into one row and probed /models with whichever header set was seen first (order-dependent). Fold a canonical header identity into group_key so distinct header-authed endpoints stay separate; drops the now-dead first-non-empty merge branch. - W2: the extra_headers stringify+None-filter comprehension existed in 5 copies (config.py x2, runtime_provider.py, model_switch.py, models.py). Extract one shared hermes_cli.config.normalize_extra_headers primitive; all sites now call it. Tests: +normalize_extra_headers unit tests, +regression test proving two same-endpoint entries with different headers stay distinct and each probes with its own headers. 223 targeted tests pass; ruff clean. --- hermes_cli/config.py | 32 +++++++---- hermes_cli/model_switch.py | 28 +++++----- hermes_cli/models.py | 10 ++-- hermes_cli/runtime_provider.py | 14 ++--- .../test_custom_provider_extra_headers.py | 13 +++++ .../test_model_switch_custom_providers.py | 54 +++++++++++++++++++ 6 files changed, 115 insertions(+), 36 deletions(-) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 7bbec92cf..35ad38172 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -4609,11 +4609,9 @@ def _normalize_custom_provider_entry( # Per-provider extra HTTP headers (proxies, gateways, custom auth). # Values may carry credentials (e.g. CF-Access-Client-Secret) — never # log them anywhere downstream. - extra_headers = entry.get("extra_headers") - if isinstance(extra_headers, dict) and extra_headers: - normalized["extra_headers"] = { - str(k): str(v) for k, v in extra_headers.items() if v is not None - } + normalized_headers = normalize_extra_headers(entry.get("extra_headers")) + if normalized_headers: + normalized["extra_headers"] = normalized_headers ssl_ca_cert = entry.get("ssl_ca_cert") if isinstance(ssl_ca_cert, str) and ssl_ca_cert.strip(): @@ -4796,6 +4794,23 @@ def apply_custom_provider_tls_to_client_kwargs( client_kwargs["ssl_verify"] = tls["ssl_verify"] +def normalize_extra_headers(extra_headers: Any) -> Dict[str, str]: + """Normalize a raw ``extra_headers`` value into a ``dict[str, str]``. + + Stringifies keys and values and drops entries whose value is ``None``. + Returns ``{}`` for non-dict or empty inputs. This is the single shared + normalizer for per-provider ``extra_headers`` across config normalization, + runtime resolution, client construction, and live ``/models`` discovery. + + SECURITY: header values routinely carry credentials (Cloudflare Access + service tokens, proxy auth, custom bearer schemes). Callers must never + log the returned values. + """ + if not isinstance(extra_headers, dict) or not extra_headers: + return {} + return {str(k): str(v) for k, v in extra_headers.items() if v is not None} + + def get_custom_provider_extra_headers( base_url: str, custom_providers: Optional[List[Dict[str, Any]]] = None, @@ -4827,12 +4842,7 @@ def get_custom_provider_extra_headers( entry_url = (entry.get("base_url") or "").rstrip("/").lower() if not entry_url or entry_url != target_url: continue - extra_headers = entry.get("extra_headers") - if isinstance(extra_headers, dict) and extra_headers: - return { - str(k): str(v) for k, v in extra_headers.items() if v is not None - } - return {} + return normalize_extra_headers(entry.get("extra_headers")) return {} diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index b7f617494..c914caa84 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -1365,14 +1365,9 @@ _picker_prewarm_done = _threading.Event() def _extra_headers_from_config(entry: Any) -> dict[str, str]: if not isinstance(entry, dict): return {} - headers = entry.get("extra_headers") - if not isinstance(headers, dict) or not headers: - return {} - return { - str(key): str(value) - for key, value in headers.items() - if value is not None - } + from hermes_cli.config import normalize_extra_headers + + return normalize_extra_headers(entry.get("extra_headers")) def prewarm_picker_cache_async() -> Optional["_threading.Thread"]: @@ -2126,7 +2121,16 @@ def list_authenticated_providers( if isinstance(discover, str): discover = discover.lower() not in {"false", "no", "0"} - group_key = (api_url, credential_identity, api_mode) + # Per-provider extra_headers participate in the group identity: + # two entries sharing (api_url, credential, api_mode) but declaring + # different headers are distinct endpoints (e.g. different tenants + # behind one proxy URL, routed by header) and must probe /models + # with their own headers rather than collapsing into one row and + # silently adopting whichever header set was seen first. + entry_extra_headers = _extra_headers_from_config(entry) + headers_identity = tuple(sorted(entry_extra_headers.items())) + + group_key = (api_url, credential_identity, api_mode, headers_identity) if group_key not in groups: # Strip per-model suffix so "Ollama — GLM 5.1" becomes # "Ollama" for the grouped row. Em dash is the convention @@ -2147,13 +2151,13 @@ def list_authenticated_providers( "api_key": api_key, "models": [], "discover_models": discover, - "extra_headers": _extra_headers_from_config(entry), + "extra_headers": entry_extra_headers, } else: if api_key and not groups[group_key].get("api_key"): groups[group_key]["api_key"] = api_key - if not groups[group_key].get("extra_headers"): - groups[group_key]["extra_headers"] = _extra_headers_from_config(entry) + # extra_headers is part of group_key, so every entry in this + # group already carries identical headers — nothing to merge. # If any entry in this group opts out of discovery, # honour that for the whole grouped row. if not discover: diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 7683f2f38..b7bac169b 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -3501,13 +3501,9 @@ def probe_api_models( if isinstance(request_headers, dict): # Per-provider custom headers can contain auth/proxy secrets. Merge # last so endpoint-specific config wins, and never log the values. - headers.update( - { - str(key): str(value) - for key, value in request_headers.items() - if value is not None - } - ) + from hermes_cli.config import normalize_extra_headers + + headers.update(normalize_extra_headers(request_headers)) for candidate_base, is_fallback in candidates: url = candidate_base.rstrip("/") + "/models" diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 6ddcddab3..e93fa5317 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -31,7 +31,11 @@ from hermes_cli.auth import ( resolve_external_process_provider_credentials, has_usable_secret, ) -from hermes_cli.config import get_compatible_custom_providers, load_config +from hermes_cli.config import ( + get_compatible_custom_providers, + load_config, + normalize_extra_headers, +) from hermes_constants import OPENROUTER_BASE_URL from utils import base_url_host_matches, base_url_hostname, env_int @@ -597,11 +601,9 @@ def _lift_extra_headers(entry: Dict[str, Any], result: Dict[str, Any]) -> None: SECURITY: header values routinely carry credentials (Cloudflare Access service tokens, proxy auth, custom bearer schemes). Never log them. """ - extra_headers = entry.get("extra_headers") - if isinstance(extra_headers, dict) and extra_headers: - result["extra_headers"] = { - str(k): str(v) for k, v in extra_headers.items() if v is not None - } + extra_headers = normalize_extra_headers(entry.get("extra_headers")) + if extra_headers: + result["extra_headers"] = extra_headers def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]: diff --git a/tests/hermes_cli/test_custom_provider_extra_headers.py b/tests/hermes_cli/test_custom_provider_extra_headers.py index 5f1e77ff0..33eaf4477 100644 --- a/tests/hermes_cli/test_custom_provider_extra_headers.py +++ b/tests/hermes_cli/test_custom_provider_extra_headers.py @@ -10,10 +10,23 @@ from hermes_cli.config import ( _normalize_custom_provider_entry, apply_custom_provider_extra_headers_to_client_kwargs, get_custom_provider_extra_headers, + normalize_extra_headers, ) from hermes_cli import models as models_mod +def test_normalize_extra_headers_stringifies_and_drops_none(): + assert normalize_extra_headers({"X-Int": 7, "X-Str": "v", "X-None": None}) == { + "X-Int": "7", + "X-Str": "v", + } + + +def test_normalize_extra_headers_rejects_non_dict_and_empty(): + for bad in (None, "x", 42, ["a"], {}): + assert normalize_extra_headers(bad) == {} + + def test_normalize_entry_keeps_extra_headers(): normalized = _normalize_custom_provider_entry( { diff --git a/tests/hermes_cli/test_model_switch_custom_providers.py b/tests/hermes_cli/test_model_switch_custom_providers.py index ed105fa57..bfffa0474 100644 --- a/tests/hermes_cli/test_model_switch_custom_providers.py +++ b/tests/hermes_cli/test_model_switch_custom_providers.py @@ -745,6 +745,60 @@ def test_custom_provider_live_model_probe_uses_extra_headers(monkeypatch): assert gateway_prov["models"] == ["gateway-model"] +def test_same_endpoint_different_extra_headers_not_collapsed(monkeypatch): + """Entries sharing (api_url, credential, api_mode) but declaring different + extra_headers must NOT collapse into one picker row — each is a distinct + header-authenticated endpoint (e.g. per-tenant routing behind one proxy) + and must probe /models with its own headers.""" + monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {}) + monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {}) + + calls = [] + + def fake_fetch_api_models(api_key, base_url, **kwargs): + calls.append((api_key, base_url, kwargs.get("headers"))) + # Return a per-tenant model list keyed by the routing header so we can + # assert each row got its OWN probe rather than a shared one. + tenant = (kwargs.get("headers") or {}).get("X-Tenant", "none") + return [f"model-{tenant}"] + + monkeypatch.setattr("hermes_cli.models.fetch_api_models", fake_fetch_api_models) + + providers = list_authenticated_providers( + current_provider="openrouter", + current_base_url="https://openrouter.ai/api/v1", + custom_providers=[ + { + "name": "Proxy Tenant A", + "api_key": "shared-key", + "base_url": "http://localhost:8081/v1", + "extra_headers": {"X-Tenant": "a"}, + }, + { + "name": "Proxy Tenant B", + "api_key": "shared-key", + "base_url": "http://localhost:8081/v1", + "extra_headers": {"X-Tenant": "b"}, + }, + ], + max_models=50, + ) + + rows = [ + p for p in providers if p.get("api_url") == "http://localhost:8081/v1" + ] + # Two distinct rows, not one collapsed row. + assert len(rows) == 2, f"expected 2 rows, got {len(rows)}: {rows}" + + # Each tenant was probed with its OWN header set (order-independent). + assert ("shared-key", "http://localhost:8081/v1", {"X-Tenant": "a"}) in calls + assert ("shared-key", "http://localhost:8081/v1", {"X-Tenant": "b"}) in calls + + # Each row surfaces the model list its own headers unlocked. + models_by_row = {tuple(r["models"]) for r in rows} + assert models_by_row == {("model-a",), ("model-b",)} + + def test_custom_providers_discover_models_false_keeps_explicit_subset(monkeypatch): """Custom providers (section 4) with ``discover_models: false`` must keep their explicit ``models:`` subset instead of replacing it with live