From ab40e952f31b4da064d879bb7c370ecb60d79e11 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Thu, 2 Jul 2026 14:33:41 -0600 Subject: [PATCH] fix(providers): pass extra headers to model discovery --- hermes_cli/model_switch.py | 30 +++++++- hermes_cli/models.py | 20 ++++- .../test_custom_provider_extra_headers.py | 43 +++++++++++ .../test_model_switch_custom_providers.py | 73 ++++++++++++++++--- .../test_user_providers_model_switch.py | 63 ++++++++++++++-- 5 files changed, 211 insertions(+), 18 deletions(-) diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 48bf031b7..b7f617494 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -23,7 +23,7 @@ from __future__ import annotations import logging import re from dataclasses import dataclass -from typing import List, NamedTuple, Optional +from typing import Any, List, NamedTuple, Optional from hermes_cli.providers import ( ProviderDef, @@ -1362,6 +1362,19 @@ import threading as _threading # noqa: E402 _picker_prewarm_done = _threading.Event() +def _extra_headers_from_config(entry: Any) -> dict[str, str]: + if not isinstance(entry, dict): + return {} + headers = entry.get("extra_headers") + if not isinstance(headers, dict) or not headers: + return {} + return { + str(key): str(value) + for key, value in headers.items() + if value is not None + } + + def prewarm_picker_cache_async() -> Optional["_threading.Thread"]: """Warm the provider-models disk cache in a background daemon thread. @@ -1993,7 +2006,11 @@ def list_authenticated_providers( if should_probe: try: from hermes_cli.models import fetch_api_models - live_models = fetch_api_models(api_key, api_url) + live_models = fetch_api_models( + api_key, + api_url, + headers=_extra_headers_from_config(ep_cfg) or None, + ) if live_models: models_list = live_models except Exception: @@ -2130,10 +2147,13 @@ def list_authenticated_providers( "api_key": api_key, "models": [], "discover_models": discover, + "extra_headers": _extra_headers_from_config(entry), } else: if api_key and not groups[group_key].get("api_key"): groups[group_key]["api_key"] = api_key + if not groups[group_key].get("extra_headers"): + groups[group_key]["extra_headers"] = _extra_headers_from_config(entry) # If any entry in this group opts out of discovery, # honour that for the whole grouped row. if not discover: @@ -2240,7 +2260,11 @@ def list_authenticated_providers( try: from hermes_cli.models import fetch_api_models - live_models = fetch_api_models(api_key, api_url) + live_models = fetch_api_models( + api_key, + api_url, + headers=grp.get("extra_headers") or None, + ) if live_models: grp["models"] = live_models grp["total_models"] = len(live_models) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index f93ad967a..7683f2f38 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -3451,6 +3451,7 @@ def probe_api_models( base_url: Optional[str], timeout: float = 5.0, api_mode: Optional[str] = None, + request_headers: Optional[dict[str, str]] = None, ) -> dict[str, Any]: """Probe a ``/models`` endpoint with light URL heuristics. @@ -3497,6 +3498,16 @@ def probe_api_models( headers["Authorization"] = f"Bearer {api_key}" if normalized.startswith(COPILOT_BASE_URL): headers.update(copilot_default_headers()) + if isinstance(request_headers, dict): + # Per-provider custom headers can contain auth/proxy secrets. Merge + # last so endpoint-specific config wins, and never log the values. + headers.update( + { + str(key): str(value) + for key, value in request_headers.items() + if value is not None + } + ) for candidate_base, is_fallback in candidates: url = candidate_base.rstrip("/") + "/models" @@ -3529,13 +3540,20 @@ def fetch_api_models( base_url: Optional[str], timeout: float = 5.0, api_mode: Optional[str] = None, + headers: Optional[dict[str, str]] = None, ) -> Optional[list[str]]: """Fetch the list of available model IDs from the provider's ``/models`` endpoint. Returns a list of model ID strings, or ``None`` if the endpoint could not be reached (network error, timeout, auth failure, etc.). """ - return probe_api_models(api_key, base_url, timeout=timeout, api_mode=api_mode).get("models") + return probe_api_models( + api_key, + base_url, + timeout=timeout, + api_mode=api_mode, + request_headers=headers, + ).get("models") # --------------------------------------------------------------------------- diff --git a/tests/hermes_cli/test_custom_provider_extra_headers.py b/tests/hermes_cli/test_custom_provider_extra_headers.py index 9f43e9406..5f1e77ff0 100644 --- a/tests/hermes_cli/test_custom_provider_extra_headers.py +++ b/tests/hermes_cli/test_custom_provider_extra_headers.py @@ -4,11 +4,14 @@ PR #3526 salvage — user-configurable extra HTTP headers on LLM API calls (reverse proxies, gateways, custom auth such as Cloudflare Access tokens). """ +import json + from hermes_cli.config import ( _normalize_custom_provider_entry, apply_custom_provider_extra_headers_to_client_kwargs, get_custom_provider_extra_headers, ) +from hermes_cli import models as models_mod def test_normalize_entry_keeps_extra_headers(): @@ -125,3 +128,43 @@ def test_apply_extra_headers_noop_without_match(): custom_providers=providers, ) assert "default_headers" not in client_kwargs + + +def test_fetch_api_models_sends_extra_headers_to_models_probe(monkeypatch): + captured = {} + + class FakeResponse: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self): + return json.dumps({"data": [{"id": "proxy-model"}]}).encode() + + def fake_urlopen(request, timeout=0): + captured["url"] = request.full_url + captured["timeout"] = timeout + captured["headers"] = { + key.lower(): value + for key, value in request.header_items() + } + return FakeResponse() + + monkeypatch.setattr(models_mod.urllib.request, "urlopen", fake_urlopen) + + models = models_mod.fetch_api_models( + "proxy-key", + "https://llm.internal.example.com/v1", + headers={ + "sleeve-harness": "hermes", + "sleeve-base-url": "http://localhost:8081/v1", + }, + ) + + assert models == ["proxy-model"] + assert captured["url"] == "https://llm.internal.example.com/v1/models" + assert captured["headers"]["authorization"] == "Bearer proxy-key" + assert captured["headers"]["sleeve-harness"] == "hermes" + assert captured["headers"]["sleeve-base-url"] == "http://localhost:8081/v1" diff --git a/tests/hermes_cli/test_model_switch_custom_providers.py b/tests/hermes_cli/test_model_switch_custom_providers.py index 11a7613ab..ed105fa57 100644 --- a/tests/hermes_cli/test_model_switch_custom_providers.py +++ b/tests/hermes_cli/test_model_switch_custom_providers.py @@ -643,8 +643,8 @@ def test_custom_providers_uses_live_models_for_multi_model_endpoint(monkeypatch) calls = [] - def fake_fetch_api_models(api_key, base_url): - calls.append((api_key, base_url)) + def fake_fetch_api_models(api_key, base_url, **kwargs): + calls.append((api_key, base_url, kwargs)) return ["gateway-model-a", "gateway-model-b", "gateway-model-c"] monkeypatch.setattr("hermes_cli.models.fetch_api_models", fake_fetch_api_models) @@ -679,9 +679,9 @@ def test_custom_providers_uses_live_models_for_multi_model_endpoint(monkeypatch) ) assert gateway_prov is not None, "Custom provider group not found in results" - assert calls == [("sk-gateway-key", "https://gateway.example.com/v1")], ( - "fetch_api_models must be called with the custom provider's credentials" - ) + assert calls == [ + ("sk-gateway-key", "https://gateway.example.com/v1", {"headers": None}) + ], "fetch_api_models must be called with the custom provider's credentials" assert gateway_prov["models"] == [ "gateway-model-a", "gateway-model-b", @@ -690,6 +690,61 @@ def test_custom_providers_uses_live_models_for_multi_model_endpoint(monkeypatch) assert gateway_prov["total_models"] == 3 +def test_custom_provider_live_model_probe_uses_extra_headers(monkeypatch): + """custom_providers[].extra_headers must apply to live /models probes.""" + monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {}) + monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {}) + + calls = [] + + def fake_fetch_api_models(api_key, base_url, **kwargs): + calls.append((api_key, base_url, kwargs)) + return ["gateway-model"] + + monkeypatch.setattr("hermes_cli.models.fetch_api_models", fake_fetch_api_models) + + providers = list_authenticated_providers( + current_provider="openrouter", + current_base_url="https://openrouter.ai/api/v1", + custom_providers=[ + { + "name": "LLM Proxy", + "api_key": "local-key", + "base_url": "http://localhost:8081/v1", + "extra_headers": { + "sleeve-harness": "hermes", + "sleeve-base-url": "http://localhost:8081/v1", + }, + } + ], + max_models=50, + ) + + gateway_prov = next( + ( + p + for p in providers + if p.get("api_url") == "http://localhost:8081/v1" + ), + None, + ) + + assert gateway_prov is not None + assert calls == [ + ( + "local-key", + "http://localhost:8081/v1", + { + "headers": { + "sleeve-harness": "hermes", + "sleeve-base-url": "http://localhost:8081/v1", + } + }, + ) + ] + assert gateway_prov["models"] == ["gateway-model"] + + def test_custom_providers_discover_models_false_keeps_explicit_subset(monkeypatch): """Custom providers (section 4) with ``discover_models: false`` must keep their explicit ``models:`` subset instead of replacing it with live @@ -704,8 +759,8 @@ def test_custom_providers_discover_models_false_keeps_explicit_subset(monkeypatc calls = [] - def fake_fetch_api_models(api_key, base_url): - calls.append((api_key, base_url)) + def fake_fetch_api_models(api_key, base_url, **kwargs): + calls.append((api_key, base_url, kwargs)) return ["gateway-model-a", "gateway-model-b", "gateway-model-c"] monkeypatch.setattr("hermes_cli.models.fetch_api_models", fake_fetch_api_models) @@ -760,8 +815,8 @@ def test_custom_providers_discover_models_false_string_is_normalised(monkeypatch calls = [] - def fake_fetch_api_models(api_key, base_url): - calls.append((api_key, base_url)) + def fake_fetch_api_models(api_key, base_url, **kwargs): + calls.append((api_key, base_url, kwargs)) return ["live-a", "live-b"] monkeypatch.setattr("hermes_cli.models.fetch_api_models", fake_fetch_api_models) diff --git a/tests/hermes_cli/test_user_providers_model_switch.py b/tests/hermes_cli/test_user_providers_model_switch.py index 7cff7e896..9014a7b1b 100644 --- a/tests/hermes_cli/test_user_providers_model_switch.py +++ b/tests/hermes_cli/test_user_providers_model_switch.py @@ -144,8 +144,8 @@ def test_list_authenticated_providers_uses_live_models_for_user_provider(monkeyp calls = [] - def fake_fetch_api_models(api_key, base_url): - calls.append((api_key, base_url)) + def fake_fetch_api_models(api_key, base_url, **kwargs): + calls.append((api_key, base_url, kwargs)) return ["old-configured-model", "new-live-model"] monkeypatch.setattr("hermes_cli.models.fetch_api_models", fake_fetch_api_models) @@ -175,11 +175,62 @@ def test_list_authenticated_providers_uses_live_models_for_user_provider(monkeyp ) assert user_prov is not None - assert calls == [("sk-test", "http://127.0.0.1:3000/api/v1")] + assert calls == [("sk-test", "http://127.0.0.1:3000/api/v1", {"headers": None})] assert user_prov["models"] == ["old-configured-model", "new-live-model"] assert user_prov["total_models"] == 2 +def test_user_provider_live_model_probe_uses_extra_headers(monkeypatch): + """providers..extra_headers must also apply to live /models probes.""" + monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {}) + monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {}) + + calls = [] + + def fake_fetch_api_models(api_key, base_url, **kwargs): + calls.append((api_key, base_url, kwargs)) + return ["live-model"] + + monkeypatch.setattr("hermes_cli.models.fetch_api_models", fake_fetch_api_models) + + providers = list_authenticated_providers( + current_provider="llm-proxy", + user_providers={ + "llm-proxy": { + "name": "LLM Proxy", + "base_url": "http://localhost:8081/v1", + "api_key": "local-key", + "extra_headers": { + "sleeve-harness": "hermes", + "sleeve-base-url": "http://localhost:8081/v1", + }, + } + }, + custom_providers=[], + max_models=50, + ) + + user_prov = next( + (p for p in providers if p.get("is_user_defined") and p["slug"] == "llm-proxy"), + None, + ) + + assert user_prov is not None + assert calls == [ + ( + "local-key", + "http://localhost:8081/v1", + { + "headers": { + "sleeve-harness": "hermes", + "sleeve-base-url": "http://localhost:8081/v1", + } + }, + ) + ] + assert user_prov["models"] == ["live-model"] + + def test_list_authenticated_providers_dict_models_without_default_model(monkeypatch): """Dict-format ``models:`` without a ``default_model`` must still expose every dict key, not collapse to an empty list.""" @@ -1063,10 +1114,11 @@ def test_section3_probes_no_key_endpoint_without_explicit_models(monkeypatch): probed = {} - def _fake_fetch(api_key, api_url): + def _fake_fetch(api_key, api_url, **kwargs): probed["called"] = True probed["api_key"] = api_key probed["api_url"] = api_url + probed["kwargs"] = kwargs return ["live-model-1", "live-model-2", "live-model-3"] monkeypatch.setattr("hermes_cli.models.fetch_api_models", _fake_fetch) @@ -1088,6 +1140,7 @@ def test_section3_probes_no_key_endpoint_without_explicit_models(monkeypatch): assert probed.get("called") is True, "no-key bare endpoint should be probed" assert probed["api_key"] == "" + assert probed["kwargs"] == {"headers": None} row = next(p for p in providers if p["slug"] == "local-llamacpp") assert row["models"] == ["live-model-1", "live-model-2", "live-model-3"] assert row["total_models"] == 3 @@ -1099,7 +1152,7 @@ def test_section3_skips_probe_when_no_key_but_explicit_models(monkeypatch): monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {}) monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {}) - def _fail_fetch(api_key, api_url): + def _fail_fetch(api_key, api_url, **kwargs): raise AssertionError("should not probe when explicit models are set") monkeypatch.setattr("hermes_cli.models.fetch_api_models", _fail_fetch)