From 4a09b692ecc385ad48f00694a1e315b8eed120cd Mon Sep 17 00:00:00 2001 From: Mibayy Date: Thu, 2 Jul 2026 04:47:12 -0700 Subject: [PATCH] feat(api-server): per-client model routing via model_routes (#3176 salvage) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a no-code routing layer to the OpenAI-compatible API server so one Hermes deployment can map different API clients to different model/provider backends. Clients pick a backend by sending a configured alias as the OpenAI 'model' field; unmatched values fall back to the global model. Configured aliases are listed by GET /v1/models. Precedence (highest first): session /model override > model_routes route > global config. Route provider credentials resolve through _resolve_runtime_agent_kwargs_for_provider (same seam as channel_overrides); per-route api_key/base_url are upstream provider credential overrides — never caller auth, never logged. Salvaged and rebased from PR #3176 by @Mibayy onto current main. --- cli-config.yaml.example | 35 ++++ gateway/platforms/api_server.py | 208 +++++++++++++++++++++-- scripts/release.py | 1 + tests/gateway/test_api_server.py | 275 +++++++++++++++++++++++++++++++ 4 files changed, 503 insertions(+), 16 deletions(-) diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 8a0e37fa7..8c60a5ea0 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -571,6 +571,41 @@ max_concurrent_sessions: null # explicitly want one shared "room brain" per group/channel. group_sessions_per_user: true +# ───────────────────────────────────────────────────────────────────────────── +# API Server — per-client model routing +# ───────────────────────────────────────────────────────────────────────────── +# Route different API clients to different models/providers on a single +# Hermes deployment. Clients choose a backend by sending a specific string +# as the OpenAI ``model`` field. Unmapped model values fall back to the +# global model configured in the ``model:`` section above, and an explicit +# session /model override always wins over a route. +# +# Configure via the ``platforms.api_server.extra.model_routes`` gateway +# config block: +# +# platforms: +# api_server: +# enabled: true +# extra: +# key: "your-api-server-secret" +# model_routes: +# # Xiaozhi clients send model="minimax-m2" → routed to MiniMax via OpenRouter +# minimax-m2: +# model: "minimax/minimax-m1" +# provider: "openrouter" # optional — overrides global provider +# # api_key: "sk-..." # optional — per-route UPSTREAM provider +# # key (NOT caller auth; never logged) +# # base_url: "https://..." # optional — per-route base URL +# # GPT clients keep their own alias +# gpt-5: +# model: "openai/gpt-5" +# provider: "openrouter" +# +# Configured aliases are automatically listed by GET /v1/models so clients +# can discover them without manual coordination. Caller authentication is +# unchanged: every request still authenticates with the global API server +# key (``extra.key`` / API_SERVER_KEY). + # ───────────────────────────────────────────────────────────────────────────── # Gateway Streaming # ───────────────────────────────────────────────────────────────────────────── diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index fa2f14267..3cf11f335 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -6,7 +6,7 @@ Exposes an HTTP server with endpoints: - POST /v1/responses — OpenAI Responses API format (stateful via previous_response_id; X-Hermes-Session-Key supported) - GET /v1/responses/{response_id} — Retrieve a stored response - DELETE /v1/responses/{response_id} — Delete a stored response -- GET /v1/models — lists hermes-agent as an available model +- GET /v1/models — lists hermes-agent and any configured model_routes aliases - GET /v1/capabilities — machine-readable API capabilities for external UIs - GET /api/sessions — list client-visible Hermes sessions - POST /api/sessions — create an empty Hermes session @@ -832,6 +832,22 @@ class APIServerAdapter(BasePlatformAdapter): self._model_name: str = self._resolve_model_name( extra.get("model_name", os.getenv("API_SERVER_MODEL_NAME", "")), ) + # model_routes: maps incoming ``model`` field values to specific + # provider/model configs so one API server instance can serve + # multiple clients on different backends. + # + # Config format (platforms.api_server.extra in the gateway config): + # model_routes: + # minimax-m2: # alias the client sends as the "model" field + # model: "minimax/minimax-m1" + # provider: "openrouter" # optional — resolved via the provider + # # credential chain when set + # api_key: "sk-…" # optional — per-route UPSTREAM provider + # # key override (NOT caller auth; never logged) + # base_url: "https://…" # optional — per-route base URL override + self._model_routes: Dict[str, Dict[str, Any]] = self._parse_model_routes( + extra.get("model_routes"), + ) self._app: Optional["web.Application"] = None self._runner: Optional["web.AppRunner"] = None self._site: Optional["web.TCPSite"] = None @@ -1118,6 +1134,78 @@ class APIServerAdapter(BasePlatformAdapter): # Agent creation helper # ------------------------------------------------------------------ + @staticmethod + def _parse_model_routes(raw: Any) -> Dict[str, Dict[str, Any]]: + """Validate and normalize the ``model_routes`` config block. + + Accepts a mapping of ``alias -> {model, provider?, api_key?, base_url?}``. + Invalid shapes are dropped (never raised) so a config typo can't take + the whole API server down. Route values are coerced to strings. + + Security: per-route ``api_key`` values are UPSTREAM provider + credentials (used to call the routed model's backend), not caller + authentication — callers still authenticate with the global + API_SERVER_KEY bearer token via ``_check_auth``. Route api_keys must + never be logged; only alias names and non-secret fields may appear in + logs. + """ + if not isinstance(raw, dict): + if raw: + logger.warning( + "api_server model_routes ignored: expected a mapping, got %s", + type(raw).__name__, + ) + return {} + + allowed_keys = ("model", "provider", "api_key", "base_url") + routes: Dict[str, Dict[str, Any]] = {} + for alias, cfg in raw.items(): + alias_str = str(alias).strip() + if not alias_str or not isinstance(cfg, dict): + logger.warning( + "api_server model_routes: dropping invalid route entry %r", alias_str or alias + ) + continue + route = { + key: str(cfg[key]).strip() + for key in allowed_keys + if cfg.get(key) is not None and str(cfg[key]).strip() + } + if not route.get("model"): + logger.warning( + "api_server model_routes: route %r has no 'model'; dropping", alias_str + ) + continue + routes[alias_str] = route + return routes + + def _resolve_route(self, model_alias: Any) -> Optional[Dict[str, Any]]: + """Return the model_routes entry for *model_alias*, or None.""" + if not self._model_routes or not isinstance(model_alias, str): + return None + return self._model_routes.get(model_alias) + + def _session_model_override_for(self, session_key: Optional[str]) -> Optional[Dict[str, Any]]: + """Return the gateway's session ``/model`` override for *session_key*, if any. + + The gateway tracks per-session ``/model`` switches in + ``GatewayRunner._session_model_overrides``. API-server requests that + share such a session key must keep honouring the explicit session + override even when the request's ``model`` field matches a configured + route — a user-issued ``/model`` always wins over static config. + """ + if not session_key: + return None + try: + from gateway.run import _gateway_runner_ref + runner = _gateway_runner_ref() + if runner is None: + return None + override = runner._session_model_overrides.get(session_key) + return dict(override) if isinstance(override, dict) else None + except Exception: + return None + def _create_agent( self, ephemeral_system_prompt: Optional[str] = None, @@ -1127,6 +1215,7 @@ class APIServerAdapter(BasePlatformAdapter): tool_start_callback=None, tool_complete_callback=None, gateway_session_key: Optional[str] = None, + route: Optional[Dict[str, Any]] = None, ) -> Any: """ Create an AIAgent instance using the gateway's runtime config. @@ -1142,6 +1231,11 @@ class APIServerAdapter(BasePlatformAdapter): key is meant to persist across transcripts so long-term memory providers (e.g. Honcho) can scope their per-chat state correctly — matching the semantics of the native gateway's ``session_key``. + + ``route`` is an optional ``model_routes`` entry (per-client model + routing). When set — and no session ``/model`` override exists for + this session — its model/provider/api_key/base_url override the + global defaults for this agent instance only. """ from run_agent import AIAgent from gateway.run import ( @@ -1169,6 +1263,51 @@ class APIServerAdapter(BasePlatformAdapter): if runtime_model: model = runtime_model + # Per-client model routing (model_routes config). The route was + # resolved from the request's ``model`` field by the HTTP handler. + # Precedence (highest first): session ``/model`` override → model_routes + # route → global config — an explicit user-issued ``/model`` on the + # session always beats static per-client route config. + session_override = self._session_model_override_for( + gateway_session_key or session_id + ) + if route and not session_override: + if route.get("provider"): + # Resolve real credentials for the routed provider (mirrors + # the channel_overrides path in gateway/run.py) so a route + # without an explicit api_key/base_url still gets the right + # provider auth instead of the default provider's key. + try: + from gateway.run import _resolve_runtime_agent_kwargs_for_provider + provider_kwargs = _resolve_runtime_agent_kwargs_for_provider( + route["provider"] + ) + provider_kwargs.pop("model", None) + runtime_kwargs.update(provider_kwargs) + except Exception: + # Fall back to just switching the provider name; explicit + # per-route api_key/base_url below can still complete auth. + runtime_kwargs["provider"] = route["provider"] + if route.get("model"): + model = route["model"] + # Per-route secrets are upstream provider credentials. Never log + # them (compare _check_auth: caller auth stays the global bearer + # key checked with hmac.compare_digest). + if route.get("api_key"): + runtime_kwargs["api_key"] = route["api_key"] + if route.get("base_url"): + runtime_kwargs["base_url"] = route["base_url"] + logger.debug( + "api_server model route applied: model=%s provider=%s", + model, + runtime_kwargs.get("provider"), + ) + elif route and session_override: + logger.debug( + "api_server model route skipped: session /model override wins for %s", + gateway_session_key or session_id, + ) + user_config = _load_gateway_config() enabled_toolsets = sorted(_get_platform_tools(user_config, "api_server")) @@ -1255,25 +1394,40 @@ class APIServerAdapter(BasePlatformAdapter): }) async def _handle_models(self, request: "web.Request") -> "web.Response": - """GET /v1/models — return hermes-agent as an available model.""" + """GET /v1/models — list hermes-agent and any configured model_routes aliases.""" auth_err = self._check_auth(request) if auth_err: return auth_err - return web.json_response({ - "object": "list", - "data": [ - { - "id": self._model_name, - "object": "model", - "created": int(time.time()), - "owned_by": "hermes", - "permission": [], - "root": self._model_name, - "parent": None, - } - ], - }) + now = int(time.time()) + models = [ + { + "id": self._model_name, + "object": "model", + "created": now, + "owned_by": "hermes", + "permission": [], + "root": self._model_name, + "parent": None, + } + ] + # Expose configured model route aliases so clients can discover them. + # Only the alias and resolved model name are exposed — never provider + # credentials. + for alias, route_cfg in self._model_routes.items(): + if alias == self._model_name: + continue # already listed above + models.append({ + "id": alias, + "object": "model", + "created": now, + "owned_by": "hermes", + "permission": [], + "root": route_cfg.get("model", alias), + "parent": self._model_name, + }) + + return web.json_response({"object": "list", "data": models}) async def _handle_capabilities(self, request: "web.Request") -> "web.Response": """GET /v1/capabilities — advertise the stable API surface. @@ -2012,6 +2166,11 @@ class APIServerAdapter(BasePlatformAdapter): model_name = body.get("model", self._model_name) created = int(time.time()) + # Per-client model routing: if the requested model matches a + # configured model_routes alias, this request's agent is created + # with that route's model/provider instead of the global default. + route = self._resolve_route(model_name) + if stream: import queue as _q _stream_q: _q.Queue = _q.Queue() @@ -2094,6 +2253,7 @@ class APIServerAdapter(BasePlatformAdapter): tool_complete_callback=_on_tool_complete, agent_ref=agent_ref, gateway_session_key=gateway_session_key, + route=route, )) # Ensure SSE drain loops can terminate without relying on polling # agent_task.done(), which can race with queue timeout checks. @@ -2113,6 +2273,7 @@ class APIServerAdapter(BasePlatformAdapter): ephemeral_system_prompt=system_prompt, session_id=session_id, gateway_session_key=gateway_session_key, + route=route, ) idempotency_key = request.headers.get("Idempotency-Key") @@ -3123,6 +3284,9 @@ class APIServerAdapter(BasePlatformAdapter): # groups the entire conversation under one session entry. session_id = stored_session_id or str(uuid.uuid4()) + # Per-client model routing for /v1/responses (see model_routes). + route = self._resolve_route(body.get("model")) + stream = _coerce_request_bool(body.get("stream"), default=False) if stream: # Streaming branch — emit OpenAI Responses SSE events as the @@ -3176,6 +3340,7 @@ class APIServerAdapter(BasePlatformAdapter): tool_complete_callback=_on_tool_complete, agent_ref=agent_ref, gateway_session_key=gateway_session_key, + route=route, )) # Ensure SSE drain loops can terminate without relying on polling # agent_task.done(), which can race with queue timeout checks. @@ -3209,6 +3374,7 @@ class APIServerAdapter(BasePlatformAdapter): ephemeral_system_prompt=instructions, session_id=session_id, gateway_session_key=gateway_session_key, + route=route, ) idempotency_key = request.headers.get("Idempotency-Key") @@ -3839,6 +4005,7 @@ class APIServerAdapter(BasePlatformAdapter): tool_complete_callback=None, agent_ref: Optional[list] = None, gateway_session_key: Optional[str] = None, + route: Optional[Dict[str, Any]] = None, ) -> tuple: """ Create an agent and run a conversation in a thread executor. @@ -3846,6 +4013,10 @@ class APIServerAdapter(BasePlatformAdapter): Returns ``(result_dict, usage_dict)`` where *usage_dict* contains ``input_tokens``, ``output_tokens`` and ``total_tokens``. + *route* is an optional ``model_routes`` entry (resolved from the + request's ``model`` field) that overrides the global model/provider + for this specific request. + If *agent_ref* is a one-element list, the AIAgent instance is stored at ``agent_ref[0]`` before ``run_conversation`` begins. This allows callers (e.g. the SSE writer) to call ``agent.interrupt()`` from @@ -3870,6 +4041,7 @@ class APIServerAdapter(BasePlatformAdapter): tool_start_callback=tool_start_callback, tool_complete_callback=tool_complete_callback, gateway_session_key=gateway_session_key, + route=route, ) if agent_ref is not None: agent_ref[0] = agent @@ -4085,6 +4257,9 @@ class APIServerAdapter(BasePlatformAdapter): model=body.get("model", self._model_name), ) + # Per-client model routing for /v1/runs (see model_routes). + route = self._resolve_route(body.get("model")) + async def _run_and_close(): try: self._set_run_status(run_id, "running") @@ -4094,6 +4269,7 @@ class APIServerAdapter(BasePlatformAdapter): stream_delta_callback=_text_cb, tool_progress_callback=event_cb, gateway_session_key=gateway_session_key, + route=route, ) self._active_run_agents[run_id] = agent diff --git a/scripts/release.py b/scripts/release.py index 21af630ce..a0b01aa69 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -46,6 +46,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" # Auto-extracted from noreply emails + manual overrides AUTHOR_MAP = { "louis@letsfive.io": "Mibayy", # PR #3243 salvage (/compact alias + preview/aggressive flags for /compress) + "louis@letsfive.io": "Mibayy", # PR #3176 salvage (api-server: per-client model routing via model_routes) "ai-lab@foxmail.com": "CrazyBoyM", # PR #55828 salvage (image_gen openai-codex: wire image-to-image / reference-image editing via Codex Responses input_image parts; magic-byte + read-guard + 25MB-cap + clamp-to-16 hardening) "r0gersm1th@users.noreply.github.com": "r0gersm1th", # PR #3219 salvage (whatsapp bridge: resolve LID sender IDs to phone numbers in the message payload so phone-based allowlists match; commit authored by collaborator r0gersm1th, PR by @ajmeese7) "louis@letsfive.io": "Mibayy", # PR #3296 salvage (status: provider label honors config.yaml model.base_url, not just OPENAI_BASE_URL env) diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index a94b34f4d..1aed7455e 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -3798,3 +3798,278 @@ class TestSessionKeyHeader: assert resp.status == 200 data = await resp.json() assert data["features"]["session_key_header"] == "X-Hermes-Session-Key" + + +# --------------------------------------------------------------------------- +# Per-client model routing (model_routes) +# --------------------------------------------------------------------------- + + +def _make_routing_adapter(routes) -> APIServerAdapter: + """Create an adapter with model_routes configured.""" + config = PlatformConfig(enabled=True, extra={"model_routes": routes}) + return APIServerAdapter(config) + + +def _patch_create_agent_runtime(monkeypatch, captured: dict, fake_agent_cls): + """Stub out every external dependency of _create_agent.""" + monkeypatch.setattr("run_agent.AIAgent", fake_agent_cls) + monkeypatch.setattr( + "gateway.run._resolve_runtime_agent_kwargs", + lambda: { + "provider": "openrouter", + "api_key": "sk-global", + "base_url": "https://openrouter.ai/api/v1", + "api_mode": "chat_completions", + }, + ) + monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "global/model") + monkeypatch.setattr("gateway.run._load_gateway_config", lambda: {}) + monkeypatch.setattr( + "gateway.run.GatewayRunner._load_reasoning_config", staticmethod(lambda: {}) + ) + monkeypatch.setattr( + "gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None) + ) + monkeypatch.setattr("gateway.run._current_max_iterations", lambda: 90) + monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set()) + + +class TestModelRoutesParsing: + def test_valid_routes_are_parsed(self): + routes = {"minimax-m2": {"model": "minimax/minimax-m1", "provider": "openrouter"}} + adapter = _make_routing_adapter(routes) + assert adapter._model_routes == routes + + def test_non_dict_routes_config_is_ignored(self): + adapter = _make_routing_adapter("not-a-dict") + assert adapter._model_routes == {} + + def test_route_without_model_is_dropped(self): + adapter = _make_routing_adapter({"bad": {"provider": "openrouter"}}) + assert adapter._model_routes == {} + + def test_route_with_non_dict_value_is_dropped(self): + adapter = _make_routing_adapter({"bad": "gpt-5", "good": {"model": "openai/gpt-5"}}) + assert set(adapter._model_routes) == {"good"} + + def test_unknown_route_keys_are_stripped(self): + adapter = _make_routing_adapter( + {"a": {"model": "m", "provider": "p", "evil_extra": "x"}} + ) + assert adapter._model_routes["a"] == {"model": "m", "provider": "p"} + + def test_resolve_route_lookup(self): + adapter = _make_routing_adapter({"minimax-m2": {"model": "minimax/minimax-m1"}}) + assert adapter._resolve_route("minimax-m2") == {"model": "minimax/minimax-m1"} + assert adapter._resolve_route("unknown-model") is None + assert adapter._resolve_route(None) is None + assert adapter._resolve_route(123) is None + + def test_no_routes_configured(self): + adapter = _make_routing_adapter({}) + assert adapter._resolve_route("hermes-agent") is None + + +class TestModelRoutesModelsEndpoint: + @pytest.mark.asyncio + async def test_models_endpoint_lists_route_aliases(self): + routes = { + "minimax-m2": {"model": "minimax/minimax-m1", "provider": "openrouter"}, + "gpt-5": {"model": "openai/gpt-5"}, + } + adapter = _make_routing_adapter(routes) + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.get("/v1/models") + assert resp.status == 200 + data = await resp.json() + ids = {m["id"] for m in data["data"]} + assert adapter._model_name in ids + assert "minimax-m2" in ids + assert "gpt-5" in ids + + @pytest.mark.asyncio + async def test_models_endpoint_route_alias_fields_and_no_secrets(self): + routes = {"my-alias": {"model": "openai/gpt-5", "api_key": "sk-route-secret"}} + adapter = _make_routing_adapter(routes) + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.get("/v1/models") + data = await resp.json() + alias_entry = next(m for m in data["data"] if m["id"] == "my-alias") + assert alias_entry["root"] == "openai/gpt-5" + assert alias_entry["parent"] == adapter._model_name + # per-route api_key must never leak through the discovery endpoint + assert "sk-route-secret" not in json.dumps(data) + + +class TestModelRoutesHandlers: + @pytest.mark.asyncio + async def test_chat_completions_passes_route_to_run_agent(self): + routes = {"minimax-m2": {"model": "minimax/minimax-m1", "provider": "openrouter"}} + adapter = _make_routing_adapter(routes) + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = ( + {"final_response": "hi", "messages": [], "api_calls": 1}, + {"input_tokens": 5, "output_tokens": 5, "total_tokens": 10}, + ) + resp = await cli.post("/v1/chat/completions", json={ + "model": "minimax-m2", + "messages": [{"role": "user", "content": "hello"}], + }) + assert resp.status == 200 + kwargs = mock_run.call_args.kwargs + assert kwargs.get("route") == { + "model": "minimax/minimax-m1", "provider": "openrouter", + } + + @pytest.mark.asyncio + async def test_chat_completions_no_route_for_unknown_model(self): + adapter = _make_routing_adapter({"minimax-m2": {"model": "minimax/minimax-m1"}}) + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = ( + {"final_response": "hi", "messages": [], "api_calls": 1}, + {"input_tokens": 5, "output_tokens": 5, "total_tokens": 10}, + ) + resp = await cli.post("/v1/chat/completions", json={ + "model": "unknown-model", + "messages": [{"role": "user", "content": "hello"}], + }) + assert resp.status == 200 + assert mock_run.call_args.kwargs.get("route") is None + + @pytest.mark.asyncio + async def test_responses_api_passes_route_to_run_agent(self): + routes = {"xiaozhi": {"model": "minimax/minimax-m1", "provider": "openrouter"}} + adapter = _make_routing_adapter(routes) + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = ( + {"final_response": "hi", "messages": [], "api_calls": 1}, + {"input_tokens": 5, "output_tokens": 5, "total_tokens": 10}, + ) + resp = await cli.post("/v1/responses", json={ + "model": "xiaozhi", + "input": "hello", + }) + assert resp.status == 200 + assert mock_run.call_args.kwargs.get("route") == { + "model": "minimax/minimax-m1", "provider": "openrouter", + } + + +class TestModelRoutesAgentCreation: + def test_route_overrides_model_and_credentials(self, monkeypatch): + captured = {} + + class FakeAgent: + def __init__(self, **kwargs): + captured.update(kwargs) + + _patch_create_agent_runtime(monkeypatch, captured, FakeAgent) + adapter = _make_routing_adapter( + {"alias": { + "model": "minimax/minimax-m1", + "api_key": "sk-route", + "base_url": "https://route.example/v1", + }} + ) + monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None) + monkeypatch.setattr(adapter, "_session_model_override_for", lambda *_: None) + + agent = adapter._create_agent( + session_id="s1", route=adapter._resolve_route("alias") + ) + + assert isinstance(agent, FakeAgent) + assert captured["model"] == "minimax/minimax-m1" + assert captured["api_key"] == "sk-route" + assert captured["base_url"] == "https://route.example/v1" + + def test_route_provider_resolves_provider_credentials(self, monkeypatch): + captured = {} + + class FakeAgent: + def __init__(self, **kwargs): + captured.update(kwargs) + + _patch_create_agent_runtime(monkeypatch, captured, FakeAgent) + monkeypatch.setattr( + "gateway.run._resolve_runtime_agent_kwargs_for_provider", + lambda provider: { + "provider": provider, + "api_key": f"sk-{provider}", + "base_url": f"https://{provider}.example/v1", + "api_mode": "chat_completions", + }, + ) + adapter = _make_routing_adapter( + {"alias": {"model": "other/model", "provider": "otherprov"}} + ) + monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None) + monkeypatch.setattr(adapter, "_session_model_override_for", lambda *_: None) + + adapter._create_agent(session_id="s1", route=adapter._resolve_route("alias")) + + assert captured["model"] == "other/model" + assert captured["provider"] == "otherprov" + assert captured["api_key"] == "sk-otherprov" + + def test_no_route_keeps_global_model(self, monkeypatch): + captured = {} + + class FakeAgent: + def __init__(self, **kwargs): + captured.update(kwargs) + + _patch_create_agent_runtime(monkeypatch, captured, FakeAgent) + adapter = _make_routing_adapter({"alias": {"model": "other/model"}}) + monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None) + monkeypatch.setattr(adapter, "_session_model_override_for", lambda *_: None) + + adapter._create_agent(session_id="s1", route=None) + + assert captured["model"] == "global/model" + assert captured["api_key"] == "sk-global" + + def test_session_model_override_beats_route(self, monkeypatch): + """A user-issued /model on the session must win over static route config.""" + captured = {} + + class FakeAgent: + def __init__(self, **kwargs): + captured.update(kwargs) + + _patch_create_agent_runtime(monkeypatch, captured, FakeAgent) + adapter = _make_routing_adapter({"alias": {"model": "route/model", "api_key": "sk-route"}}) + monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None) + monkeypatch.setattr( + adapter, + "_session_model_override_for", + lambda key: {"model": "session/override-model"}, + ) + + adapter._create_agent(session_id="s1", route=adapter._resolve_route("alias")) + + # The route must NOT be applied — the session override path (global + # runtime here, since the gateway applies /model separately) wins. + assert captured["model"] == "global/model" + assert captured["api_key"] == "sk-global" + + def test_session_override_lookup_reads_gateway_runner(self, monkeypatch): + """_session_model_override_for consults GatewayRunner._session_model_overrides.""" + adapter = _make_routing_adapter({}) + + class FakeRunner: + _session_model_overrides = {"chan-1": {"model": "user/model"}} + + monkeypatch.setattr("gateway.run._gateway_runner_ref", lambda: FakeRunner()) + assert adapter._session_model_override_for("chan-1") == {"model": "user/model"} + assert adapter._session_model_override_for("chan-2") is None + assert adapter._session_model_override_for(None) is None