feat(api-server): per-client model routing via model_routes (#3176 salvage)

Adds a no-code routing layer to the OpenAI-compatible API server so one
Hermes deployment can map different API clients to different
model/provider backends. Clients pick a backend by sending a configured
alias as the OpenAI 'model' field; unmatched values fall back to the
global model. Configured aliases are listed by GET /v1/models.

Precedence (highest first): session /model override > model_routes
route > global config. Route provider credentials resolve through
_resolve_runtime_agent_kwargs_for_provider (same seam as
channel_overrides); per-route api_key/base_url are upstream provider
credential overrides — never caller auth, never logged.

Salvaged and rebased from PR #3176 by @Mibayy onto current main.
This commit is contained in:
Mibayy 2026-07-02 04:47:12 -07:00 committed by Teknium
parent ce9aa869fc
commit 4a09b692ec
4 changed files with 503 additions and 16 deletions

View file

@ -571,6 +571,41 @@ max_concurrent_sessions: null
# explicitly want one shared "room brain" per group/channel.
group_sessions_per_user: true
# ─────────────────────────────────────────────────────────────────────────────
# API Server — per-client model routing
# ─────────────────────────────────────────────────────────────────────────────
# Route different API clients to different models/providers on a single
# Hermes deployment. Clients choose a backend by sending a specific string
# as the OpenAI ``model`` field. Unmapped model values fall back to the
# global model configured in the ``model:`` section above, and an explicit
# session /model override always wins over a route.
#
# Configure via the ``platforms.api_server.extra.model_routes`` gateway
# config block:
#
# platforms:
# api_server:
# enabled: true
# extra:
# key: "your-api-server-secret"
# model_routes:
# # Xiaozhi clients send model="minimax-m2" → routed to MiniMax via OpenRouter
# minimax-m2:
# model: "minimax/minimax-m1"
# provider: "openrouter" # optional — overrides global provider
# # api_key: "sk-..." # optional — per-route UPSTREAM provider
# # key (NOT caller auth; never logged)
# # base_url: "https://..." # optional — per-route base URL
# # GPT clients keep their own alias
# gpt-5:
# model: "openai/gpt-5"
# provider: "openrouter"
#
# Configured aliases are automatically listed by GET /v1/models so clients
# can discover them without manual coordination. Caller authentication is
# unchanged: every request still authenticates with the global API server
# key (``extra.key`` / API_SERVER_KEY).
# ─────────────────────────────────────────────────────────────────────────────
# Gateway Streaming
# ─────────────────────────────────────────────────────────────────────────────

View file

@ -6,7 +6,7 @@ Exposes an HTTP server with endpoints:
- POST /v1/responses OpenAI Responses API format (stateful via previous_response_id; X-Hermes-Session-Key supported)
- GET /v1/responses/{response_id} Retrieve a stored response
- DELETE /v1/responses/{response_id} Delete a stored response
- GET /v1/models lists hermes-agent as an available model
- GET /v1/models lists hermes-agent and any configured model_routes aliases
- GET /v1/capabilities machine-readable API capabilities for external UIs
- GET /api/sessions list client-visible Hermes sessions
- POST /api/sessions create an empty Hermes session
@ -832,6 +832,22 @@ class APIServerAdapter(BasePlatformAdapter):
self._model_name: str = self._resolve_model_name(
extra.get("model_name", os.getenv("API_SERVER_MODEL_NAME", "")),
)
# model_routes: maps incoming ``model`` field values to specific
# provider/model configs so one API server instance can serve
# multiple clients on different backends.
#
# Config format (platforms.api_server.extra in the gateway config):
# model_routes:
# minimax-m2: # alias the client sends as the "model" field
# model: "minimax/minimax-m1"
# provider: "openrouter" # optional — resolved via the provider
# # credential chain when set
# api_key: "sk-…" # optional — per-route UPSTREAM provider
# # key override (NOT caller auth; never logged)
# base_url: "https://…" # optional — per-route base URL override
self._model_routes: Dict[str, Dict[str, Any]] = self._parse_model_routes(
extra.get("model_routes"),
)
self._app: Optional["web.Application"] = None
self._runner: Optional["web.AppRunner"] = None
self._site: Optional["web.TCPSite"] = None
@ -1118,6 +1134,78 @@ class APIServerAdapter(BasePlatformAdapter):
# Agent creation helper
# ------------------------------------------------------------------
@staticmethod
def _parse_model_routes(raw: Any) -> Dict[str, Dict[str, Any]]:
"""Validate and normalize the ``model_routes`` config block.
Accepts a mapping of ``alias -> {model, provider?, api_key?, base_url?}``.
Invalid shapes are dropped (never raised) so a config typo can't take
the whole API server down. Route values are coerced to strings.
Security: per-route ``api_key`` values are UPSTREAM provider
credentials (used to call the routed model's backend), not caller
authentication callers still authenticate with the global
API_SERVER_KEY bearer token via ``_check_auth``. Route api_keys must
never be logged; only alias names and non-secret fields may appear in
logs.
"""
if not isinstance(raw, dict):
if raw:
logger.warning(
"api_server model_routes ignored: expected a mapping, got %s",
type(raw).__name__,
)
return {}
allowed_keys = ("model", "provider", "api_key", "base_url")
routes: Dict[str, Dict[str, Any]] = {}
for alias, cfg in raw.items():
alias_str = str(alias).strip()
if not alias_str or not isinstance(cfg, dict):
logger.warning(
"api_server model_routes: dropping invalid route entry %r", alias_str or alias
)
continue
route = {
key: str(cfg[key]).strip()
for key in allowed_keys
if cfg.get(key) is not None and str(cfg[key]).strip()
}
if not route.get("model"):
logger.warning(
"api_server model_routes: route %r has no 'model'; dropping", alias_str
)
continue
routes[alias_str] = route
return routes
def _resolve_route(self, model_alias: Any) -> Optional[Dict[str, Any]]:
"""Return the model_routes entry for *model_alias*, or None."""
if not self._model_routes or not isinstance(model_alias, str):
return None
return self._model_routes.get(model_alias)
def _session_model_override_for(self, session_key: Optional[str]) -> Optional[Dict[str, Any]]:
"""Return the gateway's session ``/model`` override for *session_key*, if any.
The gateway tracks per-session ``/model`` switches in
``GatewayRunner._session_model_overrides``. API-server requests that
share such a session key must keep honouring the explicit session
override even when the request's ``model`` field matches a configured
route a user-issued ``/model`` always wins over static config.
"""
if not session_key:
return None
try:
from gateway.run import _gateway_runner_ref
runner = _gateway_runner_ref()
if runner is None:
return None
override = runner._session_model_overrides.get(session_key)
return dict(override) if isinstance(override, dict) else None
except Exception:
return None
def _create_agent(
self,
ephemeral_system_prompt: Optional[str] = None,
@ -1127,6 +1215,7 @@ class APIServerAdapter(BasePlatformAdapter):
tool_start_callback=None,
tool_complete_callback=None,
gateway_session_key: Optional[str] = None,
route: Optional[Dict[str, Any]] = None,
) -> Any:
"""
Create an AIAgent instance using the gateway's runtime config.
@ -1142,6 +1231,11 @@ class APIServerAdapter(BasePlatformAdapter):
key is meant to persist across transcripts so long-term memory
providers (e.g. Honcho) can scope their per-chat state correctly
matching the semantics of the native gateway's ``session_key``.
``route`` is an optional ``model_routes`` entry (per-client model
routing). When set and no session ``/model`` override exists for
this session its model/provider/api_key/base_url override the
global defaults for this agent instance only.
"""
from run_agent import AIAgent
from gateway.run import (
@ -1169,6 +1263,51 @@ class APIServerAdapter(BasePlatformAdapter):
if runtime_model:
model = runtime_model
# Per-client model routing (model_routes config). The route was
# resolved from the request's ``model`` field by the HTTP handler.
# Precedence (highest first): session ``/model`` override → model_routes
# route → global config — an explicit user-issued ``/model`` on the
# session always beats static per-client route config.
session_override = self._session_model_override_for(
gateway_session_key or session_id
)
if route and not session_override:
if route.get("provider"):
# Resolve real credentials for the routed provider (mirrors
# the channel_overrides path in gateway/run.py) so a route
# without an explicit api_key/base_url still gets the right
# provider auth instead of the default provider's key.
try:
from gateway.run import _resolve_runtime_agent_kwargs_for_provider
provider_kwargs = _resolve_runtime_agent_kwargs_for_provider(
route["provider"]
)
provider_kwargs.pop("model", None)
runtime_kwargs.update(provider_kwargs)
except Exception:
# Fall back to just switching the provider name; explicit
# per-route api_key/base_url below can still complete auth.
runtime_kwargs["provider"] = route["provider"]
if route.get("model"):
model = route["model"]
# Per-route secrets are upstream provider credentials. Never log
# them (compare _check_auth: caller auth stays the global bearer
# key checked with hmac.compare_digest).
if route.get("api_key"):
runtime_kwargs["api_key"] = route["api_key"]
if route.get("base_url"):
runtime_kwargs["base_url"] = route["base_url"]
logger.debug(
"api_server model route applied: model=%s provider=%s",
model,
runtime_kwargs.get("provider"),
)
elif route and session_override:
logger.debug(
"api_server model route skipped: session /model override wins for %s",
gateway_session_key or session_id,
)
user_config = _load_gateway_config()
enabled_toolsets = sorted(_get_platform_tools(user_config, "api_server"))
@ -1255,25 +1394,40 @@ class APIServerAdapter(BasePlatformAdapter):
})
async def _handle_models(self, request: "web.Request") -> "web.Response":
"""GET /v1/models — return hermes-agent as an available model."""
"""GET /v1/models — list hermes-agent and any configured model_routes aliases."""
auth_err = self._check_auth(request)
if auth_err:
return auth_err
return web.json_response({
"object": "list",
"data": [
{
"id": self._model_name,
"object": "model",
"created": int(time.time()),
"owned_by": "hermes",
"permission": [],
"root": self._model_name,
"parent": None,
}
],
})
now = int(time.time())
models = [
{
"id": self._model_name,
"object": "model",
"created": now,
"owned_by": "hermes",
"permission": [],
"root": self._model_name,
"parent": None,
}
]
# Expose configured model route aliases so clients can discover them.
# Only the alias and resolved model name are exposed — never provider
# credentials.
for alias, route_cfg in self._model_routes.items():
if alias == self._model_name:
continue # already listed above
models.append({
"id": alias,
"object": "model",
"created": now,
"owned_by": "hermes",
"permission": [],
"root": route_cfg.get("model", alias),
"parent": self._model_name,
})
return web.json_response({"object": "list", "data": models})
async def _handle_capabilities(self, request: "web.Request") -> "web.Response":
"""GET /v1/capabilities — advertise the stable API surface.
@ -2012,6 +2166,11 @@ class APIServerAdapter(BasePlatformAdapter):
model_name = body.get("model", self._model_name)
created = int(time.time())
# Per-client model routing: if the requested model matches a
# configured model_routes alias, this request's agent is created
# with that route's model/provider instead of the global default.
route = self._resolve_route(model_name)
if stream:
import queue as _q
_stream_q: _q.Queue = _q.Queue()
@ -2094,6 +2253,7 @@ class APIServerAdapter(BasePlatformAdapter):
tool_complete_callback=_on_tool_complete,
agent_ref=agent_ref,
gateway_session_key=gateway_session_key,
route=route,
))
# Ensure SSE drain loops can terminate without relying on polling
# agent_task.done(), which can race with queue timeout checks.
@ -2113,6 +2273,7 @@ class APIServerAdapter(BasePlatformAdapter):
ephemeral_system_prompt=system_prompt,
session_id=session_id,
gateway_session_key=gateway_session_key,
route=route,
)
idempotency_key = request.headers.get("Idempotency-Key")
@ -3123,6 +3284,9 @@ class APIServerAdapter(BasePlatformAdapter):
# groups the entire conversation under one session entry.
session_id = stored_session_id or str(uuid.uuid4())
# Per-client model routing for /v1/responses (see model_routes).
route = self._resolve_route(body.get("model"))
stream = _coerce_request_bool(body.get("stream"), default=False)
if stream:
# Streaming branch — emit OpenAI Responses SSE events as the
@ -3176,6 +3340,7 @@ class APIServerAdapter(BasePlatformAdapter):
tool_complete_callback=_on_tool_complete,
agent_ref=agent_ref,
gateway_session_key=gateway_session_key,
route=route,
))
# Ensure SSE drain loops can terminate without relying on polling
# agent_task.done(), which can race with queue timeout checks.
@ -3209,6 +3374,7 @@ class APIServerAdapter(BasePlatformAdapter):
ephemeral_system_prompt=instructions,
session_id=session_id,
gateway_session_key=gateway_session_key,
route=route,
)
idempotency_key = request.headers.get("Idempotency-Key")
@ -3839,6 +4005,7 @@ class APIServerAdapter(BasePlatformAdapter):
tool_complete_callback=None,
agent_ref: Optional[list] = None,
gateway_session_key: Optional[str] = None,
route: Optional[Dict[str, Any]] = None,
) -> tuple:
"""
Create an agent and run a conversation in a thread executor.
@ -3846,6 +4013,10 @@ class APIServerAdapter(BasePlatformAdapter):
Returns ``(result_dict, usage_dict)`` where *usage_dict* contains
``input_tokens``, ``output_tokens`` and ``total_tokens``.
*route* is an optional ``model_routes`` entry (resolved from the
request's ``model`` field) that overrides the global model/provider
for this specific request.
If *agent_ref* is a one-element list, the AIAgent instance is stored
at ``agent_ref[0]`` before ``run_conversation`` begins. This allows
callers (e.g. the SSE writer) to call ``agent.interrupt()`` from
@ -3870,6 +4041,7 @@ class APIServerAdapter(BasePlatformAdapter):
tool_start_callback=tool_start_callback,
tool_complete_callback=tool_complete_callback,
gateway_session_key=gateway_session_key,
route=route,
)
if agent_ref is not None:
agent_ref[0] = agent
@ -4085,6 +4257,9 @@ class APIServerAdapter(BasePlatformAdapter):
model=body.get("model", self._model_name),
)
# Per-client model routing for /v1/runs (see model_routes).
route = self._resolve_route(body.get("model"))
async def _run_and_close():
try:
self._set_run_status(run_id, "running")
@ -4094,6 +4269,7 @@ class APIServerAdapter(BasePlatformAdapter):
stream_delta_callback=_text_cb,
tool_progress_callback=event_cb,
gateway_session_key=gateway_session_key,
route=route,
)
self._active_run_agents[run_id] = agent

View file

@ -46,6 +46,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
"louis@letsfive.io": "Mibayy", # PR #3243 salvage (/compact alias + preview/aggressive flags for /compress)
"louis@letsfive.io": "Mibayy", # PR #3176 salvage (api-server: per-client model routing via model_routes)
"ai-lab@foxmail.com": "CrazyBoyM", # PR #55828 salvage (image_gen openai-codex: wire image-to-image / reference-image editing via Codex Responses input_image parts; magic-byte + read-guard + 25MB-cap + clamp-to-16 hardening)
"r0gersm1th@users.noreply.github.com": "r0gersm1th", # PR #3219 salvage (whatsapp bridge: resolve LID sender IDs to phone numbers in the message payload so phone-based allowlists match; commit authored by collaborator r0gersm1th, PR by @ajmeese7)
"louis@letsfive.io": "Mibayy", # PR #3296 salvage (status: provider label honors config.yaml model.base_url, not just OPENAI_BASE_URL env)

View file

@ -3798,3 +3798,278 @@ class TestSessionKeyHeader:
assert resp.status == 200
data = await resp.json()
assert data["features"]["session_key_header"] == "X-Hermes-Session-Key"
# ---------------------------------------------------------------------------
# Per-client model routing (model_routes)
# ---------------------------------------------------------------------------
def _make_routing_adapter(routes) -> APIServerAdapter:
"""Create an adapter with model_routes configured."""
config = PlatformConfig(enabled=True, extra={"model_routes": routes})
return APIServerAdapter(config)
def _patch_create_agent_runtime(monkeypatch, captured: dict, fake_agent_cls):
"""Stub out every external dependency of _create_agent."""
monkeypatch.setattr("run_agent.AIAgent", fake_agent_cls)
monkeypatch.setattr(
"gateway.run._resolve_runtime_agent_kwargs",
lambda: {
"provider": "openrouter",
"api_key": "sk-global",
"base_url": "https://openrouter.ai/api/v1",
"api_mode": "chat_completions",
},
)
monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "global/model")
monkeypatch.setattr("gateway.run._load_gateway_config", lambda: {})
monkeypatch.setattr(
"gateway.run.GatewayRunner._load_reasoning_config", staticmethod(lambda: {})
)
monkeypatch.setattr(
"gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None)
)
monkeypatch.setattr("gateway.run._current_max_iterations", lambda: 90)
monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set())
class TestModelRoutesParsing:
def test_valid_routes_are_parsed(self):
routes = {"minimax-m2": {"model": "minimax/minimax-m1", "provider": "openrouter"}}
adapter = _make_routing_adapter(routes)
assert adapter._model_routes == routes
def test_non_dict_routes_config_is_ignored(self):
adapter = _make_routing_adapter("not-a-dict")
assert adapter._model_routes == {}
def test_route_without_model_is_dropped(self):
adapter = _make_routing_adapter({"bad": {"provider": "openrouter"}})
assert adapter._model_routes == {}
def test_route_with_non_dict_value_is_dropped(self):
adapter = _make_routing_adapter({"bad": "gpt-5", "good": {"model": "openai/gpt-5"}})
assert set(adapter._model_routes) == {"good"}
def test_unknown_route_keys_are_stripped(self):
adapter = _make_routing_adapter(
{"a": {"model": "m", "provider": "p", "evil_extra": "x"}}
)
assert adapter._model_routes["a"] == {"model": "m", "provider": "p"}
def test_resolve_route_lookup(self):
adapter = _make_routing_adapter({"minimax-m2": {"model": "minimax/minimax-m1"}})
assert adapter._resolve_route("minimax-m2") == {"model": "minimax/minimax-m1"}
assert adapter._resolve_route("unknown-model") is None
assert adapter._resolve_route(None) is None
assert adapter._resolve_route(123) is None
def test_no_routes_configured(self):
adapter = _make_routing_adapter({})
assert adapter._resolve_route("hermes-agent") is None
class TestModelRoutesModelsEndpoint:
@pytest.mark.asyncio
async def test_models_endpoint_lists_route_aliases(self):
routes = {
"minimax-m2": {"model": "minimax/minimax-m1", "provider": "openrouter"},
"gpt-5": {"model": "openai/gpt-5"},
}
adapter = _make_routing_adapter(routes)
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.get("/v1/models")
assert resp.status == 200
data = await resp.json()
ids = {m["id"] for m in data["data"]}
assert adapter._model_name in ids
assert "minimax-m2" in ids
assert "gpt-5" in ids
@pytest.mark.asyncio
async def test_models_endpoint_route_alias_fields_and_no_secrets(self):
routes = {"my-alias": {"model": "openai/gpt-5", "api_key": "sk-route-secret"}}
adapter = _make_routing_adapter(routes)
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.get("/v1/models")
data = await resp.json()
alias_entry = next(m for m in data["data"] if m["id"] == "my-alias")
assert alias_entry["root"] == "openai/gpt-5"
assert alias_entry["parent"] == adapter._model_name
# per-route api_key must never leak through the discovery endpoint
assert "sk-route-secret" not in json.dumps(data)
class TestModelRoutesHandlers:
@pytest.mark.asyncio
async def test_chat_completions_passes_route_to_run_agent(self):
routes = {"minimax-m2": {"model": "minimax/minimax-m1", "provider": "openrouter"}}
adapter = _make_routing_adapter(routes)
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (
{"final_response": "hi", "messages": [], "api_calls": 1},
{"input_tokens": 5, "output_tokens": 5, "total_tokens": 10},
)
resp = await cli.post("/v1/chat/completions", json={
"model": "minimax-m2",
"messages": [{"role": "user", "content": "hello"}],
})
assert resp.status == 200
kwargs = mock_run.call_args.kwargs
assert kwargs.get("route") == {
"model": "minimax/minimax-m1", "provider": "openrouter",
}
@pytest.mark.asyncio
async def test_chat_completions_no_route_for_unknown_model(self):
adapter = _make_routing_adapter({"minimax-m2": {"model": "minimax/minimax-m1"}})
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (
{"final_response": "hi", "messages": [], "api_calls": 1},
{"input_tokens": 5, "output_tokens": 5, "total_tokens": 10},
)
resp = await cli.post("/v1/chat/completions", json={
"model": "unknown-model",
"messages": [{"role": "user", "content": "hello"}],
})
assert resp.status == 200
assert mock_run.call_args.kwargs.get("route") is None
@pytest.mark.asyncio
async def test_responses_api_passes_route_to_run_agent(self):
routes = {"xiaozhi": {"model": "minimax/minimax-m1", "provider": "openrouter"}}
adapter = _make_routing_adapter(routes)
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (
{"final_response": "hi", "messages": [], "api_calls": 1},
{"input_tokens": 5, "output_tokens": 5, "total_tokens": 10},
)
resp = await cli.post("/v1/responses", json={
"model": "xiaozhi",
"input": "hello",
})
assert resp.status == 200
assert mock_run.call_args.kwargs.get("route") == {
"model": "minimax/minimax-m1", "provider": "openrouter",
}
class TestModelRoutesAgentCreation:
def test_route_overrides_model_and_credentials(self, monkeypatch):
captured = {}
class FakeAgent:
def __init__(self, **kwargs):
captured.update(kwargs)
_patch_create_agent_runtime(monkeypatch, captured, FakeAgent)
adapter = _make_routing_adapter(
{"alias": {
"model": "minimax/minimax-m1",
"api_key": "sk-route",
"base_url": "https://route.example/v1",
}}
)
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
monkeypatch.setattr(adapter, "_session_model_override_for", lambda *_: None)
agent = adapter._create_agent(
session_id="s1", route=adapter._resolve_route("alias")
)
assert isinstance(agent, FakeAgent)
assert captured["model"] == "minimax/minimax-m1"
assert captured["api_key"] == "sk-route"
assert captured["base_url"] == "https://route.example/v1"
def test_route_provider_resolves_provider_credentials(self, monkeypatch):
captured = {}
class FakeAgent:
def __init__(self, **kwargs):
captured.update(kwargs)
_patch_create_agent_runtime(monkeypatch, captured, FakeAgent)
monkeypatch.setattr(
"gateway.run._resolve_runtime_agent_kwargs_for_provider",
lambda provider: {
"provider": provider,
"api_key": f"sk-{provider}",
"base_url": f"https://{provider}.example/v1",
"api_mode": "chat_completions",
},
)
adapter = _make_routing_adapter(
{"alias": {"model": "other/model", "provider": "otherprov"}}
)
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
monkeypatch.setattr(adapter, "_session_model_override_for", lambda *_: None)
adapter._create_agent(session_id="s1", route=adapter._resolve_route("alias"))
assert captured["model"] == "other/model"
assert captured["provider"] == "otherprov"
assert captured["api_key"] == "sk-otherprov"
def test_no_route_keeps_global_model(self, monkeypatch):
captured = {}
class FakeAgent:
def __init__(self, **kwargs):
captured.update(kwargs)
_patch_create_agent_runtime(monkeypatch, captured, FakeAgent)
adapter = _make_routing_adapter({"alias": {"model": "other/model"}})
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
monkeypatch.setattr(adapter, "_session_model_override_for", lambda *_: None)
adapter._create_agent(session_id="s1", route=None)
assert captured["model"] == "global/model"
assert captured["api_key"] == "sk-global"
def test_session_model_override_beats_route(self, monkeypatch):
"""A user-issued /model on the session must win over static route config."""
captured = {}
class FakeAgent:
def __init__(self, **kwargs):
captured.update(kwargs)
_patch_create_agent_runtime(monkeypatch, captured, FakeAgent)
adapter = _make_routing_adapter({"alias": {"model": "route/model", "api_key": "sk-route"}})
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
monkeypatch.setattr(
adapter,
"_session_model_override_for",
lambda key: {"model": "session/override-model"},
)
adapter._create_agent(session_id="s1", route=adapter._resolve_route("alias"))
# The route must NOT be applied — the session override path (global
# runtime here, since the gateway applies /model separately) wins.
assert captured["model"] == "global/model"
assert captured["api_key"] == "sk-global"
def test_session_override_lookup_reads_gateway_runner(self, monkeypatch):
"""_session_model_override_for consults GatewayRunner._session_model_overrides."""
adapter = _make_routing_adapter({})
class FakeRunner:
_session_model_overrides = {"chan-1": {"model": "user/model"}}
monkeypatch.setattr("gateway.run._gateway_runner_ref", lambda: FakeRunner())
assert adapter._session_model_override_for("chan-1") == {"model": "user/model"}
assert adapter._session_model_override_for("chan-2") is None
assert adapter._session_model_override_for(None) is None