feat(api-server): per-client model routing via model_routes (#3176 salvage)
Adds a no-code routing layer to the OpenAI-compatible API server so one Hermes deployment can map different API clients to different model/provider backends. Clients pick a backend by sending a configured alias as the OpenAI 'model' field; unmatched values fall back to the global model. Configured aliases are listed by GET /v1/models. Precedence (highest first): session /model override > model_routes route > global config. Route provider credentials resolve through _resolve_runtime_agent_kwargs_for_provider (same seam as channel_overrides); per-route api_key/base_url are upstream provider credential overrides — never caller auth, never logged. Salvaged and rebased from PR #3176 by @Mibayy onto current main.
This commit is contained in:
parent
ce9aa869fc
commit
4a09b692ec
4 changed files with 503 additions and 16 deletions
|
|
@ -571,6 +571,41 @@ max_concurrent_sessions: null
|
|||
# explicitly want one shared "room brain" per group/channel.
|
||||
group_sessions_per_user: true
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# API Server — per-client model routing
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Route different API clients to different models/providers on a single
|
||||
# Hermes deployment. Clients choose a backend by sending a specific string
|
||||
# as the OpenAI ``model`` field. Unmapped model values fall back to the
|
||||
# global model configured in the ``model:`` section above, and an explicit
|
||||
# session /model override always wins over a route.
|
||||
#
|
||||
# Configure via the ``platforms.api_server.extra.model_routes`` gateway
|
||||
# config block:
|
||||
#
|
||||
# platforms:
|
||||
# api_server:
|
||||
# enabled: true
|
||||
# extra:
|
||||
# key: "your-api-server-secret"
|
||||
# model_routes:
|
||||
# # Xiaozhi clients send model="minimax-m2" → routed to MiniMax via OpenRouter
|
||||
# minimax-m2:
|
||||
# model: "minimax/minimax-m1"
|
||||
# provider: "openrouter" # optional — overrides global provider
|
||||
# # api_key: "sk-..." # optional — per-route UPSTREAM provider
|
||||
# # key (NOT caller auth; never logged)
|
||||
# # base_url: "https://..." # optional — per-route base URL
|
||||
# # GPT clients keep their own alias
|
||||
# gpt-5:
|
||||
# model: "openai/gpt-5"
|
||||
# provider: "openrouter"
|
||||
#
|
||||
# Configured aliases are automatically listed by GET /v1/models so clients
|
||||
# can discover them without manual coordination. Caller authentication is
|
||||
# unchanged: every request still authenticates with the global API server
|
||||
# key (``extra.key`` / API_SERVER_KEY).
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Gateway Streaming
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ Exposes an HTTP server with endpoints:
|
|||
- POST /v1/responses — OpenAI Responses API format (stateful via previous_response_id; X-Hermes-Session-Key supported)
|
||||
- GET /v1/responses/{response_id} — Retrieve a stored response
|
||||
- DELETE /v1/responses/{response_id} — Delete a stored response
|
||||
- GET /v1/models — lists hermes-agent as an available model
|
||||
- GET /v1/models — lists hermes-agent and any configured model_routes aliases
|
||||
- GET /v1/capabilities — machine-readable API capabilities for external UIs
|
||||
- GET /api/sessions — list client-visible Hermes sessions
|
||||
- POST /api/sessions — create an empty Hermes session
|
||||
|
|
@ -832,6 +832,22 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
self._model_name: str = self._resolve_model_name(
|
||||
extra.get("model_name", os.getenv("API_SERVER_MODEL_NAME", "")),
|
||||
)
|
||||
# model_routes: maps incoming ``model`` field values to specific
|
||||
# provider/model configs so one API server instance can serve
|
||||
# multiple clients on different backends.
|
||||
#
|
||||
# Config format (platforms.api_server.extra in the gateway config):
|
||||
# model_routes:
|
||||
# minimax-m2: # alias the client sends as the "model" field
|
||||
# model: "minimax/minimax-m1"
|
||||
# provider: "openrouter" # optional — resolved via the provider
|
||||
# # credential chain when set
|
||||
# api_key: "sk-…" # optional — per-route UPSTREAM provider
|
||||
# # key override (NOT caller auth; never logged)
|
||||
# base_url: "https://…" # optional — per-route base URL override
|
||||
self._model_routes: Dict[str, Dict[str, Any]] = self._parse_model_routes(
|
||||
extra.get("model_routes"),
|
||||
)
|
||||
self._app: Optional["web.Application"] = None
|
||||
self._runner: Optional["web.AppRunner"] = None
|
||||
self._site: Optional["web.TCPSite"] = None
|
||||
|
|
@ -1118,6 +1134,78 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
# Agent creation helper
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _parse_model_routes(raw: Any) -> Dict[str, Dict[str, Any]]:
|
||||
"""Validate and normalize the ``model_routes`` config block.
|
||||
|
||||
Accepts a mapping of ``alias -> {model, provider?, api_key?, base_url?}``.
|
||||
Invalid shapes are dropped (never raised) so a config typo can't take
|
||||
the whole API server down. Route values are coerced to strings.
|
||||
|
||||
Security: per-route ``api_key`` values are UPSTREAM provider
|
||||
credentials (used to call the routed model's backend), not caller
|
||||
authentication — callers still authenticate with the global
|
||||
API_SERVER_KEY bearer token via ``_check_auth``. Route api_keys must
|
||||
never be logged; only alias names and non-secret fields may appear in
|
||||
logs.
|
||||
"""
|
||||
if not isinstance(raw, dict):
|
||||
if raw:
|
||||
logger.warning(
|
||||
"api_server model_routes ignored: expected a mapping, got %s",
|
||||
type(raw).__name__,
|
||||
)
|
||||
return {}
|
||||
|
||||
allowed_keys = ("model", "provider", "api_key", "base_url")
|
||||
routes: Dict[str, Dict[str, Any]] = {}
|
||||
for alias, cfg in raw.items():
|
||||
alias_str = str(alias).strip()
|
||||
if not alias_str or not isinstance(cfg, dict):
|
||||
logger.warning(
|
||||
"api_server model_routes: dropping invalid route entry %r", alias_str or alias
|
||||
)
|
||||
continue
|
||||
route = {
|
||||
key: str(cfg[key]).strip()
|
||||
for key in allowed_keys
|
||||
if cfg.get(key) is not None and str(cfg[key]).strip()
|
||||
}
|
||||
if not route.get("model"):
|
||||
logger.warning(
|
||||
"api_server model_routes: route %r has no 'model'; dropping", alias_str
|
||||
)
|
||||
continue
|
||||
routes[alias_str] = route
|
||||
return routes
|
||||
|
||||
def _resolve_route(self, model_alias: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Return the model_routes entry for *model_alias*, or None."""
|
||||
if not self._model_routes or not isinstance(model_alias, str):
|
||||
return None
|
||||
return self._model_routes.get(model_alias)
|
||||
|
||||
def _session_model_override_for(self, session_key: Optional[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Return the gateway's session ``/model`` override for *session_key*, if any.
|
||||
|
||||
The gateway tracks per-session ``/model`` switches in
|
||||
``GatewayRunner._session_model_overrides``. API-server requests that
|
||||
share such a session key must keep honouring the explicit session
|
||||
override even when the request's ``model`` field matches a configured
|
||||
route — a user-issued ``/model`` always wins over static config.
|
||||
"""
|
||||
if not session_key:
|
||||
return None
|
||||
try:
|
||||
from gateway.run import _gateway_runner_ref
|
||||
runner = _gateway_runner_ref()
|
||||
if runner is None:
|
||||
return None
|
||||
override = runner._session_model_overrides.get(session_key)
|
||||
return dict(override) if isinstance(override, dict) else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _create_agent(
|
||||
self,
|
||||
ephemeral_system_prompt: Optional[str] = None,
|
||||
|
|
@ -1127,6 +1215,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
tool_start_callback=None,
|
||||
tool_complete_callback=None,
|
||||
gateway_session_key: Optional[str] = None,
|
||||
route: Optional[Dict[str, Any]] = None,
|
||||
) -> Any:
|
||||
"""
|
||||
Create an AIAgent instance using the gateway's runtime config.
|
||||
|
|
@ -1142,6 +1231,11 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
key is meant to persist across transcripts so long-term memory
|
||||
providers (e.g. Honcho) can scope their per-chat state correctly
|
||||
— matching the semantics of the native gateway's ``session_key``.
|
||||
|
||||
``route`` is an optional ``model_routes`` entry (per-client model
|
||||
routing). When set — and no session ``/model`` override exists for
|
||||
this session — its model/provider/api_key/base_url override the
|
||||
global defaults for this agent instance only.
|
||||
"""
|
||||
from run_agent import AIAgent
|
||||
from gateway.run import (
|
||||
|
|
@ -1169,6 +1263,51 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
if runtime_model:
|
||||
model = runtime_model
|
||||
|
||||
# Per-client model routing (model_routes config). The route was
|
||||
# resolved from the request's ``model`` field by the HTTP handler.
|
||||
# Precedence (highest first): session ``/model`` override → model_routes
|
||||
# route → global config — an explicit user-issued ``/model`` on the
|
||||
# session always beats static per-client route config.
|
||||
session_override = self._session_model_override_for(
|
||||
gateway_session_key or session_id
|
||||
)
|
||||
if route and not session_override:
|
||||
if route.get("provider"):
|
||||
# Resolve real credentials for the routed provider (mirrors
|
||||
# the channel_overrides path in gateway/run.py) so a route
|
||||
# without an explicit api_key/base_url still gets the right
|
||||
# provider auth instead of the default provider's key.
|
||||
try:
|
||||
from gateway.run import _resolve_runtime_agent_kwargs_for_provider
|
||||
provider_kwargs = _resolve_runtime_agent_kwargs_for_provider(
|
||||
route["provider"]
|
||||
)
|
||||
provider_kwargs.pop("model", None)
|
||||
runtime_kwargs.update(provider_kwargs)
|
||||
except Exception:
|
||||
# Fall back to just switching the provider name; explicit
|
||||
# per-route api_key/base_url below can still complete auth.
|
||||
runtime_kwargs["provider"] = route["provider"]
|
||||
if route.get("model"):
|
||||
model = route["model"]
|
||||
# Per-route secrets are upstream provider credentials. Never log
|
||||
# them (compare _check_auth: caller auth stays the global bearer
|
||||
# key checked with hmac.compare_digest).
|
||||
if route.get("api_key"):
|
||||
runtime_kwargs["api_key"] = route["api_key"]
|
||||
if route.get("base_url"):
|
||||
runtime_kwargs["base_url"] = route["base_url"]
|
||||
logger.debug(
|
||||
"api_server model route applied: model=%s provider=%s",
|
||||
model,
|
||||
runtime_kwargs.get("provider"),
|
||||
)
|
||||
elif route and session_override:
|
||||
logger.debug(
|
||||
"api_server model route skipped: session /model override wins for %s",
|
||||
gateway_session_key or session_id,
|
||||
)
|
||||
|
||||
user_config = _load_gateway_config()
|
||||
enabled_toolsets = sorted(_get_platform_tools(user_config, "api_server"))
|
||||
|
||||
|
|
@ -1255,25 +1394,40 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
})
|
||||
|
||||
async def _handle_models(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /v1/models — return hermes-agent as an available model."""
|
||||
"""GET /v1/models — list hermes-agent and any configured model_routes aliases."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
return web.json_response({
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": self._model_name,
|
||||
"object": "model",
|
||||
"created": int(time.time()),
|
||||
"owned_by": "hermes",
|
||||
"permission": [],
|
||||
"root": self._model_name,
|
||||
"parent": None,
|
||||
}
|
||||
],
|
||||
})
|
||||
now = int(time.time())
|
||||
models = [
|
||||
{
|
||||
"id": self._model_name,
|
||||
"object": "model",
|
||||
"created": now,
|
||||
"owned_by": "hermes",
|
||||
"permission": [],
|
||||
"root": self._model_name,
|
||||
"parent": None,
|
||||
}
|
||||
]
|
||||
# Expose configured model route aliases so clients can discover them.
|
||||
# Only the alias and resolved model name are exposed — never provider
|
||||
# credentials.
|
||||
for alias, route_cfg in self._model_routes.items():
|
||||
if alias == self._model_name:
|
||||
continue # already listed above
|
||||
models.append({
|
||||
"id": alias,
|
||||
"object": "model",
|
||||
"created": now,
|
||||
"owned_by": "hermes",
|
||||
"permission": [],
|
||||
"root": route_cfg.get("model", alias),
|
||||
"parent": self._model_name,
|
||||
})
|
||||
|
||||
return web.json_response({"object": "list", "data": models})
|
||||
|
||||
async def _handle_capabilities(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /v1/capabilities — advertise the stable API surface.
|
||||
|
|
@ -2012,6 +2166,11 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
model_name = body.get("model", self._model_name)
|
||||
created = int(time.time())
|
||||
|
||||
# Per-client model routing: if the requested model matches a
|
||||
# configured model_routes alias, this request's agent is created
|
||||
# with that route's model/provider instead of the global default.
|
||||
route = self._resolve_route(model_name)
|
||||
|
||||
if stream:
|
||||
import queue as _q
|
||||
_stream_q: _q.Queue = _q.Queue()
|
||||
|
|
@ -2094,6 +2253,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
tool_complete_callback=_on_tool_complete,
|
||||
agent_ref=agent_ref,
|
||||
gateway_session_key=gateway_session_key,
|
||||
route=route,
|
||||
))
|
||||
# Ensure SSE drain loops can terminate without relying on polling
|
||||
# agent_task.done(), which can race with queue timeout checks.
|
||||
|
|
@ -2113,6 +2273,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
ephemeral_system_prompt=system_prompt,
|
||||
session_id=session_id,
|
||||
gateway_session_key=gateway_session_key,
|
||||
route=route,
|
||||
)
|
||||
|
||||
idempotency_key = request.headers.get("Idempotency-Key")
|
||||
|
|
@ -3123,6 +3284,9 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
# groups the entire conversation under one session entry.
|
||||
session_id = stored_session_id or str(uuid.uuid4())
|
||||
|
||||
# Per-client model routing for /v1/responses (see model_routes).
|
||||
route = self._resolve_route(body.get("model"))
|
||||
|
||||
stream = _coerce_request_bool(body.get("stream"), default=False)
|
||||
if stream:
|
||||
# Streaming branch — emit OpenAI Responses SSE events as the
|
||||
|
|
@ -3176,6 +3340,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
tool_complete_callback=_on_tool_complete,
|
||||
agent_ref=agent_ref,
|
||||
gateway_session_key=gateway_session_key,
|
||||
route=route,
|
||||
))
|
||||
# Ensure SSE drain loops can terminate without relying on polling
|
||||
# agent_task.done(), which can race with queue timeout checks.
|
||||
|
|
@ -3209,6 +3374,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
ephemeral_system_prompt=instructions,
|
||||
session_id=session_id,
|
||||
gateway_session_key=gateway_session_key,
|
||||
route=route,
|
||||
)
|
||||
|
||||
idempotency_key = request.headers.get("Idempotency-Key")
|
||||
|
|
@ -3839,6 +4005,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
tool_complete_callback=None,
|
||||
agent_ref: Optional[list] = None,
|
||||
gateway_session_key: Optional[str] = None,
|
||||
route: Optional[Dict[str, Any]] = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Create an agent and run a conversation in a thread executor.
|
||||
|
|
@ -3846,6 +4013,10 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
Returns ``(result_dict, usage_dict)`` where *usage_dict* contains
|
||||
``input_tokens``, ``output_tokens`` and ``total_tokens``.
|
||||
|
||||
*route* is an optional ``model_routes`` entry (resolved from the
|
||||
request's ``model`` field) that overrides the global model/provider
|
||||
for this specific request.
|
||||
|
||||
If *agent_ref* is a one-element list, the AIAgent instance is stored
|
||||
at ``agent_ref[0]`` before ``run_conversation`` begins. This allows
|
||||
callers (e.g. the SSE writer) to call ``agent.interrupt()`` from
|
||||
|
|
@ -3870,6 +4041,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
tool_start_callback=tool_start_callback,
|
||||
tool_complete_callback=tool_complete_callback,
|
||||
gateway_session_key=gateway_session_key,
|
||||
route=route,
|
||||
)
|
||||
if agent_ref is not None:
|
||||
agent_ref[0] = agent
|
||||
|
|
@ -4085,6 +4257,9 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
model=body.get("model", self._model_name),
|
||||
)
|
||||
|
||||
# Per-client model routing for /v1/runs (see model_routes).
|
||||
route = self._resolve_route(body.get("model"))
|
||||
|
||||
async def _run_and_close():
|
||||
try:
|
||||
self._set_run_status(run_id, "running")
|
||||
|
|
@ -4094,6 +4269,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
stream_delta_callback=_text_cb,
|
||||
tool_progress_callback=event_cb,
|
||||
gateway_session_key=gateway_session_key,
|
||||
route=route,
|
||||
)
|
||||
self._active_run_agents[run_id] = agent
|
||||
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
|
|||
# Auto-extracted from noreply emails + manual overrides
|
||||
AUTHOR_MAP = {
|
||||
"louis@letsfive.io": "Mibayy", # PR #3243 salvage (/compact alias + preview/aggressive flags for /compress)
|
||||
"louis@letsfive.io": "Mibayy", # PR #3176 salvage (api-server: per-client model routing via model_routes)
|
||||
"ai-lab@foxmail.com": "CrazyBoyM", # PR #55828 salvage (image_gen openai-codex: wire image-to-image / reference-image editing via Codex Responses input_image parts; magic-byte + read-guard + 25MB-cap + clamp-to-16 hardening)
|
||||
"r0gersm1th@users.noreply.github.com": "r0gersm1th", # PR #3219 salvage (whatsapp bridge: resolve LID sender IDs to phone numbers in the message payload so phone-based allowlists match; commit authored by collaborator r0gersm1th, PR by @ajmeese7)
|
||||
"louis@letsfive.io": "Mibayy", # PR #3296 salvage (status: provider label honors config.yaml model.base_url, not just OPENAI_BASE_URL env)
|
||||
|
|
|
|||
|
|
@ -3798,3 +3798,278 @@ class TestSessionKeyHeader:
|
|||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert data["features"]["session_key_header"] == "X-Hermes-Session-Key"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-client model routing (model_routes)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_routing_adapter(routes) -> APIServerAdapter:
|
||||
"""Create an adapter with model_routes configured."""
|
||||
config = PlatformConfig(enabled=True, extra={"model_routes": routes})
|
||||
return APIServerAdapter(config)
|
||||
|
||||
|
||||
def _patch_create_agent_runtime(monkeypatch, captured: dict, fake_agent_cls):
|
||||
"""Stub out every external dependency of _create_agent."""
|
||||
monkeypatch.setattr("run_agent.AIAgent", fake_agent_cls)
|
||||
monkeypatch.setattr(
|
||||
"gateway.run._resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "openrouter",
|
||||
"api_key": "sk-global",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "global/model")
|
||||
monkeypatch.setattr("gateway.run._load_gateway_config", lambda: {})
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.GatewayRunner._load_reasoning_config", staticmethod(lambda: {})
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None)
|
||||
)
|
||||
monkeypatch.setattr("gateway.run._current_max_iterations", lambda: 90)
|
||||
monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set())
|
||||
|
||||
|
||||
class TestModelRoutesParsing:
|
||||
def test_valid_routes_are_parsed(self):
|
||||
routes = {"minimax-m2": {"model": "minimax/minimax-m1", "provider": "openrouter"}}
|
||||
adapter = _make_routing_adapter(routes)
|
||||
assert adapter._model_routes == routes
|
||||
|
||||
def test_non_dict_routes_config_is_ignored(self):
|
||||
adapter = _make_routing_adapter("not-a-dict")
|
||||
assert adapter._model_routes == {}
|
||||
|
||||
def test_route_without_model_is_dropped(self):
|
||||
adapter = _make_routing_adapter({"bad": {"provider": "openrouter"}})
|
||||
assert adapter._model_routes == {}
|
||||
|
||||
def test_route_with_non_dict_value_is_dropped(self):
|
||||
adapter = _make_routing_adapter({"bad": "gpt-5", "good": {"model": "openai/gpt-5"}})
|
||||
assert set(adapter._model_routes) == {"good"}
|
||||
|
||||
def test_unknown_route_keys_are_stripped(self):
|
||||
adapter = _make_routing_adapter(
|
||||
{"a": {"model": "m", "provider": "p", "evil_extra": "x"}}
|
||||
)
|
||||
assert adapter._model_routes["a"] == {"model": "m", "provider": "p"}
|
||||
|
||||
def test_resolve_route_lookup(self):
|
||||
adapter = _make_routing_adapter({"minimax-m2": {"model": "minimax/minimax-m1"}})
|
||||
assert adapter._resolve_route("minimax-m2") == {"model": "minimax/minimax-m1"}
|
||||
assert adapter._resolve_route("unknown-model") is None
|
||||
assert adapter._resolve_route(None) is None
|
||||
assert adapter._resolve_route(123) is None
|
||||
|
||||
def test_no_routes_configured(self):
|
||||
adapter = _make_routing_adapter({})
|
||||
assert adapter._resolve_route("hermes-agent") is None
|
||||
|
||||
|
||||
class TestModelRoutesModelsEndpoint:
|
||||
@pytest.mark.asyncio
|
||||
async def test_models_endpoint_lists_route_aliases(self):
|
||||
routes = {
|
||||
"minimax-m2": {"model": "minimax/minimax-m1", "provider": "openrouter"},
|
||||
"gpt-5": {"model": "openai/gpt-5"},
|
||||
}
|
||||
adapter = _make_routing_adapter(routes)
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.get("/v1/models")
|
||||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
ids = {m["id"] for m in data["data"]}
|
||||
assert adapter._model_name in ids
|
||||
assert "minimax-m2" in ids
|
||||
assert "gpt-5" in ids
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_models_endpoint_route_alias_fields_and_no_secrets(self):
|
||||
routes = {"my-alias": {"model": "openai/gpt-5", "api_key": "sk-route-secret"}}
|
||||
adapter = _make_routing_adapter(routes)
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.get("/v1/models")
|
||||
data = await resp.json()
|
||||
alias_entry = next(m for m in data["data"] if m["id"] == "my-alias")
|
||||
assert alias_entry["root"] == "openai/gpt-5"
|
||||
assert alias_entry["parent"] == adapter._model_name
|
||||
# per-route api_key must never leak through the discovery endpoint
|
||||
assert "sk-route-secret" not in json.dumps(data)
|
||||
|
||||
|
||||
class TestModelRoutesHandlers:
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_completions_passes_route_to_run_agent(self):
|
||||
routes = {"minimax-m2": {"model": "minimax/minimax-m1", "provider": "openrouter"}}
|
||||
adapter = _make_routing_adapter(routes)
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (
|
||||
{"final_response": "hi", "messages": [], "api_calls": 1},
|
||||
{"input_tokens": 5, "output_tokens": 5, "total_tokens": 10},
|
||||
)
|
||||
resp = await cli.post("/v1/chat/completions", json={
|
||||
"model": "minimax-m2",
|
||||
"messages": [{"role": "user", "content": "hello"}],
|
||||
})
|
||||
assert resp.status == 200
|
||||
kwargs = mock_run.call_args.kwargs
|
||||
assert kwargs.get("route") == {
|
||||
"model": "minimax/minimax-m1", "provider": "openrouter",
|
||||
}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_completions_no_route_for_unknown_model(self):
|
||||
adapter = _make_routing_adapter({"minimax-m2": {"model": "minimax/minimax-m1"}})
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (
|
||||
{"final_response": "hi", "messages": [], "api_calls": 1},
|
||||
{"input_tokens": 5, "output_tokens": 5, "total_tokens": 10},
|
||||
)
|
||||
resp = await cli.post("/v1/chat/completions", json={
|
||||
"model": "unknown-model",
|
||||
"messages": [{"role": "user", "content": "hello"}],
|
||||
})
|
||||
assert resp.status == 200
|
||||
assert mock_run.call_args.kwargs.get("route") is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_responses_api_passes_route_to_run_agent(self):
|
||||
routes = {"xiaozhi": {"model": "minimax/minimax-m1", "provider": "openrouter"}}
|
||||
adapter = _make_routing_adapter(routes)
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (
|
||||
{"final_response": "hi", "messages": [], "api_calls": 1},
|
||||
{"input_tokens": 5, "output_tokens": 5, "total_tokens": 10},
|
||||
)
|
||||
resp = await cli.post("/v1/responses", json={
|
||||
"model": "xiaozhi",
|
||||
"input": "hello",
|
||||
})
|
||||
assert resp.status == 200
|
||||
assert mock_run.call_args.kwargs.get("route") == {
|
||||
"model": "minimax/minimax-m1", "provider": "openrouter",
|
||||
}
|
||||
|
||||
|
||||
class TestModelRoutesAgentCreation:
|
||||
def test_route_overrides_model_and_credentials(self, monkeypatch):
|
||||
captured = {}
|
||||
|
||||
class FakeAgent:
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
_patch_create_agent_runtime(monkeypatch, captured, FakeAgent)
|
||||
adapter = _make_routing_adapter(
|
||||
{"alias": {
|
||||
"model": "minimax/minimax-m1",
|
||||
"api_key": "sk-route",
|
||||
"base_url": "https://route.example/v1",
|
||||
}}
|
||||
)
|
||||
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
|
||||
monkeypatch.setattr(adapter, "_session_model_override_for", lambda *_: None)
|
||||
|
||||
agent = adapter._create_agent(
|
||||
session_id="s1", route=adapter._resolve_route("alias")
|
||||
)
|
||||
|
||||
assert isinstance(agent, FakeAgent)
|
||||
assert captured["model"] == "minimax/minimax-m1"
|
||||
assert captured["api_key"] == "sk-route"
|
||||
assert captured["base_url"] == "https://route.example/v1"
|
||||
|
||||
def test_route_provider_resolves_provider_credentials(self, monkeypatch):
|
||||
captured = {}
|
||||
|
||||
class FakeAgent:
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
_patch_create_agent_runtime(monkeypatch, captured, FakeAgent)
|
||||
monkeypatch.setattr(
|
||||
"gateway.run._resolve_runtime_agent_kwargs_for_provider",
|
||||
lambda provider: {
|
||||
"provider": provider,
|
||||
"api_key": f"sk-{provider}",
|
||||
"base_url": f"https://{provider}.example/v1",
|
||||
"api_mode": "chat_completions",
|
||||
},
|
||||
)
|
||||
adapter = _make_routing_adapter(
|
||||
{"alias": {"model": "other/model", "provider": "otherprov"}}
|
||||
)
|
||||
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
|
||||
monkeypatch.setattr(adapter, "_session_model_override_for", lambda *_: None)
|
||||
|
||||
adapter._create_agent(session_id="s1", route=adapter._resolve_route("alias"))
|
||||
|
||||
assert captured["model"] == "other/model"
|
||||
assert captured["provider"] == "otherprov"
|
||||
assert captured["api_key"] == "sk-otherprov"
|
||||
|
||||
def test_no_route_keeps_global_model(self, monkeypatch):
|
||||
captured = {}
|
||||
|
||||
class FakeAgent:
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
_patch_create_agent_runtime(monkeypatch, captured, FakeAgent)
|
||||
adapter = _make_routing_adapter({"alias": {"model": "other/model"}})
|
||||
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
|
||||
monkeypatch.setattr(adapter, "_session_model_override_for", lambda *_: None)
|
||||
|
||||
adapter._create_agent(session_id="s1", route=None)
|
||||
|
||||
assert captured["model"] == "global/model"
|
||||
assert captured["api_key"] == "sk-global"
|
||||
|
||||
def test_session_model_override_beats_route(self, monkeypatch):
|
||||
"""A user-issued /model on the session must win over static route config."""
|
||||
captured = {}
|
||||
|
||||
class FakeAgent:
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
_patch_create_agent_runtime(monkeypatch, captured, FakeAgent)
|
||||
adapter = _make_routing_adapter({"alias": {"model": "route/model", "api_key": "sk-route"}})
|
||||
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
|
||||
monkeypatch.setattr(
|
||||
adapter,
|
||||
"_session_model_override_for",
|
||||
lambda key: {"model": "session/override-model"},
|
||||
)
|
||||
|
||||
adapter._create_agent(session_id="s1", route=adapter._resolve_route("alias"))
|
||||
|
||||
# The route must NOT be applied — the session override path (global
|
||||
# runtime here, since the gateway applies /model separately) wins.
|
||||
assert captured["model"] == "global/model"
|
||||
assert captured["api_key"] == "sk-global"
|
||||
|
||||
def test_session_override_lookup_reads_gateway_runner(self, monkeypatch):
|
||||
"""_session_model_override_for consults GatewayRunner._session_model_overrides."""
|
||||
adapter = _make_routing_adapter({})
|
||||
|
||||
class FakeRunner:
|
||||
_session_model_overrides = {"chan-1": {"model": "user/model"}}
|
||||
|
||||
monkeypatch.setattr("gateway.run._gateway_runner_ref", lambda: FakeRunner())
|
||||
assert adapter._session_model_override_for("chan-1") == {"model": "user/model"}
|
||||
assert adapter._session_model_override_for("chan-2") is None
|
||||
assert adapter._session_model_override_for(None) is None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue