fix(api_server): pop fallback model kwarg to prevent AIAgent collision

When the primary provider's auth fails (expired token / 429 quota cap),
_resolve_runtime_agent_kwargs() falls through to the fallback provider
chain, whose runtime dict carries its own 'model' key. api_server's
_create_agent then did AIAgent(model=model, **runtime_kwargs), colliding
on 'model' and 500ing every /v1/chat/completions request while a fallback
was active. Pop the runtime model and let it override the config model,
mirroring the native gateway path (_resolve_session_agent_runtime).

Salvaged from #35716 by @ryo-solo (earliest submitter); the PR's second
half (Mistral reasoning_content strip) is already handled on main and
dropped.

Co-authored-by: Hermes Agent <noreply@nousresearch.com>
This commit is contained in:
ryo-solo 2026-07-01 01:10:41 -07:00 committed by Teknium
parent ce9d180a94
commit d578b6165d
3 changed files with 91 additions and 0 deletions

View file

@ -1108,6 +1108,18 @@ class APIServerAdapter(BasePlatformAdapter):
reasoning_config = GatewayRunner._load_reasoning_config()
model = _resolve_gateway_model()
# When the primary provider's auth fails (expired token / 429 quota
# cap), _resolve_runtime_agent_kwargs() falls through to the fallback
# provider chain, whose runtime dict carries its own ``model`` key.
# Pop it and let it override the config model, mirroring the native
# gateway path (_resolve_session_agent_runtime in run.py). Otherwise
# the explicit ``model=model`` below collides with the ``**runtime_kwargs``
# spread → "got multiple values for keyword argument 'model'", 500ing
# every /v1/chat/completions request while a fallback is active.
runtime_model = runtime_kwargs.pop("model", None)
if runtime_model:
model = runtime_model
user_config = _load_gateway_config()
enabled_toolsets = sorted(_get_platform_tools(user_config, "api_server"))

View file

@ -318,6 +318,7 @@ AUTHOR_MAP = {
"alelpoan@proton.me": "alelpoan",
"aman@abacus.ai": "Aman113114-IITD",
"octavio.turra@gmail.com": "octavioturra",
"275877312+ryo-solo@users.noreply.github.com": "ryo-solo",
"524706+Twanislas@users.noreply.github.com": "Twanislas",
"9592417+adam91holt@users.noreply.github.com": "adam91holt",
"kchuang1015@users.noreply.github.com": "kchuang1015",

View file

@ -400,6 +400,84 @@ class TestAdapterInit:
assert isinstance(agent, FakeAgent)
assert captured["max_iterations"] == 200
def test_create_agent_handles_fallback_model_kwarg_collision(self, monkeypatch):
"""When the primary provider auth-fails, _resolve_runtime_agent_kwargs()
returns a runtime dict that carries its own ``model`` key. _create_agent
must pop it and let it override the config model otherwise the explicit
``model=`` collides with ``**runtime_kwargs`` and every request 500s with
"got multiple values for keyword argument 'model'"."""
captured = {}
class FakeAgent:
def __init__(self, **kwargs):
captured.update(kwargs)
monkeypatch.setattr("run_agent.AIAgent", FakeAgent)
monkeypatch.setattr(
"gateway.run._resolve_runtime_agent_kwargs",
lambda: {
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"api_mode": "chat_completions",
"model": "anthropic/claude-haiku", # from the fallback entry
},
)
monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "primary/model")
monkeypatch.setattr("gateway.run._load_gateway_config", lambda: {})
monkeypatch.setattr(
"gateway.run.GatewayRunner._load_reasoning_config",
staticmethod(lambda: {}),
)
monkeypatch.setattr("gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None))
monkeypatch.setattr("gateway.run._current_max_iterations", lambda: 90)
monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set())
adapter = APIServerAdapter(PlatformConfig(enabled=True))
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
# Must not raise TypeError on the duplicate 'model' kwarg.
agent = adapter._create_agent(session_id="api-session")
assert isinstance(agent, FakeAgent)
# Fallback model overrides the config model, mirroring the native path.
assert captured["model"] == "anthropic/claude-haiku"
def test_create_agent_keeps_config_model_when_runtime_omits_it(self, monkeypatch):
"""Happy path (no fallback active): runtime_kwargs has no 'model', so the
resolved gateway model is used unchanged. Regression guard for the pop."""
captured = {}
class FakeAgent:
def __init__(self, **kwargs):
captured.update(kwargs)
monkeypatch.setattr("run_agent.AIAgent", FakeAgent)
monkeypatch.setattr(
"gateway.run._resolve_runtime_agent_kwargs",
lambda: {
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"api_mode": "chat_completions",
},
)
monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "primary/model")
monkeypatch.setattr("gateway.run._load_gateway_config", lambda: {})
monkeypatch.setattr(
"gateway.run.GatewayRunner._load_reasoning_config",
staticmethod(lambda: {}),
)
monkeypatch.setattr("gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None))
monkeypatch.setattr("gateway.run._current_max_iterations", lambda: 90)
monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set())
adapter = APIServerAdapter(PlatformConfig(enabled=True))
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
agent = adapter._create_agent(session_id="api-session")
assert isinstance(agent, FakeAgent)
assert captured["model"] == "primary/model"
# ---------------------------------------------------------------------------
# Auth checking