fix(api_server): pop fallback model kwarg to prevent AIAgent collision
When the primary provider's auth fails (expired token / 429 quota cap), _resolve_runtime_agent_kwargs() falls through to the fallback provider chain, whose runtime dict carries its own 'model' key. api_server's _create_agent then did AIAgent(model=model, **runtime_kwargs), colliding on 'model' and 500ing every /v1/chat/completions request while a fallback was active. Pop the runtime model and let it override the config model, mirroring the native gateway path (_resolve_session_agent_runtime). Salvaged from #35716 by @ryo-solo (earliest submitter); the PR's second half (Mistral reasoning_content strip) is already handled on main and dropped. Co-authored-by: Hermes Agent <noreply@nousresearch.com>
This commit is contained in:
parent
ce9d180a94
commit
d578b6165d
3 changed files with 91 additions and 0 deletions
|
|
@ -1108,6 +1108,18 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
reasoning_config = GatewayRunner._load_reasoning_config()
|
||||
model = _resolve_gateway_model()
|
||||
|
||||
# When the primary provider's auth fails (expired token / 429 quota
|
||||
# cap), _resolve_runtime_agent_kwargs() falls through to the fallback
|
||||
# provider chain, whose runtime dict carries its own ``model`` key.
|
||||
# Pop it and let it override the config model, mirroring the native
|
||||
# gateway path (_resolve_session_agent_runtime in run.py). Otherwise
|
||||
# the explicit ``model=model`` below collides with the ``**runtime_kwargs``
|
||||
# spread → "got multiple values for keyword argument 'model'", 500ing
|
||||
# every /v1/chat/completions request while a fallback is active.
|
||||
runtime_model = runtime_kwargs.pop("model", None)
|
||||
if runtime_model:
|
||||
model = runtime_model
|
||||
|
||||
user_config = _load_gateway_config()
|
||||
enabled_toolsets = sorted(_get_platform_tools(user_config, "api_server"))
|
||||
|
||||
|
|
|
|||
|
|
@ -318,6 +318,7 @@ AUTHOR_MAP = {
|
|||
"alelpoan@proton.me": "alelpoan",
|
||||
"aman@abacus.ai": "Aman113114-IITD",
|
||||
"octavio.turra@gmail.com": "octavioturra",
|
||||
"275877312+ryo-solo@users.noreply.github.com": "ryo-solo",
|
||||
"524706+Twanislas@users.noreply.github.com": "Twanislas",
|
||||
"9592417+adam91holt@users.noreply.github.com": "adam91holt",
|
||||
"kchuang1015@users.noreply.github.com": "kchuang1015",
|
||||
|
|
|
|||
|
|
@ -400,6 +400,84 @@ class TestAdapterInit:
|
|||
assert isinstance(agent, FakeAgent)
|
||||
assert captured["max_iterations"] == 200
|
||||
|
||||
def test_create_agent_handles_fallback_model_kwarg_collision(self, monkeypatch):
|
||||
"""When the primary provider auth-fails, _resolve_runtime_agent_kwargs()
|
||||
returns a runtime dict that carries its own ``model`` key. _create_agent
|
||||
must pop it and let it override the config model — otherwise the explicit
|
||||
``model=`` collides with ``**runtime_kwargs`` and every request 500s with
|
||||
"got multiple values for keyword argument 'model'"."""
|
||||
captured = {}
|
||||
|
||||
class FakeAgent:
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
monkeypatch.setattr("run_agent.AIAgent", FakeAgent)
|
||||
monkeypatch.setattr(
|
||||
"gateway.run._resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "openrouter",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"model": "anthropic/claude-haiku", # from the fallback entry
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "primary/model")
|
||||
monkeypatch.setattr("gateway.run._load_gateway_config", lambda: {})
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.GatewayRunner._load_reasoning_config",
|
||||
staticmethod(lambda: {}),
|
||||
)
|
||||
monkeypatch.setattr("gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None))
|
||||
monkeypatch.setattr("gateway.run._current_max_iterations", lambda: 90)
|
||||
monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set())
|
||||
|
||||
adapter = APIServerAdapter(PlatformConfig(enabled=True))
|
||||
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
|
||||
|
||||
# Must not raise TypeError on the duplicate 'model' kwarg.
|
||||
agent = adapter._create_agent(session_id="api-session")
|
||||
|
||||
assert isinstance(agent, FakeAgent)
|
||||
# Fallback model overrides the config model, mirroring the native path.
|
||||
assert captured["model"] == "anthropic/claude-haiku"
|
||||
|
||||
def test_create_agent_keeps_config_model_when_runtime_omits_it(self, monkeypatch):
|
||||
"""Happy path (no fallback active): runtime_kwargs has no 'model', so the
|
||||
resolved gateway model is used unchanged. Regression guard for the pop."""
|
||||
captured = {}
|
||||
|
||||
class FakeAgent:
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
monkeypatch.setattr("run_agent.AIAgent", FakeAgent)
|
||||
monkeypatch.setattr(
|
||||
"gateway.run._resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "openrouter",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "primary/model")
|
||||
monkeypatch.setattr("gateway.run._load_gateway_config", lambda: {})
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.GatewayRunner._load_reasoning_config",
|
||||
staticmethod(lambda: {}),
|
||||
)
|
||||
monkeypatch.setattr("gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None))
|
||||
monkeypatch.setattr("gateway.run._current_max_iterations", lambda: 90)
|
||||
monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set())
|
||||
|
||||
adapter = APIServerAdapter(PlatformConfig(enabled=True))
|
||||
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
|
||||
|
||||
agent = adapter._create_agent(session_id="api-session")
|
||||
|
||||
assert isinstance(agent, FakeAgent)
|
||||
assert captured["model"] == "primary/model"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth checking
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue