fix(agent): retry 413 after stripping vision payloads (#47339)
When text compression can't reduce a 413 request further, evict base64 image parts from tool messages and retry once instead of dead-ending with 'Payload too large and cannot compress further.' A 413 is a request-body byte-size limit, not a token limit. browser_vision screenshots (2-5MB base64 each) keep the HTTP body oversized even after aggressive summarization. The strip pass passes remember_model=False so a 413 does not poison _no_list_tool_content_models — that set is for providers that reject list-type tool content, a distinct failure mode. Cherry-picked from #47397 by Tranquil-Flow; placed onto main's current token-aware 413 recovery else branch.
This commit is contained in:
parent
2b8adb8683
commit
122e5bc037
3 changed files with 105 additions and 14 deletions
|
|
@ -3244,6 +3244,16 @@ def run_conversation(
|
|||
_retry.restart_with_compressed_messages = True
|
||||
break
|
||||
else:
|
||||
if agent._try_strip_image_parts_from_tool_messages(
|
||||
api_messages,
|
||||
remember_model=False,
|
||||
):
|
||||
agent._buffer_status(
|
||||
"📐 Compression could not reduce the request further — "
|
||||
"removed retained vision payloads and retrying..."
|
||||
)
|
||||
continue
|
||||
|
||||
# Terminal — surface buffered context so the user
|
||||
# sees what compression attempts were made.
|
||||
agent._flush_status_buffer()
|
||||
|
|
|
|||
39
run_agent.py
39
run_agent.py
|
|
@ -4932,17 +4932,27 @@ class AIAgent:
|
|||
max_dimension=max_dimension,
|
||||
)
|
||||
|
||||
def _try_strip_image_parts_from_tool_messages(self, api_messages: list) -> bool:
|
||||
def _try_strip_image_parts_from_tool_messages(
|
||||
self,
|
||||
api_messages: list,
|
||||
*,
|
||||
remember_model: bool = True,
|
||||
) -> bool:
|
||||
"""Downgrade list-type tool messages to text summaries in-place.
|
||||
|
||||
Recovery path for providers that reject list-type tool message content
|
||||
(e.g. Xiaomi MiMo's 400 "text is not set"; see issue #27344). Walks
|
||||
``api_messages`` for any ``role: "tool"`` message whose ``content`` is
|
||||
a list containing image parts, replaces the content with the existing
|
||||
text part(s) (or a minimal placeholder if none survive), and records
|
||||
the active (provider, model) in ``self._no_list_tool_content_models``
|
||||
so subsequent ``_tool_result_content_for_active_model`` calls in this
|
||||
session preemptively downgrade screenshots without a round-trip.
|
||||
text part(s) (or a minimal placeholder if none survive), and by default
|
||||
records the active (provider, model) in
|
||||
``self._no_list_tool_content_models`` so subsequent
|
||||
``_tool_result_content_for_active_model`` calls in this session
|
||||
preemptively downgrade screenshots without a round-trip.
|
||||
|
||||
413 payload-size recovery passes ``remember_model=False`` because that
|
||||
error means this request body was too large, not that the provider/model
|
||||
rejects list-type tool content in general.
|
||||
|
||||
Returns True when at least one tool message was downgraded — the
|
||||
caller (the 400 recovery branch in ``agent.conversation_loop``) uses
|
||||
|
|
@ -4952,15 +4962,16 @@ class AIAgent:
|
|||
if not isinstance(api_messages, list):
|
||||
return False
|
||||
|
||||
# Record (provider, model) so we don't relearn this lesson.
|
||||
key = (
|
||||
(getattr(self, "provider", "") or "").strip().lower(),
|
||||
(getattr(self, "model", "") or "").strip(),
|
||||
)
|
||||
if not hasattr(self, "_no_list_tool_content_models"):
|
||||
self._no_list_tool_content_models = set()
|
||||
if key[1]: # only record when we actually have a model id
|
||||
self._no_list_tool_content_models.add(key)
|
||||
if remember_model:
|
||||
# Record (provider, model) so we don't relearn this lesson.
|
||||
key = (
|
||||
(getattr(self, "provider", "") or "").strip().lower(),
|
||||
(getattr(self, "model", "") or "").strip(),
|
||||
)
|
||||
if not hasattr(self, "_no_list_tool_content_models"):
|
||||
self._no_list_tool_content_models = set()
|
||||
if key[1]: # only record when we actually have a model id
|
||||
self._no_list_tool_content_models.add(key)
|
||||
|
||||
changed = False
|
||||
for msg in api_messages:
|
||||
|
|
|
|||
|
|
@ -227,6 +227,76 @@ class TestHTTP413Compression:
|
|||
mock_compress.assert_called_once()
|
||||
assert result["completed"] is True
|
||||
|
||||
def test_413_strips_vision_payloads_when_compression_cannot_reduce_messages(self, agent):
|
||||
"""If compression leaves image payloads behind, strip them and retry.
|
||||
|
||||
Browser vision tool results can contain base64 image parts. A 413 can
|
||||
persist even after summarisation when the remaining recent tool result
|
||||
still carries binary data; Hermes should evict the image payload and
|
||||
keep the text/placeholder context instead of failing immediately.
|
||||
"""
|
||||
err_413 = _make_413_error()
|
||||
ok_resp = _mock_response(content="Recovered after image eviction", finish_reason="stop")
|
||||
request_payloads = []
|
||||
|
||||
def _side_effect(**kwargs):
|
||||
request_payloads.append(kwargs)
|
||||
if len(request_payloads) == 1:
|
||||
raise err_413
|
||||
return ok_resp
|
||||
|
||||
agent.client.chat.completions.create.side_effect = _side_effect
|
||||
|
||||
image_part = {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/png;base64," + ("a" * 2000)},
|
||||
}
|
||||
prefill = [
|
||||
{"role": "user", "content": "please inspect this page"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_vision",
|
||||
"type": "function",
|
||||
"function": {"name": "browser_vision", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_vision",
|
||||
"name": "browser_vision",
|
||||
"content": [
|
||||
{"type": "text", "text": "Screenshot of the dashboard"},
|
||||
image_part,
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
with (
|
||||
patch.object(agent, "_compress_context") as mock_compress,
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
# Simulate the bad production case: compression ran, but the
|
||||
# recent vision tool message survived so message count did not drop.
|
||||
mock_compress.side_effect = lambda msgs, *_a, **_k: (msgs, "compressed prompt")
|
||||
result = agent.run_conversation("continue", conversation_history=prefill)
|
||||
|
||||
mock_compress.assert_called_once()
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "Recovered after image eviction"
|
||||
assert len(request_payloads) == 2
|
||||
first_tool = next(m for m in request_payloads[0]["messages"] if m.get("role") == "tool")
|
||||
retried_tool = next(m for m in request_payloads[1]["messages"] if m.get("role") == "tool")
|
||||
assert "Screenshot of the dashboard" in str(first_tool["content"])
|
||||
assert "data:image" not in str(retried_tool["content"])
|
||||
assert "Screenshot of the dashboard" in str(retried_tool["content"])
|
||||
assert not getattr(agent, "_no_list_tool_content_models", set())
|
||||
|
||||
def test_413_clears_conversation_history_on_persist(self, agent):
|
||||
"""After 413-triggered compression, _persist_session must receive None history.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue