fix(agent): retry 413 after stripping vision payloads (#47339)

When text compression can't reduce a 413 request further, evict base64
image parts from tool messages and retry once instead of dead-ending
with 'Payload too large and cannot compress further.'

A 413 is a request-body byte-size limit, not a token limit. browser_vision
screenshots (2-5MB base64 each) keep the HTTP body oversized even after
aggressive summarization. The strip pass passes remember_model=False so a
413 does not poison _no_list_tool_content_models — that set is for providers
that reject list-type tool content, a distinct failure mode.

Cherry-picked from #47397 by Tranquil-Flow; placed onto main's current
token-aware 413 recovery else branch.
This commit is contained in:
Tranquil-Flow 2026-07-01 03:06:00 -07:00 committed by Teknium
parent 2b8adb8683
commit 122e5bc037
3 changed files with 105 additions and 14 deletions

View file

@ -3244,6 +3244,16 @@ def run_conversation(
_retry.restart_with_compressed_messages = True
break
else:
if agent._try_strip_image_parts_from_tool_messages(
api_messages,
remember_model=False,
):
agent._buffer_status(
"📐 Compression could not reduce the request further — "
"removed retained vision payloads and retrying..."
)
continue
# Terminal — surface buffered context so the user
# sees what compression attempts were made.
agent._flush_status_buffer()

View file

@ -4932,17 +4932,27 @@ class AIAgent:
max_dimension=max_dimension,
)
def _try_strip_image_parts_from_tool_messages(self, api_messages: list) -> bool:
def _try_strip_image_parts_from_tool_messages(
self,
api_messages: list,
*,
remember_model: bool = True,
) -> bool:
"""Downgrade list-type tool messages to text summaries in-place.
Recovery path for providers that reject list-type tool message content
(e.g. Xiaomi MiMo's 400 "text is not set"; see issue #27344). Walks
``api_messages`` for any ``role: "tool"`` message whose ``content`` is
a list containing image parts, replaces the content with the existing
text part(s) (or a minimal placeholder if none survive), and records
the active (provider, model) in ``self._no_list_tool_content_models``
so subsequent ``_tool_result_content_for_active_model`` calls in this
session preemptively downgrade screenshots without a round-trip.
text part(s) (or a minimal placeholder if none survive), and by default
records the active (provider, model) in
``self._no_list_tool_content_models`` so subsequent
``_tool_result_content_for_active_model`` calls in this session
preemptively downgrade screenshots without a round-trip.
413 payload-size recovery passes ``remember_model=False`` because that
error means this request body was too large, not that the provider/model
rejects list-type tool content in general.
Returns True when at least one tool message was downgraded the
caller (the 400 recovery branch in ``agent.conversation_loop``) uses
@ -4952,15 +4962,16 @@ class AIAgent:
if not isinstance(api_messages, list):
return False
# Record (provider, model) so we don't relearn this lesson.
key = (
(getattr(self, "provider", "") or "").strip().lower(),
(getattr(self, "model", "") or "").strip(),
)
if not hasattr(self, "_no_list_tool_content_models"):
self._no_list_tool_content_models = set()
if key[1]: # only record when we actually have a model id
self._no_list_tool_content_models.add(key)
if remember_model:
# Record (provider, model) so we don't relearn this lesson.
key = (
(getattr(self, "provider", "") or "").strip().lower(),
(getattr(self, "model", "") or "").strip(),
)
if not hasattr(self, "_no_list_tool_content_models"):
self._no_list_tool_content_models = set()
if key[1]: # only record when we actually have a model id
self._no_list_tool_content_models.add(key)
changed = False
for msg in api_messages:

View file

@ -227,6 +227,76 @@ class TestHTTP413Compression:
mock_compress.assert_called_once()
assert result["completed"] is True
def test_413_strips_vision_payloads_when_compression_cannot_reduce_messages(self, agent):
"""If compression leaves image payloads behind, strip them and retry.
Browser vision tool results can contain base64 image parts. A 413 can
persist even after summarisation when the remaining recent tool result
still carries binary data; Hermes should evict the image payload and
keep the text/placeholder context instead of failing immediately.
"""
err_413 = _make_413_error()
ok_resp = _mock_response(content="Recovered after image eviction", finish_reason="stop")
request_payloads = []
def _side_effect(**kwargs):
request_payloads.append(kwargs)
if len(request_payloads) == 1:
raise err_413
return ok_resp
agent.client.chat.completions.create.side_effect = _side_effect
image_part = {
"type": "image_url",
"image_url": {"url": "data:image/png;base64," + ("a" * 2000)},
}
prefill = [
{"role": "user", "content": "please inspect this page"},
{
"role": "assistant",
"content": None,
"tool_calls": [
{
"id": "call_vision",
"type": "function",
"function": {"name": "browser_vision", "arguments": "{}"},
}
],
},
{
"role": "tool",
"tool_call_id": "call_vision",
"name": "browser_vision",
"content": [
{"type": "text", "text": "Screenshot of the dashboard"},
image_part,
],
},
]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
# Simulate the bad production case: compression ran, but the
# recent vision tool message survived so message count did not drop.
mock_compress.side_effect = lambda msgs, *_a, **_k: (msgs, "compressed prompt")
result = agent.run_conversation("continue", conversation_history=prefill)
mock_compress.assert_called_once()
assert result["completed"] is True
assert result["final_response"] == "Recovered after image eviction"
assert len(request_payloads) == 2
first_tool = next(m for m in request_payloads[0]["messages"] if m.get("role") == "tool")
retried_tool = next(m for m in request_payloads[1]["messages"] if m.get("role") == "tool")
assert "Screenshot of the dashboard" in str(first_tool["content"])
assert "data:image" not in str(retried_tool["content"])
assert "Screenshot of the dashboard" in str(retried_tool["content"])
assert not getattr(agent, "_no_list_tool_content_models", set())
def test_413_clears_conversation_history_on_persist(self, agent):
"""After 413-triggered compression, _persist_session must receive None history.