fix(compaction): place END MARKER last in merge-into-tail summaries
When the compression summary is merged into the first tail message (the alternation corner case where a standalone summary role would collide with both head and tail), the old format was SUMMARY + END_MARKER + OLD_TAIL_CONTENT — so the preserved tail content appeared AFTER the end marker and the model could read it as a fresh message to respond to. Reorder so the END MARKER is always last: old tail content is wrapped in [PRIOR CONTEXT ...][END OF PRIOR CONTEXT — COMPACTION SUMMARY BELOW] delimiters, then the summary, then the END MARKER. _append_text_to_content handles both string and multimodal-list content. Salvaged from #56372 by @Gromykoss. Only the END-MARKER reorder half is carried over. The PR's second change (a post-compaction pass that strips user-role messages before the first summary marker on compression_count>=2) was dropped: on 2nd+ compactions the protected head decays to system-only (_effective_protect_first_n -> 0, #11996) so the targeted 'ghost head user' does not occur, and where the strip does fire it deletes legitimate recent tail user turns (data loss) and can leave consecutive assistant messages (role-alternation violation).
This commit is contained in:
parent
d00762623b
commit
a1a8a967e1
2 changed files with 83 additions and 5 deletions
|
|
@ -2886,10 +2886,25 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
for i in range(compress_end, n_messages):
|
||||
msg = messages[i].copy()
|
||||
if _merge_summary_into_tail and i == compress_end:
|
||||
merged_prefix = summary + "\n\n" + _SUMMARY_END_MARKER + "\n\n"
|
||||
# Merge the summary into the first tail message, but place
|
||||
# the END MARKER at the very end so the model sees an
|
||||
# unambiguous boundary. Old tail content is preserved as
|
||||
# reference material BEFORE the summary, clearly delimited
|
||||
# so it is not mistaken for a new message to respond to.
|
||||
# Uses _append_text_to_content to safely handle both
|
||||
# string and multimodal-list content types.
|
||||
# Fixes ghost-message leakage across compaction boundaries
|
||||
# where old head messages survived verbatim and appeared
|
||||
# before the summary.
|
||||
old_content = msg.get("content", "")
|
||||
suffix = (
|
||||
"\n\n[END OF PRIOR CONTEXT — COMPACTION SUMMARY BELOW]\n\n"
|
||||
+ summary + "\n\n"
|
||||
+ _SUMMARY_END_MARKER
|
||||
)
|
||||
msg["content"] = _append_text_to_content(
|
||||
msg.get("content"),
|
||||
merged_prefix,
|
||||
_append_text_to_content(old_content, suffix, prepend=False),
|
||||
"[PRIOR CONTEXT — for reference only; not a new message]\n",
|
||||
prepend=True,
|
||||
)
|
||||
# Mark the merged message so frontends can identify it as
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from agent.context_compressor import (
|
|||
ContextCompressor,
|
||||
HISTORICAL_TASK_HEADING,
|
||||
SUMMARY_PREFIX,
|
||||
COMPRESSED_SUMMARY_METADATA_KEY,
|
||||
)
|
||||
from hermes_state import SessionDB
|
||||
|
||||
|
|
@ -1826,7 +1827,7 @@ class TestCompressWithClient:
|
|||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
summary_msg = [
|
||||
m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)
|
||||
m for m in result if m.get(COMPRESSED_SUMMARY_METADATA_KEY)
|
||||
]
|
||||
assert len(summary_msg) == 1
|
||||
assert summary_msg[0]["role"] == "assistant"
|
||||
|
|
@ -1940,12 +1941,74 @@ class TestCompressWithClient:
|
|||
if m.get("role") == "user" and isinstance(m.get("content"), list)
|
||||
)
|
||||
assert isinstance(merged_tail["content"], list)
|
||||
assert "summary text" in merged_tail["content"][0]["text"]
|
||||
# With the fixed merge format, summary text is in the last text block
|
||||
# (after PRIOR CONTEXT and END OF PRIOR CONTEXT delimiters),
|
||||
# not necessarily in block [0].
|
||||
assert any(
|
||||
"summary text" in (block.get("text") or "")
|
||||
for block in merged_tail["content"]
|
||||
if isinstance(block, dict)
|
||||
)
|
||||
assert any(
|
||||
isinstance(block, dict) and block.get("text") == "msg 6"
|
||||
for block in merged_tail["content"]
|
||||
)
|
||||
|
||||
def test_merge_into_tail_end_marker_is_last(self):
|
||||
"""Regression for #56372: in a merge-into-tail summary, the END MARKER
|
||||
must come AFTER the preserved prior tail content, not before it.
|
||||
|
||||
The old format was SUMMARY + END_MARKER + OLD_CONTENT, so the preserved
|
||||
tail content landed after the marker and the model could read it as a
|
||||
fresh message. The fix wraps old content in [PRIOR CONTEXT] delimiters
|
||||
and always places the END MARKER last.
|
||||
|
||||
Mirrors test_double_collision_merges_summary_into_list_tail_content so
|
||||
the merged tail message genuinely carries preserved content ("msg 6").
|
||||
"""
|
||||
from agent.context_compressor import _SUMMARY_END_MARKER
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "SUMMARY_BODY"
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=3)
|
||||
|
||||
msgs = [
|
||||
{"role": "system", "content": "system prompt"},
|
||||
{"role": "user", "content": "msg 1"},
|
||||
{"role": "assistant", "content": "msg 2"},
|
||||
{"role": "user", "content": "msg 3"},
|
||||
{"role": "assistant", "content": "msg 4"},
|
||||
{"role": "user", "content": "msg 5"},
|
||||
{"role": "user", "content": [{"type": "text", "text": "PRESERVED_TAIL_CONTENT"}]},
|
||||
{"role": "assistant", "content": "msg 7"},
|
||||
{"role": "user", "content": "msg 8"},
|
||||
]
|
||||
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
|
||||
merged = next(m for m in result if m.get(COMPRESSED_SUMMARY_METADATA_KEY))
|
||||
content = merged["content"]
|
||||
text = (
|
||||
content if isinstance(content, str)
|
||||
else " ".join(
|
||||
b.get("text", "") for b in content if isinstance(b, dict)
|
||||
)
|
||||
)
|
||||
end = _SUMMARY_END_MARKER.strip()
|
||||
# All three fragments present.
|
||||
assert "PRESERVED_TAIL_CONTENT" in text
|
||||
assert "SUMMARY_BODY" in text
|
||||
assert end in text
|
||||
# Ordering invariant: prior content BEFORE summary BEFORE end marker,
|
||||
# and the end marker is the very last fragment.
|
||||
assert text.index("PRESERVED_TAIL_CONTENT") < text.index("SUMMARY_BODY")
|
||||
assert text.index("SUMMARY_BODY") < text.index(end)
|
||||
assert text.rstrip().endswith(end)
|
||||
|
||||
def test_double_collision_user_head_assistant_tail(self):
|
||||
"""Reverse double collision: head ends with 'user', tail starts with 'assistant'.
|
||||
summary='assistant' collides with tail, 'user' collides with head → merge."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue