fix(compaction): place END MARKER last in merge-into-tail summaries

When the compression summary is merged into the first tail message (the alternation corner case where a standalone summary role would collide with both head and tail), the old format was SUMMARY + END_MARKER + OLD_TAIL_CONTENT — so the preserved tail content appeared AFTER the end marker and the model could read it as a fresh message to respond to. Reorder so the END MARKER is always last: old tail content is wrapped in [PRIOR CONTEXT ...][END OF PRIOR CONTEXT — COMPACTION SUMMARY BELOW] delimiters, then the summary, then the END MARKER. _append_text_to_content handles both string and multimodal-list content. Salvaged from #56372 by @Gromykoss. Only the END-MARKER reorder half is carried over. The PR's second change (a post-compaction pass that strips user-role messages before the first summary marker on compression_count>=2) was dropped: on 2nd+ compactions the protected head decays to system-only (_effective_protect_first_n -> 0, #11996) so the targeted 'ghost head user' does not occur, and where the strip does fire it deletes legitimate recent tail user turns (data loss) and can leave consecutive assistant messages (role-alternation violation).
2026-07-01 17:54:36 +05:30 · 2026-07-01 17:54:36 +05:30 · a1a8a967e1
commit a1a8a967e1
parent d00762623b
2 changed files with 83 additions and 5 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -2886,10 +2886,25 @@ This compaction should PRIORITISE preserving all information related to the focu
        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                merged_prefix = summary + "\n\n" + _SUMMARY_END_MARKER + "\n\n"
+                # Merge the summary into the first tail message, but place
+                # the END MARKER at the very end so the model sees an
+                # unambiguous boundary. Old tail content is preserved as
+                # reference material BEFORE the summary, clearly delimited
+                # so it is not mistaken for a new message to respond to.
+                # Uses _append_text_to_content to safely handle both
+                # string and multimodal-list content types.
+                # Fixes ghost-message leakage across compaction boundaries
+                # where old head messages survived verbatim and appeared
+                # before the summary.
+                old_content = msg.get("content", "")
+                suffix = (
+                    "\n\n[END OF PRIOR CONTEXT — COMPACTION SUMMARY BELOW]\n\n"
+                    + summary + "\n\n"
+                    + _SUMMARY_END_MARKER
+                )
                msg["content"] = _append_text_to_content(
-                    msg.get("content"),
-                    merged_prefix,
+                    _append_text_to_content(old_content, suffix, prepend=False),
+                    "[PRIOR CONTEXT — for reference only; not a new message]\n",
                    prepend=True,
                )
                # Mark the merged message so frontends can identify it as
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@ -8,6 +8,7 @@ from agent.context_compressor import (
    ContextCompressor,
    HISTORICAL_TASK_HEADING,
    SUMMARY_PREFIX,
+    COMPRESSED_SUMMARY_METADATA_KEY,
 )
 from hermes_state import SessionDB

@ -1826,7 +1827,7 @@ class TestCompressWithClient:
        with patch("agent.context_compressor.call_llm", return_value=mock_response):
            result = c.compress(msgs)
        summary_msg = [
-            m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)
+            m for m in result if m.get(COMPRESSED_SUMMARY_METADATA_KEY)
        ]
        assert len(summary_msg) == 1
        assert summary_msg[0]["role"] == "assistant"
@ -1940,12 +1941,74 @@ class TestCompressWithClient:
            if m.get("role") == "user" and isinstance(m.get("content"), list)
        )
        assert isinstance(merged_tail["content"], list)
-        assert "summary text" in merged_tail["content"][0]["text"]
+        # With the fixed merge format, summary text is in the last text block
+        # (after PRIOR CONTEXT and END OF PRIOR CONTEXT delimiters),
+        # not necessarily in block [0].
+        assert any(
+            "summary text" in (block.get("text") or "")
+            for block in merged_tail["content"]
+            if isinstance(block, dict)
+        )
        assert any(
            isinstance(block, dict) and block.get("text") == "msg 6"
            for block in merged_tail["content"]
        )

+    def test_merge_into_tail_end_marker_is_last(self):
+        """Regression for #56372: in a merge-into-tail summary, the END MARKER
+        must come AFTER the preserved prior tail content, not before it.
+
+        The old format was SUMMARY + END_MARKER + OLD_CONTENT, so the preserved
+        tail content landed after the marker and the model could read it as a
+        fresh message. The fix wraps old content in [PRIOR CONTEXT] delimiters
+        and always places the END MARKER last.
+
+        Mirrors test_double_collision_merges_summary_into_list_tail_content so
+        the merged tail message genuinely carries preserved content ("msg 6").
+        """
+        from agent.context_compressor import _SUMMARY_END_MARKER
+
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "SUMMARY_BODY"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=3)
+
+        msgs = [
+            {"role": "system", "content": "system prompt"},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "user", "content": [{"type": "text", "text": "PRESERVED_TAIL_CONTENT"}]},
+            {"role": "assistant", "content": "msg 7"},
+            {"role": "user", "content": "msg 8"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            result = c.compress(msgs)
+
+        merged = next(m for m in result if m.get(COMPRESSED_SUMMARY_METADATA_KEY))
+        content = merged["content"]
+        text = (
+            content if isinstance(content, str)
+            else " ".join(
+                b.get("text", "") for b in content if isinstance(b, dict)
+            )
+        )
+        end = _SUMMARY_END_MARKER.strip()
+        # All three fragments present.
+        assert "PRESERVED_TAIL_CONTENT" in text
+        assert "SUMMARY_BODY" in text
+        assert end in text
+        # Ordering invariant: prior content BEFORE summary BEFORE end marker,
+        # and the end marker is the very last fragment.
+        assert text.index("PRESERVED_TAIL_CONTENT") < text.index("SUMMARY_BODY")
+        assert text.index("SUMMARY_BODY") < text.index(end)
+        assert text.rstrip().endswith(end)
+
    def test_double_collision_user_head_assistant_tail(self):
        """Reverse double collision: head ends with 'user', tail starts with 'assistant'.
        summary='assistant' collides with tail, 'user' collides with head → merge."""