Merge pull request #50762 from NousResearch/salvage/defer-preflight-after-compaction

fix(agent): defer preflight compaction until real usage after a compaction (#23767, #36718)
2026-06-22 17:10:03 +05:30 · 2026-06-22 17:10:03 +05:30 · 065946d84f
commit 065946d84f
parent 1f28b1a9b9 b2c84a1626
2 changed files with 34 additions and 0 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -878,6 +878,18 @@ class ContextCompressor(ContextEngine):
        """
        if rough_tokens < self.threshold_tokens:
            return False
+        # Immediately after a compaction the post-compression path sets
+        # ``awaiting_real_usage_after_compression`` and parks
+        # ``last_prompt_tokens = -1``, but ``last_real_prompt_tokens`` still
+        # holds the STALE pre-compression value (above threshold — that's why
+        # compaction fired).  Without this guard that stale value defeats the
+        # ``last_real_prompt_tokens >= threshold_tokens`` check below, so
+        # preflight fires a SECOND compaction before the provider has reported
+        # real token usage for the now-shorter conversation.  Defer for exactly
+        # one turn; update_from_response() clears the flag when real usage
+        # arrives.  (#36718)
+        if self.awaiting_real_usage_after_compression:
+            return True
        if self.last_real_prompt_tokens <= 0:
            return False
        if self.last_real_prompt_tokens >= self.threshold_tokens:
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@ -86,6 +86,28 @@ class TestPreflightDeferral:

        assert compressor.should_defer_preflight_to_real_usage(93_000) is False

+    def test_defers_immediately_after_compaction_with_stale_real_prompt(self, compressor):
+        """#36718: right after a compaction, last_real_prompt_tokens still holds
+        the stale pre-compression value (above threshold). The awaiting flag
+        must force deferral so preflight doesn't fire a SECOND compaction before
+        real post-compaction usage arrives."""
+        compressor.threshold_tokens = 85_000
+        # Stale pre-compression value — would hit the `>= threshold => False`
+        # short-circuit and defeat deferral without the flag guard.
+        compressor.last_real_prompt_tokens = 120_000
+        compressor.awaiting_real_usage_after_compression = True
+        assert compressor.should_defer_preflight_to_real_usage(95_000) is True
+
+    def test_resumes_normal_deferral_after_flag_cleared(self, compressor):
+        """Once update_from_response() clears the flag, the normal baseline/
+        growth deferral logic governs again (no permanent deferral)."""
+        compressor.threshold_tokens = 85_000
+        compressor.last_real_prompt_tokens = 120_000
+        compressor.awaiting_real_usage_after_compression = False
+        # Stale-high real prompt with the flag cleared => the >= threshold
+        # short-circuit applies => no deferral.
+        assert compressor.should_defer_preflight_to_real_usage(95_000) is False
+


 class TestCompress: