fix(compressor): skip compression during summary LLM cooldown to prevent CLI freeze

When the summary LLM hits a 429/transient failure, _generate_summary() sets a cooldown and returns None; compress() inserts a static fallback marker and returns. Tokens stay above threshold, so should_compress() kept returning True and every subsequent agent turn re-fired _compress_context() — the CLI appeared frozen until the cooldown expired. Add a cooldown guard to should_compress(): return False while _summary_failure_cooldown_until is in the future. Reuses the existing float; no new state. Manual /compress (force=True) still clears the cooldown first. Fixes #11529
2026-06-30 15:37:11 -07:00 · 2026-06-30 15:37:11 -07:00 · 812236bff8
commit 812236bff8
parent 0e4c879a3b
2 changed files with 52 additions and 0 deletions
--- a/tests/run_agent/test_infinite_compaction_loop.py
+++ b/tests/run_agent/test_infinite_compaction_loop.py
@ -16,6 +16,8 @@ The fix adds two safeguards:

 from unittest.mock import patch, MagicMock

+import time
+
 from agent.context_compressor import ContextCompressor, _CHARS_PER_TOKEN


@ -248,3 +250,36 @@ class TestAntiThrashing:
        comp = _make_compressor(config_context_length=96000)
        comp.last_prompt_tokens = 10_000
        assert not comp.should_compress(10_000)
+
+
+# ---------------------------------------------------------------------------
+# Test: summary-LLM cooldown guard in should_compress (#11529)
+# ---------------------------------------------------------------------------
+
+class TestCooldownGuard:
+    """should_compress() must skip compression while the summary LLM is in
+    cooldown, otherwise a 429/transient failure re-fires _compress_context()
+    every turn (inserting a fallback marker repeatedly) and freezes the CLI.
+    """
+
+    def test_active_cooldown_blocks(self):
+        """A future cooldown deadline -> should_compress returns False even
+        when tokens are over threshold."""
+        comp = _make_compressor(config_context_length=96000)
+        comp.last_prompt_tokens = 65_000
+        comp._summary_failure_cooldown_until = time.monotonic() + 60
+        assert not comp.should_compress(65_000)
+
+    def test_expired_cooldown_allows(self):
+        """A past cooldown deadline -> compression resumes normally."""
+        comp = _make_compressor(config_context_length=96000)
+        comp.last_prompt_tokens = 65_000
+        comp._summary_failure_cooldown_until = time.monotonic() - 1
+        assert comp.should_compress(65_000)
+
+    def test_no_cooldown_allows(self):
+        """The default (no cooldown set) does not block compression."""
+        comp = _make_compressor(config_context_length=96000)
+        comp.last_prompt_tokens = 65_000
+        assert comp._summary_failure_cooldown_until == 0.0
+        assert comp.should_compress(65_000)