diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 4bccda138..3b37af7b8 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1073,6 +1073,23 @@ class ContextCompressor(ContextEngine): tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens if tokens < self.threshold_tokens: return False + # Do not trigger compression while the summary LLM is in cooldown. + # On a 429/transient failure _generate_summary() sets a cooldown and + # returns None; compress() then inserts a static fallback marker and + # returns. Tokens stay above threshold, so without this guard every + # subsequent turn re-fires _compress_context() — re-inserting the + # marker and re-entering the loop, making the CLI appear frozen until + # the cooldown expires (issue #11529). Manual /compress passes + # force=True, which clears this cooldown in compress() before running, + # so it still retries immediately. + _cooldown_remaining = self._summary_failure_cooldown_until - time.monotonic() + if _cooldown_remaining > 0: + if not self.quiet_mode: + logger.debug( + "Compression deferred — summary LLM in cooldown for %.0fs more", + _cooldown_remaining, + ) + return False # Anti-thrashing: back off if recent compressions were ineffective if self._ineffective_compression_count >= 2: if not self.quiet_mode: diff --git a/tests/run_agent/test_infinite_compaction_loop.py b/tests/run_agent/test_infinite_compaction_loop.py index 930df3381..fc26a2f41 100644 --- a/tests/run_agent/test_infinite_compaction_loop.py +++ b/tests/run_agent/test_infinite_compaction_loop.py @@ -16,6 +16,8 @@ The fix adds two safeguards: from unittest.mock import patch, MagicMock +import time + from agent.context_compressor import ContextCompressor, _CHARS_PER_TOKEN @@ -248,3 +250,36 @@ class TestAntiThrashing: comp = _make_compressor(config_context_length=96000) comp.last_prompt_tokens = 10_000 assert not comp.should_compress(10_000) + + +# --------------------------------------------------------------------------- +# Test: summary-LLM cooldown guard in should_compress (#11529) +# --------------------------------------------------------------------------- + +class TestCooldownGuard: + """should_compress() must skip compression while the summary LLM is in + cooldown, otherwise a 429/transient failure re-fires _compress_context() + every turn (inserting a fallback marker repeatedly) and freezes the CLI. + """ + + def test_active_cooldown_blocks(self): + """A future cooldown deadline -> should_compress returns False even + when tokens are over threshold.""" + comp = _make_compressor(config_context_length=96000) + comp.last_prompt_tokens = 65_000 + comp._summary_failure_cooldown_until = time.monotonic() + 60 + assert not comp.should_compress(65_000) + + def test_expired_cooldown_allows(self): + """A past cooldown deadline -> compression resumes normally.""" + comp = _make_compressor(config_context_length=96000) + comp.last_prompt_tokens = 65_000 + comp._summary_failure_cooldown_until = time.monotonic() - 1 + assert comp.should_compress(65_000) + + def test_no_cooldown_allows(self): + """The default (no cooldown set) does not block compression.""" + comp = _make_compressor(config_context_length=96000) + comp.last_prompt_tokens = 65_000 + assert comp._summary_failure_cooldown_until == 0.0 + assert comp.should_compress(65_000)