fix(compressor): skip compression during summary LLM cooldown to prevent CLI freeze
When the summary LLM hits a 429/transient failure, _generate_summary() sets a cooldown and returns None; compress() inserts a static fallback marker and returns. Tokens stay above threshold, so should_compress() kept returning True and every subsequent agent turn re-fired _compress_context() — the CLI appeared frozen until the cooldown expired. Add a cooldown guard to should_compress(): return False while _summary_failure_cooldown_until is in the future. Reuses the existing float; no new state. Manual /compress (force=True) still clears the cooldown first. Fixes #11529
This commit is contained in:
parent
0e4c879a3b
commit
812236bff8
2 changed files with 52 additions and 0 deletions
|
|
@ -1073,6 +1073,23 @@ class ContextCompressor(ContextEngine):
|
|||
tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
|
||||
if tokens < self.threshold_tokens:
|
||||
return False
|
||||
# Do not trigger compression while the summary LLM is in cooldown.
|
||||
# On a 429/transient failure _generate_summary() sets a cooldown and
|
||||
# returns None; compress() then inserts a static fallback marker and
|
||||
# returns. Tokens stay above threshold, so without this guard every
|
||||
# subsequent turn re-fires _compress_context() — re-inserting the
|
||||
# marker and re-entering the loop, making the CLI appear frozen until
|
||||
# the cooldown expires (issue #11529). Manual /compress passes
|
||||
# force=True, which clears this cooldown in compress() before running,
|
||||
# so it still retries immediately.
|
||||
_cooldown_remaining = self._summary_failure_cooldown_until - time.monotonic()
|
||||
if _cooldown_remaining > 0:
|
||||
if not self.quiet_mode:
|
||||
logger.debug(
|
||||
"Compression deferred — summary LLM in cooldown for %.0fs more",
|
||||
_cooldown_remaining,
|
||||
)
|
||||
return False
|
||||
# Anti-thrashing: back off if recent compressions were ineffective
|
||||
if self._ineffective_compression_count >= 2:
|
||||
if not self.quiet_mode:
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ The fix adds two safeguards:
|
|||
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import time
|
||||
|
||||
from agent.context_compressor import ContextCompressor, _CHARS_PER_TOKEN
|
||||
|
||||
|
||||
|
|
@ -248,3 +250,36 @@ class TestAntiThrashing:
|
|||
comp = _make_compressor(config_context_length=96000)
|
||||
comp.last_prompt_tokens = 10_000
|
||||
assert not comp.should_compress(10_000)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test: summary-LLM cooldown guard in should_compress (#11529)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCooldownGuard:
|
||||
"""should_compress() must skip compression while the summary LLM is in
|
||||
cooldown, otherwise a 429/transient failure re-fires _compress_context()
|
||||
every turn (inserting a fallback marker repeatedly) and freezes the CLI.
|
||||
"""
|
||||
|
||||
def test_active_cooldown_blocks(self):
|
||||
"""A future cooldown deadline -> should_compress returns False even
|
||||
when tokens are over threshold."""
|
||||
comp = _make_compressor(config_context_length=96000)
|
||||
comp.last_prompt_tokens = 65_000
|
||||
comp._summary_failure_cooldown_until = time.monotonic() + 60
|
||||
assert not comp.should_compress(65_000)
|
||||
|
||||
def test_expired_cooldown_allows(self):
|
||||
"""A past cooldown deadline -> compression resumes normally."""
|
||||
comp = _make_compressor(config_context_length=96000)
|
||||
comp.last_prompt_tokens = 65_000
|
||||
comp._summary_failure_cooldown_until = time.monotonic() - 1
|
||||
assert comp.should_compress(65_000)
|
||||
|
||||
def test_no_cooldown_allows(self):
|
||||
"""The default (no cooldown set) does not block compression."""
|
||||
comp = _make_compressor(config_context_length=96000)
|
||||
comp.last_prompt_tokens = 65_000
|
||||
assert comp._summary_failure_cooldown_until == 0.0
|
||||
assert comp.should_compress(65_000)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue