diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 4bccda138..3b37af7b8 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -1073,6 +1073,23 @@ class ContextCompressor(ContextEngine):
         tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
         if tokens < self.threshold_tokens:
             return False
+        # Do not trigger compression while the summary LLM is in cooldown.
+        # On a 429/transient failure _generate_summary() sets a cooldown and
+        # returns None; compress() then inserts a static fallback marker and
+        # returns. Tokens stay above threshold, so without this guard every
+        # subsequent turn re-fires _compress_context() — re-inserting the
+        # marker and re-entering the loop, making the CLI appear frozen until
+        # the cooldown expires (issue #11529). Manual /compress passes
+        # force=True, which clears this cooldown in compress() before running,
+        # so it still retries immediately.
+        _cooldown_remaining = self._summary_failure_cooldown_until - time.monotonic()
+        if _cooldown_remaining > 0:
+            if not self.quiet_mode:
+                logger.debug(
+                    "Compression deferred — summary LLM in cooldown for %.0fs more",
+                    _cooldown_remaining,
+                )
+            return False
         # Anti-thrashing: back off if recent compressions were ineffective
         if self._ineffective_compression_count >= 2:
             if not self.quiet_mode:
diff --git a/tests/run_agent/test_infinite_compaction_loop.py b/tests/run_agent/test_infinite_compaction_loop.py
index 930df3381..fc26a2f41 100644
--- a/tests/run_agent/test_infinite_compaction_loop.py
+++ b/tests/run_agent/test_infinite_compaction_loop.py
@@ -16,6 +16,8 @@ The fix adds two safeguards:
 
 from unittest.mock import patch, MagicMock
 
+import time
+
 from agent.context_compressor import ContextCompressor, _CHARS_PER_TOKEN
 
 
@@ -248,3 +250,36 @@ class TestAntiThrashing:
         comp = _make_compressor(config_context_length=96000)
         comp.last_prompt_tokens = 10_000
         assert not comp.should_compress(10_000)
+
+
+# ---------------------------------------------------------------------------
+# Test: summary-LLM cooldown guard in should_compress (#11529)
+# ---------------------------------------------------------------------------
+
+class TestCooldownGuard:
+    """should_compress() must skip compression while the summary LLM is in
+    cooldown, otherwise a 429/transient failure re-fires _compress_context()
+    every turn (inserting a fallback marker repeatedly) and freezes the CLI.
+    """
+
+    def test_active_cooldown_blocks(self):
+        """A future cooldown deadline -> should_compress returns False even
+        when tokens are over threshold."""
+        comp = _make_compressor(config_context_length=96000)
+        comp.last_prompt_tokens = 65_000
+        comp._summary_failure_cooldown_until = time.monotonic() + 60
+        assert not comp.should_compress(65_000)
+
+    def test_expired_cooldown_allows(self):
+        """A past cooldown deadline -> compression resumes normally."""
+        comp = _make_compressor(config_context_length=96000)
+        comp.last_prompt_tokens = 65_000
+        comp._summary_failure_cooldown_until = time.monotonic() - 1
+        assert comp.should_compress(65_000)
+
+    def test_no_cooldown_allows(self):
+        """The default (no cooldown set) does not block compression."""
+        comp = _make_compressor(config_context_length=96000)
+        comp.last_prompt_tokens = 65_000
+        assert comp._summary_failure_cooldown_until == 0.0
+        assert comp.should_compress(65_000)