Merge pull request #50762 from NousResearch/salvage/defer-preflight-after-compaction

fix(agent): defer preflight compaction until real usage after a compaction (#23767, #36718)
This commit is contained in:
kshitij 2026-06-22 17:10:03 +05:30 committed by GitHub
commit 065946d84f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 34 additions and 0 deletions

View file

@ -878,6 +878,18 @@ class ContextCompressor(ContextEngine):
"""
if rough_tokens < self.threshold_tokens:
return False
# Immediately after a compaction the post-compression path sets
# ``awaiting_real_usage_after_compression`` and parks
# ``last_prompt_tokens = -1``, but ``last_real_prompt_tokens`` still
# holds the STALE pre-compression value (above threshold — that's why
# compaction fired). Without this guard that stale value defeats the
# ``last_real_prompt_tokens >= threshold_tokens`` check below, so
# preflight fires a SECOND compaction before the provider has reported
# real token usage for the now-shorter conversation. Defer for exactly
# one turn; update_from_response() clears the flag when real usage
# arrives. (#36718)
if self.awaiting_real_usage_after_compression:
return True
if self.last_real_prompt_tokens <= 0:
return False
if self.last_real_prompt_tokens >= self.threshold_tokens:

View file

@ -86,6 +86,28 @@ class TestPreflightDeferral:
assert compressor.should_defer_preflight_to_real_usage(93_000) is False
def test_defers_immediately_after_compaction_with_stale_real_prompt(self, compressor):
"""#36718: right after a compaction, last_real_prompt_tokens still holds
the stale pre-compression value (above threshold). The awaiting flag
must force deferral so preflight doesn't fire a SECOND compaction before
real post-compaction usage arrives."""
compressor.threshold_tokens = 85_000
# Stale pre-compression value — would hit the `>= threshold => False`
# short-circuit and defeat deferral without the flag guard.
compressor.last_real_prompt_tokens = 120_000
compressor.awaiting_real_usage_after_compression = True
assert compressor.should_defer_preflight_to_real_usage(95_000) is True
def test_resumes_normal_deferral_after_flag_cleared(self, compressor):
"""Once update_from_response() clears the flag, the normal baseline/
growth deferral logic governs again (no permanent deferral)."""
compressor.threshold_tokens = 85_000
compressor.last_real_prompt_tokens = 120_000
compressor.awaiting_real_usage_after_compression = False
# Stale-high real prompt with the flag cleared => the >= threshold
# short-circuit applies => no deferral.
assert compressor.should_defer_preflight_to_real_usage(95_000) is False
class TestCompress: