From 8db6ed7bd9a6db418aa3a4cfe8e718b8bc70b5d3 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Wed, 1 Jul 2026 13:20:55 +0530 Subject: [PATCH] fix(context): clamp -1 post-compression sentinel in sibling status paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whole-bug-class follow-up to the tui_gateway fix: the same -1 last_prompt_tokens sentinel (parked by conversation_compression after a compression) leaked into other status readers, producing a raw -1 or a NEGATIVE usage_percent on the transitional turn: - agent/context_engine.py get_status() (the ABC default every external context engine inherits) — highest blast radius - gateway/slash_commands.py /usage context line - cli.py session usage printout All clamped to >=0, mirroring cli.py _get_status_bar_snapshot and the tui_gateway fix. Adds an ABC get_status sentinel-clamp regression test. --- agent/context_engine.py | 9 +++++++-- cli.py | 2 +- gateway/slash_commands.py | 7 ++++--- tests/agent/test_context_engine.py | 10 ++++++++++ tests/run_agent/test_percentage_clamp.py | 6 ++++-- 5 files changed, 26 insertions(+), 8 deletions(-) diff --git a/agent/context_engine.py b/agent/context_engine.py index 79c31fb48..ba2da561f 100644 --- a/agent/context_engine.py +++ b/agent/context_engine.py @@ -194,12 +194,17 @@ class ContextEngine(ABC): Default returns the standard fields run_agent.py expects. """ + # Clamp the -1 "compression just ran, awaiting real usage" sentinel + # (set by conversation_compression) to 0 so status readers don't see a + # raw -1 or a negative usage_percent on the transitional turn. Mirrors + # the CLI/gateway status-bar paths (cli.py, tui_gateway/server.py). + last_prompt = self.last_prompt_tokens if self.last_prompt_tokens > 0 else 0 return { - "last_prompt_tokens": self.last_prompt_tokens, + "last_prompt_tokens": last_prompt, "threshold_tokens": self.threshold_tokens, "context_length": self.context_length, "usage_percent": ( - min(100, self.last_prompt_tokens / self.context_length * 100) + min(100, last_prompt / self.context_length * 100) if self.context_length else 0 ), "compression_count": self.compression_count, diff --git a/cli.py b/cli.py index 2b761166a..1d7702352 100644 --- a/cli.py +++ b/cli.py @@ -9261,7 +9261,7 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): total = agent.session_total_tokens compressor = agent.context_compressor - last_prompt = compressor.last_prompt_tokens + last_prompt = compressor.last_prompt_tokens if compressor.last_prompt_tokens > 0 else 0 ctx_len = compressor.context_length pct = min(100, (last_prompt / ctx_len * 100)) if ctx_len else 0 compressions = compressor.compression_count diff --git a/gateway/slash_commands.py b/gateway/slash_commands.py index ccb09811e..f678a6fc5 100644 --- a/gateway/slash_commands.py +++ b/gateway/slash_commands.py @@ -3589,9 +3589,10 @@ class GatewaySlashCommandsMixin: # Context window and compressions ctx = agent.context_compressor - if ctx.last_prompt_tokens: - pct = min(100, ctx.last_prompt_tokens / ctx.context_length * 100) if ctx.context_length else 0 - lines.append(t("gateway.usage.label_context", used=f"{ctx.last_prompt_tokens:,}", total=f"{ctx.context_length:,}", pct=f"{pct:.0f}")) + _lpt = ctx.last_prompt_tokens if ctx.last_prompt_tokens > 0 else 0 + if _lpt: + pct = min(100, _lpt / ctx.context_length * 100) if ctx.context_length else 0 + lines.append(t("gateway.usage.label_context", used=f"{_lpt:,}", total=f"{ctx.context_length:,}", pct=f"{pct:.0f}")) if ctx.compression_count: lines.append(t("gateway.usage.label_compressions", count=ctx.compression_count)) diff --git a/tests/agent/test_context_engine.py b/tests/agent/test_context_engine.py index d0a757301..70eb8c71c 100644 --- a/tests/agent/test_context_engine.py +++ b/tests/agent/test_context_engine.py @@ -120,6 +120,16 @@ class TestDefaults: assert status["threshold_tokens"] == 100000 assert 0 < status["usage_percent"] <= 100 + def test_default_get_status_clamps_post_compression_sentinel(self): + """After a compression, last_prompt_tokens is the -1 sentinel. get_status + must clamp it to 0 rather than export a raw -1 or a negative + usage_percent on the transitional turn.""" + engine = StubEngine() + engine.last_prompt_tokens = -1 + status = engine.get_status() + assert status["last_prompt_tokens"] == 0 + assert status["usage_percent"] >= 0 + def test_on_session_reset(self): engine = StubEngine() engine.last_prompt_tokens = 999 diff --git a/tests/run_agent/test_percentage_clamp.py b/tests/run_agent/test_percentage_clamp.py index ca407ef8d..6c78eb562 100644 --- a/tests/run_agent/test_percentage_clamp.py +++ b/tests/run_agent/test_percentage_clamp.py @@ -84,8 +84,10 @@ class TestSourceLinesAreClamped: # The /usage stats handler was extracted from gateway/run.py into # gateway/slash_commands.py (god-file decomposition Phase 3b). src = self._read_file("gateway/slash_commands.py") - # Check that the stats handler has min(100, ...) - assert "min(100, ctx.last_prompt_tokens" in src, ( + # Check that the stats handler clamps the context pct with min(100, ...). + # Assert the clamp intent, not a specific local name (the occupancy + # value is read into a clamped `_lpt` local, #50421). + assert "min(100, _lpt / ctx.context_length" in src, ( "gateway/slash_commands.py stats pct is not clamped with min(100, ...)" )