From 8db6ed7bd9a6db418aa3a4cfe8e718b8bc70b5d3 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Wed, 1 Jul 2026 13:20:55 +0530
Subject: [PATCH] fix(context): clamp -1 post-compression sentinel in sibling
 status paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Whole-bug-class follow-up to the tui_gateway fix: the same -1
last_prompt_tokens sentinel (parked by conversation_compression after a
compression) leaked into other status readers, producing a raw -1 or a
NEGATIVE usage_percent on the transitional turn:

- agent/context_engine.py get_status() (the ABC default every external
  context engine inherits) — highest blast radius
- gateway/slash_commands.py /usage context line
- cli.py session usage printout

All clamped to >=0, mirroring cli.py _get_status_bar_snapshot and the
tui_gateway fix. Adds an ABC get_status sentinel-clamp regression test.
---
 agent/context_engine.py                  |  9 +++++++--
 cli.py                                   |  2 +-
 gateway/slash_commands.py                |  7 ++++---
 tests/agent/test_context_engine.py       | 10 ++++++++++
 tests/run_agent/test_percentage_clamp.py |  6 ++++--
 5 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/agent/context_engine.py b/agent/context_engine.py
index 79c31fb48..ba2da561f 100644
--- a/agent/context_engine.py
+++ b/agent/context_engine.py
@@ -194,12 +194,17 @@ class ContextEngine(ABC):
 
         Default returns the standard fields run_agent.py expects.
         """
+        # Clamp the -1 "compression just ran, awaiting real usage" sentinel
+        # (set by conversation_compression) to 0 so status readers don't see a
+        # raw -1 or a negative usage_percent on the transitional turn. Mirrors
+        # the CLI/gateway status-bar paths (cli.py, tui_gateway/server.py).
+        last_prompt = self.last_prompt_tokens if self.last_prompt_tokens > 0 else 0
         return {
-            "last_prompt_tokens": self.last_prompt_tokens,
+            "last_prompt_tokens": last_prompt,
             "threshold_tokens": self.threshold_tokens,
             "context_length": self.context_length,
             "usage_percent": (
-                min(100, self.last_prompt_tokens / self.context_length * 100)
+                min(100, last_prompt / self.context_length * 100)
                 if self.context_length else 0
             ),
             "compression_count": self.compression_count,
diff --git a/cli.py b/cli.py
index 2b761166a..1d7702352 100644
--- a/cli.py
+++ b/cli.py
@@ -9261,7 +9261,7 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
         total = agent.session_total_tokens
 
         compressor = agent.context_compressor
-        last_prompt = compressor.last_prompt_tokens
+        last_prompt = compressor.last_prompt_tokens if compressor.last_prompt_tokens > 0 else 0
         ctx_len = compressor.context_length
         pct = min(100, (last_prompt / ctx_len * 100)) if ctx_len else 0
         compressions = compressor.compression_count
diff --git a/gateway/slash_commands.py b/gateway/slash_commands.py
index ccb09811e..f678a6fc5 100644
--- a/gateway/slash_commands.py
+++ b/gateway/slash_commands.py
@@ -3589,9 +3589,10 @@ class GatewaySlashCommandsMixin:
 
             # Context window and compressions
             ctx = agent.context_compressor
-            if ctx.last_prompt_tokens:
-                pct = min(100, ctx.last_prompt_tokens / ctx.context_length * 100) if ctx.context_length else 0
-                lines.append(t("gateway.usage.label_context", used=f"{ctx.last_prompt_tokens:,}", total=f"{ctx.context_length:,}", pct=f"{pct:.0f}"))
+            _lpt = ctx.last_prompt_tokens if ctx.last_prompt_tokens > 0 else 0
+            if _lpt:
+                pct = min(100, _lpt / ctx.context_length * 100) if ctx.context_length else 0
+                lines.append(t("gateway.usage.label_context", used=f"{_lpt:,}", total=f"{ctx.context_length:,}", pct=f"{pct:.0f}"))
             if ctx.compression_count:
                 lines.append(t("gateway.usage.label_compressions", count=ctx.compression_count))
 
diff --git a/tests/agent/test_context_engine.py b/tests/agent/test_context_engine.py
index d0a757301..70eb8c71c 100644
--- a/tests/agent/test_context_engine.py
+++ b/tests/agent/test_context_engine.py
@@ -120,6 +120,16 @@ class TestDefaults:
         assert status["threshold_tokens"] == 100000
         assert 0 < status["usage_percent"] <= 100
 
+    def test_default_get_status_clamps_post_compression_sentinel(self):
+        """After a compression, last_prompt_tokens is the -1 sentinel. get_status
+        must clamp it to 0 rather than export a raw -1 or a negative
+        usage_percent on the transitional turn."""
+        engine = StubEngine()
+        engine.last_prompt_tokens = -1
+        status = engine.get_status()
+        assert status["last_prompt_tokens"] == 0
+        assert status["usage_percent"] >= 0
+
     def test_on_session_reset(self):
         engine = StubEngine()
         engine.last_prompt_tokens = 999
diff --git a/tests/run_agent/test_percentage_clamp.py b/tests/run_agent/test_percentage_clamp.py
index ca407ef8d..6c78eb562 100644
--- a/tests/run_agent/test_percentage_clamp.py
+++ b/tests/run_agent/test_percentage_clamp.py
@@ -84,8 +84,10 @@ class TestSourceLinesAreClamped:
         # The /usage stats handler was extracted from gateway/run.py into
         # gateway/slash_commands.py (god-file decomposition Phase 3b).
         src = self._read_file("gateway/slash_commands.py")
-        # Check that the stats handler has min(100, ...)
-        assert "min(100, ctx.last_prompt_tokens" in src, (
+        # Check that the stats handler clamps the context pct with min(100, ...).
+        # Assert the clamp intent, not a specific local name (the occupancy
+        # value is read into a clamped `_lpt` local, #50421).
+        assert "min(100, _lpt / ctx.context_length" in src, (
             "gateway/slash_commands.py stats pct is not clamped with min(100, ...)"
         )