diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 7b4cb62ef..78e5639b4 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -8513,3 +8513,22 @@ def test_get_usage_reports_real_current_occupancy(): assert usage["context_used"] == 60_000 assert usage["context_max"] == 120_000 assert usage["context_percent"] == 50 + + +def test_get_usage_clamps_post_compression_sentinel(): + """Right after a compression, last_prompt_tokens is the -1 sentinel + (conversation_compression sets it until the next real usage report). It is + truthy, so `or 0` doesn't neutralize it — the guard must clamp <0 to 0 so + the transitional turn emits no gauge instead of leaking context_used=-1.""" + agent = types.SimpleNamespace( + model="test-model", + session_total_tokens=4_000_000, + context_compressor=types.SimpleNamespace( + last_prompt_tokens=-1, + context_length=1_048_576, + compression_count=6, + ), + ) + usage = server._get_usage(agent) + assert "context_used" not in usage + assert "context_percent" not in usage diff --git a/tui_gateway/server.py b/tui_gateway/server.py index bfdded9df..9dd54c9b6 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -2977,7 +2977,13 @@ def _get_usage(agent) -> dict: # fabricated 0% or the old cumulative reading. The built-in compressor # always reports a real last_prompt_tokens once a turn runs, so it is # unaffected. + # Clamp the -1 "compression just ran, awaiting real usage" sentinel + # (conversation_compression.py) to 0 so the transitional turn reads as + # unknown (no gauge) instead of leaking context_used=-1. Matches the + # CLI status-bar path (cli.py _get_status_bar_snapshot). last_prompt = getattr(comp, "last_prompt_tokens", 0) or 0 + if last_prompt < 0: + last_prompt = 0 ctx_max = getattr(comp, "context_length", 0) or 0 if ctx_max and last_prompt: usage["context_used"] = last_prompt