From e23f723389ed4c8da1f72e827d26642a077c6fbb Mon Sep 17 00:00:00 2001 From: YLChen-007 <30854794+YLChen-007@users.noreply.github.com> Date: Wed, 1 Jul 2026 03:11:53 -0700 Subject: [PATCH] fix: make streaming reasoning-tag filter case-insensitive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The streaming think-tag suppressors in cli.py (_stream_delta) and gateway/stream_consumer.py (_filter_and_accumulate) matched tag names with case-sensitive str.find(), so only the exact-case literals in the tag tuples were caught. Mixed-case variants a model may emit — , , , — slipped through and leaked raw reasoning into the user-visible stream. Match against a lowercased view of the buffer with lowercased tag names at all three sites (open-tag boundary search, partial-tag hold-back, close-tag search) in both paths. Only KNOWN tag names are matched — no substring matching — and the block-boundary gating that protects prose mentions of is preserved. - 6 parametrized case-insensitive regression tests in each of tests/gateway/test_stream_consumer.py and tests/cli/test_stream_delta_think_tag.py. Salvaged from PR #27289 by @YLChen-007. --- cli.py | 14 ++++++++++---- gateway/stream_consumer.py | 13 ++++++++++--- scripts/release.py | 1 + tests/cli/test_stream_delta_think_tag.py | 15 +++++++++++++++ tests/gateway/test_stream_consumer.py | 10 ++++++++++ 5 files changed, 46 insertions(+), 7 deletions(-) diff --git a/cli.py b/cli.py index a2ab0cc78..2ceac1059 100644 --- a/cli.py +++ b/cli.py @@ -5443,10 +5443,14 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): self._stream_last_was_newline = True # start of stream = boundary if not getattr(self, "_in_reasoning_block", False): + # Case-insensitive matching against a lowercased view so + # mixed-case tag variants (, , …) are caught. + prefilt_lower = self._stream_prefilt.lower() for tag in _OPEN_TAGS: + tag_lower = tag.lower() search_start = 0 while True: - idx = self._stream_prefilt.find(tag, search_start) + idx = prefilt_lower.find(tag_lower, search_start) if idx == -1: break # Check if this is a block boundary position @@ -5486,11 +5490,12 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): # Could also be a partial open tag at the end — hold it back if not getattr(self, "_in_reasoning_block", False): - # Check for partial tag match at the end + # Check for partial tag match at the end (case-insensitive) safe = self._stream_prefilt for tag in _OPEN_TAGS: + tag_lower = tag.lower() for i in range(1, len(tag)): - if self._stream_prefilt.endswith(tag[:i]): + if prefilt_lower.endswith(tag_lower[:i]): safe = self._stream_prefilt[:-i] break if safe: @@ -5503,8 +5508,9 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): # Keep accumulating _stream_prefilt because close tags can arrive # split across multiple tokens (e.g. "..."). if getattr(self, "_in_reasoning_block", False): + prefilt_lower = self._stream_prefilt.lower() for tag in _CLOSE_TAGS: - idx = self._stream_prefilt.find(tag) + idx = prefilt_lower.find(tag.lower()) if idx != -1: self._in_reasoning_block = False # When show_reasoning is on, route inner content to diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py index b6537f916..a08e169f2 100644 --- a/gateway/stream_consumer.py +++ b/gateway/stream_consumer.py @@ -394,12 +394,17 @@ class GatewayStreamConsumer: self._think_buffer = "" while buf: + # Case-insensitive matching: models emit mixed-case tag + # variants (, , …). Match against a + # lowercased view of the buffer with lowercased tag names so + # every case variant is caught with a single canonical form. + lower_buf = buf.lower() if self._in_think_block: # Look for the earliest closing tag best_idx = -1 best_len = 0 for tag in self._CLOSE_THINK_TAGS: - idx = buf.find(tag) + idx = lower_buf.find(tag.lower()) if idx != -1 and (best_idx == -1 or idx < best_idx): best_idx = idx best_len = len(tag) @@ -422,9 +427,10 @@ class GatewayStreamConsumer: best_idx = -1 best_len = 0 for tag in self._OPEN_THINK_TAGS: + tag_lower = tag.lower() search_start = 0 while True: - idx = buf.find(tag, search_start) + idx = lower_buf.find(tag_lower, search_start) if idx == -1: break # Block-boundary check (mirrors cli.py logic) @@ -460,8 +466,9 @@ class GatewayStreamConsumer: # No opening tag — check for a partial tag at the tail held_back = 0 for tag in self._OPEN_THINK_TAGS: + tag_lower = tag.lower() for i in range(1, len(tag)): - if buf.endswith(tag[:i]) and i > held_back: + if lower_buf.endswith(tag_lower[:i]) and i > held_back: held_back = i if held_back: self._accumulated += buf[:-held_back] diff --git a/scripts/release.py b/scripts/release.py index 66b8ad2b9..bf459051f 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" # Auto-extracted from noreply emails + manual overrides AUTHOR_MAP = { + "30854794+YLChen-007@users.noreply.github.com": "YLChen-007", # PR #27289 salvage (case-insensitive streaming reasoning-tag filter in cli.py _stream_delta + gateway stream_consumer so mixed-case variants like / are suppressed, not just the hardcoded case literals) "259353979+testingbuddies24@users.noreply.github.com": "testingbuddies24", # PR #43192 salvage (strip orphan think-tag close tags in progressive gateway stream so a bare whose open was dropped upstream can't leak to the user) "shx_929@163.com": "Lazymonter", # PR #42914 salvage (retry launchd bootstrap after bootout on EIO for install/start instead of degrading to detached) "5848605+itenev@users.noreply.github.com": "itenev", # PR #22753 salvage (asyncify model-context resolution in gateway message path so blocking requests.get can't starve Discord heartbeats) diff --git a/tests/cli/test_stream_delta_think_tag.py b/tests/cli/test_stream_delta_think_tag.py index 93c738b73..331988bfa 100644 --- a/tests/cli/test_stream_delta_think_tag.py +++ b/tests/cli/test_stream_delta_think_tag.py @@ -1,6 +1,9 @@ """Tests for _stream_delta's handling of tags in prose vs real reasoning blocks.""" import sys import os + +import pytest + sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) @@ -110,6 +113,18 @@ class TestRealReasoningBlock: cli._stream_delta(" ") assert cli._in_reasoning_block + @pytest.mark.parametrize( + "tag", + ["THINK", "Think", "ThInK", "THOUGHT", "REASONING", "Thinking"], + ) + def test_reasoning_tags_are_case_insensitive(self, tag): + cli = _make_cli_stub() + cli._stream_delta(f"<{tag}>hidden reasoningVisible answer") + assert not cli._in_reasoning_block + full = "".join(cli._emitted) + assert full == "Visible answer" + assert "hidden reasoning" not in full + class TestFlushRecovery: """_flush_stream should recover content from false-positive reasoning blocks.""" diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py index 009621e7c..cd49d3d74 100644 --- a/tests/gateway/test_stream_consumer.py +++ b/tests/gateway/test_stream_consumer.py @@ -1464,6 +1464,16 @@ class TestFilterAndAccumulate: c._filter_and_accumulate("capsanswer") assert c._accumulated == "answer" + @pytest.mark.parametrize( + "tag", + ["THINK", "Think", "ThInK", "THOUGHT", "REASONING", "Thinking"], + ) + def test_reasoning_tags_are_case_insensitive(self, tag): + c = _make_consumer() + c._filter_and_accumulate(f"<{tag}>hidden reasoningVisible answer") + assert c._accumulated == "Visible answer" + assert "hidden reasoning" not in c._accumulated + def test_prose_mention_not_stripped(self): """ mentioned mid-line in prose should NOT trigger filtering.""" c = _make_consumer()