fix: make streaming reasoning-tag filter case-insensitive

The streaming think-tag suppressors in cli.py (_stream_delta) and
gateway/stream_consumer.py (_filter_and_accumulate) matched tag names
with case-sensitive str.find(), so only the exact-case literals in the
tag tuples were caught. Mixed-case variants a model may emit — <Think>,
<ThInK>, <REASONING>, <Thought> — slipped through and leaked raw
reasoning into the user-visible stream.

Match against a lowercased view of the buffer with lowercased tag names
at all three sites (open-tag boundary search, partial-tag hold-back,
close-tag search) in both paths. Only KNOWN tag names are matched — no
substring matching — and the block-boundary gating that protects prose
mentions of <think> is preserved.

- 6 parametrized case-insensitive regression tests in each of
  tests/gateway/test_stream_consumer.py and
  tests/cli/test_stream_delta_think_tag.py.

Salvaged from PR #27289 by @YLChen-007.
This commit is contained in:
YLChen-007 2026-07-01 03:11:53 -07:00 committed by Teknium
parent f049227f31
commit e23f723389
5 changed files with 46 additions and 7 deletions

14
cli.py
View file

@ -5443,10 +5443,14 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
self._stream_last_was_newline = True # start of stream = boundary
if not getattr(self, "_in_reasoning_block", False):
# Case-insensitive matching against a lowercased view so
# mixed-case tag variants (<Think>, <THINKING>, …) are caught.
prefilt_lower = self._stream_prefilt.lower()
for tag in _OPEN_TAGS:
tag_lower = tag.lower()
search_start = 0
while True:
idx = self._stream_prefilt.find(tag, search_start)
idx = prefilt_lower.find(tag_lower, search_start)
if idx == -1:
break
# Check if this is a block boundary position
@ -5486,11 +5490,12 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
# Could also be a partial open tag at the end — hold it back
if not getattr(self, "_in_reasoning_block", False):
# Check for partial tag match at the end
# Check for partial tag match at the end (case-insensitive)
safe = self._stream_prefilt
for tag in _OPEN_TAGS:
tag_lower = tag.lower()
for i in range(1, len(tag)):
if self._stream_prefilt.endswith(tag[:i]):
if prefilt_lower.endswith(tag_lower[:i]):
safe = self._stream_prefilt[:-i]
break
if safe:
@ -5503,8 +5508,9 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
# Keep accumulating _stream_prefilt because close tags can arrive
# split across multiple tokens (e.g. "</REASONING_SCRATCH" + "PAD>...").
if getattr(self, "_in_reasoning_block", False):
prefilt_lower = self._stream_prefilt.lower()
for tag in _CLOSE_TAGS:
idx = self._stream_prefilt.find(tag)
idx = prefilt_lower.find(tag.lower())
if idx != -1:
self._in_reasoning_block = False
# When show_reasoning is on, route inner content to

View file

@ -394,12 +394,17 @@ class GatewayStreamConsumer:
self._think_buffer = ""
while buf:
# Case-insensitive matching: models emit mixed-case tag
# variants (<Think>, <THINKING>, …). Match against a
# lowercased view of the buffer with lowercased tag names so
# every case variant is caught with a single canonical form.
lower_buf = buf.lower()
if self._in_think_block:
# Look for the earliest closing tag
best_idx = -1
best_len = 0
for tag in self._CLOSE_THINK_TAGS:
idx = buf.find(tag)
idx = lower_buf.find(tag.lower())
if idx != -1 and (best_idx == -1 or idx < best_idx):
best_idx = idx
best_len = len(tag)
@ -422,9 +427,10 @@ class GatewayStreamConsumer:
best_idx = -1
best_len = 0
for tag in self._OPEN_THINK_TAGS:
tag_lower = tag.lower()
search_start = 0
while True:
idx = buf.find(tag, search_start)
idx = lower_buf.find(tag_lower, search_start)
if idx == -1:
break
# Block-boundary check (mirrors cli.py logic)
@ -460,8 +466,9 @@ class GatewayStreamConsumer:
# No opening tag — check for a partial tag at the tail
held_back = 0
for tag in self._OPEN_THINK_TAGS:
tag_lower = tag.lower()
for i in range(1, len(tag)):
if buf.endswith(tag[:i]) and i > held_back:
if lower_buf.endswith(tag_lower[:i]) and i > held_back:
held_back = i
if held_back:
self._accumulated += buf[:-held_back]

View file

@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
"30854794+YLChen-007@users.noreply.github.com": "YLChen-007", # PR #27289 salvage (case-insensitive streaming reasoning-tag filter in cli.py _stream_delta + gateway stream_consumer so mixed-case variants like <Think>/<ThInK> are suppressed, not just the hardcoded case literals)
"259353979+testingbuddies24@users.noreply.github.com": "testingbuddies24", # PR #43192 salvage (strip orphan think-tag close tags in progressive gateway stream so a bare </think> whose open was dropped upstream can't leak to the user)
"shx_929@163.com": "Lazymonter", # PR #42914 salvage (retry launchd bootstrap after bootout on EIO for install/start instead of degrading to detached)
"5848605+itenev@users.noreply.github.com": "itenev", # PR #22753 salvage (asyncify model-context resolution in gateway message path so blocking requests.get can't starve Discord heartbeats)

View file

@ -1,6 +1,9 @@
"""Tests for _stream_delta's handling of <think> tags in prose vs real reasoning blocks."""
import sys
import os
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
@ -110,6 +113,18 @@ class TestRealReasoningBlock:
cli._stream_delta(" <think>")
assert cli._in_reasoning_block
@pytest.mark.parametrize(
"tag",
["THINK", "Think", "ThInK", "THOUGHT", "REASONING", "Thinking"],
)
def test_reasoning_tags_are_case_insensitive(self, tag):
cli = _make_cli_stub()
cli._stream_delta(f"<{tag}>hidden reasoning</{tag}>Visible answer")
assert not cli._in_reasoning_block
full = "".join(cli._emitted)
assert full == "Visible answer"
assert "hidden reasoning" not in full
class TestFlushRecovery:
"""_flush_stream should recover content from false-positive reasoning blocks."""

View file

@ -1464,6 +1464,16 @@ class TestFilterAndAccumulate:
c._filter_and_accumulate("<THINKING>caps</THINKING>answer")
assert c._accumulated == "answer"
@pytest.mark.parametrize(
"tag",
["THINK", "Think", "ThInK", "THOUGHT", "REASONING", "Thinking"],
)
def test_reasoning_tags_are_case_insensitive(self, tag):
c = _make_consumer()
c._filter_and_accumulate(f"<{tag}>hidden reasoning</{tag}>Visible answer")
assert c._accumulated == "Visible answer"
assert "hidden reasoning" not in c._accumulated
def test_prose_mention_not_stripped(self):
"""<think> mentioned mid-line in prose should NOT trigger filtering."""
c = _make_consumer()