fix(gateway): strip orphan think-tag close tags in progressive stream

When a model emits an inline <think>...</think> block but the opening
tag is dropped upstream (thinking-mode toggle, truncated stream, or
incomplete upstream filtering), the bare </think> close tag leaked
through to the user in the live progressive edit. The agent-side final
scrubber (agent/think_scrubber.py) already had _strip_orphan_close_tags;
this ports the same logic into GatewayStreamConsumer so the streaming
display stays clean too.

- _filter_and_accumulate: strip orphan close tags before appending the
  'no-opening-tag' branch text to _accumulated.
- _flush_think_buffer: same on stream end for held-back partials.
- 14 regression tests (TestStripOrphanCloseTags): all 6 close-tag
  variants, multi-tag, partial-tag-untouched, trailing whitespace,
  and end-to-end through _filter_and_accumulate / _flush_think_buffer.

Only strips KNOWN close-tag names (case-insensitive) — never arbitrary
tag-shaped substrings — so comparison operators and unrelated prose are
preserved.

Salvaged from PR #43192 by @testingbuddies24.
This commit is contained in:
testingbuddies24 2026-07-01 02:49:03 -07:00 committed by Teknium
parent 6a6fd42111
commit e07768a53f
3 changed files with 151 additions and 2 deletions

View file

@ -467,9 +467,48 @@ class GatewayStreamConsumer:
self._accumulated += buf[:-held_back]
self._think_buffer = buf[-held_back:]
else:
self._accumulated += buf
# No (partial) open tag — but the model may have
# emitted an orphan close tag like </think> on its
# own (e.g. when a thinking-mode toggle drops the
# matched open, or when upstream stripping is
# incomplete). Strip those before accumulating so
# they never reach the user.
self._accumulated += self._strip_orphan_close_tags(buf)
return
@classmethod
def _strip_orphan_close_tags(cls, text: str) -> str:
"""Remove any close tags from *text* that have no matching open.
Mirrors ``agent/think_scrubber.py::StreamingThinkScrubber.
_strip_orphan_close_tags`` so the progressive-display filter
behaves the same as the post-stream final-response scrubber.
An orphan close tag is always noise stripped along with any
trailing whitespace so surrounding prose flows naturally.
"""
if "</" not in text:
return text
text_lower = text.lower()
out: list[str] = []
i = 0
while i < len(text):
matched = False
if text_lower[i:i + 2] == "</":
for tag in cls._CLOSE_THINK_TAGS:
tag_lower = tag.lower()
tag_len = len(tag_lower)
if text_lower[i:i + tag_len] == tag_lower:
j = i + tag_len
while j < len(text) and text[j] in " \t\n\r":
j += 1
i = j
matched = True
break
if not matched:
out.append(text[i])
i += 1
return "".join(out)
def _flush_think_buffer(self) -> None:
"""Flush any held-back partial-tag buffer into accumulated text.
@ -477,7 +516,9 @@ class GatewayStreamConsumer:
was held back waiting for a possible opening tag is not lost.
"""
if self._think_buffer and not self._in_think_block:
self._accumulated += self._think_buffer
# Strip any orphan close tags that may have been held back —
# see _filter_and_accumulate for context.
self._accumulated += self._strip_orphan_close_tags(self._think_buffer)
self._think_buffer = ""
async def run(self) -> None:

View file

@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
"259353979+testingbuddies24@users.noreply.github.com": "testingbuddies24", # PR #43192 salvage (strip orphan think-tag close tags in progressive gateway stream so a bare </think> whose open was dropped upstream can't leak to the user)
"5848605+itenev@users.noreply.github.com": "itenev", # PR #22753 salvage (asyncify model-context resolution in gateway message path so blocking requests.get can't starve Discord heartbeats)
"arthur.zhang@ingenico.com": "arthurzhang", # PR #34718 salvage (redact Slack App-Level xapp- tokens in agent/redact.py + gateway/run.py)
"290873280+rrevenanttt@users.noreply.github.com": "rrevenanttt", # PR #40773 salvage (close hardline rm bypass via quoted paths and ${HOME} brace form)

View file

@ -2246,3 +2246,110 @@ class TestRunStillCurrentGuard:
adapter.send.assert_not_called()
adapter.edit_message.assert_not_called()
assert consumer._final_response_sent is False
# ── _strip_orphan_close_tags regression tests ──────────────────────────
# Regression guard for the /think tag leak: when the stream consumer is
# NOT inside a think block, stray close tags like </think> must be
# stripped before text is accumulated — otherwise they leak to Telegram.
# (Reported by Tony on 2026-06-09.)
class TestStripOrphanCloseTags:
"""Verify orphan close tags are stripped from text the stream consumer
would accumulate while NOT inside a think block."""
@pytest.mark.parametrize(
"tag",
[
"</think>",
"</thinking>",
"</thought>",
"</reasoning>",
"</REASONING_SCRATCHPAD>",
"</THINKING>",
],
)
def test_all_close_tag_variants_stripped(self, tag):
text = f"before{tag}after"
result = GatewayStreamConsumer._strip_orphan_close_tags(text)
assert tag not in result
assert "before" in result and "after" in result
def test_no_close_tag_passthrough(self):
text = "Just normal text with no tags."
assert GatewayStreamConsumer._strip_orphan_close_tags(text) == text
def test_empty_string(self):
assert GatewayStreamConsumer._strip_orphan_close_tags("") == ""
def test_close_tag_with_trailing_whitespace(self):
"""The trailing whitespace after the tag should also be eaten so
surrounding prose flows naturally (matches StreamingThinkScrubber)."""
text = "Looking at this now.\n\n</think>\n\nThe answer is 42."
result = GatewayStreamConsumer._strip_orphan_close_tags(text)
assert "</think>" not in result
assert "Looking at this now" in result
assert "The answer is 42" in result
def test_multiple_orphan_close_tags(self):
text = "foo </think> bar </thinking> baz"
result = GatewayStreamConsumer._strip_orphan_close_tags(text)
assert "</think>" not in result
assert "</thinking>" not in result
assert "foo" in result and "bar" in result and "baz" in result
def test_orphan_close_does_not_eat_following_prose(self):
text = "answer </think> then this should remain"
result = GatewayStreamConsumer._strip_orphan_close_tags(text)
assert result == "answer then this should remain"
def test_partial_close_tag_not_stripped(self):
"""A partial tag like '</thi' should not be eaten — it's not yet
a recognized close tag, and eating it would corrupt following text."""
text = "before </thin after"
result = GatewayStreamConsumer._strip_orphan_close_tags(text)
assert result == text # unchanged — partial tag, no stripping
def test_filter_and_accumulate_strips_orphan_close(self):
"""End-to-end: feed an orphan close tag through _filter_and_accumulate
and verify the accumulated text does not contain the raw tag."""
adapter = MagicMock()
adapter.MAX_MESSAGE_LENGTH = 4096
config = StreamConsumerConfig(cursor="")
consumer = GatewayStreamConsumer(adapter, "chat_123", config)
# Simulate a stream delta that contains an orphan close tag with
# surrounding prose (the actual leak pattern reported 2026-06-09).
consumer._filter_and_accumulate(
"Here is the result you asked for.\n\n</think>\n\n"
"The answer is 42 and the cat is black."
)
# No raw close tag should remain in the accumulated text.
for tag in GatewayStreamConsumer._CLOSE_THINK_TAGS:
assert tag not in consumer._accumulated, (
f"Orphan close tag {tag!r} leaked into accumulated text: "
f"{consumer._accumulated!r}"
)
# Surrounding prose must survive intact.
assert "Here is the result" in consumer._accumulated
assert "The answer is 42" in consumer._accumulated
def test_flush_think_buffer_strips_orphan_close(self):
"""The end-of-stream flush should also strip orphan close tags from
any held-back buffer text."""
adapter = MagicMock()
adapter.MAX_MESSAGE_LENGTH = 4096
config = StreamConsumerConfig(cursor="")
consumer = GatewayStreamConsumer(adapter, "chat_123", config)
# Plant a held-back buffer with an orphan close tag (simulates the
# buffer being held while waiting for a possible opening tag, then
# flushed when the stream ends).
consumer._think_buffer = "trailing prose </think> more"
consumer._in_think_block = False
consumer._flush_think_buffer()
for tag in GatewayStreamConsumer._CLOSE_THINK_TAGS:
assert tag not in consumer._accumulated
assert "trailing prose" in consumer._accumulated
assert "more" in consumer._accumulated