test(gateway): pin in-band follow-up re-baseline boundary + placement

This commit is contained in:
Evo 2026-06-15 20:46:59 +08:00 committed by Teknium
parent b4cacba6ae
commit 6bc0a7ce80

View file

@ -1715,6 +1715,77 @@ class TestAgentCacheMessageCountRebaseline:
with runner._agent_cache_lock:
assert runner._agent_cache["telegram:s1"][2] == 5
def test_in_band_followup_reuses_cached_agent(self, tmp_path):
"""Behavioral regression for the in-band queued (/queue) follow-up.
#46237 re-baselines the snapshot only on the EXTERNAL-turn boundary
(in ``_handle_message_with_agent``, after the whole ``_run_agent``
chain unwinds). The recursive in-band follow-up re-enters the cache
guard MID-CHAIN while the cache still holds the build-time snapshot
and the first turn has already flushed its own rows so without a
re-baseline at the follow-up boundary the guard sees the grown count
and rebuilds the agent on THIS process's own writes, re-introducing
the every-turn rebuild #46237 set out to fix, on the follow-up path.
Pins both halves at that boundary: WITHOUT the re-baseline the in-band
follow-up would rebuild; WITH it the follow-up REUSES the warm agent.
The guard's reuse decision (``_guard_would_reuse``) mirrors the real
cache-hit guard, which reads ``get_session(session_id)`` with the same
``session_id`` the recursive ``_run_agent`` call is given.
"""
from hermes_state import SessionDB
db = SessionDB(db_path=tmp_path / "sessions.db")
db.create_session("s1", source="telegram")
runner = self._runner_with_db(db)
agent = object()
# First turn: cache miss -> build. Snapshot is the pre-turn count.
_row = db.get_session("s1")
build_count = _row.get("message_count", 0) if _row else 0
with runner._agent_cache_lock:
runner._agent_cache["telegram:s1"] = (agent, "sig", build_count)
# First turn flushes its own user + assistant rows.
db.append_message("s1", role="user", content="u")
db.append_message("s1", role="assistant", content="a")
# Bug reproduction: re-entering the guard at the in-band follow-up
# boundary WITHOUT the re-baseline sees the grown count and rebuilds.
assert self._guard_would_reuse(runner, "telegram:s1", "s1") is False
# The fix: re-baseline at the follow-up boundary.
runner._refresh_agent_cache_message_count("telegram:s1", "s1")
# The in-band follow-up now REUSES the cached, warm-prefix agent.
assert self._guard_would_reuse(runner, "telegram:s1", "s1") is True
with runner._agent_cache_lock:
assert runner._agent_cache["telegram:s1"][0] is agent
def test_in_band_followup_rebaseline_precedes_recursion(self):
"""Pin the FIX PLACEMENT in the production source.
The behavioral test above proves the re-baseline makes the in-band
follow-up reuse the cached agent, but it calls the helper directly
it would still pass if the production call were deleted. This guards
the actual call site: inside ``_run_agent`` the queued (/queue)
follow-up recurses via ``followup_result = await self._run_agent(...)``
and the re-baseline MUST run BEFORE that recursion (running it only
after, like the external-turn site at 8888, is too late for the
in-band path the follow-up would already have rebuilt).
"""
import inspect
from gateway.run import GatewayRunner
src = inspect.getsource(GatewayRunner._run_agent)
marker = "followup_result = await self._run_agent("
assert marker in src, "in-band queued follow-up recursion not found in _run_agent"
before_recursion = src[: src.index(marker)]
assert "_refresh_agent_cache_message_count" in before_recursion, (
"the in-band queued follow-up recursion must be preceded by a "
"_refresh_agent_cache_message_count re-baseline, else the follow-up "
"rebuilds the agent on this process's own first-turn writes"
)
class TestCrossProcessInvalidationDefersCleanup:
"""#52197: cross-process cache invalidation must NOT run agent cleanup