diff --git a/tests/gateway/test_agent_cache.py b/tests/gateway/test_agent_cache.py index bba92d37a..e7974bd07 100644 --- a/tests/gateway/test_agent_cache.py +++ b/tests/gateway/test_agent_cache.py @@ -1715,6 +1715,77 @@ class TestAgentCacheMessageCountRebaseline: with runner._agent_cache_lock: assert runner._agent_cache["telegram:s1"][2] == 5 + def test_in_band_followup_reuses_cached_agent(self, tmp_path): + """Behavioral regression for the in-band queued (/queue) follow-up. + + #46237 re-baselines the snapshot only on the EXTERNAL-turn boundary + (in ``_handle_message_with_agent``, after the whole ``_run_agent`` + chain unwinds). The recursive in-band follow-up re-enters the cache + guard MID-CHAIN — while the cache still holds the build-time snapshot + and the first turn has already flushed its own rows — so without a + re-baseline at the follow-up boundary the guard sees the grown count + and rebuilds the agent on THIS process's own writes, re-introducing + the every-turn rebuild #46237 set out to fix, on the follow-up path. + + Pins both halves at that boundary: WITHOUT the re-baseline the in-band + follow-up would rebuild; WITH it the follow-up REUSES the warm agent. + The guard's reuse decision (``_guard_would_reuse``) mirrors the real + cache-hit guard, which reads ``get_session(session_id)`` with the same + ``session_id`` the recursive ``_run_agent`` call is given. + """ + from hermes_state import SessionDB + + db = SessionDB(db_path=tmp_path / "sessions.db") + db.create_session("s1", source="telegram") + runner = self._runner_with_db(db) + agent = object() + + # First turn: cache miss -> build. Snapshot is the pre-turn count. + _row = db.get_session("s1") + build_count = _row.get("message_count", 0) if _row else 0 + with runner._agent_cache_lock: + runner._agent_cache["telegram:s1"] = (agent, "sig", build_count) + + # First turn flushes its own user + assistant rows. + db.append_message("s1", role="user", content="u") + db.append_message("s1", role="assistant", content="a") + + # Bug reproduction: re-entering the guard at the in-band follow-up + # boundary WITHOUT the re-baseline sees the grown count and rebuilds. + assert self._guard_would_reuse(runner, "telegram:s1", "s1") is False + + # The fix: re-baseline at the follow-up boundary. + runner._refresh_agent_cache_message_count("telegram:s1", "s1") + + # The in-band follow-up now REUSES the cached, warm-prefix agent. + assert self._guard_would_reuse(runner, "telegram:s1", "s1") is True + with runner._agent_cache_lock: + assert runner._agent_cache["telegram:s1"][0] is agent + + def test_in_band_followup_rebaseline_precedes_recursion(self): + """Pin the FIX PLACEMENT in the production source. + + The behavioral test above proves the re-baseline makes the in-band + follow-up reuse the cached agent, but it calls the helper directly — + it would still pass if the production call were deleted. This guards + the actual call site: inside ``_run_agent`` the queued (/queue) + follow-up recurses via ``followup_result = await self._run_agent(...)`` + and the re-baseline MUST run BEFORE that recursion (running it only + after, like the external-turn site at 8888, is too late for the + in-band path — the follow-up would already have rebuilt). + """ + import inspect + from gateway.run import GatewayRunner + + src = inspect.getsource(GatewayRunner._run_agent) + marker = "followup_result = await self._run_agent(" + assert marker in src, "in-band queued follow-up recursion not found in _run_agent" + before_recursion = src[: src.index(marker)] + assert "_refresh_agent_cache_message_count" in before_recursion, ( + "the in-band queued follow-up recursion must be preceded by a " + "_refresh_agent_cache_message_count re-baseline, else the follow-up " + "rebuilds the agent on this process's own first-turn writes" + ) class TestCrossProcessInvalidationDefersCleanup: """#52197: cross-process cache invalidation must NOT run agent cleanup