test(gateway): pin in-band follow-up re-baseline boundary + placement

2026-06-15 20:46:59 +08:00 · 2026-06-15 20:46:59 +08:00 · 6bc0a7ce80
commit 6bc0a7ce80
parent b4cacba6ae
1 changed files with 71 additions and 0 deletions
--- a/tests/gateway/test_agent_cache.py
+++ b/tests/gateway/test_agent_cache.py
@ -1715,6 +1715,77 @@ class TestAgentCacheMessageCountRebaseline:
        with runner._agent_cache_lock:
            assert runner._agent_cache["telegram:s1"][2] == 5

+    def test_in_band_followup_reuses_cached_agent(self, tmp_path):
+        """Behavioral regression for the in-band queued (/queue) follow-up.
+
+        #46237 re-baselines the snapshot only on the EXTERNAL-turn boundary
+        (in ``_handle_message_with_agent``, after the whole ``_run_agent``
+        chain unwinds).  The recursive in-band follow-up re-enters the cache
+        guard MID-CHAIN — while the cache still holds the build-time snapshot
+        and the first turn has already flushed its own rows — so without a
+        re-baseline at the follow-up boundary the guard sees the grown count
+        and rebuilds the agent on THIS process's own writes, re-introducing
+        the every-turn rebuild #46237 set out to fix, on the follow-up path.
+
+        Pins both halves at that boundary: WITHOUT the re-baseline the in-band
+        follow-up would rebuild; WITH it the follow-up REUSES the warm agent.
+        The guard's reuse decision (``_guard_would_reuse``) mirrors the real
+        cache-hit guard, which reads ``get_session(session_id)`` with the same
+        ``session_id`` the recursive ``_run_agent`` call is given.
+        """
+        from hermes_state import SessionDB
+
+        db = SessionDB(db_path=tmp_path / "sessions.db")
+        db.create_session("s1", source="telegram")
+        runner = self._runner_with_db(db)
+        agent = object()
+
+        # First turn: cache miss -> build. Snapshot is the pre-turn count.
+        _row = db.get_session("s1")
+        build_count = _row.get("message_count", 0) if _row else 0
+        with runner._agent_cache_lock:
+            runner._agent_cache["telegram:s1"] = (agent, "sig", build_count)
+
+        # First turn flushes its own user + assistant rows.
+        db.append_message("s1", role="user", content="u")
+        db.append_message("s1", role="assistant", content="a")
+
+        # Bug reproduction: re-entering the guard at the in-band follow-up
+        # boundary WITHOUT the re-baseline sees the grown count and rebuilds.
+        assert self._guard_would_reuse(runner, "telegram:s1", "s1") is False
+
+        # The fix: re-baseline at the follow-up boundary.
+        runner._refresh_agent_cache_message_count("telegram:s1", "s1")
+
+        # The in-band follow-up now REUSES the cached, warm-prefix agent.
+        assert self._guard_would_reuse(runner, "telegram:s1", "s1") is True
+        with runner._agent_cache_lock:
+            assert runner._agent_cache["telegram:s1"][0] is agent
+
+    def test_in_band_followup_rebaseline_precedes_recursion(self):
+        """Pin the FIX PLACEMENT in the production source.
+
+        The behavioral test above proves the re-baseline makes the in-band
+        follow-up reuse the cached agent, but it calls the helper directly —
+        it would still pass if the production call were deleted.  This guards
+        the actual call site: inside ``_run_agent`` the queued (/queue)
+        follow-up recurses via ``followup_result = await self._run_agent(...)``
+        and the re-baseline MUST run BEFORE that recursion (running it only
+        after, like the external-turn site at 8888, is too late for the
+        in-band path — the follow-up would already have rebuilt).
+        """
+        import inspect
+        from gateway.run import GatewayRunner
+
+        src = inspect.getsource(GatewayRunner._run_agent)
+        marker = "followup_result = await self._run_agent("
+        assert marker in src, "in-band queued follow-up recursion not found in _run_agent"
+        before_recursion = src[: src.index(marker)]
+        assert "_refresh_agent_cache_message_count" in before_recursion, (
+            "the in-band queued follow-up recursion must be preceded by a "
+            "_refresh_agent_cache_message_count re-baseline, else the follow-up "
+            "rebuilds the agent on this process's own first-turn writes"
+        )

 class TestCrossProcessInvalidationDefersCleanup:
    """#52197: cross-process cache invalidation must NOT run agent cleanup