From 23518a5e02475ad8cbf18015147dff2329f4a5c7 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Wed, 1 Jul 2026 16:15:15 +0530 Subject: [PATCH] test(review): add integration guards for the two isolation wirings (review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2c mutation-check found the salvaged tests covered only the pure helpers (_is_background_review_harness_message / _strip_background_review_harness) — the two integration WIRINGS had zero coverage: removing the _persist_disabled guard in _flush_messages_to_session_db, or the _strip call in get_messages_as_conversation, left all 13 tests green. Add: - TestPersistDisabledHardStop: a _persist_disabled agent's flush writes nothing to a live SessionDB (guards the run_agent hard-stop). - TestGetMessagesAsConversationStripsHarness: a session with stray harness rows resumes clean end-to-end through get_messages_as_conversation (guards the hermes_state load-time wiring). Mutation-checked: each new test fails when its wiring is reverted. --- ...est_background_review_session_isolation.py | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/tests/test_background_review_session_isolation.py b/tests/test_background_review_session_isolation.py index 66bb507d7..a4878293c 100644 --- a/tests/test_background_review_session_isolation.py +++ b/tests/test_background_review_session_isolation.py @@ -102,3 +102,83 @@ class TestStripBackgroundReviewHarness: ] out = _strip_background_review_harness(messages) assert [m["content"] for m in out] == ["real question", "real answer"] + + +class TestGetMessagesAsConversationStripsHarness: + """The load-on-read wiring: get_messages_as_conversation must actually call + _strip_background_review_harness, so a session polluted with stray harness + rows resumes clean end-to-end (not just the pure helper in isolation).""" + + def test_polluted_session_resumes_without_harness(self): + import tempfile + from pathlib import Path + from hermes_state import SessionDB + + with tempfile.TemporaryDirectory() as tmp: + db = SessionDB(db_path=Path(tmp) / "t.db") + try: + db.create_session(session_id="s1", source="cli") + db.append_message("s1", role="user", content="What's the weather?") + db.append_message("s1", role="assistant", content="It's sunny.") + # Stray background-review pollution written by an older build. + db.append_message( + "s1", role="user", + content="Review the conversation above and update the skill library with anything useful.", + ) + db.append_message("s1", role="assistant", content="I'll act as the curator now.") + db.append_message("s1", role="user", content="Thanks, now book a flight.") + + conv = db.get_messages_as_conversation("s1") + contents = [m["content"] for m in conv] + + # Harness user turn AND its curator-mode assistant reply are gone. + assert not any( + isinstance(c, str) and c.lstrip().startswith("Review the conversation above") + for c in contents + ) + assert "I'll act as the curator now." not in contents + # Genuine turns survive in order. + assert contents == ["What's the weather?", "It's sunny.", "Thanks, now book a flight."] + finally: + db.close() + + +class TestPersistDisabledHardStop: + """The isolation wiring: a _persist_disabled agent must never write to the + session store via _flush_messages_to_session_db, even with a live db set.""" + + def test_flush_is_a_noop_when_persist_disabled(self): + import os + import tempfile + from pathlib import Path + from unittest.mock import patch + from hermes_state import SessionDB + + with tempfile.TemporaryDirectory() as tmp: + db = SessionDB(db_path=Path(tmp) / "t.db") + try: + with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}): + from run_agent import AIAgent + agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", + model="test/model", + quiet_mode=True, + session_db=db, + session_id="s-review", + skip_context_files=True, + skip_memory=True, + ) + agent._ensure_db_session() + agent._persist_disabled = True + + agent._flush_messages_to_session_db( + [{"role": "user", "content": "Review the conversation above and update the skill library."}, + {"role": "assistant", "content": "curator reply"}], + [], + ) + + # Nothing written: the hard-stop fired before any append. + assert db.get_messages("s-review") == [] + finally: + db.close()