test(review): add integration guards for the two isolation wirings (review)

Phase 2c mutation-check found the salvaged tests covered only the pure helpers
(_is_background_review_harness_message / _strip_background_review_harness) — the
two integration WIRINGS had zero coverage: removing the _persist_disabled guard
in _flush_messages_to_session_db, or the _strip call in
get_messages_as_conversation, left all 13 tests green.

Add:
- TestPersistDisabledHardStop: a _persist_disabled agent's flush writes nothing
  to a live SessionDB (guards the run_agent hard-stop).
- TestGetMessagesAsConversationStripsHarness: a session with stray harness rows
  resumes clean end-to-end through get_messages_as_conversation (guards the
  hermes_state load-time wiring).
Mutation-checked: each new test fails when its wiring is reverted.
This commit is contained in:
kshitijk4poor 2026-07-01 16:15:15 +05:30 committed by kshitij
parent e2fa509bf3
commit 23518a5e02

View file

@ -102,3 +102,83 @@ class TestStripBackgroundReviewHarness:
]
out = _strip_background_review_harness(messages)
assert [m["content"] for m in out] == ["real question", "real answer"]
class TestGetMessagesAsConversationStripsHarness:
"""The load-on-read wiring: get_messages_as_conversation must actually call
_strip_background_review_harness, so a session polluted with stray harness
rows resumes clean end-to-end (not just the pure helper in isolation)."""
def test_polluted_session_resumes_without_harness(self):
import tempfile
from pathlib import Path
from hermes_state import SessionDB
with tempfile.TemporaryDirectory() as tmp:
db = SessionDB(db_path=Path(tmp) / "t.db")
try:
db.create_session(session_id="s1", source="cli")
db.append_message("s1", role="user", content="What's the weather?")
db.append_message("s1", role="assistant", content="It's sunny.")
# Stray background-review pollution written by an older build.
db.append_message(
"s1", role="user",
content="Review the conversation above and update the skill library with anything useful.",
)
db.append_message("s1", role="assistant", content="I'll act as the curator now.")
db.append_message("s1", role="user", content="Thanks, now book a flight.")
conv = db.get_messages_as_conversation("s1")
contents = [m["content"] for m in conv]
# Harness user turn AND its curator-mode assistant reply are gone.
assert not any(
isinstance(c, str) and c.lstrip().startswith("Review the conversation above")
for c in contents
)
assert "I'll act as the curator now." not in contents
# Genuine turns survive in order.
assert contents == ["What's the weather?", "It's sunny.", "Thanks, now book a flight."]
finally:
db.close()
class TestPersistDisabledHardStop:
"""The isolation wiring: a _persist_disabled agent must never write to the
session store via _flush_messages_to_session_db, even with a live db set."""
def test_flush_is_a_noop_when_persist_disabled(self):
import os
import tempfile
from pathlib import Path
from unittest.mock import patch
from hermes_state import SessionDB
with tempfile.TemporaryDirectory() as tmp:
db = SessionDB(db_path=Path(tmp) / "t.db")
try:
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
from run_agent import AIAgent
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
session_db=db,
session_id="s-review",
skip_context_files=True,
skip_memory=True,
)
agent._ensure_db_session()
agent._persist_disabled = True
agent._flush_messages_to_session_db(
[{"role": "user", "content": "Review the conversation above and update the skill library."},
{"role": "assistant", "content": "curator reply"}],
[],
)
# Nothing written: the hard-stop fired before any append.
assert db.get_messages("s-review") == []
finally:
db.close()