fix(kanban): route notifier wake via profile chokepoint; harden review findings

Follow-up review fixes on the salvage of #54872 (原作者 张满良/@zmlgit):

1. [HIGH] Adapter selection now goes through the shared
   _authorization_adapter chokepoint (gateway/authz_mixin.py) instead of a
   local inline lookup that fell back to the DEFAULT profile's same-platform
   adapter when the owning profile had a registry entry but no adapter for
   that platform. That fallback re-introduced the exact cross-profile
   mis-delivery ([230002] Bot can NOT be out of the chat) this change exists
   to fix. Adds a mutation-verified guard test
   (test_notifier_owning_profile_adapter_no_default_fallback).

2. [HIGH→documented] The creator-wake SessionSource cannot faithfully
   reconstruct a DM/thread creator's session key because chat_type is neither
   persisted on the subscription nor carried on the session-context bridge.
   Documented the limitation inline; behavior degrades to a fresh group
   session (never an exception). The end-to-end fix (stamp + persist
   chat_type) is a scoped follow-up, not bundled into this salvage.

3. [MED] Documented that archived/unblocked are intentionally claimed (cursor
   hygiene) but silent, and excluded from wake kinds.

4. [MED] Wake-injection failure now logs at WARNING with exc_info=True (the
   cursor has already advanced, so a broken wake must not be a silent no-op).
This commit is contained in:
kshitijk4poor 2026-07-01 23:59:56 +05:30 committed by kshitij
parent 3545d74915
commit b225b30d08
2 changed files with 116 additions and 9 deletions

View file

@ -311,13 +311,16 @@ class GatewayKanbanWatchersMixin:
)
continue
sub_profile = sub.get("notifier_profile") or ""
adapter = None
if sub_profile:
_profile_map = getattr(self, "_profile_adapters", {}).get(sub_profile)
if _profile_map:
adapter = _profile_map.get(plat)
if adapter is None:
adapter = self.adapters.get(plat)
# Route via the SAME chokepoint the authorization path uses
# (gateway/authz_mixin.py::_authorization_adapter): a stamped
# profile with its own adapter-registry entry must be served
# by THAT profile's same-platform adapter and must NOT silently
# fall back to the default profile's adapter — otherwise a
# secondary profile's task notification is delivered by the
# wrong bot (the cross-profile mis-delivery this whole change
# exists to fix). The helper returns None only when the profile
# (or default) genuinely has no adapter for the platform.
adapter = self._authorization_adapter(plat, sub_profile or None)
if adapter is None:
logger.debug(
"kanban notifier: adapter %s disconnected before delivery for %s; rewinding claim",
@ -394,6 +397,13 @@ class GatewayKanbanWatchersMixin:
new_status = str(ev.payload["status"])
msg = f"🔄 {board_tag}{tag}Kanban {sub['task_id']}{new_status}"
else:
# archived / unblocked are claimed by TERMINAL_KINDS
# (so the cursor advances past them and they can't
# wedge a later completed/blocked event behind an
# unclaimed row) but are intentionally SILENT: an
# archive needs no user ping, and unblocked is an
# internal transition. They are also excluded from
# _WAKE_KINDS below, so they never wake the creator.
continue
metadata: dict[str, Any] = {}
if sub.get("thread_id"):
@ -504,6 +514,23 @@ class GatewayKanbanWatchersMixin:
)
from gateway.session import SessionSource
from gateway.platforms.base import MessageEvent, MessageType
# KNOWN LIMITATION (tracked follow-up): the
# subscription row does not persist the
# creator's chat_type, and it is not carried
# on the session-context bridge, so we cannot
# faithfully reconstruct the creator's real
# session key here. build_session_key() keys
# DMs (":dm:<chat_id>") on a wholly different
# shape from group/thread, so any hardcoded
# value mis-routes some creators. "group" is
# the least-surprising default for the
# dashboard/group flows this wake primarily
# serves; DM-originated creators are handled
# by the follow-up that stamps + persists
# chat_type end-to-end. handle_message()
# get_or_create_session's the target, so a
# mismatch degrades to "wake lands in a fresh
# group session" — never an exception.
_source = SessionSource(
platform=plat,
chat_id=sub["chat_id"],
@ -524,9 +551,15 @@ class GatewayKanbanWatchersMixin:
sub["task_id"], platform_str, sub["chat_id"], sub_profile or "default", _wake_kinds,
)
except Exception as _wk_err:
logger.debug(
# Best-effort: the notification itself already
# delivered and the cursor has advanced, so a
# broken wake path must not wedge the tick — but
# log at WARNING with a traceback rather than
# DEBUG so a persistently-failing wake is visible
# in normal logs instead of silently no-op'ing.
logger.warning(
"kanban notifier: wakeup injection failed for %s: %s",
sub["task_id"], _wk_err,
sub["task_id"], _wk_err, exc_info=True,
)
if task_terminal:
await asyncio.to_thread(

View file

@ -233,3 +233,77 @@ def test_notifier_redelivers_same_kind_on_dispatch_cycle(tmp_path, monkeypatch):
f"deliveries (texts: {[d['text'] for d in adapter.sent]})"
)
assert "crashed" in adapter.sent[1]["text"].lower()
def test_notifier_owning_profile_adapter_no_default_fallback(tmp_path, monkeypatch):
"""A subscription owned by a secondary profile whose profile-adapter
registry entry EXISTS but lacks this platform must NOT fall back to the
default profile's same-platform adapter — the notifier must route through
the shared ``_authorization_adapter`` chokepoint, which forbids that
fallback (gateway/authz_mixin.py). Delivering via the default profile's bot
is the exact cross-profile mis-delivery this whole change exists to fix
(`[230002] Bot can NOT be out of the chat`).
Mutation check: reverting kanban_watchers.py's adapter selection to the old
inline ``if adapter is None: adapter = self.adapters.get(plat)`` fallback
makes this test FAIL (the default adapter receives the delivery).
"""
db_path = tmp_path / "profile-no-fallback.db"
monkeypatch.setenv("HERMES_KANBAN_DB", str(db_path))
kb.init_db()
conn = kb.connect()
try:
tid = kb.create_task(conn, title="owned by beta", assignee="worker")
# Subscription is owned by profile "beta".
kb.add_notify_sub(
conn, task_id=tid, platform="telegram", chat_id="chat-beta",
notifier_profile="beta",
)
kb.complete_task(conn, tid, summary="done")
finally:
conn.close()
default_adapter = RecordingAdapter()
other_adapter = RecordingAdapter()
runner = GatewayRunner.__new__(GatewayRunner)
runner._running = True
# Default profile has a telegram adapter …
runner.adapters = {Platform.TELEGRAM: default_adapter}
# … and profile "beta" HAS a non-empty registry entry (so it passes the
# notifier's upstream skip-filter, which only skips owning profiles with NO
# adapter at all), but that entry does NOT contain a telegram adapter — beta
# connected a different platform (discord). The telegram sub owned by beta
# must therefore resolve to NO adapter, not silently borrow the default
# profile's telegram bot.
runner._profile_adapters = {"beta": {Platform.DISCORD: other_adapter}}
runner._kanban_sub_fail_counts = {}
asyncio.run(_run_one_notifier_tick(monkeypatch, runner))
# The default profile's adapter must never receive beta's notification.
assert default_adapter.sent == [], (
"Owning-profile subscription must not fall back to the default "
f"profile's adapter; got {default_adapter.sent!r}"
)
assert other_adapter.sent == [], (
f"beta's discord adapter must not receive a telegram sub; got {other_adapter.sent!r}"
)
# The claim is rewound (adapter resolved to None → treated as disconnected),
# so the event is still unseen and will deliver once beta's adapter connects.
assert [ev.kind for ev in _unseen_terminal_events_for(tid, "chat-beta")] == ["completed"]
def _unseen_terminal_events_for(tid, chat_id):
conn = kb.connect()
try:
_, events = kb.unseen_events_for_sub(
conn,
task_id=tid,
platform="telegram",
chat_id=chat_id,
kinds=["completed", "blocked", "gave_up", "crashed", "timed_out"],
)
return events
finally:
conn.close()