Merge remote-tracking branch 'origin/main' into pr48275-rebase

# Conflicts: # cron/scheduler.py
2026-06-19 07:40:29 -07:00 · 2026-06-19 07:40:29 -07:00 · a58287afcb
commit a58287afcb
parent 637aff46e7 35e7ca03d5
162 changed files with 8521 additions and 634 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -2535,3 +2535,56 @@ def sanitize_anthropic_kwargs(api_kwargs: Any, *, log_prefix: str = "") -> Any:
            sorted(leaked),
        )
    return api_kwargs
+
+
+def _is_stream_unavailable_error(exc: Exception) -> bool:
+    """Return True when an Anthropic stream call should fall back to create()."""
+    err_lower = str(exc).lower()
+    if "stream" in err_lower and "not supported" in err_lower:
+        return True
+    if "invokemodelwithresponsestream" in err_lower:
+        from agent.bedrock_adapter import is_streaming_access_denied_error
+
+        return is_streaming_access_denied_error(exc)
+    return False
+
+
+def create_anthropic_message(
+    client: Any,
+    api_kwargs: dict,
+    *,
+    log_prefix: str = "",
+    prefer_stream: bool = True,
+) -> Any:
+    """Create an Anthropic message, aggregating via stream when available.
+
+    Some Anthropic-compatible gateways are SSE-only: they ignore non-streaming
+    requests and return ``text/event-stream`` even for ``messages.create()``.
+    The SDK can surface that as raw text, so callers that expect a Message then
+    crash on ``.content``.  Prefer ``messages.stream().get_final_message()`` to
+    match the main turn path, falling back to ``create()`` only for providers
+    that explicitly do not support streaming, such as restricted Bedrock roles.
+    """
+    sanitize_anthropic_kwargs(api_kwargs, log_prefix=log_prefix)
+
+    messages_api = getattr(client, "messages", None)
+    stream_fn = getattr(messages_api, "stream", None)
+    if prefer_stream and callable(stream_fn):
+        stream_kwargs = dict(api_kwargs)
+        stream_kwargs.pop("stream", None)
+        try:
+            with stream_fn(**stream_kwargs) as stream:
+                return stream.get_final_message()
+        except Exception as exc:
+            if not _is_stream_unavailable_error(exc):
+                raise
+            logger.debug(
+                "%sAnthropic Messages stream unavailable; falling back to "
+                "messages.create(): %s",
+                log_prefix,
+                exc,
+            )
+
+    create_kwargs = dict(api_kwargs)
+    create_kwargs.pop("stream", None)
+    return messages_api.create(**create_kwargs)
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -997,7 +997,7 @@ class _AnthropicCompletionsAdapter:
        self._is_oauth = is_oauth

    def create(self, **kwargs) -> Any:
-        from agent.anthropic_adapter import build_anthropic_kwargs
+        from agent.anthropic_adapter import build_anthropic_kwargs, create_anthropic_message
        from agent.transports import get_transport

        messages = kwargs.get("messages", [])
@ -1041,7 +1041,7 @@ class _AnthropicCompletionsAdapter:
            if not _forbids_sampling_params(model):
                anthropic_kwargs["temperature"] = temperature

-        response = self._client.messages.create(**anthropic_kwargs)
+        response = create_anthropic_message(self._client, anthropic_kwargs)
        _transport = get_transport("anthropic_messages")
        _nr = _transport.normalize_response(
            response, strip_tool_prefix=self._is_oauth
--- a/agent/codex_runtime.py
+++ b/agent/codex_runtime.py
@ -290,6 +290,7 @@ def run_codex_app_server_turn(
                original_user_message=original_user_message,
                final_response=turn.final_text,
                interrupted=False,
+                messages=messages,
            )
        except Exception:
            logger.debug("external memory sync raised", exc_info=True)
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@ -3197,15 +3197,22 @@ def run_conversation(
                    # Terminal — flush buffered context so the user sees
                    # what was tried before the abort.
                    agent._flush_status_buffer()
+                    # Summarize once: Cloudflare/proxy HTML challenge pages and
+                    # other raw provider bodies must be collapsed to a short
+                    # one-liner here, otherwise the full page leaks into the
+                    # returned ``error`` field and downstream consumers deliver
+                    # it verbatim (e.g. a cron failure notification dumped a
+                    # ~60KB Cloudflare challenge page as 31 Discord messages).
+                    _nonretryable_summary = agent._summarize_api_error(api_error)
                    if classified.reason == FailoverReason.content_policy_blocked:
                        agent._emit_status(
                            f"❌ Provider safety filter blocked this request: "
-                            f"{agent._summarize_api_error(api_error)}"
+                            f"{_nonretryable_summary}"
                        )
                    else:
                        agent._emit_status(
                            f"❌ Non-retryable error (HTTP {status_code}): "
-                            f"{agent._summarize_api_error(api_error)}"
+                            f"{_nonretryable_summary}"
                        )
                    agent._vprint(f"{agent.log_prefix}❌ Non-retryable client error (HTTP {status_code}). Aborting.", force=True)
                    agent._vprint(f"{agent.log_prefix}   🔌 Provider: {_provider}  Model: {_model}", force=True)
@ -3290,18 +3297,17 @@ def run_conversation(
                    else:
                        agent._persist_session(messages, conversation_history)
                    if classified.reason == FailoverReason.content_policy_blocked:
-                        _summary = agent._summarize_api_error(api_error)
                        _policy_response = (
                            "⚠️  The model provider's safety filter blocked this request "
                            "(not a Hermes/gateway failure).\n\n"
-                            f"Provider message: {_summary}\n\n"
+                            f"Provider message: {_nonretryable_summary}\n\n"
                            f"{_CONTENT_POLICY_RECOVERY_HINT}"
                        )
                        return _content_policy_blocked_result(
                            messages,
                            api_call_count,
                            final_response=_policy_response,
-                            error_detail=_summary,
+                            error_detail=_nonretryable_summary,
                        )
                    return {
                        "final_response": None,
@ -3309,7 +3315,7 @@ def run_conversation(
                        "api_calls": api_call_count,
                        "completed": False,
                        "failed": True,
-                        "error": str(api_error),
+                        "error": _nonretryable_summary,
                    }

                if retry_count >= max_retries:
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@ -15,6 +15,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_constants import OPENROUTER_BASE_URL
 from hermes_cli.config import load_env
+from agent.secret_scope import get_secret as _get_secret
 from agent.credential_persistence import (
    is_borrowed_credential_source,
    sanitize_borrowed_credential_payload,
@ -1666,7 +1667,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        _env_file = load_env()

        def _env_val(key: str) -> str:
-            return (_env_file.get(key) or os.environ.get(key) or "").strip()
+            return (_env_file.get(key) or _get_secret(key, "") or "").strip()

        anthropic_api_key = _env_val("ANTHROPIC_API_KEY")
        anthropic_oauth_env = (
@ -1952,7 +1953,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
    # changes to the .env file.
    def _get_env_prefer_dotenv(key: str) -> str:
        env_file = load_env()
-        val = env_file.get(key) or os.environ.get(key) or ""
+        val = env_file.get(key) or _get_secret(key, "") or ""
        return val.strip()

    # Honour user suppression — `hermes auth remove <provider> <N>` for an
--- a/agent/message_content.py
+++ b/agent/message_content.py
@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any
+
+
+_NON_TEXT_PART_TYPES = {"image", "image_url", "input_image", "audio", "input_audio"}
+_TEXT_KEYS = ("text", "content", "input_text", "output_text", "summary_text")
+
+
+def _field(value: Any, key: str) -> Any:
+    if isinstance(value, Mapping):
+        return value.get(key)
+    return getattr(value, key, None)
+
+
+def _text_from_part(part: Any) -> str:
+    if part is None:
+        return ""
+    if isinstance(part, str):
+        return part
+
+    part_type = str(_field(part, "type") or "").strip().lower()
+    if part_type in _NON_TEXT_PART_TYPES:
+        return ""
+
+    for key in _TEXT_KEYS:
+        text = _field(part, key)
+        if isinstance(text, str):
+            return text
+    return ""
+
+
+def flatten_message_text(content: Any, *, sep: str = "\n") -> str:
+    """Return the visible text from common chat/Responses message content shapes."""
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        chunks = [_text_from_part(part) for part in content]
+        return sep.join(chunk for chunk in chunks if chunk)
+
+    text = _text_from_part(content)
+    if text:
+        return text
+    try:
+        return str(content)
+    except Exception:
+        return ""
--- a/agent/secret_scope.py
+++ b/agent/secret_scope.py
@ -0,0 +1,205 @@
+"""Profile-scoped credential resolution for multi-profile gateway multiplexing.
+
+The multiplexing gateway serves many profiles from one process. Each profile
+has its own ``.env`` with its own provider keys and platform tokens, so we
+**cannot** union them into the process-global ``os.environ`` (that would leak
+profile A's keys to profile B's turns, and to every subprocess spawned with
+``env=dict(os.environ)``).
+
+This module provides a fail-closed, context-local secret scope:
+
+- ``set_secret_scope(mapping)`` installs the active profile's secrets for the
+  current task (a contextvar, so it propagates into the agent's worker thread
+  via ``copy_context()`` exactly like the HERMES_HOME override).
+- ``get_secret(name)`` reads from that scope. When multiplexing is **active**
+  and no scope is set, it RAISES rather than silently falling back to
+  ``os.environ`` — an un-migrated or newly-added call site fails loud at that
+  exact line instead of leaking another profile's value. When multiplexing is
+  **off** (the default), it transparently reads ``os.environ`` so the
+  single-profile gateway and every non-gateway caller behave exactly as before.
+
+Design rationale lives in ``docs/design/multiplexing-gateway.md`` (Workstream A).
+"""
+from __future__ import annotations
+
+import os
+from contextvars import ContextVar, Token
+from pathlib import Path
+from typing import Dict, Mapping, Optional
+
+
+# ── multiplex-active flag ────────────────────────────────────────────────
+# Process-global: set once at gateway startup when gateway.multiplex_profiles
+# is true. Governs whether get_secret() fails closed on an unscoped read.
+# A plain module global (not a contextvar): it describes the deployment mode,
+# not a per-task value.
+_MULTIPLEX_ACTIVE: bool = False
+
+
+def set_multiplex_active(active: bool) -> None:
+    """Mark whether the process is running as a profile multiplexer.
+
+    Called once at gateway startup. When True, ``get_secret`` fails closed on
+    an unscoped read instead of falling back to ``os.environ``.
+    """
+    global _MULTIPLEX_ACTIVE
+    _MULTIPLEX_ACTIVE = bool(active)
+
+
+def is_multiplex_active() -> bool:
+    """Return whether the process is running as a profile multiplexer."""
+    return _MULTIPLEX_ACTIVE
+
+
+# ── the secret scope contextvar ──────────────────────────────────────────
+_SECRET_SCOPE: ContextVar[Optional[Mapping[str, str]]] = ContextVar(
+    "_SECRET_SCOPE", default=None
+)
+
+
+class UnscopedSecretError(RuntimeError):
+    """Raised when a secret is read in multiplex mode with no scope installed.
+
+    This is the fail-closed signal: it means a credential read reached
+    ``get_secret`` without a profile scope active, which in a multiplexer would
+    otherwise leak whichever profile's value happened to be in ``os.environ``.
+    The fix is to wrap the call path in ``set_secret_scope(...)`` (the per-turn
+    / per-adapter profile scope), not to widen the allowlist.
+    """
+
+
+def set_secret_scope(secrets: Optional[Mapping[str, str]]) -> Token:
+    """Install the active profile's secret mapping for the current context.
+
+    Returns a token for ``reset_secret_scope``. Pass ``None`` to clear.
+    """
+    return _SECRET_SCOPE.set(secrets)
+
+
+def reset_secret_scope(token: Token) -> None:
+    """Restore the previous secret scope."""
+    _SECRET_SCOPE.reset(token)
+
+
+def current_secret_scope() -> Optional[Mapping[str, str]]:
+    """Return the active secret mapping, or None when no scope is installed."""
+    return _SECRET_SCOPE.get()
+
+
+# ── genuinely-global env vars (NOT per-profile secrets) ──────────────────
+# These are process/deployment-level settings, not profile credentials. They
+# legitimately live in os.environ and must keep reading from it even in
+# multiplex mode — routing them through the fail-closed path would wrongly
+# crash. Anything matching is read from os.environ regardless of scope.
+#
+# Membership test is by exact name OR prefix (see _is_global_env). Keep this
+# list tight: when in doubt a value is a profile secret, not a global.
+_GLOBAL_ENV_EXACT = frozenset({
+    # Hermes runtime / deployment
+    "HERMES_HOME", "HERMES_PROFILE", "HERMES_GATEWAY_LOCK_DIR",
+    "HERMES_MAX_ITERATIONS", "HERMES_MAX_TOKENS", "HERMES_API_TIMEOUT",
+    "HERMES_REDACT_SECRETS", "HERMES_NOUS_TIMEOUT_SECONDS",
+    "_HERMES_GATEWAY",
+    # OS / interpreter
+    "PATH", "HOME", "USER", "LANG", "LC_ALL", "TZ", "PWD", "SHELL", "TMPDIR",
+    "VIRTUAL_ENV", "PYTHONPATH", "SSL_CERT_FILE",
+    # Kanban paths (per-board, not per-profile-secret)
+    "HERMES_KANBAN_DB", "HERMES_KANBAN_WORKSPACES_ROOT", "HERMES_KANBAN_BOARD",
+})
+_GLOBAL_ENV_PREFIXES = (
+    "HERMES_KANBAN_",
+    "HERMES_TELEGRAM_",   # tuning knobs (batch delays, fallback toggles) — NOT the token
+    "TERMINAL_",          # terminal/sandbox backend settings
+)
+
+
+def _is_global_env(name: str) -> bool:
+    """Return True for genuinely process-global (non-profile-secret) env vars."""
+    if name in _GLOBAL_ENV_EXACT:
+        return True
+    return any(name.startswith(p) for p in _GLOBAL_ENV_PREFIXES)
+
+
+def get_secret(name: str, default: Optional[str] = None) -> Optional[str]:
+    """Resolve a credential by env-var name, honoring the active profile scope.
+
+    Resolution order:
+
+    1. Genuinely-global vars (``_is_global_env``) always read ``os.environ`` —
+       they are deployment settings, not profile secrets.
+    2. When a secret scope is installed (multiplexed turn), read from it; an
+       absent key returns ``default``. The scope is authoritative — we do NOT
+       fall through to ``os.environ``, because in a multiplexer ``os.environ``
+       may hold another profile's value.
+    3. No scope installed:
+       - multiplex INACTIVE (default deployment): read ``os.environ`` —
+         identical to the legacy ``os.getenv`` behavior every caller had before.
+       - multiplex ACTIVE: FAIL CLOSED. Raise ``UnscopedSecretError`` so the
+         missing scope is caught loudly instead of leaking a cross-profile value.
+    """
+    if _is_global_env(name):
+        val = os.environ.get(name)
+        return val if val is not None else default
+
+    scope = _SECRET_SCOPE.get()
+    if scope is not None:
+        val = scope.get(name)
+        return val if val is not None else default
+
+    if _MULTIPLEX_ACTIVE:
+        raise UnscopedSecretError(
+            f"get_secret({name!r}) called with no profile secret scope active "
+            f"while multiplexing is on. This credential read must run inside a "
+            f"set_secret_scope(...) block (the per-turn / per-adapter profile "
+            f"scope). Reading os.environ here would risk leaking another "
+            f"profile's value. See docs/design/multiplexing-gateway.md "
+            f"(Workstream A)."
+        )
+
+    val = os.environ.get(name)
+    return val if val is not None else default
+
+
+def load_env_file(env_path: Path) -> Dict[str, str]:
+    """Parse a ``.env`` file into a plain dict WITHOUT touching ``os.environ``.
+
+    Used to load a profile's secrets into an isolated mapping for
+    ``set_secret_scope``. Mirrors python-dotenv's basic parsing (KEY=VALUE,
+    ``export`` prefix, ``#`` comments, optional matching quotes) but never
+    mutates the process environment — that isolation is the whole point.
+    """
+    secrets: Dict[str, str] = {}
+    try:
+        text = env_path.read_text(encoding="utf-8")
+    except (FileNotFoundError, OSError, UnicodeDecodeError):
+        return secrets
+
+    for raw in text.splitlines():
+        line = raw.strip()
+        if not line or line.startswith("#"):
+            continue
+        if line.startswith("export "):
+            line = line[len("export "):].lstrip()
+        if "=" not in line:
+            continue
+        key, _, value = line.partition("=")
+        key = key.strip()
+        if not key:
+            continue
+        value = value.strip()
+        if len(value) >= 2 and value[0] == value[-1] and value[0] in ("'", '"'):
+            value = value[1:-1]
+        secrets[key] = value
+
+    return secrets
+
+
+def build_profile_secret_scope(hermes_home: Path) -> Dict[str, str]:
+    """Build a profile's secret mapping from its ``<home>/.env``.
+
+    Returns a fresh dict (safe to install via ``set_secret_scope``). Genuinely
+    global vars are intentionally NOT copied in — ``get_secret`` reads those
+    from ``os.environ`` directly, so the scope holds only profile secrets.
+    """
+    return load_env_file(Path(hermes_home) / ".env")
+
--- a/apps/desktop/src/app/desktop-controller.tsx
+++ b/apps/desktop/src/app/desktop-controller.tsx
@ -14,6 +14,7 @@ import { useSkinCommand } from '@/themes/use-skin-command'
 import { formatRefValue } from '../components/assistant-ui/directive-text'
 import { getCronJobs, getSessionMessages, listAllProfileSessions, type SessionInfo, triggerCronJob } from '../hermes'
 import { type ChatMessage, chatMessageText, preserveLocalAssistantErrors, toChatMessages } from '../lib/chat-messages'
+import { storedSessionIdForNotification } from '../lib/session-ids'
 import {
  isMessagingSource,
  LOCAL_SESSION_SOURCE_IDS,
@ -276,16 +277,20 @@ export function DesktopController() {
    }
  }, [])

-  // Notification click: the main process already focused the window; jump to its session.
+  // Notification click: the main process already focused the window; jump to its
+  // session. Notifications are tagged with the gateway *runtime* session id, but
+  // the chat route is keyed by the *stored* id — navigating with the runtime id
+  // resumes a non-existent stored session ("session not found") and strands the
+  // user. Translate runtime -> stored before navigating.
  useEffect(() => {
    const unsubscribe = window.hermesDesktop?.onFocusSession?.(sessionId => {
      if (sessionId) {
-        navigate(sessionRoute(sessionId))
+        navigate(sessionRoute(storedSessionIdForNotification(sessionId, runtimeIdByStoredSessionIdRef.current)))
      }
    })

    return () => unsubscribe?.()
-  }, [navigate])
+  }, [navigate, runtimeIdByStoredSessionIdRef])

  // Notification action button (Approve/Reject) — resolve in place, no navigation.
  useEffect(() => {
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
@ -32,6 +32,7 @@ import {
  clearComposerAttachments,
  type ComposerAttachment,
  setComposerAttachmentUploadState,
+  setComposerDraft,
  terminalContextBlocksFromDraft,
  updateComposerAttachment
 } from '@/store/composer'
@ -951,8 +952,26 @@ export function usePromptActions({
            return
          }

+          // send / prefill carry an optional `notice` (e.g. "⊙ Goal set …")
+          // that the backend wants shown as a system line before the message
+          // is acted on. Mirrors the TUI's createSlashHandler — without it a
+          // `/goal <text>` looked like it did nothing.
+          if ((dispatch.type === 'send' || dispatch.type === 'prefill') && dispatch.notice?.trim()) {
+            renderSlashOutput(dispatch.notice.trim())
+          }
+
          const message = ('message' in dispatch ? dispatch.message : '')?.trim() ?? ''

+          // /undo returns a prefill directive: drop the backed-up message into
+          // the composer for editing instead of submitting it immediately.
+          if (dispatch.type === 'prefill') {
+            if (message) {
+              setComposerDraft(message)
+            }
+
+            return
+          }
+
          if (!message) {
            renderSlashOutput(
              `/${name}: ${dispatch.type === 'skill' ? 'skill payload missing message' : 'empty message'}`
--- a/apps/desktop/src/app/settings/providers-settings.test.tsx
+++ b/apps/desktop/src/app/settings/providers-settings.test.tsx
@ -2,7 +2,7 @@ import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/re
 import { atom } from 'nanostores'
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'

-import type { OAuthProvider } from '@/types/hermes'
+import type { EnvVarInfo, OAuthProvider } from '@/types/hermes'

 const listOAuthProviders = vi.fn()
 const disconnectOAuthProvider = vi.fn()
@ -36,6 +36,25 @@ function provider(id: string, loggedIn: boolean, patch: Partial<OAuthProvider> =
  }
 }

+// One `/api/env` row (an EnvVarInfo) for the API-keys view. Mirrors the
+// `provider()` factory above: a valid base + per-test overrides, typed against
+// the real response shape so it can't drift from EnvVarInfo.
+function keyVar(patch: Partial<EnvVarInfo> = {}): EnvVarInfo {
+  return {
+    advanced: false,
+    category: 'provider',
+    description: '',
+    is_password: true,
+    is_set: false,
+    provider: '',
+    provider_label: '',
+    redacted_value: null,
+    tools: [],
+    url: '',
+    ...patch
+  }
+}
+
 beforeEach(() => {
  onboarding.set({ manual: false })
  getEnvVars.mockResolvedValue({})
@ -97,4 +116,56 @@ describe('ProvidersSettings', () => {
    expect(screen.queryByRole('button', { name: 'Remove Qwen Code' })).toBeNull()
    expect(screen.getByText(/managed by its own CLI/)).toBeTruthy()
  })
+
+  it('renders a Keys card for a backend-tagged provider with no PROVIDER_GROUPS prefix', async () => {
+    // A provider the backend catalog tags (provider/provider_label) but that has
+    // no desktop PROVIDER_GROUPS prefix row must still render its own card —
+    // this is the GUI/CLI drift fix: membership comes from the backend, not
+    // from the hand-maintained prefix list.
+    getEnvVars.mockResolvedValue({
+      WIDGETAI_API_KEY: keyVar({
+        provider: 'widgetai',
+        provider_label: 'WidgetAI',
+        url: 'https://widgetai.example/keys'
+      })
+    })
+    listOAuthProviders.mockResolvedValue({ providers: [] })
+
+    const { ProvidersSettings } = await import('./providers-settings')
+    render(<ProvidersSettings onClose={vi.fn()} onViewChange={vi.fn()} view="keys" />)
+
+    expect(await screen.findByText('WidgetAI')).toBeTruthy()
+  })
+
+  it('orders API-key providers by priority then name, and filters them via search', async () => {
+    // These three providers have no curated PROVIDER_GROUPS priority, so they
+    // share the default priority and fall back to alphabetical among themselves
+    // (Acme, Middle, Zebra) — exercising the name tiebreak of the priority sort.
+    getEnvVars.mockResolvedValue({
+      ZEBRA_API_KEY: keyVar({ provider: 'zebra', provider_label: 'Zebra' }),
+      ACME_API_KEY: keyVar({ provider: 'acme', provider_label: 'Acme' }),
+      MIDDLE_API_KEY: keyVar({ provider: 'middle', provider_label: 'Middle' })
+    })
+    listOAuthProviders.mockResolvedValue({ providers: [] })
+
+    const { ProvidersSettings } = await import('./providers-settings')
+    render(<ProvidersSettings onClose={vi.fn()} onViewChange={vi.fn()} view="keys" />)
+
+    // Equal priority → alphabetical tiebreak: Acme, Middle, Zebra.
+    await screen.findByText('Acme')
+    const labels = screen.getAllByText(/Acme|Middle|Zebra/).map(el => el.textContent)
+    expect(labels).toEqual(['Acme', 'Middle', 'Zebra'])
+
+    // Typing narrows the list to matching providers only.
+    const search = screen.getByPlaceholderText('Search providers…')
+    fireEvent.change(search, { target: { value: 'mid' } })
+
+    await waitFor(() => expect(screen.queryByText('Acme')).toBeNull())
+    expect(screen.getByText('Middle')).toBeTruthy()
+    expect(screen.queryByText('Zebra')).toBeNull()
+
+    // A non-matching query shows the empty-state copy.
+    fireEvent.change(search, { target: { value: 'nonesuch-xyz' } })
+    expect(await screen.findByText('No providers match your search.')).toBeTruthy()
+  })
 })
--- a/apps/desktop/src/app/settings/providers-settings.tsx
+++ b/apps/desktop/src/app/settings/providers-settings.tsx
@ -12,6 +12,7 @@ import {
  sortProviders
 } from '@/components/desktop-onboarding-overlay'
 import { Button } from '@/components/ui/button'
+import { SearchField } from '@/components/ui/search-field'
 import { disconnectOAuthProvider, listOAuthProviders } from '@/hermes'
 import { useI18n } from '@/i18n'
 import { Check, ChevronDown, ChevronRight, KeyRound, Loader2, Terminal, Trash2 } from '@/lib/icons'
@ -45,8 +46,17 @@ export const PROVIDER_VIEWS = ['accounts', 'keys'] as const
 export type ProviderView = (typeof PROVIDER_VIEWS)[number]

 // Group the env catalog by provider — one ListRow per vendor plus optional
-// advanced overrides (base URL, region, etc.). Groups without a key field and
-// the "Other" bucket are skipped.
+// advanced overrides (base URL, region, etc.). Groups without a key field are
+// skipped.
+//
+// Grouping key precedence:
+//   1. Backend `provider_label` / `provider` (from the unified provider catalog
+//      in hermes_cli/provider_catalog.py) — the SAME provider identity
+//      `hermes model` uses. This is authoritative: a provider tagged by the
+//      backend always renders a card, even with no PROVIDER_GROUPS row.
+//   2. Desktop prefix match (`providerGroup`) — legacy fallback for provider
+//      env vars that predate the backend tagging.
+// Only entries that resolve to neither (the "Other" bucket) are skipped.
 function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGroup[] {
  const buckets = new Map<string, [string, EnvVarInfo][]>()

@ -55,7 +65,9 @@ function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGr
      continue
    }

-    const name = providerGroup(key)
+    // Prefer the backend-supplied provider label/id so the Keys tab groups by
+    // the same identity the CLI picker uses; fall back to the prefix guess.
+    const name = info.provider_label?.trim() || info.provider?.trim() || providerGroup(key)

    if (name === 'Other') {
      continue
@ -73,6 +85,9 @@ function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGr
      continue
    }

+    // Presentation overlay (priority, blurb, docs) is keyed by the prefix-based
+    // group name; when the backend introduced this provider it may have no
+    // overlay entry, so fall back to the backend/env metadata for display.
    const meta = providerMeta(name)

    groups.push({
@ -131,6 +146,7 @@ function OAuthPicker({
  const rest = featured ? ordered.filter(p => p.id !== FEATURED_ID) : ordered
  // Keep connected accounts grouped and always visible; only the unconnected
  // providers hide behind the disclosure, so the page leads with what's set up.
+  // Both lists preserve `sortProviders` order (curated priority, then name).
  const connected = rest.filter(p => p.status?.logged_in)
  const others = rest.filter(p => !p.status?.logged_in)
  const collapsible = others.length > 0
@ -284,6 +300,8 @@ export function ProvidersSettings({ onClose, onViewChange, view }: ProvidersSett
  const [oauthProviders, setOauthProviders] = useState<OAuthProvider[]>([])
  const [openProvider, setOpenProvider] = useState<null | string>(null)
  const [disconnecting, setDisconnecting] = useState<null | string>(null)
+  // Free-text filter for the API-keys view (provider name / env-var key / desc).
+  const [keyQuery, setKeyQuery] = useState('')
  // The onboarding overlay owns the OAuth flow. Watch its `manual` flag so we
  // re-read connection state when the user finishes (or dismisses) a sign-in
  // they launched from this page — otherwise the cards keep their stale status.
@ -372,20 +390,49 @@ export function ProvidersSettings({ onClose, onViewChange, view }: ProvidersSett
  const keyGroups = buildProviderKeyGroups(vars)

  if (showApiKeys) {
+    const q = keyQuery.trim().toLowerCase()
+    const visibleGroups = q
+      ? keyGroups.filter(group => {
+          const haystack = [
+            group.name,
+            group.description ?? '',
+            group.primary[0],
+            ...group.advanced.map(([k]) => k)
+          ]
+
+          return haystack.some(s => s.toLowerCase().includes(q))
+        })
+      : keyGroups
+
    return (
      <SettingsContent>
        {keyGroups.length > 0 ? (
-          <div className="grid gap-2">
-            {keyGroups.map(group => (
-              <ProviderKeyRows
-                expanded={openProvider === group.name}
-                group={group}
-                key={group.name}
-                onExpand={() => setOpenProvider(group.name)}
-                onToggle={() => setOpenProvider(prev => (prev === group.name ? null : group.name))}
-                rowProps={rowProps}
-              />
-            ))}
+          <div className="grid gap-3">
+            <SearchField
+              aria-label={t.settings.providers.searchKeys}
+              containerClassName="w-full"
+              onChange={setKeyQuery}
+              placeholder={t.settings.providers.searchKeys}
+              value={keyQuery}
+            />
+            {visibleGroups.length > 0 ? (
+              <div className="grid gap-2">
+                {visibleGroups.map(group => (
+                  <ProviderKeyRows
+                    expanded={openProvider === group.name}
+                    group={group}
+                    key={group.name}
+                    onExpand={() => setOpenProvider(group.name)}
+                    onToggle={() => setOpenProvider(prev => (prev === group.name ? null : group.name))}
+                    rowProps={rowProps}
+                  />
+                ))}
+              </div>
+            ) : (
+              <div className="grid min-h-24 place-items-center px-4 py-6 text-center text-[length:var(--conversation-caption-font-size)] text-muted-foreground">
+                {t.settings.providers.noKeysMatch}
+              </div>
+            )}
          </div>
        ) : (
          <NoProviderKeys />
--- a/apps/desktop/src/app/types.ts
+++ b/apps/desktop/src/app/types.ts
@ -106,6 +106,13 @@ export interface SkillCommandDispatchResponse {
 export interface SendCommandDispatchResponse {
  type: 'send'
  message: string
+  notice?: string
+}
+
+export interface PrefillCommandDispatchResponse {
+  type: 'prefill'
+  message: string
+  notice?: string
 }

 export type CommandDispatchResponse =
@ -113,6 +120,7 @@ export type CommandDispatchResponse =
  | AliasCommandDispatchResponse
  | SkillCommandDispatchResponse
  | SendCommandDispatchResponse
+  | PrefillCommandDispatchResponse

 export type SidebarNavId = 'artifacts' | 'command-center' | 'messaging' | 'new-session' | 'settings' | 'skills'

--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@ -859,7 +859,10 @@ const ProcessNotificationNote: FC<{ text: string }> = ({ text }) => {
          <summary className="cursor-pointer select-none text-muted-foreground/45 hover:text-muted-foreground/70">
            output
          </summary>
-          <pre className="mt-0.5 max-h-48 overflow-auto whitespace-pre-wrap font-mono text-[0.625rem] leading-4 text-muted-foreground/55">
+          <pre
+            className="mt-0.5 max-h-48 overflow-auto whitespace-pre-wrap font-mono text-[0.625rem] leading-4 text-muted-foreground/55"
+            data-selectable-text="true"
+          >
            {detail}
          </pre>
        </details>
--- a/apps/desktop/src/components/chat/terminal-output.tsx
+++ b/apps/desktop/src/components/chat/terminal-output.tsx
@ -41,7 +41,11 @@ export function TerminalOutput({ className, text }: TerminalOutputProps) {
  }, [text])

  return (
-    <div className={cn('max-h-16 overflow-auto overscroll-contain', className)} ref={ref}>
+    <div
+      className={cn('max-h-16 overflow-auto overscroll-contain', className)}
+      data-selectable-text="true"
+      ref={ref}
+    >
      <pre className="w-max min-w-full font-mono text-[0.5625rem] leading-[0.85rem] whitespace-pre text-muted-foreground/70">
        {text}
      </pre>
--- a/apps/desktop/src/i18n/en.ts
+++ b/apps/desktop/src/i18n/en.ts
@ -581,6 +581,8 @@ export const en: Translations = {
      removedMessage: provider => `${provider} was removed.`,
      failedRemove: provider => `Could not remove ${provider}`,
      noProviderKeys: 'No provider API keys available.',
+      searchKeys: 'Search providers…',
+      noKeysMatch: 'No providers match your search.',
      loading: 'Loading providers...'
    },
    sessions: {
--- a/apps/desktop/src/i18n/ja.ts
+++ b/apps/desktop/src/i18n/ja.ts
@ -700,6 +700,8 @@ export const ja = defineLocale({
      removedMessage: provider => `${provider} を削除しました。`,
      failedRemove: provider => `${provider} を削除できませんでした`,
      noProviderKeys: '利用可能なプロバイダー API キーがありません。',
+      searchKeys: 'プロバイダーを検索…',
+      noKeysMatch: '一致するプロバイダーがありません。',
      loading: 'プロバイダーを読み込み中...'
    },
    sessions: {
--- a/apps/desktop/src/i18n/types.ts
+++ b/apps/desktop/src/i18n/types.ts
@ -462,6 +462,8 @@ export interface Translations {
      removedMessage: (provider: string) => string
      failedRemove: (provider: string) => string
      noProviderKeys: string
+      searchKeys: string
+      noKeysMatch: string
      loading: string
    }
    sessions: {
--- a/apps/desktop/src/i18n/zh-hant.ts
+++ b/apps/desktop/src/i18n/zh-hant.ts
@ -677,6 +677,8 @@ export const zhHant = defineLocale({
      removedMessage: provider => `${provider} 已移除。`,
      failedRemove: provider => `無法移除 ${provider}`,
      noProviderKeys: '沒有可用的提供方 API 金鑰。',
+      searchKeys: '搜尋提供方…',
+      noKeysMatch: '沒有符合的提供方。',
      loading: '正在載入提供方...'
    },
    sessions: {
--- a/apps/desktop/src/i18n/zh.ts
+++ b/apps/desktop/src/i18n/zh.ts
@ -774,6 +774,8 @@ export const zh: Translations = {
      removedMessage: provider => `${provider} 已移除。`,
      failedRemove: provider => `无法移除 ${provider}`,
      noProviderKeys: '没有可用的提供方 API 密钥。',
+      searchKeys: '搜索提供方…',
+      noKeysMatch: '没有匹配的提供方。',
      loading: '正在加载提供方...'
    },
    sessions: {
--- a/apps/desktop/src/lib/chat-runtime.test.ts
+++ b/apps/desktop/src/lib/chat-runtime.test.ts
@ -2,7 +2,7 @@ import { describe, expect, it } from 'vitest'

 import type { ComposerAttachment } from '@/store/composer'

-import { coerceThinkingText, optimisticAttachmentRef } from './chat-runtime'
+import { coerceThinkingText, optimisticAttachmentRef, parseCommandDispatch } from './chat-runtime'

 const DATA_URL = 'data:image/png;base64,iVBORw0KGgoAAAANS'

@ -52,3 +52,31 @@ describe('coerceThinkingText', () => {
    ).toBe('')
  })
 })
+
+describe('parseCommandDispatch', () => {
+  it('keeps the notice on a send directive (e.g. /goal set)', () => {
+    // The backend's /goal set returns {type:send, notice:"⊙ Goal set …", message}.
+    // Dropping the notice made /goal look like it did nothing in the desktop app.
+    const parsed = parseCommandDispatch({ type: 'send', notice: '⊙ Goal set', message: 'do the thing' })
+
+    expect(parsed).toEqual({ type: 'send', message: 'do the thing', notice: '⊙ Goal set' })
+  })
+
+  it('keeps message-only send directives working (no notice)', () => {
+    expect(parseCommandDispatch({ type: 'send', message: 'hi' })).toEqual({
+      type: 'send',
+      message: 'hi',
+      notice: undefined
+    })
+  })
+
+  it('parses a prefill directive with its notice (e.g. /undo)', () => {
+    const parsed = parseCommandDispatch({ type: 'prefill', notice: 'backed up 1 turn', message: 'edit me' })
+
+    expect(parsed).toEqual({ type: 'prefill', message: 'edit me', notice: 'backed up 1 turn' })
+  })
+
+  it('rejects a prefill directive missing its message', () => {
+    expect(parseCommandDispatch({ type: 'prefill', notice: 'x' })).toBeNull()
+  })
+})
--- a/apps/desktop/src/lib/chat-runtime.ts
+++ b/apps/desktop/src/lib/chat-runtime.ts
@ -238,7 +238,12 @@ export function parseCommandDispatch(raw: unknown): CommandDispatchResponse | nu
      return typeof row.name === 'string' ? { type: 'skill', name: row.name, message: str(row.message) } : null

    case 'send':
-      return typeof row.message === 'string' ? { type: 'send', message: row.message } : null
+      return typeof row.message === 'string' ? { type: 'send', message: row.message, notice: str(row.notice) } : null
+
+    case 'prefill':
+      return typeof row.message === 'string'
+        ? { type: 'prefill', message: row.message, notice: str(row.notice) }
+        : null

    default:
      return null
--- a/apps/desktop/src/lib/session-ids.test.ts
+++ b/apps/desktop/src/lib/session-ids.test.ts
@ -0,0 +1,44 @@
+import { describe, expect, it } from 'vitest'
+
+import { storedSessionIdForNotification } from './session-ids'
+
+describe('storedSessionIdForNotification', () => {
+  it('translates a runtime id back to its stored id', () => {
+    // The route is keyed by the stored id, but notifications carry the runtime
+    // id. Resolving runtime -> stored keeps notification-click navigation from
+    // resuming a non-existent stored session ("session not found").
+    const map = new Map([['stored-abc', 'runtime-123']])
+
+    expect(storedSessionIdForNotification('runtime-123', map)).toBe('stored-abc')
+  })
+
+  it('returns the id unchanged when no mapping is known', () => {
+    // A notification for a session this window never opened may already carry a
+    // stored id; let the resume/REST lookup handle it as-is.
+    const map = new Map([['stored-abc', 'runtime-123']])
+
+    expect(storedSessionIdForNotification('stored-xyz', map)).toBe('stored-xyz')
+  })
+
+  it('returns the id unchanged for an empty map', () => {
+    expect(storedSessionIdForNotification('runtime-123', new Map())).toBe('runtime-123')
+  })
+
+  it('resolves the correct stored id among several sessions', () => {
+    const map = new Map([
+      ['stored-1', 'runtime-1'],
+      ['stored-2', 'runtime-2'],
+      ['stored-3', 'runtime-3']
+    ])
+
+    expect(storedSessionIdForNotification('runtime-2', map)).toBe('stored-2')
+  })
+
+  it('does not treat a stored id as a runtime id (keys are not matched)', () => {
+    // The map is stored -> runtime. A value that only appears as a *key* must
+    // not be rewritten, otherwise an already-stored id could be mangled.
+    const map = new Map([['stored-1', 'runtime-1']])
+
+    expect(storedSessionIdForNotification('stored-1', map)).toBe('stored-1')
+  })
+})
--- a/apps/desktop/src/lib/session-ids.ts
+++ b/apps/desktop/src/lib/session-ids.ts
@ -0,0 +1,26 @@
+// The gateway tags every event — and therefore every native notification —
+// with the *runtime* session id (the key under which the session lives in the
+// gateway's in-memory `_sessions` map). The chat route, however, is keyed by
+// the *stored* session id (`stored_session_id`), which is a different value:
+// a brand-new chat gets a runtime id immediately but its stored id is assigned
+// when the first turn persists. Navigating to a runtime id therefore tries to
+// resume a stored session that does not exist ("session not found") and
+// strands the user, who experiences it as the running session being destroyed.
+//
+// `runtimeIdByStoredSessionId` maps stored -> runtime; this resolves the
+// reverse so notification-click navigation lands on the real route. The id is
+// returned unchanged when no mapping is known — it may already be a stored id
+// (e.g. a notification for a session this window never opened), in which case
+// the normal resume/REST lookup handles it.
+export function storedSessionIdForNotification(
+  id: string,
+  runtimeIdByStoredSessionId: ReadonlyMap<string, string>
+): string {
+  for (const [storedId, runtimeId] of runtimeIdByStoredSessionId) {
+    if (runtimeId === id) {
+      return storedId
+    }
+  }
+
+  return id
+}
--- a/apps/desktop/src/styles.css
+++ b/apps/desktop/src/styles.css
@ -680,6 +680,7 @@ textarea,
 [contenteditable]:not([contenteditable='false']),
 [data-slot='aui_user-message-root'],
 [data-slot='aui_assistant-message-content'],
+[data-slot='aui_system-message-root'],
 [data-selectable-text='true'],
 [data-selectable-text='true'] * {
  -webkit-user-select: text;
--- a/apps/desktop/src/types/hermes.ts
+++ b/apps/desktop/src/types/hermes.ts
@ -108,6 +108,12 @@ export interface EnvVarInfo {
  description: string
  is_password: boolean
  is_set: boolean
+  // Backend-derived provider grouping hints (from the unified provider catalog
+  // in hermes_cli/provider_catalog.py). When present, the Keys tab groups by
+  // this provider identity — the SAME one `hermes model` uses — instead of
+  // desktop-only env-var prefix guesses. Empty for non-provider env vars.
+  provider?: string
+  provider_label?: string
  redacted_value: null | string
  tools: string[]
  url: null | string
--- a/cli.py
+++ b/cli.py
@ -6959,24 +6959,43 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
            self._close_model_picker()

    def _handle_model_switch(self, cmd_original: str):
-        """Handle /model command — switch model for this session.
+        """Handle /model command — switch model.

        Supports:
          /model                              — show current model + usage hints
-          /model <name>                       — switch for this session only
-          /model <name> --global              — switch and persist to config.yaml
+          /model <name>                       — switch model (persists by default)
+          /model <name> --session             — switch for this session only
+          /model <name> --global              — switch and persist (explicit)
          /model <name> --provider <provider> — switch provider + model
          /model --provider <provider>        — switch to provider, auto-detect model
+
+        Persistence defaults to on (``model.persist_switch_by_default`` in
+        config.yaml, default True). Use ``--session`` for a one-off switch.
        """
-        from hermes_cli.model_switch import switch_model, parse_model_flags
+        from hermes_cli.model_switch import (
+            switch_model,
+            parse_model_flags,
+            resolve_persist_behavior,
+        )
        from hermes_cli.providers import get_label

        # Parse args from the original command
        parts = cmd_original.split(None, 1)  # split off '/model'
        raw_args = parts[1].strip() if len(parts) > 1 else ""

-        # Parse --provider, --global, and --refresh flags
-        model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
+        # Parse --provider, --global, --session, and --refresh flags
+        (
+            model_input,
+            explicit_provider,
+            is_global_flag,
+            force_refresh,
+            is_session,
+        ) = parse_model_flags(raw_args)
+        # Resolve the effective persistence once: --session overrides the
+        # config-gated default, --global forces persist, otherwise defer to
+        # model.persist_switch_by_default (defaults to True so /model survives
+        # across sessions).
+        persist_global = resolve_persist_behavior(is_global_flag, is_session)

        # --refresh: wipe the on-disk picker cache before building the
        # provider list. Forces a live re-fetch of every authed provider's
@ -7024,7 +7043,8 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
            if not providers:
                _cprint("  No authenticated providers found.")
                _cprint("")
-                _cprint("  /model <name>                        switch model")
+                _cprint("  /model <name>                        switch model (persists)")
+                _cprint("  /model <name> --session              switch for this session only")
                _cprint("  /model --provider <slug>             switch provider")
                _cprint("  /model --refresh                     re-fetch live model lists")
                return
@ -7144,7 +7164,7 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
            save_config_value("model.default", result.new_model)
            if result.provider_changed:
                save_config_value("model.provider", result.target_provider)
-            _cprint("    Saved to config.yaml (--global)")
+            _cprint("    Saved to config.yaml")
        else:
            _cprint("    (session only — add --global to persist)")

@ -11917,7 +11937,13 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
            # --- /model picker modal ---
            if self._model_picker_state:
                try:
-                    self._handle_model_picker_selection()
+                    # Picker selections persist by default (same default as
+                    # /model <name>); honour model.persist_switch_by_default.
+                    from hermes_cli.model_switch import resolve_persist_behavior
+
+                    self._handle_model_picker_selection(
+                        persist_global=resolve_persist_behavior(False, False)
+                    )
                except Exception as _exc:
                    _cprint(f"  ✗ Model selection failed: {_exc}")
                    self._close_model_picker()
@ -13527,13 +13553,13 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
            style=style,
            full_screen=False,
            mouse_support=False,
-            # The status bar contains wall-clock read-outs (live prompt elapsed
-            # and idle-since-last-turn). Once a turn finishes there may be no
-            # further events to invalidate the app, so prompt_toolkit would keep
-            # rendering the first post-turn value (usually ``✓ 0s``) forever.
-            # A low-rate refresh keeps the clock honest without reintroducing a
-            # custom repaint thread or touching conversation state.
-            refresh_interval=1.0,
+            # Read from display.cli_refresh_interval (default 0 = disabled).
+            # When non-zero, prompt_toolkit redraws the UI on this cadence
+            # during idle, keeping wall-clock status-bar read-outs ticking.
+            # Set to 0 to suppress background redraws entirely — avoids
+            # fighting terminal auto-scroll in non-fullscreen mode (Xshell,
+            # iTerm2, Windows Terminal). See #48309.
+            refresh_interval=float(CLI_CONFIG.get("display", {}).get("cli_refresh_interval", 0)),
            # Erase the live bottom chrome (status bar, input box, separator
            # rules) on exit instead of freezing a final copy into scrollback.
            # Without this, prompt_toolkit's render_as_done teardown repaints
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@ -15,6 +15,7 @@ import contextvars
 import json
 import logging
 import os
+import re
 import shutil
 import subprocess
 import sys
@ -45,6 +46,59 @@ from hermes_time import now as _hermes_now
 logger = logging.getLogger(__name__)


+def _summarize_cron_failure_for_delivery(job: dict, error: str | None) -> str:
+    """Return a compact one-line failure message for chat delivery.
+
+    Full details stay in the cron output directory and the logs. Chat should
+    show the operator what broke without dumping provider JSON, retry noise, or
+    stack traces into the delivery channel.
+    """
+    job_name = job.get("name") or job.get("id") or "cron job"
+    text = (error or "unknown error").strip()
+    lower = text.lower()
+
+    # Provider/API failures are the common noisy path. Keep these short.
+    if "429" in text or "rate limit" in lower or "usage limit" in lower:
+        reason = "rate limit"
+        if "weekly usage limit" in lower:
+            reason = "weekly usage limit"
+        elif "quota" in lower:
+            reason = "quota limit"
+        return (
+            f"⚠️ Cron '{job_name}' failed: provider {reason}. "
+            "Fallback chain was exhausted or unavailable. "
+            "Full details saved in cron output."
+        )
+
+    if "readtimeout" in lower or "timed out" in lower or "timeout" in lower:
+        return (
+            f"⚠️ Cron '{job_name}' failed: provider timeout. "
+            "Fallback chain was exhausted or unavailable. "
+            "Full details saved in cron output."
+        )
+
+    # Match authentication/authorization wording at a word boundary and the
+    # 401/403 status codes as whole tokens, so "oauth", "4015" and similar do
+    # not trip a misleading auth message.
+    if re.search(r"authenticat|authoriz", lower) or re.search(r"\b(401|403)\b", text):
+        return (
+            f"⚠️ Cron '{job_name}' failed: provider authentication error. "
+            "Full details saved in cron output."
+        )
+
+    # Strip common exception wrappers and collapse provider payloads. Bound
+    # the input first so a multi-KB provider blob cannot slow the
+    # substitutions.
+    cleaned = re.sub(
+        r"^(RuntimeError|Exception|ValueError|HTTPStatusError):\s*",
+        "", text[:2000],
+    )
+    cleaned = re.sub(r"\s+", " ", cleaned).strip()
+    if len(cleaned) > 180:
+        cleaned = cleaned[:177].rstrip() + "..."
+    return f"⚠️ Cron '{job_name}' failed: {cleaned}"
+
+
 class CronPromptInjectionBlocked(Exception):
    """Raised by _build_job_prompt when the fully-assembled prompt trips the
    injection scanner. Caught in run_job so the operator sees a clean
@ -1992,7 +2046,7 @@ def run_one_job(job: dict, *, adapters=None, loop=None, verbose: bool = False) -
        # Deliver the final response to the origin/target chat.
        # If the agent responded with [SILENT], skip delivery (but
        # output is already saved above).  Failed jobs always deliver.
-        deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
+        deliver_content = final_response if success else _summarize_cron_failure_for_delivery(job, error)
        # Treat whitespace-only final responses the same as empty
        # responses: do not deliver a blank message, and let the
        # empty-response guard below mark the run as a soft failure.
--- a/gateway/config.py
+++ b/gateway/config.py
@ -545,6 +545,13 @@ class GatewayConfig:
    thread_sessions_per_user: bool = False  # When False (default), threads are shared across all participants
    max_concurrent_sessions: Optional[int] = None  # Positive int caps simultaneous active chat sessions

+    # Multi-profile multiplexing (opt-in; default off preserves one-gateway-per-profile).
+    # When True, the default profile's gateway serves inbound messages for every
+    # profile on the host: profiles are stamped into session keys and (in later
+    # phases) per-profile adapters/credentials are resolved. When False, the
+    # gateway behaves exactly as before — single HERMES_HOME, no profile stamping.
+    multiplex_profiles: bool = False
+
    # Unauthorized DM policy
    unauthorized_dm_behavior: str = "pair"  # "pair" or "ignore"

@ -650,6 +657,7 @@ class GatewayConfig:
            "group_sessions_per_user": self.group_sessions_per_user,
            "thread_sessions_per_user": self.thread_sessions_per_user,
            "max_concurrent_sessions": self.max_concurrent_sessions,
+            "multiplex_profiles": self.multiplex_profiles,
            "unauthorized_dm_behavior": self.unauthorized_dm_behavior,
            "streaming": self.streaming.to_dict(),
            "session_store_max_age_days": self.session_store_max_age_days,
@ -695,7 +703,12 @@ class GatewayConfig:

        group_sessions_per_user = data.get("group_sessions_per_user")
        thread_sessions_per_user = data.get("thread_sessions_per_user")
+        multiplex_profiles = data.get("multiplex_profiles")
        nested_gateway = data.get("gateway") if isinstance(data.get("gateway"), dict) else {}
+        if multiplex_profiles is None and isinstance(nested_gateway, dict):
+            # Also honor gateway.multiplex_profiles written by
+            # ``hermes config set gateway.multiplex_profiles true``.
+            multiplex_profiles = nested_gateway.get("multiplex_profiles")
        if "max_concurrent_sessions" in data:
            max_concurrent_raw = data.get("max_concurrent_sessions")
            max_concurrent_key = "max_concurrent_sessions"
@ -732,6 +745,7 @@ class GatewayConfig:
            stt_enabled=_coerce_bool(stt_enabled, True),
            group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
            thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
+            multiplex_profiles=_coerce_bool(multiplex_profiles, False),
            max_concurrent_sessions=max_concurrent_sessions,
            unauthorized_dm_behavior=unauthorized_dm_behavior,
            streaming=StreamingConfig.from_dict(data.get("streaming", {})),
@ -823,6 +837,13 @@ def load_gateway_config() -> GatewayConfig:
            if "thread_sessions_per_user" in yaml_cfg:
                gw_data["thread_sessions_per_user"] = yaml_cfg["thread_sessions_per_user"]

+            # Multiplexing flag: accept both the top-level key and the nested
+            # gateway.multiplex_profiles form (from_dict resolves the nested
+            # fallback, but surface the top-level key here for parity with the
+            # other session-scope flags above).
+            if "multiplex_profiles" in yaml_cfg:
+                gw_data["multiplex_profiles"] = yaml_cfg["multiplex_profiles"]
+
            gateway_section = yaml_cfg.get("gateway")
            if isinstance(gateway_section, dict) and "max_concurrent_sessions" in gateway_section:
                gw_data["max_concurrent_sessions"] = gateway_section["max_concurrent_sessions"]
@ -2143,5 +2164,24 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
    except Exception as e:
        logger.debug("Plugin platform enable pass failed: %s", e)

+    # Relay (generic connector-fronted platform, EXPERIMENTAL). Enabled when a
+    # connector relay URL is configured via GATEWAY_RELAY_URL (env) or
+    # gateway.relay_url (config.yaml). The adapter is registered into the
+    # platform_registry at gateway startup (gateway.relay.register_relay_adapter)
+    # and dials OUT to the connector — so, like Telegram/Matrix, it has no public
+    # inbound port and just needs Platform.RELAY present+enabled in
+    # config.platforms for start_gateway()'s connect loop to bring it up. The
+    # connected-checker (Platform.RELAY in _PLATFORM_CONNECTED_CHECKERS) keys on
+    # extra["relay_url"], so mirror the URL into extra here.
+    relay_url_env = os.getenv("GATEWAY_RELAY_URL", "").strip()
+    relay_url_yaml = ""
+    existing_relay = config.platforms.get(Platform.RELAY)
+    if existing_relay is not None:
+        relay_url_yaml = str(existing_relay.extra.get("relay_url") or "").strip()
+    relay_url_val = relay_url_env or relay_url_yaml
+    if relay_url_val:
+        relay_config = _enable_from_env(Platform.RELAY)
+        relay_config.extra["relay_url"] = relay_url_val.rstrip("/")
+
    for platform_config in config.platforms.values():
        platform_config.extra.pop("_enabled_explicit", None)
--- a/gateway/kanban_watchers.py
+++ b/gateway/kanban_watchers.py
@ -23,6 +23,58 @@ from typing import Any, Optional
 logger = logging.getLogger("gateway.run")


+def _acquire_singleton_lock(lock_path) -> "tuple[Optional[object], str]":
+    """Take an exclusive, non-blocking advisory lock for the sole dispatcher.
+
+    Only one gateway process machine-wide may run the embedded kanban
+    dispatcher: concurrent dispatchers double the reclaim frequency (each
+    runs its own ``release_stale_claims`` → promote → dispatch loop), double
+    claim-attempt events in the event log, and — with ``wal_autocheckpoint=0`` —
+    concurrent manual WAL checkpoints can corrupt index pages. The
+    ``dispatch_in_gateway`` config flag is the primary control; this lock is the
+    backstop that survives config drift and same-profile restart races.
+
+    Delegates to :func:`gateway.status._try_acquire_file_lock` (``fcntl`` on
+    POSIX, ``msvcrt`` on Windows) so the guard is cross-platform.
+
+    Returns ``(handle, "held")`` on success — the caller keeps the file handle
+    for the process lifetime and **must** release it via
+    :func:`_release_singleton_lock` when done. ``(None, "contended")`` when
+    another process holds the lock (caller must NOT dispatch). ``(None,
+    "unavailable")`` when locking cannot be performed (non-POSIX filesystem
+    without flock, or the status.py helpers are unimportable) — caller falls
+    back to config-only control.
+    """
+    try:
+        from gateway.status import _try_acquire_file_lock  # deferred; same package
+    except ImportError:
+        return None, "unavailable"
+    try:
+        Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
+        handle = open(str(lock_path), "a+", encoding="utf-8")
+    except OSError:
+        return None, "unavailable"
+    if not _try_acquire_file_lock(handle):
+        handle.close()
+        return None, "contended"
+    return handle, "held"
+
+
+def _release_singleton_lock(handle) -> None:
+    """Release a dispatcher singleton lock acquired via :func:`_acquire_singleton_lock`."""
+    if handle is None:
+        return
+    try:
+        from gateway.status import _release_file_lock
+        _release_file_lock(handle)
+    except Exception:
+        pass
+    try:
+        handle.close()
+    except Exception:
+        pass
+
+
 class GatewayKanbanWatchersMixin:
    """Kanban watcher / notifier / dispatcher loops for GatewayRunner."""

@ -606,6 +658,31 @@ class GatewayKanbanWatchersMixin:
            logger.warning("kanban dispatcher: kanban_db not importable; dispatcher disabled")
            return

+        # Single-dispatcher backstop. dispatch_in_gateway defaults to true, so a
+        # new profile gateway (or a same-profile restart race) can silently
+        # start a second dispatcher; concurrent dispatchers double reclaim
+        # frequency, double claim-attempt events, and — with
+        # wal_autocheckpoint=0 — concurrent manual WAL checkpoints can corrupt
+        # index pages. The lock lives at the machine-global kanban root
+        # (shared across profiles by design), so it serialises ALL gateways.
+        self._kanban_dispatcher_lock_handle = None
+        _lock_path = _kb.kanban_home() / "kanban" / ".dispatcher.lock"
+        _lock_handle, _lock_state = _acquire_singleton_lock(_lock_path)
+        if _lock_state == "contended":
+            logger.info(
+                "kanban dispatcher: another gateway already holds the dispatcher "
+                "lock (%s); this gateway will NOT dispatch.", _lock_path,
+            )
+            return
+        if _lock_state == "held":
+            self._kanban_dispatcher_lock_handle = _lock_handle  # hold for process lifetime
+            logger.info("kanban dispatcher: holding singleton dispatcher lock (%s)", _lock_path)
+        else:
+            logger.warning(
+                "kanban dispatcher: advisory lock unavailable at %s; proceeding "
+                "on config control alone.", _lock_path,
+            )
+
        try:
            interval = float(kanban_cfg.get("dispatch_interval_seconds", 60) or 60)
        except (ValueError, TypeError):
@ -1052,6 +1129,8 @@ class GatewayKanbanWatchersMixin:
                        last_warn_at = now
            except asyncio.CancelledError:
                logger.debug("kanban dispatcher: cancelled")
+                _release_singleton_lock(self._kanban_dispatcher_lock_handle)
+                self._kanban_dispatcher_lock_handle = None
                raise
            except Exception:
                logger.exception("kanban dispatcher: unexpected watcher error")
@ -1062,3 +1141,6 @@ class GatewayKanbanWatchersMixin:
            while slept < interval and self._running:
                await asyncio.sleep(min(1.0, interval - slept))
                slept += 1.0
+
+        _release_singleton_lock(self._kanban_dispatcher_lock_handle)
+        self._kanban_dispatcher_lock_handle = None
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@ -1043,7 +1043,13 @@ class APIServerAdapter(BasePlatformAdapter):
        — matching the semantics of the native gateway's ``session_key``.
        """
        from run_agent import AIAgent
-        from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config, GatewayRunner
+        from gateway.run import (
+            _current_max_iterations,
+            _resolve_runtime_agent_kwargs,
+            _resolve_gateway_model,
+            _load_gateway_config,
+            GatewayRunner,
+        )
        from hermes_cli.tools_config import _get_platform_tools

        runtime_kwargs = _resolve_runtime_agent_kwargs()
@ -1053,7 +1059,7 @@ class APIServerAdapter(BasePlatformAdapter):
        user_config = _load_gateway_config()
        enabled_toolsets = sorted(_get_platform_tools(user_config, "api_server"))

-        max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
+        max_iterations = _current_max_iterations()

        # Load fallback provider chain so the API server platform has the
        # same fallback behaviour as Telegram/Discord/Slack (fixes #4954).
--- a/gateway/platforms/webhook.py
+++ b/gateway/platforms/webhook.py
@ -57,6 +57,11 @@ from gateway.platforms.base import (

 logger = logging.getLogger(__name__)

+# Sentinel returned by _resolve_request_profile when a /p/<profile>/ prefix
+# names a profile this gateway does not serve (→ 404). Distinct from None
+# (no prefix / multiplexing off → handle as the default profile).
+_PROFILE_REJECTED = object()
+
 _BUILTIN_DELIVER_PLATFORMS = {
    "telegram", "discord", "slack", "signal", "sms", "whatsapp",
    "matrix", "mattermost", "homeassistant", "email", "dingtalk",
@ -189,6 +194,14 @@ class WebhookAdapter(BasePlatformAdapter):
        app = web.Application()
        app.router.add_get("/health", self._handle_health)
        app.router.add_post("/webhooks/{route_name}", self._handle_webhook)
+        # Multi-profile multiplexing: a /p/<profile>/webhooks/<route> prefix
+        # routes the inbound event to that profile. Same handler; the profile is
+        # captured from the path and stamped onto the SessionSource so the agent
+        # turn resolves that profile's config/skills/credentials. Only honored
+        # when gateway.multiplex_profiles is on (the handler validates).
+        app.router.add_post(
+            "/p/{profile}/webhooks/{route_name}", self._handle_webhook
+        )

        # Port conflict detection — fail fast if port is already in use
        import socket as _socket
@ -397,6 +410,35 @@ class WebhookAdapter(BasePlatformAdapter):
        except Exception as e:
            logger.error("[webhook] Failed to reload dynamic routes: %s", e)

+    def _resolve_request_profile(self, request: "web.Request"):
+        """Resolve + validate the /p/<profile>/ URL prefix on a webhook request.
+
+        Returns:
+          - ``None`` when no profile prefix is present, or multiplexing is off
+            (the prefix is ignored, request handled as the default profile).
+          - the profile name (str) when present, multiplexing is on, and the
+            profile is one this gateway serves.
+          - ``_PROFILE_REJECTED`` when a prefix is present but the profile is
+            unknown/unconfigured (handler returns 404).
+        """
+        profile = (request.match_info.get("profile") or "").strip()
+        if not profile:
+            return None
+        runner = self.gateway_runner
+        cfg = getattr(runner, "config", None)
+        if not getattr(cfg, "multiplex_profiles", False):
+            # Prefix supplied but multiplexing is off — ignore it, behave as
+            # the single-profile gateway (don't 404 a would-be valid route).
+            return None
+        try:
+            from hermes_cli.profiles import profiles_to_serve
+            served = {name for name, _ in profiles_to_serve(multiplex=True)}
+        except Exception:
+            return _PROFILE_REJECTED
+        if profile not in served:
+            return _PROFILE_REJECTED
+        return profile
+
    async def _handle_webhook(self, request: "web.Request") -> "web.Response":
        """POST /webhooks/{route_name} — receive and process a webhook event."""
        # Hot-reload dynamic subscriptions on each request (mtime-gated, cheap)
@ -405,6 +447,13 @@ class WebhookAdapter(BasePlatformAdapter):
        route_name = request.match_info.get("route_name", "")
        route_config = self._routes.get(route_name)

+        # Multi-profile: resolve + validate the /p/<profile>/ prefix if present.
+        profile = self._resolve_request_profile(request)
+        if profile is _PROFILE_REJECTED:
+            return web.json_response(
+                {"error": "Unknown or unconfigured profile"}, status=404
+            )
+
        if not route_config:
            return web.json_response(
                {"error": f"Unknown route: {route_name}"}, status=404
@ -641,6 +690,8 @@ class WebhookAdapter(BasePlatformAdapter):
            user_id=f"webhook:{route_name}",
            user_name=route_name,
        )
+        if profile and isinstance(profile, str):
+            source.profile = profile
        event = MessageEvent(
            text=prompt,
            message_type=MessageType.TEXT,
--- a/gateway/relay/adapter.py
+++ b/gateway/relay/adapter.py
@ -57,6 +57,13 @@ class RelayAdapter(BasePlatformAdapter):
        self._transport = transport
        # Capability surface read by stream_consumer (getattr(..., 4096)).
        self.MAX_MESSAGE_LENGTH = descriptor.max_message_length
+        # chat_id -> guild_id (Discord) / workspace scope, learned from inbound
+        # events. The connector's egress guard resolves the owning tenant from
+        # the OUTBOUND action's metadata.guild_id; the gateway's generic delivery
+        # path (run.py _thread_metadata_for_source) only carries thread_id, so we
+        # re-attach the scope here from what we saw inbound. Keyed by chat_id
+        # (channel) since that's what send() receives. See routedEgressGuard.ts.
+        self._scope_by_chat: Dict[str, str] = {}
        self.supports_code_blocks = descriptor.markdown_dialect not in ("", "plain")

    # ── capability surface (from descriptor) ─────────────────────────────
@ -108,8 +115,35 @@ class RelayAdapter(BasePlatformAdapter):

    async def _on_inbound(self, event) -> None:
        """Bridge a connector-delivered MessageEvent into the normal adapter path."""
+        self._capture_scope(event)
        await self.handle_message(event)

+    def _capture_scope(self, event) -> None:
+        """Remember chat_id -> guild scope from an inbound event so our outbound
+        (the agent's reply) can re-assert it for the connector's egress tenant
+        resolution. Never raises — scope tracking must not break inbound."""
+        try:
+            src = getattr(event, "source", None)
+            scope = getattr(src, "guild_id", None) if src else None
+            chat = getattr(src, "chat_id", None) if src else None
+            if scope and chat:
+                self._scope_by_chat[str(chat)] = str(scope)
+        except Exception:  # noqa: BLE001 - scope tracking must never break inbound
+            pass
+
+    def _with_scope(self, chat_id: str, metadata: Optional[Dict[str, Any]]) -> Dict[str, Any]:
+        """Ensure the outbound metadata carries guild_id for the connector's
+        egress tenant resolution. The connector resolves the owning tenant from
+        metadata.guild_id (Discord); without it egress is declined as
+        'target not routed to an onboarded tenant'. No-op when we have no scope
+        for this chat (e.g. DMs) or it's already present."""
+        meta: Dict[str, Any] = dict(metadata or {})
+        if not meta.get("guild_id"):
+            scope = self._scope_by_chat.get(str(chat_id))
+            if scope:
+                meta["guild_id"] = scope
+        return meta
+
    async def on_interrupt(self, session_key: str, chat_id: str) -> None:
        """Bridge a connector-delivered /stop into the adapter's interrupt path.

@ -140,7 +174,7 @@ class RelayAdapter(BasePlatformAdapter):
                "chat_id": chat_id,
                "content": content,
                "reply_to": reply_to,
-                "metadata": metadata or {},
+                "metadata": self._with_scope(chat_id, metadata),
            }
        )
        return SendResult(
--- a/gateway/relay/ws_transport.py
+++ b/gateway/relay/ws_transport.py
@ -54,6 +54,35 @@ _HANDSHAKE_TIMEOUT_S = 30.0
 _OUTBOUND_TIMEOUT_S = 30.0


+def _ws_dial_url(url: str) -> str:
+    """Normalize a connector URL to the ``ws(s)://…/relay`` dial target.
+
+    The relay URL is configured once (``GATEWAY_RELAY_URL`` / ``gateway.relay_url``)
+    as the connector's BASE URL (e.g. ``https://connector.example``) and shared by
+    both the provision POST (which needs ``http(s)://…/relay/provision`` — see
+    ``_provision_url``) and the WS dial (which needs ``ws(s)://…/relay``, the path
+    the connector mounts its ``WebSocketServer`` on). Two normalizations, both
+    load-bearing:
+
+      - scheme: ``https -> wss``, ``http -> ws`` (``websockets.connect`` raises
+        "scheme isn't ws or wss" on an http(s) URL).
+      - path: ensure it ends in ``/relay`` (the connector returns HTTP 400 on an
+        upgrade to any other path, since the WS server is mounted at ``/relay``).
+
+    Idempotent: an already-``ws(s)://…/relay`` URL is returned unchanged, so a URL
+    configured WITH the scheme and/or ``/relay`` still works.
+    """
+    raw = (url or "").strip()
+    if raw.startswith("https://"):
+        raw = "wss://" + raw[len("https://"):]
+    elif raw.startswith("http://"):
+        raw = "ws://" + raw[len("http://"):]
+    raw = raw.rstrip("/")
+    if not raw.endswith("/relay"):
+        raw = f"{raw}/relay"
+    return raw
+
+
 def _event_from_wire(raw: Dict[str, Any]) -> MessageEvent:
    """Rebuild a MessageEvent from the connector's normalized inbound payload.

@ -118,7 +147,7 @@ class WebSocketRelayTransport:
                "WebSocketRelayTransport requires the 'websockets' package "
                "(install the messaging extra)."
            )
-        self._url = url
+        self._url = _ws_dial_url(url)
        self._platform = platform
        self._bot_id = bot_id
        self._connect_timeout_s = connect_timeout_s
--- a/gateway/run.py
+++ b/gateway/run.py
@ -195,6 +195,19 @@ def _gateway_platform_value(platform: Any) -> str:
    return str(getattr(platform, "value", platform) or "").strip().lower()


+def _non_conversational_metadata(
+    metadata: Optional[Dict[str, Any]] = None,
+    *,
+    platform: Any = None,
+) -> Optional[Dict[str, Any]]:
+    """Mark Discord lifecycle/status sends without changing other platforms."""
+    if _gateway_platform_value(platform) != "discord":
+        return metadata
+    merged = dict(metadata or {})
+    merged["non_conversational"] = True
+    return merged
+
+
 def _is_transient_network_error(exc: BaseException) -> bool:
    """Return True for transient network errors safe to log + swallow.

@ -1173,13 +1186,31 @@ def _reload_runtime_env_preserving_config_authority() -> None:
    pick up rotated API keys. config.yaml remains authoritative for agent budget
    settings such as agent.max_turns; otherwise a stale HERMES_MAX_ITERATIONS in
    .env can replace the startup bridge on later turns.
+
+    In multiplex mode this is a NO-OP for the credential reload: secrets come
+    from the per-turn ``set_secret_scope`` (installed by ``_profile_runtime_scope``)
+    which loads the routed profile's ``.env`` into an isolated mapping. Mutating
+    the process-global ``os.environ`` here would defeat that isolation and leak
+    the default profile's keys to every profile's turns and subprocesses.
    """
+    from agent.secret_scope import is_multiplex_active
+    if is_multiplex_active():
+        # Credentials are resolved from the active profile's secret scope, not
+        # os.environ. Still honor config.yaml's agent.max_turns bridge below
+        # using the scoped home, but never reload .env into global env.
+        _bridge_max_turns_from_config(_hermes_home)
+        return
+
    load_hermes_dotenv(
        hermes_home=_hermes_home,
        project_env=Path(__file__).resolve().parents[1] / '.env',
    )
+    _bridge_max_turns_from_config(_hermes_home)

-    config_path = _hermes_home / 'config.yaml'
+
+def _bridge_max_turns_from_config(home: "Path") -> None:
+    """Bridge config.yaml agent.max_turns into HERMES_MAX_ITERATIONS (a global)."""
+    config_path = home / 'config.yaml'
    if not config_path.exists():
        return
    try:
@ -1196,6 +1227,80 @@ def _reload_runtime_env_preserving_config_authority() -> None:
        os.environ["HERMES_MAX_ITERATIONS"] = str(agent_cfg["max_turns"])


+def _current_max_iterations() -> int:
+    """Return the current per-turn iteration budget after runtime env refresh."""
+    _reload_runtime_env_preserving_config_authority()
+    try:
+        return int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
+    except (TypeError, ValueError):
+        return 90
+
+
+from contextlib import contextmanager as _contextmanager
+
+
+# Platforms that bind a host TCP port (HTTP/webhook listeners). In a profile
+# multiplexer the default profile owns the single shared listener and serves
+# every profile through the /p/<profile>/ URL prefix, so a SECONDARY profile
+# enabling one of these is always a misconfiguration: it would try to bind a
+# port already held by the default's listener. We hard-error on it rather than
+# silently dropping the adapter (see _start_one_profile_adapters).
+# Stored as platform .value strings since the Platform enum is imported below.
+_PORT_BINDING_PLATFORM_VALUES = frozenset({
+    "webhook",
+    "api_server",
+    "msgraph_webhook",
+    "feishu",
+    "wecom_callback",
+    "bluebubbles",
+    "sms",
+})
+
+
+class MultiplexConfigError(RuntimeError):
+    """A profile multiplexer config is invalid (fail-fast at startup).
+
+    Distinct from a transient adapter-connect failure: a transient error is
+    logged and the gateway stays alive to retry, but a config error means the
+    operator must fix config.yaml, so it aborts startup cleanly.
+    """
+
+
+@_contextmanager
+def _profile_runtime_scope(profile_home: "Path"):
+    """Scope config/skills/memory AND credentials to a profile for one turn.
+
+    Combines the two seams the multiplexer needs:
+      1. ``set_hermes_home_override`` — redirects ``get_hermes_home()`` (config,
+         skills, memory, SOUL, sessions) to the profile's home. Contextvar, so
+         it propagates into the agent worker thread via ``copy_context()``.
+      2. ``set_secret_scope`` — installs the profile's ``.env`` secrets as the
+         authoritative credential source, so ``get_secret`` reads this profile's
+         keys and never the process-global ``os.environ`` (which in a
+         multiplexer may hold another profile's values).
+
+    Only used on the multiplexed inbound path. Single-profile gateways never
+    enter this scope, so their behavior is unchanged. Loading the profile's
+    ``.env`` here does NOT mutate ``os.environ`` — ``build_profile_secret_scope``
+    returns an isolated dict — which is what keeps subprocesses (MCP, kanban)
+    from inheriting cross-profile secrets.
+    """
+    from hermes_constants import set_hermes_home_override, reset_hermes_home_override
+    from agent.secret_scope import (
+        build_profile_secret_scope,
+        set_secret_scope,
+        reset_secret_scope,
+    )
+
+    home_token = set_hermes_home_override(str(profile_home))
+    secret_token = set_secret_scope(build_profile_secret_scope(Path(profile_home)))
+    try:
+        yield
+    finally:
+        reset_secret_scope(secret_token)
+        reset_hermes_home_override(home_token)
+
+
 _DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P<host>.+):(?P<container>/[^:]+?)(?::(?P<options>[^:]+))?$")
 _DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"}

@ -2240,7 +2345,22 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
    def __init__(self, config: Optional[GatewayConfig] = None):
        global _gateway_runner_ref
        self.config = config or load_gateway_config()
+        # Mark the process as a profile multiplexer when configured. This flips
+        # agent.secret_scope.get_secret() to fail-closed on any unscoped
+        # credential read, so a missed migration crashes loudly instead of
+        # leaking a cross-profile value (Workstream A). Inert when off.
+        try:
+            from agent.secret_scope import set_multiplex_active
+            set_multiplex_active(bool(getattr(self.config, "multiplex_profiles", False)))
+        except Exception:
+            logger.debug("could not set multiplex-active flag", exc_info=True)
        self.adapters: Dict[Platform, BasePlatformAdapter] = {}
+        # Multi-profile multiplexing: adapters for NON-default profiles live
+        # here, keyed by profile name then Platform. self.adapters stays the
+        # default/active profile's map so the ~93 existing self.adapters[...]
+        # sites are untouched when multiplexing is off (this dict is empty).
+        # Populated by _start_secondary_profile_adapters().
+        self._profile_adapters: Dict[str, Dict[Platform, BasePlatformAdapter]] = {}
        self._warn_if_docker_media_delivery_is_risky()
        _gateway_runner_ref = _weakref.ref(self)

@ -2792,10 +2912,24 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
            except Exception:
                pass
        config = getattr(self, "config", None)
+        # Mirror SessionStore._resolve_profile_for_key so this fallback path
+        # produces the same namespace as the primary path: None (legacy
+        # agent:main) unless multiplexing is on, then the active profile.
+        _profile = None
+        if getattr(config, "multiplex_profiles", False):
+            if source.profile:
+                _profile = source.profile
+            else:
+                try:
+                    from hermes_cli.profiles import get_active_profile_name
+                    _profile = get_active_profile_name() or "default"
+                except Exception:
+                    _profile = None
        return build_session_key(
            source,
            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
+            profile=_profile,
        )

    def _telegram_topic_mode_enabled(self, source: SessionSource) -> bool:
@ -5335,7 +5469,30 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                    "attempts": 1,
                    "next_retry": time.monotonic() + 30,
                }
-        
+
+        # Multi-profile multiplexing: bring up adapters for every OTHER profile
+        # this gateway serves. Each profile's adapters connect under that
+        # profile's home + credential scope and stamp their inbound events with
+        # the profile so the agent turn resolves correctly. No-op when off.
+        try:
+            _secondary_connected = await self._start_secondary_profile_adapters()
+            connected_count += _secondary_connected
+        except MultiplexConfigError as e:
+            # Invalid multiplexer config — abort startup cleanly so the operator
+            # fixes config.yaml rather than running a half-wired gateway.
+            reason = str(e)
+            logger.error("Gateway multiplexer config error: %s", reason)
+            try:
+                from gateway.status import write_runtime_status
+                write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
+            except Exception:
+                pass
+            self._request_clean_exit(reason)
+            self._startup_restore_in_progress = False
+            return True
+        except Exception as e:
+            logger.error("Secondary-profile adapter startup failed: %s", e, exc_info=True)
+
        if connected_count == 0:
            if startup_nonretryable_errors:
                reason = "; ".join(startup_nonretryable_errors)
@ -6342,6 +6499,22 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                        time.monotonic() - _adapter_started_at,
                        e,
                    )
+
+            # Disconnect secondary-profile adapters (multiplex mode).
+            for _prof, _amap in list(getattr(self, "_profile_adapters", {}).items()):
+                for platform, adapter in list(_amap.items()):
+                    try:
+                        await adapter.cancel_background_tasks()
+                    except Exception as e:
+                        logger.debug("✗ %s bg-cancel error (profile %s): %s", platform.value, _prof, e)
+                    try:
+                        await adapter.disconnect()
+                        logger.info("✓ %s disconnected (profile: %s)", platform.value, _prof)
+                    except Exception as e:
+                        logger.error("✗ %s disconnect error (profile %s): %s", platform.value, _prof, e)
+                _amap.clear()
+            if hasattr(self, "_profile_adapters"):
+                self._profile_adapters.clear()
            logger.info(
                "Shutdown phase: all adapters disconnected at +%.2fs",
                _phase_elapsed(),
@ -6511,6 +6684,175 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
        """Wait for shutdown signal."""
        await self._shutdown_event.wait()

+    async def _start_secondary_profile_adapters(self) -> int:
+        """Bring up adapters for every non-active profile this gateway serves.
+
+        Returns the number of secondary adapters that connected. No-op (returns
+        0) unless ``gateway.multiplex_profiles`` is on.
+
+        Each profile's adapters are created and connected under that profile's
+        HERMES_HOME + secret scope (``_profile_runtime_scope``), stored in
+        ``self._profile_adapters[profile]``, and given a message handler that
+        stamps ``source.profile`` before delegating to the shared
+        ``_handle_message`` — so the agent turn resolves that profile's config,
+        skills, and credentials. Same-platform credential collisions (two
+        profiles polling the same bot token) are detected and refused here, the
+        only point that sees every profile's resolved credentials together.
+        """
+        if not getattr(self.config, "multiplex_profiles", False):
+            return 0
+
+        try:
+            from hermes_cli.profiles import profiles_to_serve, get_active_profile_name
+        except Exception:
+            return 0
+
+        active = get_active_profile_name() or "default"
+        connected = 0
+        # (platform, token-fingerprint) -> profile that claimed it. Detects two
+        # profiles trying to poll the same bot credential (impossible to do
+        # concurrently). Seed with the active profile's adapters.
+        claimed: Dict[tuple, str] = {}
+        for _plat, _ad in self.adapters.items():
+            fp = self._adapter_credential_fingerprint(_ad)
+            if fp is not None:
+                claimed[(_plat, fp)] = active
+
+        for profile_name, profile_home in profiles_to_serve(multiplex=True):
+            if profile_name == active:
+                continue  # handled by the primary startup loop
+            try:
+                connected += await self._start_one_profile_adapters(
+                    profile_name, profile_home, claimed
+                )
+            except MultiplexConfigError:
+                # Config error (e.g. a secondary profile binding a port) is not
+                # transient — propagate so startup aborts cleanly instead of
+                # limping along with a half-configured multiplexer.
+                raise
+            except Exception as e:
+                logger.error(
+                    "Failed to start adapters for profile '%s': %s",
+                    profile_name, e, exc_info=True,
+                )
+
+        # Record served profiles in runtime status for `hermes status`.
+        try:
+            from gateway.status import write_runtime_status
+            served = [active] + sorted(self._profile_adapters.keys())
+            write_runtime_status(served_profiles=served)
+        except Exception:
+            logger.debug("could not record served_profiles", exc_info=True)
+
+        return connected
+
+    async def _start_one_profile_adapters(
+        self, profile_name: str, profile_home: "Path", claimed: Dict[tuple, str]
+    ) -> int:
+        """Create+connect one profile's adapters under its runtime scope."""
+        from gateway.config import load_gateway_config
+
+        with _profile_runtime_scope(profile_home):
+            profile_cfg = load_gateway_config()
+
+        profile_map = self._profile_adapters.setdefault(profile_name, {})
+        connected = 0
+        for platform, platform_config in profile_cfg.platforms.items():
+            if not platform_config.enabled:
+                continue
+            # A secondary profile must NOT enable a port-binding platform: the
+            # default profile's listener already serves every profile via the
+            # /p/<profile>/ prefix, so a second bind can only collide. This is a
+            # config error, not a transient failure — fail fast and loud.
+            if platform.value in _PORT_BINDING_PLATFORM_VALUES:
+                raise MultiplexConfigError(
+                    f"Profile '{profile_name}' enables the port-binding platform "
+                    f"'{platform.value}', but gateway.multiplex_profiles is on. The "
+                    f"default profile owns the single shared HTTP listener and "
+                    f"serves every profile through the /p/{profile_name}/ URL "
+                    f"prefix — a secondary profile cannot bind its own port. "
+                    f"Remove platforms.{platform.value} from profile "
+                    f"'{profile_name}'s config.yaml (configure it only on the "
+                    f"default profile)."
+                )
+            with _profile_runtime_scope(profile_home):
+                adapter = self._create_adapter(platform, platform_config)
+            if not adapter:
+                continue
+
+            # Same-token conflict detection — refuse a duplicate poll.
+            fp = self._adapter_credential_fingerprint(adapter)
+            if fp is not None:
+                owner = claimed.get((platform, fp))
+                if owner is not None:
+                    logger.error(
+                        "Profile '%s' and '%s' both configure %s with the same "
+                        "credential — refusing to start the duplicate (a single "
+                        "bot token cannot be polled twice). Give each profile its "
+                        "own %s credential.",
+                        owner, profile_name, platform.value, platform.value,
+                    )
+                    await self._safe_adapter_disconnect(adapter, platform)
+                    continue
+                claimed[(platform, fp)] = profile_name
+
+            # Stamp every inbound event from this adapter with its profile so
+            # the agent turn (and session key) resolve to the right home.
+            adapter.set_message_handler(
+                self._make_profile_message_handler(profile_name)
+            )
+            adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
+            adapter.set_session_store(self.session_store)
+            adapter.set_busy_session_handler(self._handle_active_session_busy_message)
+            adapter.set_topic_recovery_fn(self._recover_telegram_topic_thread_id)
+            adapter._busy_text_mode = self._busy_text_mode
+
+            try:
+                with _profile_runtime_scope(profile_home):
+                    success = await self._connect_adapter_with_timeout(adapter, platform)
+                if success:
+                    profile_map[platform] = adapter
+                    connected += 1
+                    logger.info("✓ %s connected (profile: %s)", platform.value, profile_name)
+                else:
+                    logger.warning("✗ %s failed to connect (profile: %s)", platform.value, profile_name)
+                    await self._safe_adapter_disconnect(adapter, platform)
+            except Exception as e:
+                logger.error("✗ %s error (profile: %s): %s", platform.value, profile_name, e)
+                await self._safe_adapter_disconnect(adapter, platform)
+        return connected
+
+    def _make_profile_message_handler(self, profile_name: str):
+        """Return a message handler that stamps source.profile then delegates."""
+        async def _handler(event):
+            try:
+                if getattr(event, "source", None) is not None and not event.source.profile:
+                    event.source.profile = profile_name
+            except Exception:
+                pass
+            return await self._handle_message(event)
+        return _handler
+
+    @staticmethod
+    def _adapter_credential_fingerprint(adapter: Any) -> Optional[str]:
+        """Return a stable, log-safe fingerprint of an adapter's credential.
+
+        Used only to detect two profiles claiming the same bot token. Returns a
+        salted hash (never the token itself) of the adapter's primary
+        credential, or None when no credential is discoverable (in which case
+        we don't attempt conflict detection for it).
+        """
+        token = None
+        for attr in ("token", "bot_token", "_token", "api_token", "_bot_token"):
+            val = getattr(adapter, attr, None)
+            if isinstance(val, str) and val.strip():
+                token = val.strip()
+                break
+        if not token:
+            return None
+        import hashlib
+        return hashlib.sha256(("hermes-mux:" + token).encode("utf-8")).hexdigest()[:16]
+
    def _create_adapter(
        self, 
        platform: Platform, 
@ -10633,7 +10975,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
            disabled_toolsets = agent_cfg.get("disabled_toolsets") or None

            pr = self._provider_routing
-            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
+            max_iterations = _current_max_iterations()
            reasoning_config = self._resolve_session_reasoning_config(source=source)
            self._reasoning_config = reasoning_config
            self._service_tier = self._load_service_tier()
@ -11737,7 +12079,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
            chunks = [clean[i:i + max_chunk] for i in range(0, len(clean), max_chunk)]
            for chunk in chunks:
                try:
-                    await adapter.send(chat_id, f"```\n{chunk}\n```", metadata=metadata)
+                    await adapter.send(
+                        chat_id,
+                        f"```\n{chunk}\n```",
+                        metadata=_non_conversational_metadata(metadata, platform=platform),
+                    )
                except Exception as e:
                    logger.debug("Update stream send failed: %s", e)

@ -11760,12 +12106,16 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                    exit_code_raw = exit_code_path.read_text().strip() or "1"
                    exit_code = int(exit_code_raw)
                    if exit_code == 0:
-                        await adapter.send(chat_id, "✅ Hermes update finished.", metadata=metadata)
+                        await adapter.send(
+                            chat_id,
+                            "✅ Hermes update finished.",
+                            metadata=_non_conversational_metadata(metadata, platform=platform),
+                        )
                    else:
                        await adapter.send(
                            chat_id,
                            "❌ Hermes update failed (exit code {}).".format(exit_code),
-                            metadata=metadata,
+                            metadata=_non_conversational_metadata(metadata, platform=platform),
                        )
                    logger.info("Update finished (exit=%s), notified %s", exit_code, session_key)
                except Exception as e:
@ -11816,7 +12166,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                                    prompt=prompt_text,
                                    default=default,
                                    session_key=session_key,
-                                    metadata=metadata,
+                                    metadata=_non_conversational_metadata(metadata, platform=platform),
                                )
                                sent_buttons = True
                            except Exception as btn_err:
@ -11830,7 +12180,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                                f"{prompt_text}{default_hint}\n\n"
                                f"Reply `{_p}approve` (yes) or `{_p}deny` (no), "
                                f"or type your answer directly.",
-                                metadata=metadata,
+                                metadata=_non_conversational_metadata(metadata, platform=platform),
                            )
                        # Keep the prompt marker on disk until the user
                        # answers. If the gateway restarts mid-prompt, the
@ -11854,7 +12204,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                await adapter.send(
                    chat_id,
                    "❌ Hermes update timed out after 30 minutes.",
-                    metadata=metadata,
+                    metadata=_non_conversational_metadata(metadata, platform=platform),
                )
            except Exception:
                pass
@ -11960,7 +12310,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                    msg = "✅ Hermes update finished successfully."
                else:
                    msg = "❌ Hermes update failed. Check the gateway logs or run `hermes update` manually for details."
-                await adapter.send(chat_id, msg, metadata=metadata)
+                await adapter.send(
+                    chat_id,
+                    msg,
+                    metadata=_non_conversational_metadata(metadata, platform=platform),
+                )
                logger.info(
                    "Sent post-update notification to %s:%s (exit=%s)",
                    platform_str,
@ -12023,7 +12377,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
            result = await adapter.send(
                str(chat_id),
                "♻ Gateway restarted successfully. Your session continues.",
-                metadata=metadata,
+                metadata=_non_conversational_metadata(metadata, platform=platform),
            )
            # adapter.send() catches provider errors (e.g. "Chat not found")
            # and returns SendResult(success=False) rather than raising, so
@ -12090,9 +12444,21 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                    adapter=adapter,
                )
                if metadata:
-                    result = await adapter.send(str(home.chat_id), message, metadata=metadata)
+                    result = await adapter.send(
+                        str(home.chat_id),
+                        message,
+                        metadata=_non_conversational_metadata(metadata, platform=platform),
+                    )
                else:
-                    result = await adapter.send(str(home.chat_id), message)
+                    _startup_meta = _non_conversational_metadata(platform=platform)
+                    if _startup_meta:
+                        result = await adapter.send(
+                            str(home.chat_id),
+                            message,
+                            metadata=_startup_meta,
+                        )
+                    else:
+                        result = await adapter.send(str(home.chat_id), message)
                if result is not None and getattr(result, "success", True) is False:
                    logger.warning(
                        "Home-channel startup notification failed for %s:%s: %s",
@ -12733,7 +13099,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                    if adapter and chat_id:
                        try:
                            send_meta = {"thread_id": thread_id} if thread_id else None
-                            await adapter.send(chat_id, message_text, metadata=send_meta)
+                            await adapter.send(
+                                chat_id,
+                                message_text,
+                                metadata=_non_conversational_metadata(send_meta, platform=platform_name),
+                            )
                        except Exception as e:
                            logger.error("Watcher delivery error: %s", e)
                break
@ -12754,7 +13124,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                if adapter and chat_id:
                    try:
                        send_meta = {"thread_id": thread_id} if thread_id else None
-                        await adapter.send(chat_id, message_text, metadata=send_meta)
+                        await adapter.send(
+                            chat_id,
+                            message_text,
+                            metadata=_non_conversational_metadata(send_meta, platform=platform_name),
+                        )
                    except Exception as e:
                        logger.error("Watcher delivery error: %s", e)

@ -13740,6 +14114,64 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
        channel_prompt: Optional[str] = None,
        persist_user_message: Optional[str] = None,
        persist_user_timestamp: Optional[float] = None,
+    ) -> Dict[str, Any]:
+        """Profile-scoping wrapper around the agent run.
+
+        When multiplexing is active, resolve the inbound source's profile and
+        run the whole turn inside ``_profile_runtime_scope`` so config/skills/
+        memory resolve to that profile's home AND credentials resolve from that
+        profile's secret scope (never the process-global ``os.environ``). When
+        multiplexing is off this is a transparent pass-through — zero behavior
+        change for single-profile gateways.
+        """
+        if not getattr(getattr(self, "config", None), "multiplex_profiles", False):
+            return await self._run_agent_inner(
+                message, context_prompt, history, source, session_id,
+                session_key=session_key, run_generation=run_generation,
+                _interrupt_depth=_interrupt_depth, event_message_id=event_message_id,
+                channel_prompt=channel_prompt, persist_user_message=persist_user_message,
+                persist_user_timestamp=persist_user_timestamp,
+            )
+
+        profile_home = self._resolve_profile_home_for_source(source)
+        with _profile_runtime_scope(profile_home):
+            return await self._run_agent_inner(
+                message, context_prompt, history, source, session_id,
+                session_key=session_key, run_generation=run_generation,
+                _interrupt_depth=_interrupt_depth, event_message_id=event_message_id,
+                channel_prompt=channel_prompt, persist_user_message=persist_user_message,
+                persist_user_timestamp=persist_user_timestamp,
+            )
+
+    def _resolve_profile_home_for_source(self, source: SessionSource) -> "Path":
+        """Resolve which profile's HERMES_HOME should serve this inbound source.
+
+        Prefers the profile the source was routed to (``source.profile`` — set
+        by the /p/<profile>/ URL prefix or a per-credential adapter), falling
+        back to the active profile (the multiplexer's own home).
+        """
+        from hermes_cli.profiles import get_active_profile_name, get_profile_dir
+        try:
+            name = (source.profile or "").strip() or get_active_profile_name() or "default"
+            return get_profile_dir(name)
+        except Exception:
+            from hermes_constants import get_hermes_home
+            return get_hermes_home()
+
+    async def _run_agent_inner(
+        self,
+        message: str,
+        context_prompt: str,
+        history: List[Dict[str, Any]],
+        source: SessionSource,
+        session_id: str,
+        session_key: str = None,
+        run_generation: Optional[int] = None,
+        _interrupt_depth: int = 0,
+        event_message_id: Optional[str] = None,
+        channel_prompt: Optional[str] = None,
+        persist_user_message: Optional[str] = None,
+        persist_user_timestamp: Optional[float] = None,
    ) -> Dict[str, Any]:
        """
        Run the agent with the given message and context.
@ -14135,6 +14567,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
            if _progress_thread_id == source.thread_id
            else {"thread_id": _progress_thread_id}
        ) if _progress_thread_id else None
+        _progress_metadata = _non_conversational_metadata(_progress_metadata, platform=source.platform)
        _progress_reply_to = (
            event_message_id
            if source.platform in (Platform.FEISHU, Platform.MATTERMOST) and source.thread_id and event_message_id
@ -14581,9 +15014,6 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
            # session_key is now set via contextvars in _set_session_env()
            # (concurrency-safe). Keep os.environ as fallback for CLI/cron.
            os.environ["HERMES_SESSION_KEY"] = session_key or ""
-
-            # Read from env var or use default (same as CLI)
-            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
            
            # Map platform enum to the platform hint key the agent understands.
            # Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
@ -14598,10 +15028,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
            if self._ephemeral_system_prompt:
                combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()

-            # Re-read .env and config for fresh credentials (gateway is long-lived,
-            # keys may change without restart). Keep config.yaml authoritative for
-            # runtime budget settings bridged into env vars.
-            _reload_runtime_env_preserving_config_authority()
+            max_iterations = _current_max_iterations()

            try:
                model, runtime_kwargs = self._resolve_session_agent_runtime(
@ -14799,6 +15226,9 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                                except KeyError:
                                    pass
                            self._init_cached_agent_for_turn(agent, _interrupt_depth)
+                            # Refresh agent max_iterations from current config
+                            # (cached agent may have been created with old config)
+                            agent.max_iterations = max_iterations
                            logger.debug("Reusing cached agent for session %s", session_key)

            if agent is None:
@ -14900,7 +15330,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                    _status_adapter.send(
                        _status_chat_id,
                        message,
-                        metadata=_status_thread_metadata,
+                        metadata=_non_conversational_metadata(_status_thread_metadata, platform=source.platform),
                    ),
                    _loop_for_step,
                    logger=logger,
@ -15742,7 +16172,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                        _notify_res = await _notify_adapter.send(
                            source.chat_id,
                            _heartbeat_text,
-                            metadata=_status_thread_metadata,
+                            metadata=_non_conversational_metadata(_status_thread_metadata, platform=source.platform),
                        )
                        if getattr(_notify_res, "success", False) and getattr(
                            _notify_res, "message_id", None
--- a/gateway/session.py
+++ b/gateway/session.py
@ -92,6 +92,11 @@ class SessionSource:
    parent_chat_id: Optional[str] = None  # Parent channel when chat_id refers to a thread
    message_id: Optional[str] = None  # ID of the triggering message (for pin/reply/react)
    role_authorized: bool = False  # True when adapter granted access via role (not user ID)
+    # Profile this inbound message is routed to in a multiplexing gateway
+    # (from the /p/<profile>/ URL prefix or per-credential adapter ownership).
+    # None => the gateway's active/default profile. Drives both session-key
+    # namespacing and the per-turn config/credential scope.
+    profile: Optional[str] = None
    
    @property
    def description(self) -> str:
@ -135,6 +140,8 @@ class SessionSource:
            d["parent_chat_id"] = self.parent_chat_id
        if self.message_id:
            d["message_id"] = self.message_id
+        if self.profile:
+            d["profile"] = self.profile
        return d

    @classmethod
@ -153,6 +160,7 @@ class SessionSource:
            guild_id=data.get("guild_id"),
            parent_chat_id=data.get("parent_chat_id"),
            message_id=data.get("message_id"),
+            profile=data.get("profile"),
        )
    

@ -615,15 +623,41 @@ def is_shared_multi_user_session(
    return not group_sessions_per_user


+def _session_key_namespace(profile: Optional[str]) -> str:
+    """Return the ``agent:<ns>`` namespace prefix for a session key.
+
+    The historical key format is ``agent:main:<platform>:<chat_type>:...`` where
+    ``main`` is a static namespace literal (NOT a branch name — branching keys
+    off ``session_id``, not this slot). Multi-profile multiplexing reuses this
+    slot to carry the profile:
+
+    - default profile (or ``None``/``""``/``"default"``) → ``agent:main`` —
+      BYTE-IDENTICAL to every key ever generated, so existing sessions and all
+      positional parsers (``parts[2]`` == platform, etc.) are unaffected.
+    - named profile ``coder`` → ``agent:coder`` — keeps the same positional
+      layout, just a different namespace, so two profiles serving the same
+      platform/chat never collide.
+    """
+    if not profile or profile == "default":
+        return "agent:main"
+    return f"agent:{profile}"
+
+
 def build_session_key(
    source: SessionSource,
    group_sessions_per_user: bool = True,
    thread_sessions_per_user: bool = False,
+    profile: Optional[str] = None,
 ) -> str:
    """Build a deterministic session key from a message source.

    This is the single source of truth for session key construction.

+    ``profile`` selects the key namespace (see :func:`_session_key_namespace`).
+    It defaults to ``None`` ⇒ the legacy ``agent:main`` namespace, so callers
+    that don't multiplex produce byte-identical keys to before. Only the
+    multiplexing gateway passes a non-default profile.
+
    DM rules:
      - DMs include chat_id when present, so each private conversation is isolated.
      - thread_id further differentiates threaded DMs within the same DM chat.
@ -643,6 +677,7 @@ def build_session_key(
        shared session per chat.
      - Without identifiers, messages fall back to one session per platform/chat_type.
    """
+    ns = _session_key_namespace(profile)
    platform = source.platform.value
    if source.chat_type == "dm":
        dm_chat_id = source.chat_id
@ -651,12 +686,12 @@ def build_session_key(

        if dm_chat_id:
            if source.thread_id:
-                return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
-            return f"agent:main:{platform}:dm:{dm_chat_id}"
+                return f"{ns}:{platform}:dm:{dm_chat_id}:{source.thread_id}"
+            return f"{ns}:{platform}:dm:{dm_chat_id}"
        # No chat_id — fall back to the sender's own identifier before the
        # bare per-platform sink.  Without this, every DM from every user that
        # arrives without a chat_id (non-standard adapters / synthetic sources)
-        # collapses into one shared "agent:main:<platform>:dm" session, and a
+        # collapses into one shared "<ns>:<platform>:dm" session, and a
        # single cached agent ends up serving multiple people's conversations —
        # cross-user history bleed.  participant_id keeps DMs isolated per user.
        dm_participant_id = source.user_id_alt or source.user_id
@ -667,11 +702,11 @@ def build_session_key(
            )
        if dm_participant_id:
            if source.thread_id:
-                return f"agent:main:{platform}:dm:{dm_participant_id}:{source.thread_id}"
-            return f"agent:main:{platform}:dm:{dm_participant_id}"
+                return f"{ns}:{platform}:dm:{dm_participant_id}:{source.thread_id}"
+            return f"{ns}:{platform}:dm:{dm_participant_id}"
        if source.thread_id:
-            return f"agent:main:{platform}:dm:{source.thread_id}"
-        return f"agent:main:{platform}:dm"
+            return f"{ns}:{platform}:dm:{source.thread_id}"
+        return f"{ns}:{platform}:dm"

    participant_id = source.user_id_alt or source.user_id
    if participant_id and source.platform == Platform.WHATSAPP:
@ -679,7 +714,7 @@ def build_session_key(
        # single group member gets two isolated per-user sessions when the
        # bridge reshuffles alias forms.
        participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
-    key_parts = ["agent:main", platform, source.chat_type]
+    key_parts = [ns, platform, source.chat_type]

    if source.chat_id:
        key_parts.append(source.chat_id)
@ -775,12 +810,32 @@ class SessionStore:
                logger.debug("Could not remove temp file %s: %s", tmp_path, e)
            raise
    
+    def _resolve_profile_for_key(self, source: Optional[SessionSource] = None) -> Optional[str]:
+        """Return the profile namespace for session keys, or None when off.
+
+        When ``multiplex_profiles`` is disabled (default), returns ``None`` so
+        keys stay in the legacy ``agent:main`` namespace — byte-identical to
+        before. When enabled, prefers the profile the inbound source was routed
+        to (``source.profile`` — set by the /p/<profile>/ URL prefix or
+        per-credential adapter), falling back to the active profile name.
+        """
+        if not getattr(self.config, "multiplex_profiles", False):
+            return None
+        if source is not None and source.profile:
+            return source.profile
+        try:
+            from hermes_cli.profiles import get_active_profile_name
+            return get_active_profile_name() or "default"
+        except Exception:
+            return None
+
    def _generate_session_key(self, source: SessionSource) -> str:
        """Generate a session key from a source."""
        return build_session_key(
            source,
            group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
            thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
+            profile=self._resolve_profile_for_key(source),
        )
    
    def _is_session_expired(self, entry: SessionEntry) -> bool:
--- a/gateway/slash_commands.py
+++ b/gateway/slash_commands.py
@ -1030,12 +1030,13 @@ class GatewaySlashCommandsMixin:
        )

    async def _handle_model_command(self, event: MessageEvent) -> Optional[str]:
-        """Handle /model command — switch model for this session.
+        """Handle /model command — switch model.

        Supports:
          /model                              — interactive picker (Telegram/Discord) or text list
-          /model <name>                       — switch for this session only
-          /model <name> --global              — switch and persist to config.yaml
+          /model <name>                       — switch model (persists by default)
+          /model <name> --session             — switch for this session only
+          /model <name> --global              — switch and persist (explicit)
          /model <name> --provider <provider> — switch provider + model
          /model --provider <provider>        — switch to provider, auto-detect model
        """
@ -1043,6 +1044,7 @@ class GatewaySlashCommandsMixin:
        import yaml
        from hermes_cli.model_switch import (
            switch_model as _switch_model, parse_model_flags,
+            resolve_persist_behavior,
            list_authenticated_providers,
            list_picker_providers,
        )
@ -1050,8 +1052,15 @@ class GatewaySlashCommandsMixin:

        raw_args = event.get_command_args().strip()

-        # Parse --provider, --global, and --refresh flags
-        model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
+        # Parse --provider, --global, --session, and --refresh flags
+        (
+            model_input,
+            explicit_provider,
+            is_global_flag,
+            force_refresh,
+            is_session,
+        ) = parse_model_flags(raw_args)
+        persist_global = resolve_persist_behavior(is_global_flag, is_session)

        # --refresh: bust the disk cache so the picker shows live data.
        if force_refresh:
@ -1362,7 +1371,7 @@ class GatewaySlashCommandsMixin:
            # override rather than relying on cache signature mismatch detection.
            self._evict_cached_agent(session_key)

-            # Persist to config if --global
+            # Persist to config (default) unless --session opted out
            if persist_global:
                try:
                    if config_path.exists():
--- a/gateway/status.py
+++ b/gateway/status.py
@ -14,6 +14,7 @@ concurrently under distinct configurations).
 import hashlib
 import json
 import os
+import shlex
 import signal
 import subprocess
 import sys
@ -164,20 +165,86 @@ def _read_process_cmdline(pid: int) -> Optional[str]:
    return None


+def looks_like_gateway_command_line(command: str | None) -> bool:
+    """Return True only for a real ``gateway run`` process command line.
+
+    Lifecycle decisions (is the gateway up? did restart relaunch it?) must not
+    fire on loose substring matches.  The previous ``"... gateway" in cmdline``
+    test also matched ``hermes_cli.main gateway status`` and even unrelated
+    processes like ``python -m tui_gateway`` -- which made ``restart()`` race
+    against a still-draining old process and ``status``/``start`` report false
+    positives.  This requires the actual ``gateway`` subcommand followed by
+    ``run`` (or one of the gateway-dedicated entrypoints), excluding the other
+    ``gateway`` management subcommands and any process that merely contains the
+    word "gateway".
+
+    Tokenizes quote-aware (``shlex``) so quoted Windows paths with spaces
+    (``"C:\\Program Files\\...\\hermes-gateway.exe"``) survive, and strips
+    ``--profile``/``-p`` selectors from anywhere in argv -- Hermes's
+    ``_apply_profile_override`` removes them before argparse, so the profile
+    flag (and a profile literally named ``gateway``) can legally appear on
+    either side of the ``gateway`` subcommand.
+    """
+    if not command:
+        return False
+
+    try:
+        raw_tokens = shlex.split(command, posix=False)
+    except ValueError:
+        raw_tokens = command.split()
+    # Strip surrounding quotes, normalize slashes + case per token.
+    tokens = [t.strip("\"'").replace("\\", "/").lower() for t in raw_tokens]
+    if not tokens:
+        return False
+
+    # Gateway-dedicated entrypoints carry no subcommand to inspect.
+    for token in tokens:
+        if token == "gateway/run.py" or token.endswith("/gateway/run.py"):
+            return True
+        basename = token.rsplit("/", 1)[-1]
+        if basename in ("hermes-gateway", "hermes-gateway.exe"):
+            return True
+
+    joined = " ".join(tokens)
+    has_gateway_entry = (
+        "hermes_cli.main" in joined
+        or "hermes_cli/main.py" in joined
+        or any(t.rsplit("/", 1)[-1] in ("hermes", "hermes.exe") for t in tokens)
+    )
+    if not has_gateway_entry:
+        return False
+
+    # Drop profile selectors anywhere: --profile X / -p X / --profile=X / -p=X.
+    # This consumes a profile VALUE of "gateway" too, so the real subcommand
+    # token is the one we land on below.
+    filtered: list[str] = []
+    skip_next = False
+    for token in tokens:
+        if skip_next:
+            skip_next = False
+            continue
+        if token in ("--profile", "-p"):
+            skip_next = True
+            continue
+        if token.startswith("--profile=") or token.startswith("-p="):
+            continue
+        filtered.append(token)
+
+    for i, token in enumerate(filtered):
+        if token != "gateway":
+            continue
+        if i + 1 >= len(filtered):
+            return True  # bare `hermes gateway` defaults to `run`
+        return filtered[i + 1] == "run"
+    return False
+
+
 def _looks_like_gateway_process(pid: int) -> bool:
    """Return True when the live PID still looks like the Hermes gateway."""
    cmdline = _read_process_cmdline(pid)
    if not cmdline:
        return False
-
-    patterns = (
-        "hermes_cli.main gateway",
-        "hermes_cli/main.py gateway",
-        "hermes gateway",
-        "hermes-gateway",
-        "gateway/run.py",
-    )
-    return any(pattern in cmdline for pattern in patterns)
+    return looks_like_gateway_command_line(cmdline)


 def _record_looks_like_gateway(record: dict[str, Any]) -> bool:
@ -189,15 +256,8 @@ def _record_looks_like_gateway(record: dict[str, Any]) -> bool:
    if not isinstance(argv, list) or not argv:
        return False

-    # Normalize Windows backslashes so patterns match cross-platform.
-    cmdline = " ".join(str(part) for part in argv).replace("\\", "/")
-    patterns = (
-        "hermes_cli.main gateway",
-        "hermes_cli/main.py gateway",
-        "hermes gateway",
-        "gateway/run.py",
-    )
-    return any(pattern in cmdline for pattern in patterns)
+    cmdline = " ".join(str(part) for part in argv)
+    return looks_like_gateway_command_line(cmdline)


 def _build_pid_record() -> dict:
@ -515,6 +575,7 @@ def write_runtime_status(
    platform_state: Any = _UNSET,
    error_code: Any = _UNSET,
    error_message: Any = _UNSET,
+    served_profiles: Any = _UNSET,
 ) -> None:
    """Persist gateway runtime health information for diagnostics/status."""
    path = _get_runtime_status_path()
@ -535,6 +596,11 @@ def write_runtime_status(
        payload["restart_requested"] = bool(restart_requested)
    if active_agents is not _UNSET:
        payload["active_agents"] = max(0, int(active_agents))
+    if served_profiles is not _UNSET:
+        # Profiles this gateway multiplexes (multi-profile mode). Absent/empty
+        # for a single-profile gateway. Lets `hermes status` show per-profile
+        # coverage without a second probe.
+        payload["served_profiles"] = list(served_profiles or [])

    if platform is not _UNSET:
        platform_payload = payload["platforms"].get(platform, {})
--- a/hermes_cli/backup.py
+++ b/hermes_cli/backup.py
@ -34,14 +34,38 @@ logger = logging.getLogger(__name__)
 # ``hermes-agent`` is special-cased to root level only in ``_should_exclude``
 # so that skill directories like ``skills/autonomous-ai-agents/hermes-agent/``
 # are not accidentally excluded.
+#
+# The dependency/cache entries below matter for more than tidiness: without
+# them a single plugin venv, MCP-server install, or pip/uv cache living under
+# HERMES_HOME gets walked file-by-file, ballooning a backup to hundreds of
+# thousands of entries that crawl for hours — the exact "backup stuck for
+# days / 426543 files" symptom users hit. The dependency/test-env names mostly
+# mirror ``agent.skill_utils.EXCLUDED_SKILL_DIRS`` (the project's canonical
+# "regeneratable dir" set); ``.cache`` is an additional backup-only entry, as
+# it names a broad regeneratable cache convention (pip/uv/etc.) that the skill
+# scanner doesn't need to prune but a backup walk does. We deliberately do NOT
+# exclude ``.archive`` here because the curator's ``skills/.archive/`` holds
+# restorable user skills that must survive a backup.
 _EXCLUDED_DIRS = {
    "hermes-agent",     # the codebase repo — re-clone instead
    "__pycache__",      # bytecode caches — regenerated on import
    ".git",             # nested git dirs (profiles shouldn't have these, but safety)
-    "node_modules",     # js deps if website/ somehow leaks in
+    "node_modules",     # js deps — reinstalled on demand
    "backups",          # prior auto-backups — don't nest backups exponentially
    "checkpoints",      # session-local trajectory caches — regenerated per-session,
                        # session-hash-keyed so they don't port to another machine anyway
+    # Python dependency trees (plugin / MCP-server venvs under HERMES_HOME) —
+    # regenerated by reinstalling; never irreplaceable state.
+    ".venv",
+    "venv",
+    "site-packages",
+    # Tool / build caches — all regeneratable.
+    ".cache",
+    ".tox",
+    ".nox",
+    ".pytest_cache",
+    ".mypy_cache",
+    ".ruff_cache",
 }

 # File-name suffixes to skip
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -123,8 +123,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    # Configuration
    CommandDef("config", "Show current configuration", "Configuration",
               cli_only=True),
-    CommandDef("model", "Switch model for this session", "Configuration",
-               args_hint="[model] [--provider name] [--global] [--refresh]"),
+    CommandDef("model", "Switch model (persists by default)", "Configuration",
+               args_hint="[model] [--provider name] [--global|--session] [--refresh]"),
    CommandDef("codex-runtime", "Toggle codex app-server runtime for OpenAI/Codex models",
               "Configuration", aliases=("codex_runtime",),
               args_hint="[auto|codex_app_server]"),
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -1581,6 +1581,14 @@ DEFAULT_CONFIG = {
        # TUI busy indicator style: kaomoji (default), emoji, unicode (braille
        # spinner), or ascii.  Live-swappable via `/indicator <style>`.
        "tui_status_indicator": "kaomoji",
+        # Seconds between prompt_toolkit redraws in the classic CLI when idle.
+        # Default 1.0 keeps the wall-clock status-bar read-outs (idle-since-
+        # last-turn) ticking and keeps the bottom chrome alive during idle —
+        # without it prompt_toolkit stops repainting the status bar after a
+        # turn and it can go stale/disappear (#45592).
+        # Set 0 to disable the background refresh if it fights terminal
+        # auto-scroll in non-fullscreen mode on some emulators (#48309).
+        "cli_refresh_interval": 1.0,
        "user_message_preview": {  # CLI: how many submitted user-message lines to echo back in scrollback
            "first_lines": 2,
            "last_lines": 2,
@ -3453,6 +3461,7 @@ OPTIONAL_ENV_VARS = {
                       "Required scopes: chat:write, app_mentions:read, channels:history, groups:history, "
                       "im:history, im:read, im:write, users:read, files:read, files:write",
        "prompt": "Slack Bot Token (xoxb-...)",
+        "help": "In your Slack app, add the required bot scopes, install the app to the workspace, then copy OAuth & Permissions > Bot User OAuth Token.",
        "url": "https://api.slack.com/apps",
        "password": True,
        "category": "messaging",
@ -3462,10 +3471,19 @@ OPTIONAL_ENV_VARS = {
                       "App-Level Tokens. Also ensure Event Subscriptions include: message.im, "
                       "message.channels, message.groups, app_mention",
        "prompt": "Slack App Token (xapp-...)",
+        "help": "In your Slack app, enable Socket Mode, then create Basic Information > App-Level Tokens with the connections:write scope.",
        "url": "https://api.slack.com/apps",
        "password": True,
        "category": "messaging",
    },
+    "SLACK_ALLOWED_USERS": {
+        "description": "Comma-separated Slack member IDs allowed to use Hermes, e.g. U01ABC2DEF3. Without this, Slack may connect but deny messages by default.",
+        "prompt": "Allowed Slack member IDs",
+        "help": "In Slack, open your profile, choose More or the three-dot menu, then Copy member ID. Add multiple IDs comma-separated.",
+        "url": "https://api.slack.com/apps",
+        "password": False,
+        "category": "messaging",
+    },
    "MATTERMOST_URL": {
        "description": "Mattermost server URL (e.g. https://mm.example.com)",
        "prompt": "Mattermost server URL",
--- a/hermes_cli/cron.py
+++ b/hermes_cli/cron.py
@ -25,7 +25,7 @@ _GATEWAY_LIFECYCLE_PATTERNS = re.compile(
    r"(?i)"
    r"(hermes\s+gateway\s+(restart|stop|start))"
    r"|(launchctl\s+(kickstart|unload|load|stop|restart)\s+.*hermes)"
-    r"|(systemctl\s+(restart|stop|start)\s+.*hermes)"
+    r"|(systemctl\s+(-\S+\s+)*(restart|stop|start)\s+.*hermes)"
    r"|(p?kill\s+.*hermes.*gateway)"
 )

--- a/hermes_cli/debug.py
+++ b/hermes_cli/debug.py
@ -191,10 +191,10 @@ _PRIVACY_NOTICE = """\
 ⚠️  This will upload the following to a public paste service:
  • System info (OS, Python version, Hermes version, provider, which API keys
    are configured — NOT the actual keys)
-  • Recent log lines (agent.log, errors.log, gateway.log, desktop.log — may
-    contain conversation fragments and file paths)
-  • Full agent.log, gateway.log, and desktop.log (up to 512 KB each — likely
-    contains conversation content, tool outputs, and file paths)
+  • Recent log lines (agent.log, errors.log, gateway.log, gui.log, desktop.log
+    — may contain conversation fragments and file paths)
+  • Full agent.log, gateway.log, gui.log, and desktop.log (up to 512 KB each —
+    likely contains conversation content, tool outputs, and file paths)

 Pastes auto-delete after 6 hours.
 """
@ -503,6 +503,9 @@ def _capture_default_log_snapshots(
        "gateway": _capture_log_snapshot(
            "gateway", tail_lines=errors_lines, redact=redact
        ),
+        "gui": _capture_log_snapshot(
+            "gui", tail_lines=errors_lines, redact=redact
+        ),
        "desktop": _capture_log_snapshot(
            "desktop", tail_lines=errors_lines, redact=redact
        ),
@ -574,6 +577,10 @@ def collect_debug_report(
    buf.write(log_snapshots["gateway"].tail_text)
    buf.write("\n\n")

+    buf.write(f"--- gui.log (last {errors_lines} lines) ---\n")
+    buf.write(log_snapshots["gui"].tail_text)
+    buf.write("\n\n")
+
    buf.write(f"--- desktop.log (last {errors_lines} lines) ---\n")
    buf.write(log_snapshots["desktop"].tail_text)
    buf.write("\n")
@ -639,6 +646,7 @@ def build_debug_share(
    )
    agent_log = log_snapshots["agent"].full_text
    gateway_log = log_snapshots["gateway"].full_text
+    gui_log = log_snapshots["gui"].full_text
    desktop_log = log_snapshots["desktop"].full_text

    # Prepend dump header to each full log so every paste is self-contained.
@ -646,6 +654,8 @@ def build_debug_share(
        agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
    if gateway_log:
        gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
+    if gui_log:
+        gui_log = dump_text + "\n\n--- full gui.log ---\n" + gui_log
    if desktop_log:
        desktop_log = dump_text + "\n\n--- full desktop.log ---\n" + desktop_log

@ -657,6 +667,8 @@ def build_debug_share(
            agent_log = _REDACTION_BANNER + agent_log
        if gateway_log:
            gateway_log = _REDACTION_BANNER + gateway_log
+        if gui_log:
+            gui_log = _REDACTION_BANNER + gui_log
        if desktop_log:
            desktop_log = _REDACTION_BANNER + desktop_log

@ -670,6 +682,7 @@ def build_debug_share(
    for label, content in (
        ("agent.log", agent_log),
        ("gateway.log", gateway_log),
+        ("gui.log", gui_log),
        ("desktop.log", desktop_log),
    ):
        if not content:
@ -712,11 +725,14 @@ def run_debug_share(args):
        )
        agent_log = log_snapshots["agent"].full_text
        gateway_log = log_snapshots["gateway"].full_text
+        gui_log = log_snapshots["gui"].full_text
        desktop_log = log_snapshots["desktop"].full_text
        if agent_log:
            agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
        if gateway_log:
            gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
+        if gui_log:
+            gui_log = dump_text + "\n\n--- full gui.log ---\n" + gui_log
        if desktop_log:
            desktop_log = dump_text + "\n\n--- full desktop.log ---\n" + desktop_log
        if redact:
@ -725,12 +741,15 @@ def run_debug_share(args):
                agent_log = _REDACTION_BANNER + agent_log
            if gateway_log:
                gateway_log = _REDACTION_BANNER + gateway_log
+            if gui_log:
+                gui_log = _REDACTION_BANNER + gui_log
            if desktop_log:
                desktop_log = _REDACTION_BANNER + desktop_log
        print(report)
        for title, body in (
            ("FULL agent.log", agent_log),
            ("FULL gateway.log", gateway_log),
+            ("FULL gui.log", gui_log),
            ("FULL desktop.log", desktop_log),
        ):
            if body:
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@ -319,23 +319,12 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
    # gateway.  See #13242.
    exclude_pids = exclude_pids | _get_ancestor_pids()
    pids: list[int] = []
-    patterns = [
-        "hermes_cli.main gateway",
-        "hermes_cli.main --profile",
-        "hermes_cli.main -p",
-        "hermes_cli/main.py gateway",
-        "hermes_cli/main.py --profile",
-        "hermes_cli/main.py -p",
-        "hermes gateway",
-        # Windows: only match invocations that actually carry the ``gateway``
-        # subcommand or the gateway-dedicated console-script shim. Bare
-        # ``hermes.exe --profile`` / ``hermes.exe -p`` would also match
-        # ``hermes.exe --profile foo dashboard`` and other CLI subcommands,
-        # producing false-positive gateway PIDs (Copilot review).
-        "hermes.exe gateway",
-        "hermes-gateway.exe",
-        "gateway/run.py",
-    ]
+    # Strict command-line matcher shared with gateway.status: requires the
+    # actual ``gateway run`` subcommand (or the dedicated entrypoints), so this
+    # scan no longer false-matches ``gateway status``/``dashboard`` siblings or
+    # unrelated processes like ``python -m tui_gateway``. Lazy import mirrors the
+    # circular-import avoidance used elsewhere in this module.
+    from gateway.status import looks_like_gateway_command_line
    current_home = str(get_hermes_home().resolve())
    current_home_lc = current_home.lower()
    current_profile_arg = _profile_arg(current_home)
@ -430,8 +419,7 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
                    current_cmd = line[len("CommandLine=") :]
                elif line.startswith("ProcessId="):
                    pid_str = line[len("ProcessId=") :]
-                    current_cmd_lc = current_cmd.lower()
-                    if any(p in current_cmd_lc for p in patterns) and (
+                    if looks_like_gateway_command_line(current_cmd) and (
                        all_profiles or _matches_current_profile(current_cmd)
                    ):
                        try:
@ -456,8 +444,7 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
                            with open(f"/proc/{pid}/cmdline", "rb") as _f:
                                cmdline = _f.read().decode("utf-8", errors="replace")
                            cmdline = cmdline.replace("\x00", " ")
-                            cmdline_lc = cmdline.lower()
-                            if any(p in cmdline_lc for p in patterns) and (
+                            if looks_like_gateway_command_line(cmdline) and (
                                all_profiles or _matches_current_profile(cmdline)
                            ):
                                _append_unique_pid(pids, pid, exclude_pids)
@ -500,8 +487,7 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li

                    if pid is None:
                        continue
-                    command_lc = command.lower()
-                    if any(pattern in command_lc for pattern in patterns) and (
+                    if looks_like_gateway_command_line(command) and (
                        all_profiles or _matches_current_profile(command)
                    ):
                        _append_unique_pid(pids, pid, exclude_pids)
@ -3865,6 +3851,86 @@ def _running_under_gateway_supervisor() -> bool:
    return False


+def _guard_named_profile_under_multiplexer(force: bool = False) -> None:
+    """Refuse a named-profile gateway when a multiplexer is already serving it.
+
+    When the default profile's gateway runs with gateway.multiplex_profiles=on,
+    it is the sole inbound process for EVERY profile on the host. Starting a
+    separate gateway for a named profile would double-bind that profile's
+    platforms (two pollers on one bot token, port fights). In that mode a
+    named-profile ``hermes gateway run`` is always a misconfiguration, so we
+    hard-error with a pointer to the multiplexer. ``--force`` overrides.
+
+    Inert unless ALL of: (a) this invocation is a named profile, (b) a default-
+    profile gateway is running, (c) that gateway's config has multiplexing on.
+    """
+    if force:
+        return
+    # (a) Are we a named profile? Default/custom-hash homes return "".
+    try:
+        suffix = _profile_suffix()
+    except Exception:
+        return
+    if not suffix:
+        return  # default profile (or unrecognized) — this guard doesn't apply
+
+    try:
+        from hermes_constants import get_default_hermes_root
+        default_root = get_default_hermes_root()
+        # (b) Is the default-profile gateway running?
+        from gateway.status import get_running_pid as _default_running_pid  # noqa
+    except Exception:
+        return
+
+    try:
+        import yaml as _yaml
+        from gateway.status import _read_pid_record  # type: ignore
+
+        # (b) default gateway PID file present + alive
+        default_pid_path = default_root / "gateway.pid"
+        rec = _read_pid_record(default_pid_path)
+        if not rec:
+            return
+        from gateway.status import _pid_exists, _pid_from_record
+        pid = _pid_from_record(rec)
+        if not pid or not _pid_exists(pid):
+            return
+
+        # (c) default config has multiplexing on
+        cfg_path = default_root / "config.yaml"
+        if not cfg_path.exists():
+            return
+        with open(cfg_path, encoding="utf-8") as f:
+            cfg = _yaml.safe_load(f) or {}
+        multiplex = bool(
+            cfg.get("multiplex_profiles")
+            or (cfg.get("gateway", {}) or {}).get("multiplex_profiles")
+        )
+        if not multiplex:
+            return
+    except Exception:
+        logger.debug("Multiplexer-conflict probe failed", exc_info=True)
+        return
+
+    print_error(
+        f"The default gateway is running as a profile multiplexer and already "
+        f"serves profile '{suffix}'."
+    )
+    print(
+        "  When gateway.multiplex_profiles is on, the default gateway is the\n"
+        "  single inbound process for every profile. Starting a separate\n"
+        "  gateway for this profile would double-bind its platforms (two\n"
+        "  pollers on one bot token, port conflicts).\n"
+    )
+    print("  Manage the multiplexer instead (from the default profile):")
+    print()
+    print("    hermes gateway restart")
+    print()
+    print("  Pass --force to start a separate profile gateway anyway (not")
+    print("  recommended while the multiplexer is running).")
+    sys.exit(1)
+
+
 def _guard_supervised_gateway_conflict(force: bool = False) -> None:
    """Refuse a foreground gateway when a service manager already supervises one.

@ -3977,6 +4043,7 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False, fo
               systemd/launchd service is already supervising this profile.
    """
    _guard_official_docker_root_gateway()
+    _guard_named_profile_under_multiplexer(force=force)
    _guard_supervised_gateway_conflict(force=force)
    _guard_existing_gateway_process_conflict(replace=replace)
    sys.path.insert(0, str(PROJECT_ROOT))
--- a/hermes_cli/gateway_windows.py
+++ b/hermes_cli/gateway_windows.py
@ -1302,10 +1302,54 @@ def stop() -> None:
        print("✗ No gateway was running")


+def _wait_for_gateway_absent(timeout_s: float = 30.0, interval_s: float = 0.5) -> bool:
+    """Block until no gateway process is detectable, or the timeout elapses.
+
+    ``stop()`` can return while the previous gateway is still draining
+    in-flight agents (the drain runs up to the restart-drain timeout). Uses the
+    authoritative ``get_running_pid()`` (lock + liveness + start-time +
+    gateway-shape) plus the now-strict ``_gateway_pids()`` scan so a relaunch
+    never races a still-alive old process.
+    """
+    from gateway.status import get_running_pid
+
+    deadline = time.monotonic() + max(timeout_s, interval_s)
+    while time.monotonic() < deadline:
+        if get_running_pid() is None and not _gateway_pids():
+            return True
+        time.sleep(interval_s)
+    return get_running_pid() is None and not _gateway_pids()
+
+
 def restart() -> None:
-    """Stop the gateway then start it again."""
+    """Stop the gateway then start it again.
+
+    Waits for the old gateway to be authoritatively gone before relaunching --
+    otherwise ``start()``'s "already running" guard sees the still-draining old
+    process and no-ops, and when that process later exits nothing replaces it (a
+    silent outage). Fails loudly if the process can't be cleared or the relaunch
+    doesn't produce a running gateway.
+    """
    _assert_windows()
+    from hermes_cli.gateway import kill_gateway_processes
+
    stop()
+
+    if not _wait_for_gateway_absent(timeout_s=30.0):
+        print("⚠ Gateway still present after stop; forcing termination before restart...")
+        kill_gateway_processes(all_profiles=False, force=True)
+        if not _wait_for_gateway_absent(timeout_s=10.0):
+            raise RuntimeError(
+                "Gateway process still detected after force kill; refusing to "
+                "start a duplicate. Investigate stray PIDs before retrying."
+            )
+
    # Give Windows a moment to release the listening port.
    time.sleep(1.0)
    start()
+
+    if not _wait_for_gateway_ready(timeout_s=15.0):
+        raise RuntimeError(
+            "Gateway restart did not produce a running gateway process. "
+            "Check logs/gateway.log and run `hermes gateway status`."
+        )
--- a/hermes_cli/kanban_db.py
+++ b/hermes_cli/kanban_db.py
@ -121,6 +121,16 @@ DEFAULT_CLAIM_TTL_SECONDS = 15 * 60
 # effect of normal API traffic.
 DEFAULT_CLAIM_HEARTBEAT_MAX_STALE_SECONDS = 60 * 60

+# Grace added to a claim when a reclaim is deferred because the previous
+# host-local worker is still alive after a termination attempt. Releasing the
+# claim in that state would spawn a duplicate alongside the surviving worker —
+# the runaway seen when a cgroup memory.high throttle parks a worker in
+# uninterruptible (D) state, where a pending SIGKILL cannot be delivered until
+# the throttle lifts. Holding the claim a short grace and retrying next tick
+# stops the duplication; once no duplicate is spawned the pressure eases, the
+# signal lands, and the following tick reclaims cleanly.
+RECLAIM_DEFER_GRACE_SECONDS = 120
+

 def _resolve_claim_ttl_seconds(ttl_seconds: Optional[int] = None) -> int:
    """Return the effective claim TTL, honoring the kanban env override.
@ -3286,6 +3296,14 @@ def release_stale_claims(
        termination = _terminate_reclaimed_worker(
            row["worker_pid"], row["claim_lock"], signal_fn=signal_fn,
        )
+        # Never release a claim while our own worker is still alive: that would
+        # spawn a duplicate beside it. Hold the claim and retry next tick.
+        if _worker_survived_termination(termination):
+            _defer_reclaim_for_live_worker(
+                conn, row["id"], row["claim_lock"], now, termination,
+                reason="ttl_expired_worker_alive",
+            )
+            continue
        with write_txn(conn):
            cur = conn.execute(
                "UPDATE tasks SET status = 'ready', claim_lock = NULL, "
@ -5113,7 +5131,13 @@ def _terminate_reclaimed_worker(
    info["termination_attempted"] = True
    try:
        kill(int(pid), signal.SIGTERM)
-    except (ProcessLookupError, OSError):
+    except ProcessLookupError:
+        # Process is already gone — that's a successful termination, not a
+        # survival. Leaving terminated=False here would make the reclaim guard
+        # misread a dead worker as still-alive and defer forever.
+        info["terminated"] = True
+        return info
+    except OSError:
        return info

    for _ in range(10):
@ -5136,6 +5160,63 @@ def _terminate_reclaimed_worker(
    return info


+def _worker_survived_termination(termination: dict) -> bool:
+    """True when we tried to kill our own host-local worker and it is still alive.
+
+    Reclaiming in this state would release the claim and let the dispatcher
+    spawn a second worker while the first is still running — the duplication
+    loop. Only host-local workers we actually signalled count: a non-local
+    claim lock or a no-op attempt (no ``os.kill`` available) must fall through
+    to the normal release path, since we cannot manage that worker anyway.
+    """
+    return bool(
+        termination.get("termination_attempted")
+        and termination.get("host_local")
+        and not termination.get("terminated")
+    )
+
+
+def _defer_reclaim_for_live_worker(
+    conn: sqlite3.Connection,
+    task_id: str,
+    claim_lock: Optional[str],
+    now: int,
+    termination: dict,
+    *,
+    reason: str,
+) -> None:
+    """Hold a claim whose worker survived termination instead of releasing it.
+
+    Extends ``claim_expires`` by ``RECLAIM_DEFER_GRACE_SECONDS`` so the task
+    stays ``running`` (no duplicate spawn) and records a ``reclaim_deferred``
+    event so the hold is visible in ``hermes kanban tail``. The next dispatch
+    tick retries the kill; this is self-correcting because not spawning a
+    duplicate is what lets the throttled worker finally die.
+    """
+    grace = now + RECLAIM_DEFER_GRACE_SECONDS
+    with write_txn(conn):
+        cur = conn.execute(
+            "UPDATE tasks SET claim_expires = ? "
+            "WHERE id = ? AND status = 'running' AND claim_lock IS ?",
+            (grace, task_id, claim_lock),
+        )
+        if cur.rowcount != 1:
+            return
+        run_id = _current_run_id(conn, task_id)
+        if run_id is not None:
+            conn.execute(
+                "UPDATE task_runs SET claim_expires = ? WHERE id = ?",
+                (grace, run_id),
+            )
+        payload = {
+            "reason": reason,
+            "claim_lock": claim_lock,
+            "claim_expires_now": grace,
+        }
+        payload.update(termination)
+        _append_event(conn, task_id, "reclaim_deferred", payload, run_id=run_id)
+
+
 def heartbeat_worker(
    conn: sqlite3.Connection,
    task_id: str,
@ -5374,6 +5455,15 @@ def detect_stale_running(
            pid, lock, signal_fn=signal_fn,
        )

+        # Never release a claim while our own worker is still alive: that would
+        # spawn a duplicate beside it. Hold the claim and retry next tick.
+        if _worker_survived_termination(termination):
+            _defer_reclaim_for_live_worker(
+                conn, tid, lock, now, termination,
+                reason="heartbeat_stale_worker_alive",
+            )
+            continue
+
        with write_txn(conn):
            cur = conn.execute(
                "UPDATE tasks SET status = 'ready', claim_lock = NULL, "
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@ -299,34 +299,46 @@ class ModelSwitchResult:
 # Flag parsing
 # ---------------------------------------------------------------------------

-def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool]:
-    """Parse --provider, --global, and --refresh flags from /model command args.
+def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool, bool]:
+    """Parse --provider, --global, --session, and --refresh flags from /model command args.

-    Returns (model_input, explicit_provider, is_global, force_refresh).
+    Returns ``(model_input, explicit_provider, is_global, force_refresh, is_session)``.
+
+    ``is_global`` and ``is_session`` are independent flag presences; the
+    *effective* persistence decision is resolved by
+    :func:`resolve_persist_behavior` so the config-gated default
+    (``model.persist_switch_by_default``) is applied in one place.

    Examples::

-        "sonnet"                         -> ("sonnet", "", False, False)
-        "sonnet --global"                -> ("sonnet", "", True, False)
-        "sonnet --provider anthropic"    -> ("sonnet", "anthropic", False, False)
-        "--provider my-ollama"           -> ("", "my-ollama", False, False)
-        "--refresh"                      -> ("", "", False, True)
-        "sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True, False)
+        "sonnet"                         -> ("sonnet", "", False, False, False)
+        "sonnet --global"                -> ("sonnet", "", True, False, False)
+        "sonnet --session"               -> ("sonnet", "", False, False, True)
+        "sonnet --provider anthropic"    -> ("sonnet", "anthropic", False, False, False)
+        "--provider my-ollama"           -> ("", "my-ollama", False, False, False)
+        "--refresh"                      -> ("", "", False, True, False)
+        "sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True, False, False)
    """
    is_global = False
    explicit_provider = ""
    force_refresh = False
+    is_session = False

    # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
    # A single Unicode dash before a flag keyword becomes "--"
    import re as _re
-    raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global|refresh)', r'--\1', raw_args)
+    raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global|session|refresh)', r'--\1', raw_args)

    # Extract --global
    if "--global" in raw_args:
        is_global = True
        raw_args = raw_args.replace("--global", "").strip()

+    # Extract --session (explicit session-only; overrides the persist default)
+    if "--session" in raw_args:
+        is_session = True
+        raw_args = raw_args.replace("--session", "").strip()
+
    # Extract --refresh (bust the model picker disk cache before listing)
    if "--refresh" in raw_args:
        force_refresh = True
@ -345,7 +357,37 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool]:
            i += 1

    model_input = " ".join(filtered).strip()
-    return (model_input, explicit_provider, is_global, force_refresh)
+    return (model_input, explicit_provider, is_global, force_refresh, is_session)
+
+
+def resolve_persist_behavior(is_global: bool, is_session: bool) -> bool:
+    """Decide whether a ``/model`` switch should persist to ``config.yaml``.
+
+    Resolution order:
+
+    1. ``--session`` explicitly opts out → ``False`` (this session only).
+    2. ``--global`` explicitly opts in → ``True``.
+    3. Otherwise defer to ``model.persist_switch_by_default`` in
+       ``config.yaml`` (defaults to ``True``, so a plain ``/model <name>``
+       survives across sessions — the behavior users expect).
+
+    The config read is defensive: on a fresh install ``model`` may be a
+    flat string rather than a dict, in which case the built-in default
+    (``True``) applies.
+    """
+    if is_session:
+        return False
+    if is_global:
+        return True
+    try:
+        from hermes_cli.config import load_config
+
+        model_cfg = load_config().get("model")
+        if isinstance(model_cfg, dict):
+            return bool(model_cfg.get("persist_switch_by_default", True))
+    except Exception:
+        pass
+    return True


 # ---------------------------------------------------------------------------
--- a/hermes_cli/profiles.py
+++ b/hermes_cli/profiles.py
@ -29,7 +29,7 @@ import subprocess
 import sys
 from dataclasses import dataclass
 from pathlib import Path, PurePosixPath, PureWindowsPath
-from typing import List, Optional
+from typing import List, Optional, Tuple

 from agent.skill_utils import is_excluded_skill_path

@ -781,6 +781,47 @@ def list_profiles() -> List[ProfileInfo]:
    return profiles


+def profiles_to_serve(multiplex: bool) -> List[Tuple[str, Path]]:
+    """Return the ``(profile_name, hermes_home)`` pairs a gateway should serve.
+
+    This is the single chokepoint for "which profiles does the inbound gateway
+    handle" so later multiplexing phases never re-derive the set.
+
+    - ``multiplex=False`` (default): returns exactly one entry for the *active*
+      profile — byte-for-byte the single-profile behavior the gateway has
+      always had. The name is ``"default"`` for the default profile or the
+      active named profile's id.
+    - ``multiplex=True``: returns the default profile plus every valid named
+      profile under ``profiles/``, each paired with its own HERMES_HOME.
+
+    Intentionally lightweight (a directory scan + name validation only): no
+    per-profile config reads, gateway-running probes, or skill counts like
+    :func:`list_profiles`. It runs on gateway startup and must stay cheap.
+
+    The returned ``hermes_home`` is the path to pass to
+    ``set_hermes_home_override`` when scoping a turn to that profile.
+    """
+    active = get_active_profile_name() or "default"
+    if not multiplex:
+        return [(active, get_profile_dir(active))]
+
+    serve: List[Tuple[str, Path]] = [("default", _get_default_hermes_home())]
+
+    profiles_root = _get_profiles_root()
+    if profiles_root.is_dir():
+        for entry in sorted(profiles_root.iterdir()):
+            if not entry.is_dir():
+                continue
+            name = entry.name
+            if name == "default":
+                continue  # default is the built-in entry already added above
+            if not _PROFILE_ID_RE.match(name):
+                continue
+            serve.append((name, entry))
+
+    return serve
+
+
 def create_profile(
    name: str,
    clone_from: Optional[str] = None,
--- a/hermes_cli/provider_catalog.py
+++ b/hermes_cli/provider_catalog.py
@ -0,0 +1,170 @@
+"""Unified provider catalog — one source of truth for the provider universe.
+
+The provider list shown by ``hermes model`` (CLI/TUI) and the desktop Settings
+→ Providers tabs (Accounts + API keys) **must be the same set**.  Historically
+they were not: the CLI picker read :data:`hermes_cli.models.CANONICAL_PROVIDERS`
+(which auto-extends from ``plugins/model-providers/<name>/``), while the desktop
+tabs read separate hand-maintained lists (``_OAUTH_PROVIDER_CATALOG``,
+``OPTIONAL_ENV_VARS`` + ``PROVIDER_GROUPS``) that nobody kept in sync.  Every
+provider added after those lists were written silently went missing from the
+GUI — e.g. GitHub Copilot showing up only under "tools", or ``openai-api`` being
+configurable from the CLI but not the desktop app.
+
+This module fixes that at the root: it derives ONE descriptor per provider from
+the same universe ``hermes model`` renders (``CANONICAL_PROVIDERS``), joining:
+
+* ``auth_type`` / ``api_key_env_vars`` / ``base_url_env_var`` from
+  :data:`hermes_cli.auth.PROVIDER_REGISTRY` (credential truth), and
+* ``display_name`` / ``description`` / ``signup_url`` from the provider's
+  :class:`providers.base.ProviderProfile` when one exists, falling back to the
+  ``CANONICAL_PROVIDERS`` entry's ``label`` / ``tui_desc`` and the
+  ``OPTIONAL_ENV_VARS`` signup URL otherwise (many profiles leave these blank,
+  and four canonical providers have no profile at all — lmstudio, openai-api,
+  tencent-tokenhub, xai-oauth — so the fallbacks are load-bearing).
+
+Each descriptor is tagged with the ``tab`` it belongs on (``keys`` vs
+``accounts``) based purely on how the provider authenticates.  The desktop
+``/api/env`` and ``/api/providers/oauth`` endpoints derive their MEMBERSHIP from
+this catalog; the old hand lists are demoted to presentation/override overlays
+(bespoke OAuth flow + status resolvers, richer copy, icons, ordering) and no
+longer decide which providers exist.
+
+Parity contract (locked by tests): the union of the two tabs equals the
+``CANONICAL_PROVIDERS`` universe, i.e. exactly what ``hermes model`` shows.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+# Auth types that authenticate via an account / sign-in flow rather than a
+# pasted API key.  These route to the desktop "Accounts" tab; everything else
+# (api_key, and aws_sdk which is configured via AWS_REGION/AWS_PROFILE) routes
+# to the "API keys" tab.  Mirrors the auth_type strings used in
+# hermes_cli.auth.PROVIDER_REGISTRY and providers.base.ProviderProfile.
+_ACCOUNTS_AUTH_TYPES: frozenset[str] = frozenset(
+    {
+        "oauth_device_code",
+        "oauth_external",
+        "oauth_minimax",
+        "external_process",  # copilot-acp: spawns `copilot --acp --stdio`
+        "copilot",           # GitHub Copilot token / gh auth
+    }
+)
+
+
+@dataclass(frozen=True)
+class ProviderDescriptor:
+    """One provider, as seen by every surface (CLI picker + both GUI tabs)."""
+
+    slug: str                      # canonical id, e.g. "google-gemini-cli"
+    label: str                     # human display name
+    description: str               # one-line description
+    auth_type: str                 # api_key | oauth_* | external_process | copilot | aws_sdk
+    tab: str                       # "keys" | "accounts"
+    api_key_env_vars: tuple[str, ...]  # credential env vars (may be empty)
+    base_url_env_var: str          # base-URL override env var (may be "")
+    signup_url: str                # signup / console URL (may be "")
+    order: int                     # CANONICAL_PROVIDERS index — mirrors `hermes model`
+
+
+def tab_for_auth_type(auth_type: str) -> str:
+    """Return the desktop tab ("keys"|"accounts") a provider's auth maps to."""
+    return "accounts" if auth_type in _ACCOUNTS_AUTH_TYPES else "keys"
+
+
+def _split_env_vars(env_vars: tuple[str, ...]) -> tuple[tuple[str, ...], str]:
+    """Split a profile's ``env_vars`` into (api_key_vars, base_url_var)."""
+    keys = tuple(v for v in env_vars if not (v.endswith("_BASE_URL") or v.endswith("_URL")))
+    base = next((v for v in env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), "")
+    return keys, base
+
+
+def provider_catalog() -> list[ProviderDescriptor]:
+    """Return one descriptor per provider in the ``hermes model`` universe.
+
+    Membership is :data:`CANONICAL_PROVIDERS` (the list the CLI/TUI picker
+    renders, which auto-extends from provider plugins).  Auth + env come from
+    ``PROVIDER_REGISTRY``; display metadata from ``ProviderProfile`` with
+    canonical/env fallbacks so providers without a profile (or with blank
+    profile metadata) still resolve sensibly.
+    """
+    from hermes_cli.models import CANONICAL_PROVIDERS
+
+    # PROVIDER_REGISTRY / list_providers are imported lazily and defensively:
+    # this module is on the import path of the web server and the CLI, and we
+    # never want a provider-plugin import error to blank the whole catalog.
+    try:
+        from hermes_cli.auth import PROVIDER_REGISTRY
+    except Exception:
+        PROVIDER_REGISTRY = {}
+
+    try:
+        from providers import list_providers
+
+        profiles = {p.name: p for p in list_providers()}
+    except Exception:
+        profiles = {}
+
+    try:
+        from hermes_cli.config import OPTIONAL_ENV_VARS
+    except Exception:
+        OPTIONAL_ENV_VARS = {}
+
+    out: list[ProviderDescriptor] = []
+    for order, entry in enumerate(CANONICAL_PROVIDERS):
+        slug = entry.slug
+        cfg = PROVIDER_REGISTRY.get(slug)
+        prof = profiles.get(slug)
+
+        # auth_type: registry is authoritative; fall back to profile, then api_key.
+        auth_type = (
+            (getattr(cfg, "auth_type", "") if cfg else "")
+            or (getattr(prof, "auth_type", "") if prof else "")
+            or "api_key"
+        )
+
+        # Credential env vars: registry first (it already normalizes these),
+        # else derive from the profile's env_vars tuple.
+        if cfg and getattr(cfg, "api_key_env_vars", ()):
+            api_key_vars = tuple(cfg.api_key_env_vars)
+            base_url_var = getattr(cfg, "base_url_env_var", "") or ""
+        elif prof and getattr(prof, "env_vars", ()):
+            api_key_vars, base_url_var = _split_env_vars(tuple(prof.env_vars))
+        else:
+            api_key_vars, base_url_var = (), ""
+
+        label = (
+            (getattr(prof, "display_name", "") if prof else "")
+            or entry.label
+            or slug
+        )
+        description = (
+            (getattr(prof, "description", "") if prof else "")
+            or entry.tui_desc
+            or label
+        )
+        signup_url = (getattr(prof, "signup_url", "") if prof else "") or ""
+        if not signup_url and api_key_vars:
+            info = OPTIONAL_ENV_VARS.get(api_key_vars[0]) or {}
+            signup_url = info.get("url") or ""
+
+        out.append(
+            ProviderDescriptor(
+                slug=slug,
+                label=label,
+                description=description,
+                auth_type=auth_type,
+                tab=tab_for_auth_type(auth_type),
+                api_key_env_vars=api_key_vars,
+                base_url_env_var=base_url_var,
+                signup_url=signup_url,
+                order=order,
+            )
+        )
+    return out
+
+
+def provider_catalog_by_slug() -> dict[str, ProviderDescriptor]:
+    """Convenience: the catalog keyed by slug."""
+    return {d.slug: d for d in provider_catalog()}
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@ -12,6 +12,7 @@ logger = logging.getLogger(__name__)

 from hermes_cli import auth as auth_mod
 from agent.credential_pool import CredentialPool, PooledCredential, get_custom_provider_pool_key, load_pool
+from agent.secret_scope import get_secret as _get_secret
 from hermes_cli.auth import (
    AuthError,
    DEFAULT_CODEX_BASE_URL,
@ -35,6 +36,19 @@ from hermes_constants import OPENROUTER_BASE_URL
 from utils import base_url_host_matches, base_url_hostname, env_int


+def _getenv(name: str, default: str = "") -> str:
+    """Profile-scoped replacement for ``os.getenv`` on credential/provider reads.
+
+    Routes through the secret scope (Workstream A): identical to ``os.getenv``
+    when multiplexing is off, scope-aware (and fail-closed on an unscoped read)
+    when on. Genuinely-global vars are handled inside ``get_secret`` and still
+    read ``os.environ``. Keeps the ``(name, default) -> str`` contract every
+    call site here already relies on.
+    """
+    val = _get_secret(name, default)
+    return val if val is not None else default
+
+
 def _normalize_custom_provider_name(value: str) -> str:
    return value.strip().lower().replace(" ", "-")

@ -156,7 +170,7 @@ def _host_derived_api_key(base_url: str) -> str:
    if sanitized in ("OPENAI", "OPENROUTER", "OLLAMA"):
        return ""
    env_name = f"{sanitized}_API_KEY"
-    return (os.getenv(env_name, "") or "").strip()
+    return (_getenv(env_name, "") or "").strip()


 def _auto_detect_local_model(base_url: str) -> str:
@ -437,7 +451,7 @@ def resolve_requested_provider(requested: Optional[str] = None) -> str:

    # Prefer the persisted config selection over any stale shell/.env
    # provider override so chat uses the endpoint the user last saved.
-    env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+    env_provider = _getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
    if env_provider:
        return env_provider

@ -542,7 +556,7 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
            name_norm = _normalize_custom_provider_name(ep_name)
            # Resolve the API key from the env var name stored in key_env
            key_env = str(entry.get("key_env", "") or "").strip()
-            resolved_api_key = os.getenv(key_env, "").strip() if key_env else ""
+            resolved_api_key = _getenv(key_env, "").strip() if key_env else ""
            # Fall back to inline api_key when key_env is absent or unresolvable
            if not resolved_api_key:
                resolved_api_key = str(entry.get("api_key", "") or "").strip()
@ -824,8 +838,8 @@ def _resolve_named_custom_runtime(
        api_key_candidates = [
            (explicit_api_key or "").strip(),
            # Gate env key fallbacks on authoritative hosts (#28660)
-            (os.getenv("OPENAI_API_KEY", "").strip()     if _da_is_openai_url else ""),
-            (os.getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter  else ""),
+            (_getenv("OPENAI_API_KEY", "").strip()     if _da_is_openai_url else ""),
+            (_getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter  else ""),
            # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
            # who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
            # intuitive match without configuring `custom_providers` first.
@ -878,11 +892,11 @@ def _resolve_named_custom_runtime(
    api_key_candidates = [
        (explicit_api_key or "").strip(),
        str(custom_provider.get("api_key", "") or "").strip(),
-        os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
+        _getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
        # Gate provider env keys on their authoritative hosts — sending
        # OPENAI_API_KEY to a local-llm endpoint leaks credentials (#28660).
-        (os.getenv("OPENAI_API_KEY", "").strip()     if _cp_is_openai_url  else ""),
-        (os.getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter  else ""),
+        (_getenv("OPENAI_API_KEY", "").strip()     if _cp_is_openai_url  else ""),
+        (_getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter  else ""),
        # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host as a final
        # fallback when key_env wasn't set explicitly.
        _host_derived_api_key(base_url),
@ -941,8 +955,8 @@ def _resolve_openrouter_runtime(
        except Exception:
            pass

-    env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
-    env_custom_base_url = os.getenv("CUSTOM_BASE_URL", "").strip()
+    env_openrouter_base_url = _getenv("OPENROUTER_BASE_URL", "").strip()
+    env_custom_base_url = _getenv("CUSTOM_BASE_URL", "").strip()

    # Use config base_url when available and the provider context matches.
    # OPENAI_BASE_URL env var is no longer consulted — config.yaml is
@ -982,8 +996,8 @@ def _resolve_openrouter_runtime(
    if _is_openrouter_context:
        api_key_candidates = [
            explicit_api_key,
-            os.getenv("OPENROUTER_API_KEY"),
-            os.getenv("OPENAI_API_KEY"),
+            _getenv("OPENROUTER_API_KEY"),
+            _getenv("OPENAI_API_KEY"),
        ]
    else:
        # Custom endpoint: use api_key from config when using config base_url (#1760).
@ -1003,9 +1017,9 @@ def _resolve_openrouter_runtime(
        api_key_candidates = [
            explicit_api_key,
            (cfg_api_key if use_config_base_url else ""),
-            (os.getenv("OLLAMA_API_KEY")     if _is_ollama_url                       else ""),
-            (os.getenv("OPENAI_API_KEY")     if (_is_openai_url or _is_openai_azure) else ""),
-            (os.getenv("OPENROUTER_API_KEY") if _is_openrouter_url                   else ""),
+            (_getenv("OLLAMA_API_KEY")     if _is_ollama_url                       else ""),
+            (_getenv("OPENAI_API_KEY")     if (_is_openai_url or _is_openai_azure) else ""),
+            (_getenv("OPENROUTER_API_KEY") if _is_openrouter_url                   else ""),
            # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
            # who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
            # intuitive match. Helper returns "" for IPs/loopback and for env
@ -1108,7 +1122,7 @@ def _resolve_azure_foundry_runtime(
        if inferred:
            cfg_api_mode = inferred

-    env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
+    env_base_url = _getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
    base_url = explicit_base_url_clean or cfg_base_url or env_base_url
    if not base_url:
        raise AuthError(
@ -1197,7 +1211,7 @@ def _resolve_azure_foundry_runtime(
        except Exception:
            api_key = ""
    if not api_key:
-        api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
+        api_key = _getenv("AZURE_FOUNDRY_API_KEY", "").strip()
    if not api_key:
        raise AuthError(
            "Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
@ -1297,7 +1311,7 @@ def _resolve_explicit_runtime(
        expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
        if not api_key:
            creds = resolve_nous_runtime_credentials(
-                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+                timeout_seconds=float(_getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
            )
            api_key = creds.get("api_key", "")
            expires_at = creds.get("expires_at")
@ -1326,7 +1340,7 @@ def _resolve_explicit_runtime(
    if pconfig and pconfig.auth_type == "api_key":
        env_url = ""
        if pconfig.base_url_env_var:
-            env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
+            env_url = _getenv(pconfig.base_url_env_var, "").strip().rstrip("/")

        base_url = explicit_base_url
        if not base_url:
@ -1398,8 +1412,8 @@ def resolve_runtime_provider(
    if requested_provider == "anthropic" and "azure.com" in _eff_base:
        _azure_key = (
            (explicit_api_key or "").strip()
-            or os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
-            or os.getenv("ANTHROPIC_API_KEY", "").strip()
+            or _getenv("AZURE_ANTHROPIC_KEY", "").strip()
+            or _getenv("ANTHROPIC_API_KEY", "").strip()
        )
        return {
            "provider": "anthropic",
@ -1454,8 +1468,8 @@ def resolve_runtime_provider(
    if provider == "openrouter":
        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
        cfg_base_url = str(model_cfg.get("base_url") or "").strip()
-        env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
-        env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
+        env_openai_base_url = _getenv("OPENAI_BASE_URL", "").strip()
+        env_openrouter_base_url = _getenv("OPENROUTER_BASE_URL", "").strip()
        has_custom_endpoint = bool(
            explicit_base_url
            or env_openai_base_url
@ -1511,7 +1525,7 @@ def resolve_runtime_provider(
    if provider == "nous":
        try:
            creds = resolve_nous_runtime_credentials(
-                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+                timeout_seconds=float(_getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
            )
            return {
                "provider": "nous",
@ -1664,7 +1678,7 @@ def resolve_runtime_provider(
            for hint_key in ("key_env", "api_key_env"):
                env_var = str(model_cfg.get(hint_key) or "").strip()
                if env_var:
-                    token = os.getenv(env_var, "").strip()
+                    token = _getenv(env_var, "").strip()
                    if token:
                        break
            # Next: an inline api_key on the model config (useful in multi-profile
@ -1674,8 +1688,8 @@ def resolve_runtime_provider(
            # Finally fall back to the historical fixed names.
            if not token:
                token = (
-                    os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
-                    or os.getenv("ANTHROPIC_API_KEY", "").strip()
+                    _getenv("AZURE_ANTHROPIC_KEY", "").strip()
+                    or _getenv("ANTHROPIC_API_KEY", "").strip()
                )
            if not token:
                raise AuthError(
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -1554,6 +1554,7 @@ async def upload_managed_file_stream(
    )
    tmp_path = Path(tmp_name)
    total = 0
+    renamed = False
    try:
        with os.fdopen(tmp_fd, "wb") as out:
            while True:
@ -1565,16 +1566,21 @@ async def upload_managed_file_stream(
                    raise HTTPException(status_code=413, detail="File is too large")
                out.write(chunk)
        os.replace(tmp_path, target)
+        renamed = True
    except HTTPException:
-        tmp_path.unlink(missing_ok=True)
        raise
    except PermissionError:
-        tmp_path.unlink(missing_ok=True)
        raise HTTPException(status_code=403, detail="File is not writable")
    except OSError as exc:
-        tmp_path.unlink(missing_ok=True)
        raise HTTPException(status_code=500, detail=f"Could not write file: {exc}")
    finally:
+        # Clean up the temp file on every non-success exit, including
+        # BaseException paths the `except` clauses above don't catch — most
+        # importantly asyncio.CancelledError when a browser aborts a large
+        # upload mid-stream (the exact NS-501 scenario). os.replace clears
+        # tmp_path on success, so only unlink when the rename didn't happen.
+        if not renamed:
+            tmp_path.unlink(missing_ok=True)
        await file.close()

    return {
@ -2316,6 +2322,43 @@ def _gateway_display_command(profile: Optional[str], verb: str) -> str:
    return " ".join(["hermes", *_gateway_subcommand(profile, verb)])


+# Slack member IDs (users U..., Enterprise Grid W...). Kept in sync with the
+# frontend SLACK_MEMBER_ID_RE in web/src/pages/ChannelsPage.tsx.
+_SLACK_MEMBER_ID_RE = re.compile(r"[UW][A-Z0-9]{2,}")
+
+
+def _validate_messaging_env_value(platform_id: str, key: str, value: str) -> None:
+    """Reject platform credentials that are clearly in the wrong field."""
+    if platform_id != "slack" or not value:
+        return
+
+    if key == "SLACK_BOT_TOKEN" and not value.startswith("xoxb-"):
+        raise HTTPException(
+            status_code=400,
+            detail="Slack Bot Token must start with xoxb-. Paste the bot token from OAuth & Permissions.",
+        )
+    if key == "SLACK_APP_TOKEN" and not value.startswith("xapp-"):
+        raise HTTPException(
+            status_code=400,
+            detail="Slack App Token must start with xapp-. Paste the app-level token from Basic Information > App-Level Tokens.",
+        )
+    if key == "SLACK_ALLOWED_USERS":
+        # Mirror the gateway's parse (gateway/platforms/slack.py): split on comma,
+        # strip, and drop empty entries so a trailing/interior comma isn't rejected
+        # here when the runtime would accept it. "*" is the allow-all wildcard.
+        user_ids = [part.strip() for part in value.split(",") if part.strip()]
+        invalid = [
+            user_id
+            for user_id in user_ids
+            if user_id != "*" and not _SLACK_MEMBER_ID_RE.fullmatch(user_id)
+        ]
+        if invalid:
+            raise HTTPException(
+                status_code=400,
+                detail="Slack allowed user IDs must be comma-separated member IDs like U01ABC2DEF3.",
+            )
+
+
 def _spawn_gateway_restart(profile: Optional[str] = None) -> Tuple[subprocess.Popen, bool]:
    """Spawn ``hermes gateway restart``, reusing an in-flight restart.

@ -3925,28 +3968,135 @@ async def update_config(body: ConfigUpdate, profile: Optional[str] = None):
        raise HTTPException(status_code=500, detail="Internal server error")


+def _catalog_provider_env_metadata() -> dict:
+    """Map provider env vars → desktop card metadata, derived from the catalog.
+
+    Returns ``{env_var: {provider, provider_label, description, url, is_password,
+    advanced}}`` for every API-key provider in the unified ``provider_catalog()``
+    (i.e. the ``hermes model`` universe). This is what lets the desktop Keys tab
+    render a card for a provider even when its env var was never hand-added to
+    ``OPTIONAL_ENV_VARS`` — closing the drift where CLI-configurable providers
+    (openai-api, kilocode, novita, tencent-tokenhub, copilot, …) were missing
+    from the GUI.
+
+    Hand ``OPTIONAL_ENV_VARS`` prose is layered ON TOP of this in the endpoint;
+    this only supplies membership + grouping + sensible fallbacks.
+    """
+    try:
+        from hermes_cli.provider_catalog import provider_catalog
+    except Exception:
+        return {}
+
+    # Env vars already declared with a NON-provider category (e.g. the shared
+    # GITHUB_TOKEN, which is a Skills-Hub "tool" credential) must not be
+    # promoted into a provider card. Copilot lists GITHUB_TOKEN among its auth
+    # aliases, but its provider card uses the provider-owned COPILOT_GITHUB_TOKEN.
+    try:
+        from hermes_cli.config import OPTIONAL_ENV_VARS as _OPT
+    except Exception:
+        _OPT = {}
+    _non_provider_keys = {
+        k for k, v in _OPT.items()
+        if (v or {}).get("category") and (v or {}).get("category") != "provider"
+    }
+
+    meta: dict = {}
+    for d in provider_catalog():
+        if d.tab != "keys":
+            continue
+        # API-key vars: the first is the primary (password) field; any aliases
+        # are kept as additional password fields so users can clear them too.
+        for env_var in d.api_key_env_vars:
+            if env_var in _non_provider_keys:
+                continue  # don't hijack a shared tool/messaging credential
+            meta.setdefault(
+                env_var,
+                {
+                    "provider": d.slug,
+                    "provider_label": d.label,
+                    "description": d.description,
+                    "url": d.signup_url or None,
+                    "is_password": True,
+                    "advanced": False,
+                    "category": "provider",
+                },
+            )
+        # Base-URL override is an advanced, non-secret field for the same card.
+        if d.base_url_env_var:
+            meta.setdefault(
+                d.base_url_env_var,
+                {
+                    "provider": d.slug,
+                    "provider_label": d.label,
+                    "description": f"{d.label} base URL override",
+                    "url": None,
+                    "is_password": False,
+                    "advanced": True,
+                    "category": "provider",
+                },
+            )
+
+        # AWS-SDK providers (Bedrock) authenticate via the AWS credential chain
+        # rather than a pasted API key, so they have no api_key_env_vars. Tag
+        # their AWS_* settings to the provider card so they still appear on the
+        # Keys tab (otherwise Bedrock — a `hermes model` provider — would be
+        # invisible in the desktop app).
+        if d.auth_type == "aws_sdk":
+            for aws_var in ("AWS_REGION", "AWS_PROFILE"):
+                existing = meta.get(aws_var, {})
+                meta[aws_var] = {
+                    "provider": d.slug,
+                    "provider_label": d.label,
+                    "description": existing.get("description") or f"{d.label} ({aws_var})",
+                    "url": existing.get("url"),
+                    "is_password": False,
+                    "advanced": existing.get("advanced", True),
+                    "category": "provider",
+                }
+    return meta
+
+
@app.get("/api/env")
 async def get_env_vars(profile: Optional[str] = None):
    with _profile_scope(profile):
        env_on_disk = load_env()
    channel_keys = _channel_managed_env_keys()
-    result = {}
-    for var_name, info in OPTIONAL_ENV_VARS.items():
+    catalog_meta = _catalog_provider_env_metadata()
+
+    def _row(var_name: str, info: dict) -> dict:
        value = env_on_disk.get(var_name)
-        result[var_name] = {
+        cat_meta = catalog_meta.get(var_name) or {}
+        # Hand OPTIONAL_ENV_VARS prose wins where present; the catalog fills any
+        # gaps (description/url) and always supplies provider grouping hints.
+        return {
            "is_set": bool(value),
            "redacted_value": redact_key(value) if value else None,
-            "description": info.get("description", ""),
-            "url": info.get("url"),
-            "category": info.get("category", ""),
-            "is_password": info.get("password", False),
+            "description": info.get("description") or cat_meta.get("description", ""),
+            "url": info.get("url") if info.get("url") is not None else cat_meta.get("url"),
+            "category": info.get("category") or cat_meta.get("category", ""),
+            "is_password": info.get("password", cat_meta.get("is_password", False)),
            "tools": info.get("tools", []),
-            "advanced": info.get("advanced", False),
+            "advanced": info.get("advanced", cat_meta.get("advanced", False)),
            # True when this var is a messaging-platform credential owned by a
            # Channels page card. The Keys/Env page uses this to hide it and
            # avoid duplicating the (richer) Channels configuration UI.
            "channel_managed": var_name in channel_keys,
+            # Provider grouping hints derived from the unified provider catalog
+            # so the desktop Keys tab groups by the SAME provider identity the
+            # CLI `hermes model` picker uses (not desktop-only prefix guesses).
+            "provider": cat_meta.get("provider", ""),
+            "provider_label": cat_meta.get("provider_label", ""),
        }
+
+    result = {}
+    for var_name, info in OPTIONAL_ENV_VARS.items():
+        result[var_name] = _row(var_name, info)
+    # Synthesize rows for catalog provider env vars that have no hand entry in
+    # OPTIONAL_ENV_VARS — these are the providers that were CLI-configurable but
+    # invisible in the desktop app until now.
+    for var_name in catalog_meta:
+        if var_name not in result:
+            result[var_name] = _row(var_name, {})
    return result


@ -4146,9 +4296,9 @@ _PLATFORM_OVERRIDES: dict[str, dict[str, Any]] = {
    },
    "slack": {
        "name": "Slack",
-        "description": "Use Hermes from Slack via Socket Mode.",
+        "description": "Use Hermes from Slack via Socket Mode. Add allowed Slack member IDs so connected bots can respond.",
        "docs_url": "https://api.slack.com/apps",
-        "env_vars": ("SLACK_BOT_TOKEN", "SLACK_APP_TOKEN"),
+        "env_vars": ("SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"),
        "required_env": ("SLACK_BOT_TOKEN", "SLACK_APP_TOKEN"),
    },
    "mattermost": {
@ -4633,6 +4783,7 @@ def _messaging_env_info(key: str) -> dict[str, Any]:
    return {
        "description": info.get("description", ""),
        "prompt": info.get("prompt", key),
+        "help": info.get("help", ""),
        "url": info.get("url"),
        "is_password": info.get("password", False),
        "advanced": info.get("advanced", False),
@ -5212,6 +5363,7 @@ async def update_messaging_platform(
                    )
                trimmed = value.strip()
                if trimmed:
+                    _validate_messaging_env_value(platform_id, key, trimmed)
                    save_env_value(key, trimmed)

            if body.enabled is not None:
@ -5413,13 +5565,53 @@ def _claude_code_only_status() -> Dict[str, Any]:
    return {"logged_in": False, "source": None}


-# Provider catalog. The order matters — it's how we render the UI list.
-# ``cli_command`` is what the dashboard surfaces as the copy-to-clipboard
-# fallback while Phase 2 (in-browser flows) isn't built yet.
-# ``flow`` describes the OAuth shape so the future modal can pick the
-# right UI: ``pkce`` = open URL + paste callback code, ``device_code`` =
-# show code + verification URL + poll, ``external`` = read-only (delegated
-# to a third-party CLI like Claude Code or Qwen).
+def _gemini_cli_status() -> Dict[str, Any]:
+    """Status for the google-gemini-cli OAuth provider (Code Assist login)."""
+    try:
+        from hermes_cli import auth as hauth
+        raw = hauth.get_gemini_oauth_auth_status()
+    except Exception as e:
+        return {"logged_in": False, "error": str(e)}
+    return {
+        "logged_in": bool(raw.get("logged_in")),
+        "source": raw.get("source") or "google_oauth",
+        "source_label": raw.get("email") or raw.get("auth_file") or "Google Code Assist",
+        "token_preview": _truncate_token(raw.get("api_key")),
+        "expires_at": None,
+        "has_refresh_token": True,
+    }
+
+
+def _copilot_acp_status() -> Dict[str, Any]:
+    """Status for copilot-acp — credentials are owned by the Copilot CLI.
+
+    There is no cheap programmatic credential probe for the ACP subprocess, so
+    this is a read-only "managed by the Copilot CLI" card (like claude-code):
+    Hermes never claims a login state it can't verify.
+    """
+    return {
+        "logged_in": False,
+        "source": "copilot_cli",
+        "source_label": "Managed by the GitHub Copilot CLI",
+        "token_preview": None,
+        "expires_at": None,
+        "has_refresh_token": False,
+    }
+
+
+# Explicit, hand-tuned OAuth/account provider cards. These carry the bits that
+# can't be derived from the unified provider catalog: the OAuth ``flow`` shape,
+# the per-provider ``status_fn``, the ``cli_command`` fallback, and curated
+# display order. They are the OVERRIDE BASE for ``_build_oauth_catalog()``,
+# which unions them with every accounts-tab provider in ``provider_catalog()``
+# so newly-added OAuth/external providers appear automatically (no hand edit).
+# This tuple also still includes two entries that are NOT catalog providers but
+# must show on the Accounts tab: the api-key Anthropic PKCE card and the
+# synthetic ``claude-code`` subscription row.
+# ``flow`` describes the OAuth shape so the modal can pick the right UI:
+# ``pkce`` = open URL + paste callback code, ``device_code`` = show code +
+# verification URL + poll, ``external`` = read-only (delegated to a third-party
+# CLI like Claude Code or Qwen), ``loopback`` = 127.0.0.1 callback listener.
 _OAUTH_PROVIDER_CATALOG: tuple[Dict[str, Any], ...] = (
    {
        "id": "nous",
@ -5469,6 +5661,22 @@ _OAUTH_PROVIDER_CATALOG: tuple[Dict[str, Any], ...] = (
        "docs_url": "https://hermes-agent.nousresearch.com/docs/guides/xai-grok-oauth",
        "status_fn": None,  # dispatched via auth.get_xai_oauth_auth_status
    },
+    {
+        "id": "google-gemini-cli",
+        "name": "Google Gemini (OAuth + Code Assist)",
+        "flow": "external",
+        "cli_command": "hermes auth add google-gemini-cli",
+        "docs_url": "https://ai.google.dev/gemini-api/docs",
+        "status_fn": _gemini_cli_status,
+    },
+    {
+        "id": "copilot-acp",
+        "name": "GitHub Copilot (ACP)",
+        "flow": "external",
+        "cli_command": "copilot /login",
+        "docs_url": "https://docs.github.com/en/copilot",
+        "status_fn": _copilot_acp_status,
+    },
    # ── Anthropic / Claude entries sit at the bottom: the API-key path
    # first, then the subscription OAuth path (which only works with extra
    # usage credits on top of a Claude Max plan — see disclaimer in name).
@ -5555,6 +5763,31 @@ def _resolve_provider_status(provider_id: str, status_fn) -> Dict[str, Any]:
                "has_refresh_token": True,
                "last_refresh": raw.get("last_refresh"),
            }
+        # No hand-written branch for this provider id: fall through to the
+        # canonical slug-driven dispatcher so accounts-tab providers derived
+        # from the unified catalog (which carry status_fn=None) still reflect
+        # real login state instead of rendering permanently logged-out. This
+        # closes the membership-auto-extends-but-status-doesn't gap: add an
+        # OAuth/account provider plugin and its card shows the right state.
+        raw = hauth.get_auth_status(provider_id)
+        if isinstance(raw, dict) and "logged_in" in raw:
+            return {
+                "logged_in": bool(raw.get("logged_in")),
+                "source": raw.get("source") or raw.get("provider") or provider_id,
+                "source_label": (
+                    raw.get("source_label")
+                    or raw.get("auth_store")
+                    or raw.get("auth_store_path")
+                    or raw.get("base_url")
+                    or raw.get("name")
+                    or ""
+                ),
+                "token_preview": _truncate_token(
+                    raw.get("access_token") or raw.get("api_key")
+                ),
+                "expires_at": raw.get("expires_at") or raw.get("access_expires_at"),
+                "has_refresh_token": bool(raw.get("has_refresh_token")),
+            }
    except Exception as e:
        return {"logged_in": False, "error": str(e)}
    return {"logged_in": False}
@ -5598,6 +5831,56 @@ def _oauth_provider_disconnect_hint(provider: Dict[str, Any], status: Dict[str,
    return None


+def _build_oauth_catalog() -> list[Dict[str, Any]]:
+    """Build the Accounts-tab provider list.
+
+    MEMBERSHIP is the union of:
+      1. ``_OAUTH_PROVIDER_CATALOG`` — the explicit, hand-tuned cards that carry
+         bespoke flow / status_fn / cli_command (including the api-key Anthropic
+         PKCE card and the synthetic claude-code subscription row, which are not
+         catalog providers), and
+      2. every accounts-tab provider in the unified ``provider_catalog()`` (the
+         ``hermes model`` universe) — so any OAuth/external provider added as a
+         plugin appears automatically, with sensible defaults, even if no
+         explicit card was written for it.
+
+    The explicit catalog wins on metadata; the unified catalog guarantees we
+    never silently drop a provider the CLI picker offers. Order: explicit cards
+    first (their curated order), then any catalog-only providers appended in
+    ``hermes model`` order.
+    """
+    rows: list[Dict[str, Any]] = []
+    seen: set[str] = set()
+
+    # 1. Explicit hand-tuned cards (authoritative metadata + curated order).
+    for entry in _OAUTH_PROVIDER_CATALOG:
+        if entry["id"] in seen:
+            continue
+        seen.add(entry["id"])
+        rows.append(dict(entry))
+
+    # 2. Catalog accounts-providers not already covered — keeps the Accounts tab
+    #    in lockstep with the `hermes model` universe (zero-edit for new plugins).
+    try:
+        from hermes_cli.provider_catalog import provider_catalog
+        for d in provider_catalog():
+            if d.tab != "accounts" or d.slug in seen:
+                continue
+            seen.add(d.slug)
+            rows.append({
+                "id": d.slug,
+                "name": d.label,
+                "flow": "external",
+                "cli_command": f"hermes auth add {d.slug}",
+                "docs_url": d.signup_url or "",
+                "status_fn": None,
+            })
+    except Exception:
+        pass
+
+    return rows
+
+
@app.get("/api/providers/oauth")
 async def list_oauth_providers(profile: Optional[str] = None):
    """Enumerate every OAuth-capable LLM provider with current status.
@ -5617,10 +5900,14 @@ async def list_oauth_providers(profile: Optional[str] = None):
          token_preview    last N chars of the token, never the full token
          expires_at       ISO timestamp string or null
          has_refresh_token bool
+
+    Membership is derived from the unified provider_catalog() so this stays in
+    sync with the `hermes model` picker; _OAUTH_OVERRIDES supplies per-provider
+    flow/status/cli metadata.
    """
    with _profile_scope(profile):
        providers = []
-        for p in _OAUTH_PROVIDER_CATALOG:
+        for p in _build_oauth_catalog():
            status = _resolve_provider_status(p["id"], p.get("status_fn"))
            disconnect_hint = _oauth_provider_disconnect_hint(p, status)
            providers.append({
@ -5647,7 +5934,7 @@ async def disconnect_oauth_provider(
    _require_token(request)

    with _profile_scope(profile):
-        catalog_by_id = {p["id"]: p for p in _OAUTH_PROVIDER_CATALOG}
+        catalog_by_id = {p["id"]: p for p in _build_oauth_catalog()}
        provider = catalog_by_id.get(provider_id)
        if provider is None:
            raise HTTPException(
@ -10914,6 +11201,7 @@ def _resolve_chat_argv(
    # the dashboard PTY path.
    env.setdefault("HERMES_TUI_DISABLE_MOUSE", "1")
    env.setdefault("HERMES_TUI_INLINE", "1")
+    env["HERMES_TUI_DASHBOARD"] = "1"

    if profile_dir is not None:
        env["HERMES_HOME"] = str(profile_dir)
--- a/hermes_state.py
+++ b/hermes_state.py
@ -1836,6 +1836,43 @@ class SessionDB:

        return cleaned

+    def _is_compression_ancestor(
+        self, conn, *, ancestor_id: str, descendant_id: str
+    ) -> bool:
+        """Return True if *ancestor_id* is a compression predecessor of
+        *descendant_id* (walking parent links up the continuation chain).
+
+        The continuation edge is the canonical one shared with
+        :func:`_ephemeral_child_sql` / :meth:`set_session_archived`
+        (``_COMPRESSION_CHILD_SQL``): a parent → child edge counts only when the
+        parent ended with ``end_reason = 'compression'`` and the child started
+        at or after the parent's ``ended_at``, which distinguishes continuations
+        from delegate subagents / branch children that also carry a
+        ``parent_session_id``. Expressed as a single recursive CTE rather than a
+        per-hop Python walk so the edge definition lives in exactly one place.
+        """
+        if not ancestor_id or not descendant_id or ancestor_id == descendant_id:
+            return False
+        # Walk parent links up from the descendant, following only compression
+        # continuation edges, and check whether ancestor_id is reached.
+        edge = _COMPRESSION_CHILD_SQL.format(a="child")
+        row = conn.execute(
+            f"""
+            WITH RECURSIVE ancestors(id) AS (
+                SELECT ?
+                UNION
+                SELECT parent.id
+                FROM ancestors a
+                JOIN sessions child ON child.id = a.id
+                JOIN sessions parent ON parent.id = child.parent_session_id
+                WHERE {edge}
+            )
+            SELECT 1 FROM ancestors WHERE id = ? AND id != ? LIMIT 1
+            """,
+            (descendant_id, ancestor_id, descendant_id),
+        ).fetchone()
+        return row is not None
+
    def set_session_title(self, session_id: str, title: str) -> bool:
        """Set or update a session's title.

@ -1854,9 +1891,29 @@ class SessionDB:
                )
                conflict = cursor.fetchone()
                if conflict:
-                    raise ValueError(
-                        f"Title '{title}' is already in use by session {conflict['id']}"
-                    )
+                    conflict_id = conflict["id"]
+                    # A compression continuation is the live, projected-forward
+                    # head of its conversation; its compressed predecessors are
+                    # ended and hidden from the session list (list_sessions_rich
+                    # projects roots → tip). When the title that "conflicts" is
+                    # held by such a hidden ancestor, the user has no way to free
+                    # it — renaming the visible tip back to the base name would
+                    # dead-end with "already in use by <session they can't see>".
+                    # Treat this as a transfer: move the title off the ancestor
+                    # onto the continuation. Uniqueness is preserved (still only
+                    # one session carries the exact title) and the parent-link
+                    # lineage is untouched.
+                    if self._is_compression_ancestor(
+                        conn, ancestor_id=conflict_id, descendant_id=session_id
+                    ):
+                        conn.execute(
+                            "UPDATE sessions SET title = NULL WHERE id = ?",
+                            (conflict_id,),
+                        )
+                    else:
+                        raise ValueError(
+                            f"Title '{title}' is already in use by session {conflict_id}"
+                        )
            cursor = conn.execute(
                "UPDATE sessions SET title = ? WHERE id = ?",
                (title, session_id),
--- a/nix/devShell.nix
+++ b/nix/devShell.nix
@ -12,7 +12,6 @@
    let
      packages = builtins.attrValues self'.packages;
      hermesNpmLib = self'.packages.default.passthru.hermesNpmLib;
-      fixLockfilesExe = pkgs.lib.getExe self'.packages.fix-lockfiles;

      # Collect all packageJsonPath values from npm workspace packages.
      npmPackageJsonPaths = builtins.filter (p: p != null) (
@ -33,7 +32,7 @@
        shellHook = ''
          echo "Hermes Agent dev shell"
          ${combinedNonNpm}
-          ${hermesNpmLib.mkNpmDevShellHook npmPackageJsonPaths fixLockfilesExe}
+          ${hermesNpmLib.mkNpmDevShellHook npmPackageJsonPaths}
          echo "Ready. Run 'hermes' to start."
        '';
      };
--- a/nix/lib.nix
+++ b/nix/lib.nix
@ -2,8 +2,7 @@
 #
 # All npm packages in this repo are workspace members sharing a single
 # root package-lock.json.  mkNpmPassthru provides the shared src, npmDeps,
-# npmRoot, and npmDepsFetcherVersion so individual .nix files don't
-# duplicate them.  One hash to rule them all.
+# npmRoot, and npmConfigHook so individual .nix files don't duplicate them.
 #
 # mkNpmPassthru returns packageJsonPath (e.g. "ui-tui/package.json")
 # instead of a per-package devShellHook.  The root devshell hook
@ -19,28 +18,19 @@ let
  # The workspace root — where the single package-lock.json lives.
  src = ../.;

-  # Single npm deps fetch from the workspace root lockfile.
-  # All workspace packages share this derivation.
-  npmDepsHash = "sha256-kbjJksq7limRIYqP3DwI+GNgCXkG96tXcsQqmuEedxo=";
-
-  npmDeps = pkgs.fetchNpmDeps {
-    inherit src;
-    fetcherVersion = 2;
-    hash = npmDepsHash;
-  };
+  # npm dependencies for the workspace, shared by all members. importNpmLock
+  # resolves each package from the lockfile's own `integrity` hashes, so the
+  # lockfile is the single source of truth — no separate dependency hash to
+  # keep in sync with it.
+  npmDeps = pkgs.importNpmLock.importNpmLock { npmRoot = src; };
 in
 {
  # Returns a buildNpmPackage-compatible attrs set that provides:
-  #   src, npmDeps, npmRoot, npmDepsFetcherVersion
-  #   patchPhase             — ensures root lockfile has exactly one trailing newline
-  #   nativeBuildInputs      — [ updateLockfileScript ] (list, prepend with ++ for more)
-  #   passthru.packageJsonPath — relative path to this workspace's package.json
-  #   nodejs                 — fixed nodejs version for all packages we use in the repo
-  #
-  # NOTE: npmConfigHook runs `diff` between the source lockfile and the
-  # npm-deps cache lockfile. fetchNpmDeps preserves whatever trailing
-  # newlines the lockfile has. The patchPhase normalizes to exactly one
-  # trailing newline so both sides always match.
+  #   src, npmDeps, npmRoot      — workspace source + importNpmLock dep set
+  #   npmConfigHook              — importNpmLock's offline `npm install` hook
+  #   nativeBuildInputs          — [ updateLockfileScript ] (list, prepend with ++ for more)
+  #   passthru.packageJsonPath   — relative path to this workspace's package.json
+  #   nodejs                     — fixed nodejs version for all packages we use in the repo
  #
  # Usage:
  #   npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
@ -62,35 +52,15 @@ in
    in
    {
      inherit src npmDeps nodejs;
+      # importNpmLock's hook installs the rewritten lockfile (every `resolved`
+      # rewritten to a /nix/store file: path) into the unpacked workspace and
+      # runs `npm install` offline, so every workspace member's dependencies
+      # resolve without network access.
+      npmConfigHook = pkgs.importNpmLock.npmConfigHook;
      npmRoot = ".";
-      npmDepsFetcherVersion = 2;

      ELECTRON_SKIP_BINARY_DOWNLOAD = 1;

-      patchPhase = ''
-        runHook prePatch
-        # Normalize trailing newlines on the root lockfile so source and
-        # npm-deps always match, regardless of what fetchNpmDeps preserves.
-        sed -i -z 's/\\n*$/\\n/' package-lock.json
-
-        # Make npmConfigHook's byte-for-byte diff newline-agnostic by
-        # replacing its hardcoded /nix/store/.../diff with a wrapper that
-        # normalizes trailing newlines on both sides before comparing.
-        mkdir -p "$TMPDIR/bin"
-        cat > "$TMPDIR/bin/diff" << DIFFWRAP
-        #!/bin/sh
-        f1=\\$(mktemp) && sed -z 's/\\n*$/\\n/' "\\$1" > "\\$f1"
-        f2=\\$(mktemp) && sed -z 's/\\n*$/\\n/' "\\$2" > "\\$f2"
-        ${pkgs.diffutils}/bin/diff "\\$f1" "\\$f2" && rc=0 || rc=\\$?
-        rm -f "\\$f1" "\\$f2"
-        exit \\$rc
-        DIFFWRAP
-        chmod +x "$TMPDIR/bin/diff"
-        export PATH="$TMPDIR/bin:$PATH"
-
-        runHook postPatch
-      '';
-
      nativeBuildInputs = [
        (pkgs.writeShellScriptBin "update_${attr}_lockfile" ''
          set -euox pipefail
@ -104,7 +74,6 @@ in
          CI=true ${pkgs.lib.getExe' nodejs "npm"} install --workspaces
          ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json

-          # Hash lives in lib.nix — just rebuild to verify.
          nix build .#${attr}
          echo "Lockfile updated and build verified for .#${attr}"
        '')
@ -120,12 +89,9 @@ in
  # Takes a list of package.json relative paths (from mkNpmPassthru .passthru.packageJsonPath),
  # stamps all of them, and if any changed:
  #   1. Runs `npm i --package-lock-only` from root to update the lockfile
-  #   2. If the lockfile changed, runs `npm ci` + fix-lockfiles
-  #
-  # fixLockfilesExe: absolute path to the fix-lockfiles binary
-  # (from pkgs.lib.getExe self'.packages.fix-lockfiles in devShell.nix).
+  #   2. If the lockfile changed, runs `npm ci`
  mkNpmDevShellHook =
-    packageJsonPaths: fixLockfilesExe:
+    packageJsonPaths:
    pkgs.writeShellScript "npm-dev-hook" ''
      REPO_ROOT=$(git rev-parse --show-toplevel)

@ -158,172 +124,4 @@ in
        echo "$LOCK_STAMP_VALUE" > "$LOCK_STAMP"
      fi
    '';
-
-  # Build `fix-lockfiles` bin that checks/updates the single npmDepsHash
-  #   fix-lockfiles --check   # exit 1 if any hash is stale
-  #   fix-lockfiles --apply   # rewrite stale hashes in place
-  #   fix-lockfiles           # alias of --apply
-  # Writes machine-readable fields (stale, changed, report) to $GITHUB_OUTPUT
-  # when set, so CI workflows can post a sticky PR comment directly.
-  mkFixLockfiles =
-    {
-      attr, # flake package attr for fallback verification build, e.g. "tui"
-    }:
-    pkgs.writeShellScriptBin "fix-lockfiles" ''
-      set -uox pipefail
-      MODE="''${1:---apply}"
-      case "$MODE" in
-        --check|--apply) ;;
-        -h|--help)
-          echo "usage: fix-lockfiles [--check|--apply]"
-          exit 0 ;;
-        *)
-          echo "usage: fix-lockfiles [--check|--apply]" >&2
-          exit 2 ;;
-      esac
-
-      REPO_ROOT="$(git rev-parse --show-toplevel)"
-      cd "$REPO_ROOT"
-
-      # When running in GH Actions, emit Markdown links in the report pointing
-      # at the offending line of the nix file (and the lockfile) at the exact
-      # commit that was checked. LINK_SHA should be set by the workflow to the
-      # PR head SHA; falls back to GITHUB_SHA (which on pull_request is the
-      # test-merge commit, still browseable).
-      LINK_SERVER="''${GITHUB_SERVER_URL:-https://github.com}"
-      LINK_REPO="''${GITHUB_REPOSITORY:-}"
-      LINK_SHA="''${LINK_SHA:-''${GITHUB_SHA:-}}"
-
-      STALE=0
-      FIXED=0
-      REPORT=""
-
-      # All workspace packages share the root package-lock.json, so
-      # we only need to check the hash once.
-      LOCK_FILE="package-lock.json"
-      LIB_FILE="nix/lib.nix"
-      NEW_HASH=$(${pkgs.lib.getExe pkgs.prefetch-npm-deps} "$LOCK_FILE" 2>/dev/null)
-      if [ -z "$NEW_HASH" ]; then
-        echo "prefetch-npm-deps failed, falling back to nix build" >&2
-        OUTPUT=$(nix build ".#${attr}.npmDeps" --no-link --print-build-logs 2>&1)
-        STATUS=$?
-        if [ "$STATUS" -eq 0 ]; then
-          echo "ok (via nix build)"
-          exit 0
-        fi
-        NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
-        if [ -z "$NEW_HASH" ]; then
-          if echo "$OUTPUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
-            echo "skipped (transient cache failure — see primary nix build for real status)" >&2
-            echo "$OUTPUT" | tail -8 >&2
-            exit 0
-          fi
-          echo "build failed with no hash mismatch:" >&2
-          echo "$OUTPUT" | tail -40 >&2
-          exit 1
-        fi
-      fi
-
-      OLD_HASH=$(grep -oE 'npmDepsHash = "sha256-[^"]+"' "$LIB_FILE" | head -1 \
-        | sed -E 's/npmDepsHash = "(.*)"/\1/')
-
-      # prefetch-npm-deps says the hash already matches — but it only hashes the
-      # lockfile *contents* and can disagree with fetchNpmDeps + npmConfigHook,
-      # which validate the full source lockfile against the realized deps cache.
-      # Trusting prefetch alone produced false "ok" results while the actual
-      # build was broken (e.g. lockfile engines/os/cpu fields the pinned nixpkgs
-      # strips from the deps cache, tripping npmConfigHook). So when prefetch
-      # claims the hash is current, confirm with a real consumer build before
-      # believing it.
-      if [ "$NEW_HASH" = "$OLD_HASH" ]; then
-        if VERIFY_OUT=$(nix build ".#${attr}" --no-link --print-build-logs 2>&1); then
-          echo "ok"
-          if [ -n "''${GITHUB_OUTPUT:-}" ]; then
-            { echo "stale=false"; echo "changed=false"; } >> "$GITHUB_OUTPUT"
-          fi
-          exit 0
-        fi
-        # Build failed despite a matching hash. A fixed-output 'got:' means
-        # prefetch genuinely disagreed with fetchNpmDeps — adopt the real hash
-        # and fall through to the stale-handling path below.
-        CORRECT_HASH=$(echo "$VERIFY_OUT" | awk '/got:/ {print $2; exit}')
-        if [ -n "$CORRECT_HASH" ]; then
-          echo "prefetch-npm-deps reported current ($OLD_HASH) but fetchNpmDeps wants $CORRECT_HASH" >&2
-          NEW_HASH="$CORRECT_HASH"
-        elif echo "$VERIFY_OUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
-          echo "skipped (transient cache failure — see primary nix build for real status)" >&2
-          echo "$VERIFY_OUT" | tail -8 >&2
-          exit 0
-        else
-          # Not a stale-hash problem — surface it honestly instead of "ok".
-          echo "::error::nix build .#${attr} failed and it is NOT a stale npmDepsHash (no 'got:' hash in output)." >&2
-          echo "The committed lockfile may be incompatible with the pinned nixpkgs" >&2
-          echo "(e.g. engines/os/cpu fields that prefetch-npm-deps strips from the" >&2
-          echo "deps cache, tripping npmConfigHook). fix-lockfiles cannot repair this." >&2
-          echo "$VERIFY_OUT" | tail -40 >&2
-          if [ -n "''${GITHUB_OUTPUT:-}" ]; then
-            { echo "stale=false"; echo "changed=false"; } >> "$GITHUB_OUTPUT"
-          fi
-          exit 1
-        fi
-      fi
-
-      HASH_LINE=$(grep -n 'npmDepsHash = "sha256-' "$LIB_FILE" | head -1 | cut -d: -f1)
-      echo "stale: $LIB_FILE:$HASH_LINE $OLD_HASH -> $NEW_HASH"
-      STALE=1
-
-      if [ -n "$LINK_REPO" ] && [ -n "$LINK_SHA" ]; then
-        LIB_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$LIB_FILE#L$HASH_LINE"
-        LOCK_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$LOCK_FILE"
-        REPORT="- [\`$LIB_FILE:$HASH_LINE\`]($LIB_URL): \`$OLD_HASH\` → \`$NEW_HASH\` — lockfile: [\`$LOCK_FILE\`]($LOCK_URL)"$'\\n'
-      else
-        REPORT="- \`$LIB_FILE:$HASH_LINE\`: \`$OLD_HASH\` → \`$NEW_HASH\`"$'\\n'
-      fi
-
-      if [ "$MODE" = "--apply" ]; then
-        sed -i -E "s|npmDepsHash = \"sha256-[^\"]+\";|npmDepsHash = \"$NEW_HASH\";|" "$LIB_FILE"
-        if ! nix build ".#${attr}.npmDeps" --no-link --print-build-logs 2>/dev/null; then
-          # prefetch-npm-deps may disagree with fetchNpmDeps (it hashes
-          # the lockfile contents, not the full source tree).  Extract the
-          # correct hash from the nix build error and retry.
-          RETRY_OUTPUT=$(nix build ".#${attr}.npmDeps" --no-link --print-build-logs 2>&1)
-          CORRECT_HASH=$(echo "$RETRY_OUTPUT" | awk '/got:/ {print $2; exit}')
-          if [ -n "$CORRECT_HASH" ]; then
-            echo "prefetch-npm-deps gave $NEW_HASH but nix wants $CORRECT_HASH — retrying" >&2
-            sed -i -E "s|npmDepsHash = \"sha256-[^\"]+\";|npmDepsHash = \"$CORRECT_HASH\";|" "$LIB_FILE"
-            if ! nix build ".#${attr}.npmDeps" --no-link --print-build-logs; then
-              echo "verification build failed after hash retry" >&2
-              exit 1
-            fi
-            NEW_HASH="$CORRECT_HASH"
-          else
-            echo "verification build failed after hash update" >&2
-            exit 1
-          fi
-        fi
-        FIXED=1
-        echo "fixed"
-      fi
-
-      if [ -n "''${GITHUB_OUTPUT:-}" ]; then
-        {
-          [ "$STALE" -eq 1 ] && echo "stale=true" || echo "stale=false"
-          [ "$FIXED" -eq 1 ] && echo "changed=true" || echo "changed=false"
-          if [ -n "$REPORT" ]; then
-            echo "report<<REPORT_EOF"
-            printf "%s" "$REPORT"
-            echo "REPORT_EOF"
-          fi
-        } >> "$GITHUB_OUTPUT"
-      fi
-
-      if [ "$STALE" -eq 1 ] && [ "$MODE" = "--check" ]; then
-        echo
-        echo "Stale lockfile hash detected. Run:"
-        echo "  nix run .#fix-lockfiles"
-        exit 1
-      fi
-
-      exit 0
-    '';
 }
--- a/nix/packages.nix
+++ b/nix/packages.nix
@ -50,8 +50,6 @@
        tui = hermesAgent.hermesTui;
        web = hermesAgent.hermesWeb;
        desktop = hermesAgent.hermesDesktop;
-
-        fix-lockfiles = hermesAgent.hermesNpmLib.mkFixLockfiles { attr = "tui"; };
      };
    };
 }
--- a/plugins/memory/hindsight/README.md
+++ b/plugins/memory/hindsight/README.md
@ -144,4 +144,4 @@ Available in `hybrid` and `tools` memory modes:

 ## Client Version

-Requires `hindsight-client >= 0.4.22`. The plugin auto-upgrades on session start if an older version is detected.
+Requires `hindsight-client >= 0.6.1`. The plugin auto-upgrades on session start if an older version is detected.
--- a/plugins/memory/hindsight/init.py
+++ b/plugins/memory/hindsight/init.py
@ -50,7 +50,8 @@ logger = logging.getLogger(__name__)

 _DEFAULT_API_URL = "https://api.hindsight.vectorize.io"
 _DEFAULT_LOCAL_URL = "http://localhost:8888"
-_MIN_CLIENT_VERSION = "0.4.22"
+# Keep in sync with tools/lazy_deps.py ("memory.hindsight") and plugin.yaml.
+_MIN_CLIENT_VERSION = "0.6.1"
 _DEFAULT_TIMEOUT = 120  # seconds — cloud API can take 30-40s per request
 _DEFAULT_IDLE_TIMEOUT = 300  # seconds — Hindsight embedded daemon default
 # Mirrors hindsight-integrations/openclaw — Hindsight 0.5.0 added
@ -100,6 +101,17 @@ def _check_local_runtime() -> tuple[bool, str | None]:
        return False, str(exc)


+def _ensure_cloud_client_dependency() -> None:
+    """Install the Hindsight cloud client lazily before importing it."""
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("memory.hindsight", prompt=False)
+    except ImportError:
+        pass
+    except Exception as exc:
+        raise ImportError(str(exc)) from exc
+
+
 # ---------------------------------------------------------------------------
 # Hindsight API capability probe — mirrors hindsight-integrations/openclaw.
 # ---------------------------------------------------------------------------
@ -730,7 +742,6 @@ class HindsightMemoryProvider(MemoryProvider):
        env_writes: dict = {}

        # Step 2: Install/upgrade deps for selected mode
-        _MIN_CLIENT_VERSION = "0.4.22"
        cloud_dep = f"hindsight-client>={_MIN_CLIENT_VERSION}"
        local_dep = "hindsight-all"
        if mode == "local_embedded":
@ -990,6 +1001,7 @@ class HindsightMemoryProvider(MemoryProvider):
                kwargs["idle_timeout"] = idle_timeout
                self._client = HindsightEmbedded(**kwargs)
            else:
+                _ensure_cloud_client_dependency()
                from hindsight_client import Hindsight
                timeout = self._timeout or _DEFAULT_TIMEOUT
                kwargs = {"base_url": self._api_url, "timeout": float(timeout)}
--- a/plugins/memory/hindsight/plugin.yaml
+++ b/plugins/memory/hindsight/plugin.yaml
@ -2,7 +2,7 @@ name: hindsight
 version: 1.0.0
 description: "Hindsight — long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval."
 pip_dependencies:
-  - "hindsight-client>=0.4.22"
+  - "hindsight-client>=0.6.1"
 requires_env: []
 hooks:
  - on_session_end
--- a/plugins/memory/openviking/init.py
+++ b/plugins/memory/openviking/init.py
@ -45,10 +45,11 @@ from typing import Any, Callable, Dict, List, Optional, Set
 from urllib.parse import urlparse
 from urllib.request import url2pathname

+from agent.message_content import flatten_message_text
 from agent.memory_provider import MemoryProvider
 from agent.skill_commands import extract_user_instruction_from_skill_message
 from tools.registry import tool_error
-from utils import atomic_json_write
+from utils import atomic_json_write, env_var_enabled

 logger = logging.getLogger(__name__)

@ -70,6 +71,7 @@ _TIMEOUT = 30.0
 _SESSION_DRAIN_TIMEOUT = 10.0
 _DEFERRED_COMMIT_TIMEOUT = (_TIMEOUT * 2) + 5.0
 _REMOTE_RESOURCE_PREFIXES = ("http://", "https://", "git@", "ssh://", "git://")
+_SYNC_TRACE_ENV = "HERMES_OPENVIKING_SYNC_TRACE"

 # Maps the viking_remember `category` enum to a viking:// subdirectory.
 # Keep in sync with REMEMBER_SCHEMA.parameters.properties.category.enum.
@ -156,6 +158,18 @@ def _derive_openviking_user_text(content: Any) -> str:
    return extract_user_instruction_from_skill_message(content) or ""


+def _sync_trace_enabled() -> bool:
+    return env_var_enabled(_SYNC_TRACE_ENV)
+
+
+def _preview(value: Any, limit: int = 160) -> str:
+    text = "" if value is None else str(value)
+    text = text.replace("\n", "\\n")
+    if len(text) > limit:
+        return text[:limit] + "..."
+    return text
+
+
 # ---------------------------------------------------------------------------
 # Process-level atexit safety net — ensures pending sessions are committed
 # even if shutdown_memory_provider is never called (e.g. gateway crash,
@ -488,6 +502,25 @@ ADD_RESOURCE_SCHEMA = {
 }


+# Recall tools (read-only) whose results we never re-ingest into OpenViking —
+# echoing recalled memory back into the session transcript would re-store it.
+# Write tools (viking_remember / viking_add_resource) are intentionally NOT
+# here. Derived from the canonical schema names so renames can't desync.
+_OPENVIKING_RECALL_TOOL_NAMES = {
+    SEARCH_SCHEMA["name"],
+    READ_SCHEMA["name"],
+    BROWSE_SCHEMA["name"],
+}
+
+# Canonical tool_status values emitted in OpenViking batch tool parts.
+_TOOL_STATUS_COMPLETED = "completed"
+_TOOL_STATUS_ERROR = "error"
+_TOOL_STATUS_PENDING = "pending"
+# Inbound status aliases (from varied tool-result shapes) -> canonical above.
+_TOOL_STATUS_ERROR_ALIASES = {"error", "failed", "failure"}
+_TOOL_STATUS_COMPLETED_ALIASES = {"completed", "complete", "success", "succeeded"}
+
+
 def _zip_directory(dir_path: Path) -> Path:
    """Create a temporary zip file containing a directory tree."""
    root = dir_path.resolve()
@ -2221,7 +2254,10 @@ class OpenVikingMemoryProvider(MemoryProvider):

    def _commit_session(self, sid: str, turn_count: int, *, context: str) -> bool:
        try:
-            self._client.post(f"/api/v1/sessions/{sid}/commit")
+            self._client.post(
+                f"/api/v1/sessions/{sid}/commit",
+                {"keep_recent_count": 0},
+            )
            self._mark_session_committed(sid)
            logger.info("OpenViking session %s committed %s (%d turns)", sid, context, turn_count)
            return True
@ -2293,7 +2329,265 @@ class OpenVikingMemoryProvider(MemoryProvider):
        with self._prefetch_lock:
            self._prefetch_result = ""

-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+    @staticmethod
+    def _message_text(content: Any) -> str:
+        """Extract text from OpenAI-style string/list content."""
+        return flatten_message_text(content)
+
+    @classmethod
+    def _message_matches_text(cls, message: Dict[str, Any], expected: Any) -> bool:
+        expected_text = cls._message_text(expected).strip()
+        if not expected_text:
+            return False
+        actual_text = cls._message_text(message.get("content")).strip()
+        return actual_text == expected_text
+
+    @classmethod
+    def _extract_current_turn_messages(
+        cls,
+        messages: Optional[List[Dict[str, Any]]],
+        user_content: str,
+        assistant_content: str,
+    ) -> List[Dict[str, Any]]:
+        """Slice the completed turn out of Hermes' full canonical transcript."""
+        if not messages:
+            return []
+
+        end_idx: Optional[int] = None
+        if cls._message_text(assistant_content).strip():
+            for idx in range(len(messages) - 1, -1, -1):
+                message = messages[idx]
+                if (
+                    isinstance(message, dict)
+                    and message.get("role") == "assistant"
+                    and cls._message_matches_text(message, assistant_content)
+                ):
+                    end_idx = idx
+                    break
+        if end_idx is None:
+            for idx in range(len(messages) - 1, -1, -1):
+                message = messages[idx]
+                if isinstance(message, dict) and message.get("role") == "assistant":
+                    end_idx = idx
+                    break
+        if end_idx is None:
+            end_idx = len(messages) - 1
+
+        start_idx: Optional[int] = None
+        if cls._message_text(user_content).strip():
+            for idx in range(end_idx, -1, -1):
+                message = messages[idx]
+                if (
+                    isinstance(message, dict)
+                    and message.get("role") == "user"
+                    and cls._message_matches_text(message, user_content)
+                ):
+                    start_idx = idx
+                    break
+        if start_idx is None:
+            for idx in range(end_idx, -1, -1):
+                message = messages[idx]
+                if isinstance(message, dict) and message.get("role") == "user":
+                    start_idx = idx
+                    break
+        if start_idx is None:
+            return []
+
+        return [message for message in messages[start_idx : end_idx + 1] if isinstance(message, dict)]
+
+    @staticmethod
+    def _tool_call_id(tool_call: Dict[str, Any]) -> str:
+        return str(tool_call.get("id") or tool_call.get("tool_call_id") or "")
+
+    @staticmethod
+    def _tool_call_name(tool_call: Dict[str, Any]) -> str:
+        function = tool_call.get("function")
+        if isinstance(function, dict):
+            return str(function.get("name") or "")
+        return str(tool_call.get("name") or "")
+
+    @staticmethod
+    def _is_openviking_recall_tool_name(tool_name: Any) -> bool:
+        return str(tool_name or "").strip().lower() in _OPENVIKING_RECALL_TOOL_NAMES
+
+    @staticmethod
+    def _tool_call_input(tool_call: Dict[str, Any]) -> Dict[str, Any]:
+        function = tool_call.get("function")
+        raw_args: Any = None
+        if isinstance(function, dict):
+            raw_args = function.get("arguments")
+        if raw_args is None:
+            raw_args = tool_call.get("args")
+        if raw_args is None:
+            return {}
+        if isinstance(raw_args, dict):
+            return raw_args
+        if isinstance(raw_args, str):
+            if not raw_args.strip():
+                return {}
+            try:
+                parsed = json.loads(raw_args)
+            except Exception:
+                return {"value": raw_args}
+            if isinstance(parsed, dict):
+                return parsed
+            return {"value": parsed}
+        return {"value": raw_args}
+
+    @classmethod
+    def _tool_result_status(cls, message: Dict[str, Any]) -> str:
+        raw_status = str(message.get("status") or message.get("tool_status") or "").lower()
+        if raw_status in _TOOL_STATUS_ERROR_ALIASES:
+            return _TOOL_STATUS_ERROR
+        if raw_status in _TOOL_STATUS_COMPLETED_ALIASES:
+            return _TOOL_STATUS_COMPLETED
+
+        text = cls._message_text(message.get("content")).strip()
+        if text:
+            try:
+                parsed = json.loads(text)
+            except Exception:
+                parsed = None
+            if isinstance(parsed, dict):
+                status = str(parsed.get("status") or "").lower()
+                exit_code = parsed.get("exit_code")
+                if (
+                    status in _TOOL_STATUS_ERROR_ALIASES
+                    or parsed.get("success") is False
+                    or bool(parsed.get("error"))
+                    or (isinstance(exit_code, int) and exit_code != 0)
+                ):
+                    return _TOOL_STATUS_ERROR
+
+        return _TOOL_STATUS_COMPLETED
+
+    @classmethod
+    def _messages_to_openviking_batch(
+        cls,
+        messages: List[Dict[str, Any]],
+        *,
+        assistant_peer_id: str = "",
+    ) -> List[Dict[str, Any]]:
+        """Convert Hermes canonical messages into OpenViking batch payloads."""
+        assistant_peer_id = str(assistant_peer_id or "").strip()
+        tool_calls_by_id: Dict[str, Dict[str, Any]] = {}
+        completed_tool_ids: set[str] = set()
+        skipped_tool_ids: set[str] = set()
+        for message in messages:
+            if not isinstance(message, dict):
+                continue
+            if message.get("role") == "tool":
+                tool_id = str(message.get("tool_call_id") or message.get("id") or "")
+                if tool_id:
+                    completed_tool_ids.add(tool_id)
+                    if cls._is_openviking_recall_tool_name(message.get("name")):
+                        skipped_tool_ids.add(tool_id)
+                continue
+            if message.get("role") != "assistant":
+                continue
+            for tool_call in message.get("tool_calls") or []:
+                if not isinstance(tool_call, dict):
+                    continue
+                tool_id = cls._tool_call_id(tool_call)
+                tool_name = cls._tool_call_name(tool_call)
+                if tool_id:
+                    tool_calls_by_id[tool_id] = {
+                        "tool_name": tool_name,
+                        "tool_input": cls._tool_call_input(tool_call),
+                    }
+                    if cls._is_openviking_recall_tool_name(tool_name):
+                        skipped_tool_ids.add(tool_id)
+
+        payload_messages: List[Dict[str, Any]] = []
+        pending_tool_parts: List[Dict[str, Any]] = []
+
+        def payload_message(role: str, parts: List[Dict[str, Any]]) -> Dict[str, Any]:
+            payload: Dict[str, Any] = {"role": role, "parts": parts}
+            if role == "assistant" and assistant_peer_id:
+                payload["peer_id"] = assistant_peer_id
+            return payload
+
+        def flush_tool_parts() -> None:
+            nonlocal pending_tool_parts
+            if pending_tool_parts:
+                payload_messages.append(payload_message("assistant", pending_tool_parts))
+                pending_tool_parts = []
+
+        for message in messages:
+            if not isinstance(message, dict):
+                continue
+
+            role = str(message.get("role") or "")
+            if role in {"system", "developer"}:
+                continue
+
+            if role == "tool":
+                tool_id = str(message.get("tool_call_id") or message.get("id") or "")
+                prior_call = tool_calls_by_id.get(tool_id, {})
+                tool_name = str(message.get("name") or prior_call.get("tool_name") or "")
+                if tool_id in skipped_tool_ids or cls._is_openviking_recall_tool_name(tool_name):
+                    continue
+                tool_part = {
+                    "type": "tool",
+                    "tool_id": tool_id,
+                    "tool_name": tool_name,
+                    "tool_input": prior_call.get("tool_input", {}),
+                    "tool_output": cls._message_text(message.get("content")),
+                    "tool_status": cls._tool_result_status(message),
+                }
+                pending_tool_parts.append(tool_part)
+                continue
+
+            if role not in {"user", "assistant"}:
+                continue
+
+            flush_tool_parts()
+            parts: List[Dict[str, Any]] = []
+            text = cls._message_text(message.get("content"))
+            if text:
+                parts.append({"type": "text", "text": text})
+
+            if role == "assistant":
+                for tool_call in message.get("tool_calls") or []:
+                    if not isinstance(tool_call, dict):
+                        continue
+                    tool_id = cls._tool_call_id(tool_call)
+                    tool_name = cls._tool_call_name(tool_call)
+                    if tool_id in skipped_tool_ids or cls._is_openviking_recall_tool_name(tool_name):
+                        continue
+                    if tool_id in completed_tool_ids:
+                        continue
+                    # Reuse the tool_input parsed in the pre-scan when available
+                    # (non-empty ids are cached); fall back to parsing for the
+                    # uncached empty-id case so we never drop arguments.
+                    prior_call = tool_calls_by_id.get(tool_id) if tool_id else None
+                    tool_input = (
+                        prior_call["tool_input"]
+                        if prior_call is not None
+                        else cls._tool_call_input(tool_call)
+                    )
+                    parts.append({
+                        "type": "tool",
+                        "tool_id": tool_id,
+                        "tool_name": tool_name,
+                        "tool_input": tool_input,
+                        "tool_status": _TOOL_STATUS_PENDING,
+                    })
+
+            if parts:
+                payload_messages.append(payload_message(role, parts))
+
+        flush_tool_parts()
+        return payload_messages
+
+    def sync_turn(
+        self,
+        user_content: str,
+        assistant_content: str,
+        *,
+        session_id: str = "",
+        messages: Optional[List[Dict[str, Any]]] = None,
+    ) -> None:
        """Record the conversation turn in OpenViking's session (non-blocking)."""
        if not self._client:
            return
@ -2302,6 +2596,40 @@ class OpenVikingMemoryProvider(MemoryProvider):
        if not user_content:
            return

+        turn_messages = (
+            self._extract_current_turn_messages(messages, user_content, assistant_content)
+            if messages is not None
+            else []
+        )
+        if turn_messages:
+            turn_messages = [dict(message) for message in turn_messages]
+            for message in turn_messages:
+                if message.get("role") == "user":
+                    message["content"] = user_content
+                    break
+        batch_messages = self._messages_to_openviking_batch(
+            turn_messages,
+            assistant_peer_id=getattr(self, "_agent", _DEFAULT_AGENT),
+        )
+
+        if _sync_trace_enabled():
+            logger.info(
+                "OpenViking sync_turn trace: session_arg=%r cached_session=%r "
+                "messages_param_supported=true messages_present=%s message_count=%s "
+                "turn_message_count=%d batch_message_count=%d user_len=%d assistant_len=%d "
+                "user_preview=%r assistant_preview=%r",
+                session_id,
+                self._session_id,
+                messages is not None,
+                len(messages) if messages is not None else None,
+                len(turn_messages),
+                len(batch_messages),
+                len(str(user_content or "")),
+                len(str(assistant_content or "")),
+                _preview(user_content),
+                _preview(assistant_content),
+            )
+
        # Snapshot the sid and bump the turn counter atomically so a
        # concurrent on_session_switch/on_session_end can't interleave its
        # snapshot+reset between the read and the increment (lost turn) and so
@ -2313,24 +2641,39 @@ class OpenVikingMemoryProvider(MemoryProvider):
            self._turn_count += 1

        def _sync():
-            try:
-                client = self._new_client()
+            def _post_turn(client: _VikingClient) -> None:
+                if batch_messages:
+                    payload = {"messages": batch_messages}
+                    if _sync_trace_enabled():
+                        logger.info(
+                            "OpenViking sync_turn trace: POST /api/v1/sessions/%s/messages/batch payload=%s",
+                            sid,
+                            json.dumps(payload, ensure_ascii=False),
+                        )
+                    try:
+                        client.post(f"/api/v1/sessions/{sid}/messages/batch", payload)
+                        return
+                    except Exception as batch_error:
+                        logger.warning(
+                            "OpenViking structured sync failed; falling back to text sync: %s",
+                            batch_error,
+                        )
+
                self._post_session_turn(
                    client,
                    sid,
                    user_content[:4000],
-                    assistant_content[:4000],
+                    self._message_text(assistant_content)[:4000],
                )
+
+            try:
+                client = self._new_client()
+                _post_turn(client)
            except Exception as e:
                logger.debug("OpenViking sync_turn failed, reconnecting: %s", e)
                try:
                    client = self._new_client()
-                    self._post_session_turn(
-                        client,
-                        sid,
-                        user_content[:4000],
-                        assistant_content[:4000],
-                    )
+                    _post_turn(client)
                except Exception as retry_error:
                    logger.warning("OpenViking sync_turn failed: %s", retry_error)

--- a/plugins/platforms/discord/adapter.py
+++ b/plugins/platforms/discord/adapter.py
@ -14,6 +14,7 @@ import hashlib
 import json
 import logging
 import os
+import re
 import struct
 import subprocess
 import tempfile
@ -29,6 +30,7 @@ VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
 _DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
 _DISCORD_COMMAND_SYNC_STATE_SUBDIR = "gateway"
 _DISCORD_COMMAND_SYNC_STATE_FILENAME = "discord_command_sync_state.json"
+_DISCORD_NONCONVERSATIONAL_STATE_FILENAME = "discord_nonconversational_messages.json"
 _DISCORD_COMMAND_SYNC_MUTATION_INTERVAL_SECONDS = 4.5
 _DISCORD_COMMAND_SYNC_MAX_RATE_LIMIT_SLEEP_SECONDS = 30.0
 # Discord enforces a hard cap of 100 global application (slash) commands per
@ -37,6 +39,37 @@ _DISCORD_COMMAND_SYNC_MAX_RATE_LIMIT_SLEEP_SECONDS = 30.0
 # every slash command — not just the overflow ones. We keep the desired set
 # at or below this limit at registration time.
 _DISCORD_MAX_APP_COMMANDS = 100
+_DISCORD_NONCONVERSATIONAL_METADATA_KEYS = frozenset({
+    "non_conversational",
+    "non_conversational_history",
+})
+# Upgrade-bridge fallback only. The primary mechanism is the persisted
+# non-conversational message-ID set populated from explicitly marked sends
+# (metadata["non_conversational"]). These regexes exist solely to recognize
+# status bumps emitted by an older gateway version that pre-dates the marking,
+# so they don't partition history after an upgrade. New emitters should set the
+# metadata flag, not rely on a regex here.
+_DISCORD_NONCONVERSATIONAL_HISTORY_MESSAGE_PATTERNS = (
+    re.compile(r"^\s*💾\s*Self-improvement review:\s+\S[\s\S]*$", re.IGNORECASE),
+    # Legacy/background-review test doubles used this shorter form before the
+    # self-improvement prefix became the stable emitter contract.
+    re.compile(
+        r"^\s*💾\s+Skill\s+['\"].+?['\"]\s+(?:created|updated|improved|patched)\.?\s*$",
+        re.IGNORECASE,
+    ),
+    re.compile(r"^\s*⏳\s+Working\s+—\s+\d+\s+min(?:\s|$)", re.IGNORECASE),
+    re.compile(
+        r"^\s*\[Background process\s+\S+\s+"
+        r"(?:finished with exit code|is still running~)[\s\S]*\]\s*$",
+        re.IGNORECASE,
+    ),
+    re.compile(
+        r"^\s*(?:✅|❌)\s+Hermes update\s+"
+        r"(?:finished|failed|timed out)[\s\S]*$",
+        re.IGNORECASE,
+    ),
+    re.compile(r"^\s*♻️?\s+Gateway\s+(?:restarted successfully|online\b)[\s\S]*$", re.IGNORECASE),
+)

 try:
    import discord
@ -55,7 +88,6 @@ from pathlib import Path as _Path
 sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))

 from gateway.config import Platform, PlatformConfig
-import re

 from gateway.platforms.helpers import MessageDeduplicator, ThreadParticipationTracker
 from utils import atomic_json_write
@ -132,6 +164,73 @@ def _find_discord_windows_bundled_opus(discord_module: Any = None) -> Optional[s
    return None


+class _DiscordNonConversationalMessageTracker:
+    """Persistent bounded set of Discord message IDs that are status noise."""
+
+    _MAX_TRACKED = 2000
+
+    def __init__(self, max_tracked: int = _MAX_TRACKED):
+        self._max_tracked = max_tracked
+        self._ids: dict[str, None] = dict.fromkeys(self._load())
+
+    def _state_path(self) -> _Path:
+        from hermes_constants import get_hermes_home
+
+        return (
+            get_hermes_home()
+            / _DISCORD_COMMAND_SYNC_STATE_SUBDIR
+            / _DISCORD_NONCONVERSATIONAL_STATE_FILENAME
+        )
+
+    def _load(self) -> list[str]:
+        path = self._state_path()
+        if not path.exists():
+            return []
+        try:
+            data = json.loads(path.read_text(encoding="utf-8"))
+            if isinstance(data, list):
+                return [str(message_id) for message_id in data if str(message_id).strip()]
+        except Exception:
+            logger.debug("[%s] Failed to load non-conversational Discord IDs", "Discord")
+        return []
+
+    def _save(self) -> None:
+        ids = list(self._ids)
+        if len(ids) > self._max_tracked:
+            ids = ids[-self._max_tracked:]
+            self._ids = dict.fromkeys(ids)
+        try:
+            atomic_json_write(self._state_path(), ids, indent=None)
+        except Exception:
+            logger.debug("[%s] Failed to save non-conversational Discord IDs", "Discord", exc_info=True)
+
+    def mark_many(self, message_ids: List[str]) -> None:
+        changed = False
+        for message_id in message_ids:
+            key = str(message_id or "").strip()
+            if key and key not in self._ids:
+                self._ids[key] = None
+                changed = True
+        if changed:
+            self._save()
+
+    def __contains__(self, message_id: str) -> bool:
+        return str(message_id or "") in self._ids
+
+
+def _metadata_marks_nonconversational(metadata: Optional[Dict[str, Any]]) -> bool:
+    """Return True when an outbound send was explicitly marked as status-only."""
+    if not isinstance(metadata, dict):
+        return False
+    return any(bool(metadata.get(key)) for key in _DISCORD_NONCONVERSATIONAL_METADATA_KEYS)
+
+
+def _looks_like_nonconversational_history_message(content: str) -> bool:
+    """Fallback recognizer for legacy status bumps missing persisted IDs."""
+    text = content or ""
+    return any(pattern.match(text) for pattern in _DISCORD_NONCONVERSATIONAL_HISTORY_MESSAGE_PATTERNS)
+
+
 def _clean_discord_id(entry: str) -> str:
    """Strip common prefixes from a Discord user ID or username entry.

@ -681,6 +780,9 @@ class DiscordAdapter(BasePlatformAdapter):
        # history backfill to skip the full scan on hot paths.  Falls back to
        # scanning channel.history() on cache miss (cold start / restart).
        self._last_self_message_id: Dict[str, str] = {}
+        # Persistent set of bot-authored lifecycle/status message IDs that
+        # should not act as conversational history boundaries after restart.
+        self._nonconversational_messages = _DiscordNonConversationalMessageTracker()

    def _handle_bot_task_done(self, task: asyncio.Task) -> None:
        """Surface post-startup discord.py task exits to the gateway supervisor.
@ -1577,6 +1679,7 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id = None
            if metadata and metadata.get("thread_id"):
                thread_id = metadata["thread_id"]
+            nonconversational = _metadata_marks_nonconversational(metadata)

            if thread_id:
                # Fetch the thread directly — threads are addressed by their own ID.
@ -1654,7 +1757,10 @@ class DiscordAdapter(BasePlatformAdapter):
            # backfill — avoids a full channel.history() scan on hot paths.
            if message_ids:
                _target_id = thread_id or chat_id
-                self._last_self_message_id[_target_id] = message_ids[-1]
+                if nonconversational:
+                    self._nonconversational_messages.mark_many(message_ids)
+                elif not _looks_like_nonconversational_history_message(content):
+                    self._last_self_message_id[_target_id] = message_ids[-1]

            return SendResult(
                success=True,
@ -4203,23 +4309,29 @@ class DiscordAdapter(BasePlatformAdapter):
                after=_after_obj,
                oldest_first=False,
            ):
+                # Skip system messages (pins, joins, thread renames, etc.)
+                if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
+                    continue
+
+                content = getattr(msg, "clean_content", msg.content) or ""
+                if (
+                    str(getattr(msg, "id", "")) in self._nonconversational_messages
+                    or _looks_like_nonconversational_history_message(content)
+                ):
+                    continue
+
                # Stop at our own message — this is the partition point.
                # Everything before this is already in the session transcript.
                # (Redundant when _after_obj is set, but needed for cold start.)
                if msg.author == self._client.user:
                    break

-                # Skip system messages (pins, joins, thread renames, etc.)
-                if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
-                    continue
-
                # Respect DISCORD_ALLOW_BOTS for other bots.
                # For history context, "mentions" is treated as "all" — we are
                # deciding what context to show, not whether to respond.
                if getattr(msg.author, "bot", False) and not include_other_bots:
                    continue

-                content = getattr(msg, "clean_content", msg.content) or ""
                if not content and msg.attachments:
                    content = "(attachment)"
                if not content:
@ -4566,6 +4678,13 @@ class DiscordAdapter(BasePlatformAdapter):
        Open-ended mode (``choices`` empty/None): renders the question as
        plain embed text — no buttons. The gateway's text-intercept captures
        the next message in this session and resolves the clarify.
+
+        Choice normalisation: ``choices`` may contain bare strings OR dicts
+        (LLMs sometimes emit ``[{"description": "..."}]`` instead of bare
+        strings, which would otherwise render as raw Python repr on the
+        button label). Dict choices are unwrapped against the canonical
+        LLM tool-call keys ``label``, ``description``, ``text``, ``title``
+        in that order. Dicts with none of those keys are dropped.
        """
        if not self._client or not DISCORD_AVAILABLE:
            return SendResult(success=False, error="Not connected")
@ -4591,8 +4710,37 @@ class DiscordAdapter(BasePlatformAdapter):
                color=discord.Color.orange(),
            )

+            # Normalise choices: LLMs sometimes emit `[{"description": "..."}]`
+            # instead of bare strings, which would render as raw Python repr on
+            # the button label. Unwrap the common shapes, then stringify.
+            def _flatten_choice(c):
+                if c is None:
+                    return ""
+                if isinstance(c, str):
+                    return c.strip()
+                if isinstance(c, dict):
+                    # Prefer the canonical LLM tool-call user-facing keys
+                    # in the order the LLM is most likely to emit them.
+                    # 'name' and 'value' are deliberately NOT here: they're
+                    # Discord-component-shaped fields that could appear in
+                    # dicts that aren't meant to be choices (e.g., a
+                    # developer-error wiring that passes a Button-shaped
+                    # object). Picking them would leak raw enum values
+                    # or 4-char model identifiers onto user-facing buttons.
+                    # If a dict has none of the canonical keys, drop it
+                    # rather than picking some random field — a garbage
+                    # button label is worse than no button at all.
+                    for key in ("label", "description", "text", "title"):
+                        v = c.get(key)
+                        if isinstance(v, str) and v.strip():
+                            return v.strip()
+                    return ""
+                if isinstance(c, (list, tuple)):
+                    return " ".join(_flatten_choice(x) for x in c).strip()
+                return str(c).strip()
+
            clean_choices = [
-                str(c).strip() for c in (choices or []) if c is not None and str(c).strip()
+                s for s in (_flatten_choice(c) for c in (choices or [])) if s
            ]
            # Discord allows up to 5 buttons per row, 5 rows per view = 25.
            # We reserve one slot for the "Other" button, so cap at 24 choices.
@ -4657,6 +4805,8 @@ class DiscordAdapter(BasePlatformAdapter):
            )
            msg = await channel.send(embed=embed, view=view)
            view._message = msg  # store for on_timeout expiration editing
+            if _metadata_marks_nonconversational(metadata):
+                self._nonconversational_messages.mark_many([str(msg.id)])
            return SendResult(success=True, message_id=str(msg.id))
        except Exception as e:
            return SendResult(success=False, error=str(e))
@ -6129,10 +6279,47 @@ def _define_discord_view_classes() -> None:
            self.resolved = False

            for index, choice in enumerate(self.choices):
-                # Discord button labels are capped at 80 chars.
-                label_body = choice if len(choice) <= 75 else choice[:72] + "..."
+                # Discord button labels are capped at 80 chars. On mobile the
+                # visible width is much narrower (often <40 chars before it
+                # wraps to 2 lines and the second line gets cut off), so we
+                # cap aggressively and cut at a word boundary when possible
+                # to keep the trailing text readable.
+                #
+                # Cut strategy (most-preferred to least-preferred):
+                #   1. Last space in the trailing half of the budget
+                #      (cleanest word boundary)
+                #   2. Last soft boundary in the trailing half of the
+                #      budget (hyphen, comma, period, paren)
+                #   3. Hard cut at the budget limit (last resort)
+                prefix = f"{index + 1}. "
+                budget = 80 - len(prefix)
+                if len(choice) <= budget:
+                    label_body = choice
+                else:
+                    truncated = choice[: budget - 1].rstrip()
+                    cut_at = -1
+                    # 1. Last space in the trailing half of the budget.
+                    space = truncated.rfind(" ")
+                    if space >= budget // 2:
+                        cut_at = space
+                    # 2. Soft boundary — only if no word boundary found.
+                    # Find the latest soft boundary in the trailing half
+                    # of the budget; that maximizes preserved text length.
+                    # Cut AT the soft boundary (inclusive) so the label
+                    # ends on the soft char (e.g. "-" or ",") rather than
+                    # on the alpha char that followed it.
+                    if cut_at < 0:
+                        latest_soft = max(
+                            (truncated.rfind(s) for s in ("-", ",", ".", ")")),
+                            default=-1,
+                        )
+                        if latest_soft >= budget // 2:
+                            cut_at = latest_soft + 1
+                    if cut_at > 0:
+                        truncated = truncated[:cut_at]
+                    label_body = truncated.rstrip() + "…"
                button = discord.ui.Button(
-                    label=f"{index + 1}. {label_body}",
+                    label=f"{prefix}{label_body}",
                    style=discord.ButtonStyle.primary,
                    custom_id=f"clarify:{clarify_id}:{index}",
                )
--- a/pyproject.toml
+++ b/pyproject.toml
@ -258,7 +258,7 @@ youtube = [
 # `hermes dashboard` (localhost SPA + API).  Not in core to keep the default install lean.
 # starlette==1.0.1 pinned for CVE-2026-48710 (BadHost) — fastapi pulls Starlette
 # transitively and pre-1.0.1 is the vulnerable range. See the mcp extra above.
-web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0", "starlette==1.0.1", "python-multipart==0.0.20"]
+web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0", "starlette==1.0.1", "python-multipart==0.0.27"]
 all = [
  # Policy (2026-05-12): `[all]` includes only extras that genuinely
  # CAN'T be lazy-installed via `tools/lazy_deps.py` — i.e. things every
--- a/run_agent.py
+++ b/run_agent.py
@ -4076,11 +4076,13 @@ class AIAgent:
        # Defensive: strip Responses-only kwargs that can leak in under an
        # api_mode-flip race (the Anthropic SDK raises a non-retryable
        # TypeError on them). See #31673.
-        from agent.anthropic_adapter import sanitize_anthropic_kwargs
-        sanitize_anthropic_kwargs(
-            api_kwargs, log_prefix=getattr(self, "log_prefix", "")
+        from agent.anthropic_adapter import create_anthropic_message
+        return create_anthropic_message(
+            self._anthropic_client,
+            api_kwargs,
+            log_prefix=getattr(self, "log_prefix", ""),
+            prefer_stream=not bool(getattr(self, "_disable_streaming", False)),
        )
-        return self._anthropic_client.messages.create(**api_kwargs)

    def _rebuild_anthropic_client(self) -> None:
        """Rebuild the Anthropic client after an interrupt or stale call.
--- a/scripts/release.py
+++ b/scripts/release.py
@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"

 # Auto-extracted from noreply emails + manual overrides
 AUTHOR_MAP = {
+    "charles@salesondemand.io": "salesondemandio",
    "victor@rocketfueldev.com": "victor-kyriazakos",
    "87440198+JoaoMarcos44@users.noreply.github.com": "JoaoMarcos44",
    "286497132+srojk34@users.noreply.github.com": "srojk34",
@ -56,6 +57,8 @@ AUTHOR_MAP = {
    "despitemeguru@gmail.com": "definitelynotguru",
    "chaslui@outlook.com": "ChasLui",
    "rio.jeong@thebytesize.ai": "rio-jeong",
+    "cdddo@users.noreply.github.com": "Cdddo",
+    "carlos.dddo@gmail.com": "Cdddo",
    "yehaotian@xuanshudeMac-mini.local": "ArcanePivot",
    "dbeyer7@gmail.com": "benegessarit",
    "264773240+MrDiamondBallz@users.noreply.github.com": "MrDiamondBallz",
@ -103,6 +106,7 @@ AUTHOR_MAP = {
    "290859878+synapsesx@users.noreply.github.com": "synapsesx",
    "157689911+itsflownium@users.noreply.github.com": "itsflownium",
    "dirtyren@users.noreply.github.com": "dirtyren",
+    "johnjacobkenny@users.noreply.github.com": "johnjacobkenny",
    "chanyoung.kim@nota.ai": "channkim",
    "stevenn.damatoo@gmail.com": "x1erra",
    "evansrory@gmail.com": "zimigit2020",
@ -415,6 +419,7 @@ AUTHOR_MAP = {
    "androidhtml@yandex.com": "hllqkb",
    "25840394+Bongulielmi@users.noreply.github.com": "Bongulielmi",
    "jonathan.troyer@overmatch.com": "JTroyerOvermatch",
+    "53142663+tt-a1i@users.noreply.github.com": "tt-a1i",  # PR #48933 (SSE-only Anthropic stream aggregation, #48923)
    "harryykyle1@gmail.com": "hharry11",
    "wysie@users.noreply.github.com": "wysie",
    "ronhi@buildabear1.localdomain": "RonHillDev",  # PR #29523 salvage (machine-local commit email)
@ -1528,6 +1533,7 @@ AUTHOR_MAP = {
    "erik.engervall@gmail.com": "erikengervall",  # PR #28774 (firecrawl integration tag)
    "egilewski@egilewski.com": "egilewski",  # PR #30432 (MEDIA path traversal fix, GHSA-jmf9-9729-7pp8)
    "edison@mcclean.codes": "McClean-Edison",  # PR #29817 (register_auxiliary_task plugin API)
+    "OYLFLMH@users.noreply.github.com": "OYLFLMH",  # PR #48312 salvage (cli_refresh_interval config, #48309)
    "zhangsamuel12@gmail.com": "SamuelZ12",  # PR #7480 (show recap after in-session resume)
    "490408354@qq.com": "daizhonggeng",  # PR #9020 (numbered /resume selection)
    "claw@openclaw.ai": "wanwan2qq",  # PR #10215 (strip brackets/quotes from /resume; gateway session-ID lookup)
@ -1577,6 +1583,8 @@ AUTHOR_MAP = {
    "sunsky.lau@gmail.com": "liuhao1024",  # PR #45494 salvage (claim session slot before auto-resume task; #45456)
    "andrewdmwalker@gmail.com": "capt-marbles",  # PR #38440 salvage (resolve xAI OAuth credentials across profiles; #43589)
    "infinitycrew39@gmail.com": "infinitycrew39",  # PR #47945 salvage (scope langfuse trace state by turn/request ids; #48292)
+    "eurekaxun@163.com": "huangxun375-stack",  # PR #37251 / #48894 structured OpenViking sync
+    "218421507+Sahil-SS9@users.noreply.github.com": "Sahil-SS9",  # PR #48466/#44919/#44909/#42209 salvage (cron/checkpoint/kanban/skill)
 }


--- a/skills/software-development/simplify-code/SKILL.md
+++ b/skills/software-development/simplify-code/SKILL.md
@ -87,8 +87,20 @@ toolsets (so they can `git`, `read_file`, and `search_files`/grep).

 Tell each reviewer to:
 - Search the existing codebase for evidence (don't reason from the diff alone).
- Report findings as a concrete list: `file:line → problem → suggested fix`.
- Rank each finding `high` / `medium` / `low` confidence.
+- **Apply Chesterton's Fence:** before flagging anything for removal, run
+  `git blame` on the line to understand why it exists. If you can't determine
+  the original purpose, mark it `confidence: low` — don't guess.
+- Report findings as structured output with confidence and risk:
+  ```
+  file:line → problem → suggested fix | confidence: high/medium/low | risk: SAFE/CAREFUL/RISKY
+  ```
+  - **SAFE** = proven not to affect behavior (unused imports, commented-out
+    code, pass-through wrappers). Auto-apply these.
+  - **CAREFUL** = improves without changing semantics (rename local variable,
+    flatten nested ternary, extract helper). Apply with test verification.
+  - **RISKY** = may change behavior or breaks public contracts (N+1
+    restructuring, public API rename, memory lifecycle change). Flag for
+    human review — do NOT auto-apply.
 - Skip nits and style-only churn. Only flag things that materially improve
  the code.

@ -112,7 +124,11 @@ Pass these three goals (drop any the user's focus excludes):
 > blocks that should share an abstraction); leaky abstractions (exposing
 > internals, breaking an existing encapsulation boundary); stringly-typed
 > code (raw strings where a constant/enum/registry already exists — check the
-> canonical registries before flagging). For each, give the concrete refactor.
+> canonical registries before flagging); AI-generated slop patterns (extra
+> comments restating obvious code like `// increment counter` above `count++`;
+> unnecessary defensive null-checks on already-validated inputs; `as any`
+> casts that bypass the type system; patterns inconsistent with the rest of
+> the file). For each, give the concrete refactor.

 **Reviewer 3 — Efficiency**
 > Review this diff for efficiency problems. Look for: unnecessary work
@ -122,8 +138,10 @@ Pass these three goals (drop any the user's focus excludes):
 > TOCTOU anti-patterns (existence pre-checks before an op instead of doing
 > the op and handling the error); memory issues (unbounded growth, missing
 > cleanup, listener/handle leaks); overly broad reads (loading whole files
-> when a slice would do). For each, give the concrete fix and why it's faster
-> or lighter.
+> when a slice would do); silent failures (empty catch blocks, ignored error
+> returns, `except: pass`, `.catch(() => {})` with no handling, error
+> propagation gaps — these hide bugs and should at minimum log before
+> swallowing). For each, give the concrete fix and why it's faster or safer.

 ### Phase 3 — Aggregate and apply

@ -138,13 +156,22 @@ Wait for all three to return (batch mode returns them together).
   Don't apply a perf "fix" that hurts clarity unless the path is genuinely
   hot. When two suggestions are mutually exclusive and both defensible, pick
   the one that touches less code and note the alternative.
-4. **Apply** the surviving fixes directly with `patch` / `write_file` — unless
-   the user asked for a dry run, in which case present the list and ask first.
+4. **Apply in risk-tier order:**
+   - **SAFE first** (auto-apply): unused imports, commented-out code,
+     pass-through wrappers, redundant type assertions. Run tests after.
+   - **CAREFUL next** (apply with verification, one file at a time): rename
+     locals, flatten ternaries, extract helpers, consolidate dupes. Run tests
+     after each file. Revert any that break.
+   - **RISKY last** (flag for review — do NOT auto-apply): N+1 restructuring,
+     public API changes, concurrency fixes, error-handling changes. Present
+     each with risk description and test coverage status.
+   If the user opted for a dry run, present all three tiers and apply nothing.
 5. **Verify** you didn't break anything: run the project's targeted tests for
   the touched files (not the full suite), and re-run any linter/type check the
   repo uses. If a fix breaks a test, revert that one fix and report it.
 6. **Summarize** what you changed: a short list of applied fixes grouped by
-   reviewer category, plus any findings you deliberately skipped and why.
+   reviewer category and risk tier, plus any findings you deliberately skipped
+   and why.

 ## Pitfalls

@ -166,6 +193,16 @@ Wait for all three to return (batch mode returns them together).
 - **Large diffs blow context.** If the diff is huge, scope it down before
  delegating — three subagents each carrying a 5000-line diff is expensive and
  may truncate.
+- **Over-trusting dead code tools.** `knip`, `ts-prune`, and `depcheck` flag
+  exports that ARE used dynamically (string-based imports, reflection). Always
+  grep for the symbol name before removing — a clean tool report is not proof.
+- **Renaming without checking public contracts.** Export names, API route
+  paths, DB column names, and config keys are contracts — even if the name is
+  bad, renaming breaks consumers. Tag public-contract changes as RISKY; never
+  auto-rename them.
+- **Removing "unnecessary" error handling.** An empty catch block or ignored
+  error might be intentional — the error is expected and benign in that
+  context. Flag it, don't remove it; let the human decide.

 ## Related

--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -38,6 +38,20 @@ def _jwt_with_claims(claims: dict) -> str:
    return f"{header}.{payload}.sig"


+class _FakeAnthropicStream:
+    def __init__(self, final_message):
+        self._final_message = final_message
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def get_final_message(self):
+        return self._final_message
+
+
@pytest.fixture(autouse=True)
 def _clean_env(monkeypatch):
    """Strip provider env vars so each test starts clean."""
@ -990,6 +1004,37 @@ class TestVisionClientFallback:
        assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
        assert model == "claude-haiku-4-5-20251001"

+    def test_anthropic_auxiliary_client_aggregates_stream_response(self):
+        from agent.auxiliary_client import AnthropicAuxiliaryClient
+
+        final_message = SimpleNamespace(
+            content=[SimpleNamespace(type="text", text="streamed aux response")],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=3, output_tokens=4),
+        )
+        messages_api = SimpleNamespace(
+            stream=MagicMock(return_value=_FakeAnthropicStream(final_message)),
+            create=MagicMock(return_value="raw event-stream text"),
+        )
+        real_client = SimpleNamespace(messages=messages_api)
+        client = AnthropicAuxiliaryClient(
+            real_client,
+            "claude-sonnet-4-20250514",
+            "sk-test",
+            "https://sse-only.example/v1",
+        )
+
+        response = client.chat.completions.create(
+            messages=[{"role": "user", "content": "summarize"}],
+            max_tokens=16,
+        )
+
+        messages_api.stream.assert_called_once()
+        messages_api.create.assert_not_called()
+        assert response.choices[0].message.content == "streamed aux response"
+        assert response.usage.prompt_tokens == 3
+        assert response.usage.completion_tokens == 4
+

 class TestAuxiliaryPoolAwareness:
    def test_try_nous_uses_pool_entry(self):
--- a/tests/agent/test_message_content.py
+++ b/tests/agent/test_message_content.py
@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+from agent.message_content import flatten_message_text
+
+
+def test_flatten_message_text_accepts_chat_and_responses_text_parts():
+    content = [
+        {"type": "text", "text": "chat text"},
+        {"type": "input_text", "text": "user text"},
+        {"type": "output_text", "text": "assistant text"},
+        {"type": "summary_text", "text": "summary text"},
+    ]
+
+    assert flatten_message_text(content) == "chat text\nuser text\nassistant text\nsummary text"
+
+
+def test_flatten_message_text_accepts_object_parts():
+    content = [
+        SimpleNamespace(type="output_text", text="object text"),
+        {"content": "legacy content"},
+    ]
+
+    assert flatten_message_text(content) == "object text\nlegacy content"
--- a/tests/agent/test_secret_scope.py
+++ b/tests/agent/test_secret_scope.py
@ -0,0 +1,130 @@
+"""Tests for the profile-scoped credential primitive (Workstream A / Phase 2)."""
+import pytest
+
+from agent import secret_scope as ss
+
+
+@pytest.fixture(autouse=True)
+def _reset_multiplex():
+    """Ensure each test starts and ends with multiplexing off (it's a global)."""
+    ss.set_multiplex_active(False)
+    yield
+    ss.set_multiplex_active(False)
+
+
+class TestMultiplexInactiveBackwardCompat:
+    """Default deployment: get_secret transparently reads os.environ."""
+
+    def test_reads_environ(self, monkeypatch):
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test")
+        assert ss.get_secret("ANTHROPIC_API_KEY") == "sk-test"
+
+    def test_missing_returns_default(self, monkeypatch):
+        monkeypatch.delenv("NOPE_KEY", raising=False)
+        assert ss.get_secret("NOPE_KEY") is None
+        assert ss.get_secret("NOPE_KEY", "fallback") == "fallback"
+
+    def test_no_raise_without_scope(self, monkeypatch):
+        monkeypatch.delenv("SOME_KEY", raising=False)
+        # multiplex off => unscoped read is fine, returns default
+        assert ss.get_secret("SOME_KEY") is None
+
+
+class TestMultiplexActiveFailClosed:
+    """Multiplex on: an unscoped secret read raises instead of leaking."""
+
+    def test_unscoped_read_raises(self, monkeypatch):
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-leaky")
+        ss.set_multiplex_active(True)
+        with pytest.raises(ss.UnscopedSecretError):
+            ss.get_secret("ANTHROPIC_API_KEY")
+
+    def test_scoped_read_uses_scope_not_environ(self, monkeypatch):
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-from-environ")
+        ss.set_multiplex_active(True)
+        token = ss.set_secret_scope({"ANTHROPIC_API_KEY": "sk-from-scope"})
+        try:
+            assert ss.get_secret("ANTHROPIC_API_KEY") == "sk-from-scope"
+        finally:
+            ss.reset_secret_scope(token)
+
+    def test_scoped_missing_key_returns_default_not_environ(self, monkeypatch):
+        # Even though the value exists in os.environ, a scope is authoritative:
+        # an absent scope key must NOT fall through to the (cross-profile) env.
+        monkeypatch.setenv("OPENAI_API_KEY", "sk-other-profile")
+        ss.set_multiplex_active(True)
+        token = ss.set_secret_scope({"ANTHROPIC_API_KEY": "sk-mine"})
+        try:
+            assert ss.get_secret("OPENAI_API_KEY") is None
+            assert ss.get_secret("OPENAI_API_KEY", "d") == "d"
+        finally:
+            ss.reset_secret_scope(token)
+
+    def test_global_env_still_reads_environ_under_multiplex(self, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", "/opt/data")
+        ss.set_multiplex_active(True)
+        # No scope, multiplex on — but HERMES_HOME is global, so no raise.
+        assert ss.get_secret("HERMES_HOME") == "/opt/data"
+
+    def test_kanban_prefix_is_global(self, monkeypatch):
+        monkeypatch.setenv("HERMES_KANBAN_DB", "/x/kanban.db")
+        ss.set_multiplex_active(True)
+        assert ss.get_secret("HERMES_KANBAN_DB") == "/x/kanban.db"
+
+
+class TestScopeIsolation:
+    """Two scopes never see each other's secrets."""
+
+    def test_nested_scopes_restore(self):
+        ss.set_multiplex_active(True)
+        t1 = ss.set_secret_scope({"K": "a"})
+        try:
+            assert ss.get_secret("K") == "a"
+            t2 = ss.set_secret_scope({"K": "b"})
+            try:
+                assert ss.get_secret("K") == "b"
+            finally:
+                ss.reset_secret_scope(t2)
+            assert ss.get_secret("K") == "a"
+        finally:
+            ss.reset_secret_scope(t1)
+
+
+class TestEnvFileParsing:
+    """load_env_file parses without mutating os.environ."""
+
+    def test_parses_basic(self, tmp_path):
+        env = tmp_path / ".env"
+        env.write_text(
+            "# comment\n"
+            "ANTHROPIC_API_KEY=sk-abc\n"
+            "export OPENAI_API_KEY=sk-def\n"
+            'QUOTED="quoted-value"\n'
+            "SINGLE='single'\n"
+            "\n"
+            "BAD_LINE_NO_EQUALS\n"
+        )
+        out = ss.load_env_file(env)
+        assert out == {
+            "ANTHROPIC_API_KEY": "sk-abc",
+            "OPENAI_API_KEY": "sk-def",
+            "QUOTED": "quoted-value",
+            "SINGLE": "single",
+        }
+
+    def test_does_not_mutate_environ(self, tmp_path, monkeypatch):
+        monkeypatch.delenv("ZZZ_KEY", raising=False)
+        env = tmp_path / ".env"
+        env.write_text("ZZZ_KEY=secret\n")
+        ss.load_env_file(env)
+        import os
+        assert "ZZZ_KEY" not in os.environ
+
+    def test_missing_file_returns_empty(self, tmp_path):
+        assert ss.load_env_file(tmp_path / "nope.env") == {}
+
+    def test_build_profile_secret_scope(self, tmp_path):
+        (tmp_path / ".env").write_text("ANTHROPIC_API_KEY=sk-profile\n")
+        assert ss.build_profile_secret_scope(tmp_path) == {
+            "ANTHROPIC_API_KEY": "sk-profile"
+        }
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -534,6 +534,14 @@ def pytest_configure(config):  # noqa: D401 — pytest hook
        "behaviour — e.g. PTY tests that signal their own child).",
    )

+    # The pyproject addopts pin ``--timeout-method=signal`` relies on
+    # ``signal.SIGALRM``, which does not exist on Windows — pytest-timeout
+    # raises AttributeError at timer setup and the whole run aborts before any
+    # test executes. Fall back to the thread-based timer on Windows so the
+    # suite runs natively there (POSIX keeps the more reliable signal method).
+    if sys.platform == "win32" and getattr(config.option, "timeout_method", None) == "signal":
+        config.option.timeout_method = "thread"
+

@pytest.fixture(autouse=True)
 def _live_system_guard(request, monkeypatch):
--- a/tests/gateway/relay/test_relay_adapter.py
+++ b/tests/gateway/relay/test_relay_adapter.py
@ -75,3 +75,68 @@ async def test_send_without_transport_returns_failure():
    result = await a.send("chat1", "hello")
    assert result.success is False
    assert result.error == "no transport"
+
+
+class _CaptureTransport:
+    """Minimal RelayTransport stand-in that records the outbound action."""
+
+    def __init__(self):
+        self.sent = None
+
+    def set_inbound_handler(self, h):  # noqa: D401
+        self._h = h
+
+    async def send_outbound(self, action):
+        self.sent = action
+        return {"success": True, "message_id": "m1"}
+
+
+def _make_event(chat_id="chan-1", guild_id="guild-9"):
+    from gateway.platforms.base import MessageEvent, MessageType
+    from gateway.session import SessionSource
+
+    src = SessionSource(
+        platform=Platform.RELAY,
+        chat_id=chat_id,
+        chat_type="channel",
+        guild_id=guild_id,
+    )
+    return MessageEvent(text="hi", source=src, message_type=MessageType.TEXT)
+
+
+@pytest.mark.asyncio
+async def test_send_reattaches_guild_id_from_inbound_scope():
+    """The connector's egress guard resolves the owning tenant from
+    metadata.guild_id; the gateway's generic delivery path drops it, so the
+    relay adapter must re-attach the guild scope learned from the inbound event.
+    Regression for live 'discord egress declined: target not routed to an
+    onboarded tenant'."""
+    t = _CaptureTransport()
+    a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
+    # Simulate the connector delivering an inbound message in guild-9 / chan-1,
+    # but don't run the full handle_message pipeline — just the scope capture.
+    a._capture_scope(_make_event(chat_id="chan-1", guild_id="guild-9"))
+
+    await a.send("chan-1", "the reply")
+
+    assert t.sent["metadata"].get("guild_id") == "guild-9"
+
+
+@pytest.mark.asyncio
+async def test_send_without_known_scope_omits_guild_id():
+    """A chat we never saw inbound (e.g. a DM) gets no guild_id — no-op, never
+    invents a scope."""
+    t = _CaptureTransport()
+    a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
+    await a.send("unknown-chat", "hi")
+    assert "guild_id" not in t.sent["metadata"]
+
+
+@pytest.mark.asyncio
+async def test_send_preserves_explicit_guild_id():
+    """An explicitly-provided metadata.guild_id is never overwritten."""
+    t = _CaptureTransport()
+    a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
+    a._capture_scope(_make_event(chat_id="chan-1", guild_id="guild-9"))
+    await a.send("chan-1", "hi", metadata={"guild_id": "explicit-1"})
+    assert t.sent["metadata"]["guild_id"] == "explicit-1"
--- a/tests/gateway/relay/test_ws_transport.py
+++ b/tests/gateway/relay/test_ws_transport.py
@ -177,3 +177,25 @@ async def test_disconnect_fails_pending_waiters_cleanly(server):
    # After disconnect, an outbound returns a structured failure rather than hanging.
    result = await t.send_outbound({"op": "send", "chat_id": "c", "content": "x"})
    assert result["success"] is False
+
+
+def test_https_url_normalized_to_wss():
+    """The relay URL is configured once as the http(s):// BASE (for the provision
+    POST), but websockets.connect needs ws(s):// and the connector mounts its WS
+    server at /relay. The transport must convert scheme AND ensure the /relay
+    path. Regression for the live staging failures 'scheme isn't ws or wss' then
+    'server rejected WebSocket connection: HTTP 400' (wrong path)."""
+    t = WebSocketRelayTransport("https://connector.example", "discord", "b")
+    assert t._url == "wss://connector.example/relay"
+    t2 = WebSocketRelayTransport("http://connector.local:8080", "discord", "b")
+    assert t2._url == "ws://connector.local:8080/relay"
+
+
+def test_ws_dial_url_idempotent_with_scheme_and_path():
+    # Already ws(s):// and/or already ending in /relay -> unchanged (no double append).
+    t = WebSocketRelayTransport("wss://connector.example/relay", "discord", "b")
+    assert t._url == "wss://connector.example/relay"
+    t2 = WebSocketRelayTransport("https://connector.example/relay/", "discord", "b")
+    assert t2._url == "wss://connector.example/relay"
+    t3 = WebSocketRelayTransport("ws://127.0.0.1:9", "discord", "b")
+    assert t3._url == "ws://127.0.0.1:9/relay"
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@ -337,6 +337,40 @@ class TestAdapterInit:
        assert isinstance(agent, FakeAgent)
        assert captured["reasoning_config"] == {"enabled": True, "effort": "xhigh"}

+    def test_create_agent_refreshes_max_iterations_from_runtime_config(self, monkeypatch):
+        captured = {}
+
+        class FakeAgent:
+            def __init__(self, **kwargs):
+                captured.update(kwargs)
+
+        monkeypatch.setattr("run_agent.AIAgent", FakeAgent)
+        monkeypatch.setattr(
+            "gateway.run._resolve_runtime_agent_kwargs",
+            lambda: {
+                "provider": "openai",
+                "base_url": "https://example.test/v1",
+                "api_mode": "chat_completions",
+            },
+        )
+        monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "gpt-5")
+        monkeypatch.setattr("gateway.run._load_gateway_config", lambda: {"agent": {"max_turns": 200}})
+        monkeypatch.setattr(
+            "gateway.run.GatewayRunner._load_reasoning_config",
+            staticmethod(lambda: {}),
+        )
+        monkeypatch.setattr("gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None))
+        monkeypatch.setattr("gateway.run._current_max_iterations", lambda: 200)
+        monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set())
+
+        adapter = APIServerAdapter(PlatformConfig(enabled=True))
+        monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
+
+        agent = adapter._create_agent(session_id="api-session")
+
+        assert isinstance(agent, FakeAgent)
+        assert captured["max_iterations"] == 200
+

 # ---------------------------------------------------------------------------
 # Auth checking
--- a/tests/gateway/test_cached_agent_max_iterations.py
+++ b/tests/gateway/test_cached_agent_max_iterations.py
@ -0,0 +1,92 @@
+"""Regression tests for PR #48127: cached agent max_iterations refresh.
+
+When a long-lived gateway reuses an agent from its cache, the agent must run
+the *current* configured iteration budget — not the budget it was constructed
+with on the first turn of that session. Two pieces make that true:
+
+1. ``GatewayRunner._init_cached_agent_for_turn`` must NOT reset
+   ``max_iterations`` itself (the gateway refreshes it explicitly right after,
+   from current config). If this helper ever started clobbering it, the
+   gateway's refresh would be silently undone.
+2. The per-turn budget object is rebuilt from ``agent.max_iterations`` at the
+   start of every turn (``agent/turn_context.py`` -> ``IterationBudget``), so
+   refreshing ``max_iterations`` on the cached agent is sufficient to change
+   the operative cap the agent loop checks.
+
+These tests exercise the real code paths rather than asserting a plain
+assignment, so they fail if either contract regresses.
+"""
+
+import time
+from types import SimpleNamespace
+
+from agent.iteration_budget import IterationBudget
+
+
+def _make_cached_agent(max_iterations: int) -> SimpleNamespace:
+    """A minimal stand-in cached agent with the attributes the helpers touch."""
+    # The turn loop checks both api_call_count >= max_iterations AND
+    # iteration_budget.remaining <= 0 (turn_finalizer.py), so the budget must
+    # also reflect the new cap. Seed it with the stale value to prove the
+    # refresh propagates.
+    return SimpleNamespace(
+        _last_activity_ts=time.time() - 1000,
+        _last_activity_desc="previous turn",
+        _api_call_count=42,
+        _last_flushed_db_idx=5,
+        max_iterations=max_iterations,
+        iteration_budget=IterationBudget(max_iterations),
+    )
+
+
+def test_init_cached_agent_for_turn_does_not_touch_max_iterations():
+    """The per-turn reset helper must leave max_iterations untouched.
+
+    The gateway refreshes max_iterations explicitly right after calling this
+    helper; if the helper ever reset it, that refresh would be undone.
+    """
+    from gateway.run import GatewayRunner
+
+    agent = _make_cached_agent(90)
+    GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
+
+    # Per-turn state was reset...
+    assert agent._api_call_count == 0
+    assert agent._last_activity_desc == "starting new turn (cached)"
+    assert agent._last_flushed_db_idx == 0
+    # ...but the iteration budget was NOT changed by the helper itself.
+    assert agent.max_iterations == 90
+
+
+def test_init_cached_agent_preserves_max_iterations_on_interrupt_depth():
+    """Interrupt-recursive turns must also leave max_iterations alone."""
+    from gateway.run import GatewayRunner
+
+    agent = _make_cached_agent(200)
+    GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
+
+    # Activity timestamps preserved for the inactivity watchdog (#15654)...
+    assert agent._last_activity_desc == "previous turn"
+    # ...and max_iterations untouched.
+    assert agent.max_iterations == 200
+
+
+def test_refreshed_max_iterations_propagates_to_turn_budget():
+    """Refreshing max_iterations on a cached agent changes the operative cap.
+
+    The gateway sets ``agent.max_iterations = max_iterations`` on cache reuse;
+    the new turn's setup then rebuilds ``iteration_budget`` from it. This proves
+    the refresh actually moves the budget the agent loop enforces — the cached
+    agent started at 90 and ends a new turn capped at 200.
+    """
+    agent = _make_cached_agent(90)
+    assert agent.iteration_budget.max_total == 90
+
+    # Gateway refresh on cache reuse:
+    agent.max_iterations = 200
+
+    # Start-of-turn budget rebuild (agent/turn_context.py:166):
+    agent.iteration_budget = IterationBudget(agent.max_iterations)
+
+    assert agent.iteration_budget.max_total == 200
+    assert agent.iteration_budget.remaining == 200
--- a/tests/gateway/test_config.py
+++ b/tests/gateway/test_config.py
@ -311,6 +311,55 @@ class TestLoadGatewayConfig:

        assert config.quick_commands == {"limits": {"type": "exec", "command": "echo ok"}}

+    def test_relay_platform_enabled_from_env_url(self, tmp_path, monkeypatch):
+        """GATEWAY_RELAY_URL must enable Platform.RELAY in config.platforms so
+        start_gateway()'s connect loop actually dials the connector. Registering
+        the adapter in the platform_registry is NOT enough — the connect loop
+        iterates config.platforms, so an un-enabled RELAY never connects (the
+        'relay registered but no inbound' bug)."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.setenv("GATEWAY_RELAY_URL", "https://connector.example/relay/")
+
+        config = load_gateway_config()
+
+        assert Platform.RELAY in config.platforms
+        relay = config.platforms[Platform.RELAY]
+        assert relay.enabled is True
+        # Trailing slash stripped; mirrored into extra for the connected-checker.
+        assert relay.extra.get("relay_url") == "https://connector.example/relay"
+        assert Platform.RELAY in config.get_connected_platforms()
+
+    def test_relay_platform_absent_when_url_unset(self, tmp_path, monkeypatch):
+        """No relay URL -> no RELAY platform, so direct/single-tenant gateways
+        are unaffected."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.delenv("GATEWAY_RELAY_URL", raising=False)
+
+        config = load_gateway_config()
+
+        assert Platform.RELAY not in config.platforms
+
+    def test_relay_platform_enabled_from_config_yaml(self, tmp_path, monkeypatch):
+        """gateway.relay_url in config.yaml also enables RELAY (env-less path)."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "gateway:\n  platforms:\n    relay:\n      extra:\n        relay_url: https://connector.example/relay\n",
+            encoding="utf-8",
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.delenv("GATEWAY_RELAY_URL", raising=False)
+
+        config = load_gateway_config()
+
+        assert Platform.RELAY in config.platforms
+        assert config.platforms[Platform.RELAY].enabled is True
+
    def test_bridges_group_sessions_per_user_from_config_yaml(self, tmp_path, monkeypatch):
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
--- a/tests/gateway/test_discord_clarify_buttons.py
+++ b/tests/gateway/test_discord_clarify_buttons.py
@ -122,13 +122,56 @@ class TestClarifyChoiceViewConstruction:
            clarify_id="cidZ",
            allowed_user_ids=set(),
        )
-        # 75 chars + 3 ellipsis chars in the body, plus "1. " prefix
+        # 78 chars + single-char ellipsis in the body, plus "1. " prefix.
+        # Uses U+2026 (…) instead of "..." to fit the 80-char Discord cap.
        first_label = view.children[0].label
        assert first_label.startswith("1. ")
-        assert first_label.endswith("...")
+        assert first_label.endswith("\u2026")
        # Final label total <= 80 (Discord cap on button labels)
        assert len(first_label) <= 80

+    def test_truncates_long_choice_label_breaks_on_word_boundary(self):
+        # Long choice with spaces — should cut at the last whole word so the
+        # trailing text stays readable on Discord mobile.
+        long_choice = (
+            "Tight, well-illustrated, covers all 3 audiences "
+            "(patients, families, curious general readers)"
+        )
+        view = ClarifyChoiceView(
+            choices=[long_choice],
+            clarify_id="cidW",
+            allowed_user_ids=set(),
+        )
+        first_label = view.children[0].label
+        assert first_label.startswith("1. ")
+        assert first_label.endswith("\u2026")
+        # No mid-word fragment before the ellipsis.
+        assert not first_label.rstrip("\u2026").endswith("(")
+
+    def test_truncates_long_no_space_choice_on_soft_boundary(self):
+        # A long choice with soft boundaries (commas, hyphens) but no spaces
+        # should still cut on a soft boundary, not mid-word. We use an input
+        # where position 76 is NOT a soft boundary — the test only passes
+        # if the renderer actively searches backward for a soft char
+        # rather than blindly cutting at the budget limit.
+        long_choice = "a" * 30 + "-" + "b" * 30 + "-" + "c" * 30 + "-" + "d" * 30
+        # 30a-30b-30c-30d = 30 + 1 + 30 + 1 + 30 + 1 + 30 = 123 chars
+        # Position 76 is 'b' (a mid-word alpha). The renderer must look back
+        # for a '-' to cut on.
+        view = ClarifyChoiceView(
+            choices=[long_choice],
+            clarify_id="cidSB",
+            allowed_user_ids=set(),
+        )
+        first_label = view.children[0].label
+        assert first_label.endswith("\u2026")
+        assert len(first_label) <= 80
+        body = first_label[len("1. "):].rstrip("\u2026")
+        last_char = body[-1]
+        assert last_char in {"-", ",", ".", ")", " "}, (
+            f"Label cuts mid-word at {last_char!r}: {first_label!r}"
+        )
+

 # ===========================================================================
 # Choice callback → resolve_gateway_clarify
@ -404,3 +447,134 @@ class TestDiscordSendClarify:
        # Only 1 real choice + 1 Other = 2 children
        assert len(view.children) == 2
        assert "real-choice" in view.children[0].label
+
+    @pytest.mark.asyncio
+    async def test_unwraps_dict_choices_to_description(self):
+        # LLMs sometimes emit [{"description": "..."}] instead of bare strings
+        # — the renderer must unwrap common dict shapes, not str() the whole
+        # dict into a Python repr on the button label.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 555
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        malformed = [
+            {"description": "Tight, well-illustrated"},
+            {"label": "Use label key"},
+            {"text": "Use text key"},
+            "normal-string",  # strings still pass through
+        ]
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=malformed,
+            clarify_id="cidU",
+            session_key="sk-U",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        labels = [b.label for b in view.children[:-1]]  # exclude Other
+        # No raw Python repr should leak onto any label.
+        for label in labels:
+            assert "{'" not in label
+            assert "':" not in label
+        # Each dict unwrapped to its inner string.
+        assert any("Tight, well-illustrated" in lbl for lbl in labels)
+        assert any("Use label key" in lbl for lbl in labels)
+        assert any("Use text key" in lbl for lbl in labels)
+        assert any("normal-string" in lbl for lbl in labels)
+
+    @pytest.mark.asyncio
+    async def test_unwrap_prefers_description_over_name_in_multi_key_dict(self):
+        # When the LLM emits both 'name' (often a short identifier in
+        # OpenAI-style tool calls) and 'description' (the user-facing text),
+        # the renderer must surface 'description'. The user should never see
+        # a 4-char model identifier on a button label.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 666
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=[{"name": "tight", "description": "Tight, well-illustrated"}],
+            clarify_id="cidN",
+            session_key="sk-N",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        choice_label = view.children[0].label
+        assert "Tight, well-illustrated" in choice_label
+        # The 'name' value (a short identifier) must NOT have leaked.
+        body = choice_label.split("1. ", 1)[1].rstrip("\u2026")
+        assert "tight" not in body, f"'name' leaked onto button: {choice_label!r}"
+
+    @pytest.mark.asyncio
+    async def test_unwrap_prefers_label_over_description(self):
+        # When both 'label' and 'description' are present, 'label' wins.
+        # 'label' is the canonical short user-facing text in most LLM tool
+        # conventions; 'description' is the longer explanation.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 777
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=[{"label": "Short", "description": "Long verbose explanation"}],
+            clarify_id="cidL",
+            session_key="sk-L",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        choice_label = view.children[0].label
+        assert "Short" in choice_label
+        # The longer description must NOT have leaked.
+        assert "Long verbose" not in choice_label, (
+            f"'description' leaked over 'label': {choice_label!r}"
+        )
+
+    @pytest.mark.asyncio
+    async def test_unwrap_does_not_pick_value_or_name_alone(self):
+        # 'name' and 'value' are Discord-component-shaped fields that could
+        # accidentally appear in dicts not intended as choices (e.g., a
+        # developer-error in the gateway wiring). The renderer should not
+        # surface them as button labels — only the well-known LLM tool-call
+        # keys (label, description, text, title) should win.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 888
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=[
+                {"name": "only_name_here"},   # should be filtered out
+                {"value": "only_value_here"},  # should be filtered out
+                {"description": "real choice"},
+            ],
+            clarify_id="cidNV",
+            session_key="sk-NV",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        choice_labels = [b.label for b in view.children[:-1]]  # exclude Other
+        # Only the well-formed dict survives.
+        assert len(choice_labels) == 1, (
+            f"Expected 1 choice, got {len(choice_labels)}: {choice_labels!r}"
+        )
+        assert "real choice" in choice_labels[0]
+        for label in choice_labels:
+            assert "only_name_here" not in label, f"name leaked: {label!r}"
+            assert "only_value_here" not in label, f"value leaked: {label!r}"
--- a/tests/gateway/test_discord_free_response.py
+++ b/tests/gateway/test_discord_free_response.py
@ -666,6 +666,70 @@ async def test_fetch_channel_context_stops_at_self_message_and_reverses_to_chron
    )


+@pytest.mark.asyncio
+async def test_fetch_channel_context_skips_self_improvement_boundary_message(adapter, monkeypatch):
+    """Delayed harness status bumps must not hide messages after the real reply."""
+    monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all")
+    adapter.config.extra["history_backfill_limit"] = 10
+
+    codex = SimpleNamespace(id=55, display_name="Codex", name="Codex", bot=True)
+    human = SimpleNamespace(id=56, display_name="Alice", name="Alice", bot=False)
+
+    channel = FakeHistoryChannel(
+        [
+            make_history_message(
+                author=adapter._client.user,
+                content="arbitrary lifecycle text from a metadata-marked send",
+                msg_id=9,
+            ),
+            make_history_message(
+                author=adapter._client.user,
+                content="[Background process bg-123 finished with exit code 0~ Here's the final output:\nok]",
+                msg_id=8,
+            ),
+            make_history_message(
+                author=codex,
+                content="♻ Gateway restarted successfully. Your session continues.",
+                msg_id=7,
+            ),
+            make_history_message(
+                author=codex,
+                content="💾 Self-improvement review: Memory updated",
+                msg_id=6,
+            ),
+            make_history_message(author=human, content="question after reply", msg_id=5),
+            make_history_message(
+                author=adapter._client.user,
+                content="💾 Self-improvement review: Skill 'hermes-gateway-display-config' patched",
+                msg_id=4,
+            ),
+            make_history_message(author=codex, content="Codex final answer", msg_id=3),
+            make_history_message(author=human, content="prompt before reply", msg_id=2),
+            make_history_message(author=adapter._client.user, content="our prior response", msg_id=1),
+        ],
+        channel_id=123,
+    )
+    adapter._nonconversational_messages.mark_many(["9"])
+
+    result = await adapter._fetch_channel_context(channel, before=make_message(channel=channel, content="trigger"))
+
+    assert result == (
+        "[Recent channel messages]\n"
+        "[Alice] prompt before reply\n"
+        "[Codex [bot]] Codex final answer\n"
+        "[Alice] question after reply"
+    )
+
+
+def test_nonconversational_fallback_requires_self_improvement_emoji():
+    assert discord_platform._looks_like_nonconversational_history_message(
+        "💾 Self-improvement review: Memory updated"
+    )
+    assert not discord_platform._looks_like_nonconversational_history_message(
+        "Self-improvement review: this is a normal assistant heading"
+    )
+
+
@pytest.mark.asyncio
 async def test_fetch_channel_context_skips_other_bots_when_allow_bots_none(adapter, monkeypatch):
    monkeypatch.setenv("DISCORD_ALLOW_BOTS", "none")
@ -801,6 +865,33 @@ async def test_fetch_channel_context_ignores_stale_cache(adapter, monkeypatch):
    assert recorded_after["value"] is None


+@pytest.mark.asyncio
+async def test_discord_send_does_not_cache_nonconversational_status_as_history_boundary(adapter):
+    """Automated status notifications should not move the backfill boundary."""
+
+    class SendingChannel(FakeTextChannel):
+        async def send(self, content, reference=None):
+            return SimpleNamespace(id=222)
+
+    channel = SendingChannel(channel_id=777)
+    adapter._client = SimpleNamespace(
+        user=adapter._client.user,
+        get_channel=lambda channel_id: channel if channel_id == 777 else None,
+        fetch_channel=AsyncMock(return_value=channel),
+    )
+    adapter._last_self_message_id["777"] = "111"
+
+    result = await adapter.send(
+        "777",
+        "arbitrary lifecycle text from gateway",
+        metadata={"non_conversational": True},
+    )
+
+    assert result.success is True
+    assert adapter._last_self_message_id["777"] == "111"
+    assert "222" in adapter._nonconversational_messages
+
+
@pytest.mark.asyncio
 async def test_discord_shared_channel_backfill_prepends_context(adapter, monkeypatch):
    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
@ -925,5 +1016,3 @@ async def test_discord_auto_thread_skips_backfill(adapter, monkeypatch):

    adapter._auto_create_thread.assert_awaited_once()
    adapter._fetch_channel_context.assert_not_awaited()
-
-
--- a/tests/gateway/test_gateway_command_line_matcher.py
+++ b/tests/gateway/test_gateway_command_line_matcher.py
@ -0,0 +1,60 @@
+"""Tests for the strict gateway command-line matcher.
+
+Regression guard for the Windows ``hermes gateway restart`` silent-outage bug:
+the previous loose substring match (``"... gateway" in cmdline``) false-matched
+``gateway status``/``dashboard`` siblings and unrelated processes such as
+``python -m tui_gateway``, which let ``restart()`` race a still-draining old
+process and ``status``/``start`` report false positives.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from gateway.status import looks_like_gateway_command_line as matches
+
+
+ACCEPT = [
+    "pythonw.exe -m hermes_cli.main gateway run",
+    r"C:\Users\me\hermes\venv\Scripts\pythonw.exe -m hermes_cli.main gateway run",
+    "python -m hermes_cli.main --profile work gateway run",
+    "python -m hermes_cli.main gateway run --replace",
+    "python -m hermes_cli/main.py gateway run",
+    "python gateway/run.py",
+    "hermes-gateway.exe",
+    "hermes gateway",          # bare `hermes gateway` defaults to run
+    "hermes gateway run",
+    # profile selector AFTER the `gateway` token (argv is profile-position
+    # agnostic — _apply_profile_override strips --profile/-p anywhere)
+    "hermes gateway --profile work run",
+    "python -m hermes_cli.main gateway -p work run",
+    "hermes gateway --profile=work run",
+    # a profile literally NAMED "gateway"
+    "hermes -p gateway gateway run",
+    "python -m hermes_cli.main --profile gateway gateway run",
+    # quoted Windows paths with spaces (shlex-aware tokenization)
+    r'"C:\Program Files\Hermes\hermes-gateway.exe"',
+    r'"C:\Program Files\Hermes\gateway\run.py" run',
+    r'"C:\Program Files\Py\pythonw.exe" -m hermes_cli.main gateway run',
+]
+
+REJECT = [
+    "python -m tui_gateway",                              # unrelated module
+    "python -m hermes_cli.main gateway status",           # other subcommand
+    "python -m hermes_cli.main gateway restart",
+    "python -m hermes_cli.main gateway stop",
+    "python -m hermes_cli.main --profile x dashboard",    # non-gateway subcommand
+    "some random python -m mygateway thing",
+    "",
+    None,
+]
+
+
+@pytest.mark.parametrize("cmd", ACCEPT)
+def test_accepts_real_gateway_run(cmd):
+    assert matches(cmd) is True
+
+
+@pytest.mark.parametrize("cmd", REJECT)
+def test_rejects_non_gateway_run(cmd):
+    assert matches(cmd) is False
--- a/tests/gateway/test_kanban_watchers_mixin.py
+++ b/tests/gateway/test_kanban_watchers_mixin.py
@ -43,3 +43,27 @@ def test_watcher_loops_are_coroutines():
    # The two long-running watchers are async loops.
    assert inspect.iscoroutinefunction(GatewayKanbanWatchersMixin._kanban_notifier_watcher)
    assert inspect.iscoroutinefunction(GatewayKanbanWatchersMixin._kanban_dispatcher_watcher)
+
+
+def test_singleton_dispatcher_lock_is_exclusive(tmp_path):
+    """Only one holder of the dispatcher lock at a time — the backstop that
+    stops concurrent dispatchers double reclaiming and corrupting shared
+    kanban SQLite index pages under wal_autocheckpoint=0."""
+    import os
+
+    from gateway.kanban_watchers import _acquire_singleton_lock, _release_singleton_lock
+
+    lock = tmp_path / "kanban" / ".dispatcher.lock"
+
+    h1, st1 = _acquire_singleton_lock(lock)
+    assert st1 == "held" and h1 is not None
+
+    # A second acquire while the first is held must be refused, not granted.
+    h2, st2 = _acquire_singleton_lock(lock)
+    assert st2 == "contended" and h2 is None
+
+    # Releasing the first lets a fresh acquire succeed (lock is reusable).
+    _release_singleton_lock(h1)
+    h3, st3 = _acquire_singleton_lock(lock)
+    assert st3 == "held" and h3 is not None
+    _release_singleton_lock(h3)
--- a/tests/gateway/test_model_command_flat_string_config.py
+++ b/tests/gateway/test_model_command_flat_string_config.py
@ -156,3 +156,46 @@ async def test_model_global_persists_when_config_has_proper_dict_model(tmp_path,
    written = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
    assert written["model"]["default"] == "gpt-5.5"
    assert written["model"]["provider"] == "openrouter"
+
+
+@pytest.mark.asyncio
+async def test_model_no_flag_persists_by_default(tmp_path, monkeypatch):
+    """A plain ``/model X`` (no --global) now persists to config.yaml.
+
+    This is the user-facing fix: switching models in one session survives
+    into the next without re-typing the switch every time.
+    """
+    cfg_path = _setup_isolated_home(
+        tmp_path,
+        monkeypatch,
+        {"default": "old-model", "provider": "openai-codex"},
+    )
+
+    result = await _make_runner()._handle_model_command(
+        _make_event("/model gpt-5.5")
+    )
+
+    assert result is not None
+    assert "gpt-5.5" in result
+    written = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
+    assert written["model"]["default"] == "gpt-5.5"
+
+
+@pytest.mark.asyncio
+async def test_model_session_flag_does_not_persist(tmp_path, monkeypatch):
+    """``/model X --session`` opts out of persistence even under the new default."""
+    cfg_path = _setup_isolated_home(
+        tmp_path,
+        monkeypatch,
+        {"default": "old-model", "provider": "openai-codex"},
+    )
+
+    result = await _make_runner()._handle_model_command(
+        _make_event("/model gpt-5.5 --session")
+    )
+
+    assert result is not None
+    assert "gpt-5.5" in result
+    written = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
+    # Config untouched — the session override is in-memory only.
+    assert written["model"]["default"] == "old-model"
--- a/tests/gateway/test_multiplex_adapter_registry.py
+++ b/tests/gateway/test_multiplex_adapter_registry.py
@ -0,0 +1,136 @@
+"""Phase 3: secondary-profile adapter registry + same-token conflict detection."""
+import pytest
+
+from gateway.run import GatewayRunner
+
+
+class _FakeAdapter:
+    def __init__(self, token=None):
+        self.token = token
+
+
+class TestCredentialFingerprint:
+    def test_none_without_token(self):
+        assert GatewayRunner._adapter_credential_fingerprint(_FakeAdapter()) is None
+
+    def test_stable_and_log_safe(self):
+        a = _FakeAdapter(token="secret-bot-token")
+        fp1 = GatewayRunner._adapter_credential_fingerprint(a)
+        fp2 = GatewayRunner._adapter_credential_fingerprint(_FakeAdapter(token="secret-bot-token"))
+        assert fp1 == fp2  # stable
+        assert "secret-bot-token" not in (fp1 or "")  # never the raw token
+        assert len(fp1) == 16
+
+    def test_distinct_tokens_distinct_fp(self):
+        a = GatewayRunner._adapter_credential_fingerprint(_FakeAdapter(token="tok-A"))
+        b = GatewayRunner._adapter_credential_fingerprint(_FakeAdapter(token="tok-B"))
+        assert a != b
+
+    def test_reads_alt_attrs(self):
+        class _AltAdapter:
+            def __init__(self):
+                self.bot_token = "alt-token"
+        assert GatewayRunner._adapter_credential_fingerprint(_AltAdapter()) is not None
+
+
+class TestProfileMessageHandler:
+    @pytest.mark.asyncio
+    async def test_stamps_profile_on_unstamped_source(self):
+        runner = GatewayRunner.__new__(GatewayRunner)
+        seen = {}
+
+        async def _fake_handle(event):
+            seen["profile"] = event.source.profile
+            return "ok"
+
+        runner._handle_message = _fake_handle
+        handler = runner._make_profile_message_handler("coder")
+
+        class _Src:
+            profile = None
+
+        class _Evt:
+            source = _Src()
+
+        result = await handler(_Evt())
+        assert result == "ok"
+        assert seen["profile"] == "coder"
+
+    @pytest.mark.asyncio
+    async def test_does_not_override_existing_profile(self):
+        runner = GatewayRunner.__new__(GatewayRunner)
+        seen = {}
+
+        async def _fake_handle(event):
+            seen["profile"] = event.source.profile
+            return "ok"
+
+        runner._handle_message = _fake_handle
+        handler = runner._make_profile_message_handler("coder")
+
+        class _Src:
+            profile = "writer"  # already stamped (e.g. by URL prefix)
+
+        class _Evt:
+            source = _Src()
+
+        await handler(_Evt())
+        assert seen["profile"] == "writer"
+
+
+class TestPortBindingHardError:
+    """A secondary profile enabling a port-binding platform aborts startup."""
+
+    @pytest.mark.asyncio
+    async def test_secondary_webhook_raises(self, monkeypatch):
+        from gateway.run import MultiplexConfigError
+        from gateway.config import GatewayConfig, Platform, PlatformConfig
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner.config = GatewayConfig(multiplex_profiles=True)
+        runner._profile_adapters = {}
+
+        # reviewer profile config enables webhook (a port-binding platform)
+        reviewer_cfg = GatewayConfig(multiplex_profiles=True)
+        reviewer_cfg.platforms = {
+            Platform.WEBHOOK: PlatformConfig(enabled=True, extra={"port": 8644}),
+        }
+        monkeypatch.setattr(
+            "gateway.config.load_gateway_config", lambda: reviewer_cfg
+        )
+
+        with pytest.raises(MultiplexConfigError) as ei:
+            await runner._start_one_profile_adapters("reviewer", "/tmp/x", {})
+        assert "webhook" in str(ei.value)
+        assert "reviewer" in str(ei.value)
+
+    @pytest.mark.asyncio
+    async def test_secondary_non_binding_platform_ok(self, monkeypatch):
+        """A non-port-binding platform (e.g. telegram) is NOT rejected."""
+        from gateway.config import GatewayConfig, Platform, PlatformConfig
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner.config = GatewayConfig(multiplex_profiles=True)
+        runner._profile_adapters = {}
+
+        reviewer_cfg = GatewayConfig(multiplex_profiles=True)
+        reviewer_cfg.platforms = {
+            Platform.TELEGRAM: PlatformConfig(enabled=True, token="t"),
+        }
+        monkeypatch.setattr(
+            "gateway.config.load_gateway_config", lambda: reviewer_cfg
+        )
+        # _create_adapter returns None here (no real telegram token wiring), so
+        # the loop simply connects nothing — the key assertion is NO raise.
+        monkeypatch.setattr(runner, "_create_adapter", lambda p, c: None)
+
+        connected = await runner._start_one_profile_adapters("reviewer", "/tmp/x", {})
+        assert connected == 0  # nothing connected, but no MultiplexConfigError
+
+    def test_port_binding_set_covers_known_listeners(self):
+        from gateway.run import _PORT_BINDING_PLATFORM_VALUES
+        # Every adapter that binds a TCP port must be in the guard set.
+        for p in ("webhook", "api_server", "msgraph_webhook", "feishu",
+                  "wecom_callback", "bluebubbles", "sms"):
+            assert p in _PORT_BINDING_PLATFORM_VALUES
+
--- a/tests/gateway/test_multiplex_credential_isolation.py
+++ b/tests/gateway/test_multiplex_credential_isolation.py
@ -0,0 +1,88 @@
+"""End-to-end credential isolation proof for multiplex mode (Workstream A).
+
+These exercise the REAL resolution path (runtime_provider, secret scope, MCP
+interpolation) rather than mocking it, proving the property that matters: two
+profiles with different keys never see each other's, and an unscoped read in
+multiplex mode fails closed instead of leaking.
+"""
+import pytest
+
+from agent import secret_scope as ss
+
+
+@pytest.fixture(autouse=True)
+def _reset(monkeypatch):
+    ss.set_multiplex_active(False)
+    yield
+    ss.set_multiplex_active(False)
+
+
+class TestRuntimeProviderUsesScope:
+    """hermes_cli.runtime_provider._getenv resolves through the secret scope."""
+
+    def test_getenv_reads_scope_under_multiplex(self, monkeypatch):
+        from hermes_cli.runtime_provider import _getenv
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-global-leak")
+        ss.set_multiplex_active(True)
+        tok = ss.set_secret_scope({"ANTHROPIC_API_KEY": "sk-profileA"})
+        try:
+            assert _getenv("ANTHROPIC_API_KEY") == "sk-profileA"
+        finally:
+            ss.reset_secret_scope(tok)
+
+    def test_getenv_two_profiles_isolated(self, monkeypatch):
+        from hermes_cli.runtime_provider import _getenv
+        ss.set_multiplex_active(True)
+
+        tok_a = ss.set_secret_scope({"OPENAI_API_KEY": "sk-A"})
+        try:
+            assert _getenv("OPENAI_API_KEY") == "sk-A"
+        finally:
+            ss.reset_secret_scope(tok_a)
+
+        tok_b = ss.set_secret_scope({"OPENAI_API_KEY": "sk-B"})
+        try:
+            assert _getenv("OPENAI_API_KEY") == "sk-B"
+        finally:
+            ss.reset_secret_scope(tok_b)
+
+    def test_getenv_fails_closed_unscoped(self, monkeypatch):
+        from hermes_cli.runtime_provider import _getenv
+        monkeypatch.setenv("OPENROUTER_API_KEY", "sk-leak")
+        ss.set_multiplex_active(True)
+        with pytest.raises(ss.UnscopedSecretError):
+            _getenv("OPENROUTER_API_KEY")
+
+    def test_getenv_global_var_still_reads_environ(self, monkeypatch):
+        from hermes_cli.runtime_provider import _getenv
+        monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42")
+        ss.set_multiplex_active(True)
+        # global var: no scope needed, no raise
+        assert _getenv("HERMES_MAX_ITERATIONS") == "42"
+
+
+class TestMcpInterpolationUsesScope:
+    """MCP config ${VAR} interpolation resolves through the secret scope."""
+
+    def test_interpolation_reads_scope(self, monkeypatch):
+        from tools.mcp_tool import _interpolate_env_vars
+        monkeypatch.setenv("MY_MCP_TOKEN", "global-token")
+        ss.set_multiplex_active(True)
+        tok = ss.set_secret_scope({"MY_MCP_TOKEN": "profile-token"})
+        try:
+            cfg = {"env": {"TOKEN": "${MY_MCP_TOKEN}"}}
+            assert _interpolate_env_vars(cfg) == {"env": {"TOKEN": "profile-token"}}
+        finally:
+            ss.reset_secret_scope(tok)
+
+    def test_interpolation_unset_keeps_placeholder(self, monkeypatch):
+        from tools.mcp_tool import _interpolate_env_vars
+        monkeypatch.delenv("UNSET_MCP_VAR", raising=False)
+        # multiplex off: unset var keeps literal placeholder (legacy behavior)
+        assert _interpolate_env_vars("${UNSET_MCP_VAR}") == "${UNSET_MCP_VAR}"
+
+    def test_interpolation_off_reads_environ(self, monkeypatch):
+        from tools.mcp_tool import _interpolate_env_vars
+        monkeypatch.setenv("MY_MCP_TOKEN", "env-token")
+        # multiplex off: legacy os.environ resolution
+        assert _interpolate_env_vars("${MY_MCP_TOKEN}") == "env-token"
--- a/tests/gateway/test_multiplex_http_routing.py
+++ b/tests/gateway/test_multiplex_http_routing.py
@ -0,0 +1,73 @@
+"""Phase 1: HTTP-inbound /p/<profile>/ routing for the webhook adapter."""
+import pytest
+
+from gateway.config import GatewayConfig, Platform
+from gateway.session import SessionSource, build_session_key
+
+
+class TestSessionSourceProfileField:
+    def test_profile_roundtrips(self):
+        s = SessionSource(
+            platform=Platform.WEBHOOK if hasattr(Platform, "WEBHOOK") else Platform.TELEGRAM,
+            chat_id="c1",
+            chat_type="webhook",
+            profile="coder",
+        )
+        restored = SessionSource.from_dict(s.to_dict())
+        assert restored.profile == "coder"
+
+    def test_profile_absent_not_serialized(self):
+        s = SessionSource(platform=Platform.TELEGRAM, chat_id="c1", chat_type="dm")
+        assert "profile" not in s.to_dict()
+
+    def test_source_profile_drives_session_key_namespace(self):
+        s = SessionSource(platform=Platform.TELEGRAM, chat_id="99", chat_type="dm")
+        # build_session_key takes profile explicitly; the adapter passes
+        # source.profile through. Verify the namespace follows it.
+        assert build_session_key(s, profile="coder") == "agent:coder:telegram:dm:99"
+
+
+class TestWebhookProfileResolution:
+    """_resolve_request_profile validates the /p/<profile>/ prefix."""
+
+    def _adapter(self, multiplex: bool, served=("default", "coder")):
+        from gateway.platforms.webhook import WebhookAdapter, _PROFILE_REJECTED
+
+        class _FakeReq:
+            def __init__(self, profile):
+                self.match_info = {"profile": profile} if profile is not None else {}
+
+        cfg = GatewayConfig(multiplex_profiles=multiplex)
+
+        class _Runner:
+            config = cfg
+
+        # Construct minimally; we only call _resolve_request_profile.
+        adapter = WebhookAdapter.__new__(WebhookAdapter)
+        adapter.gateway_runner = _Runner()
+        return adapter, _FakeReq, _PROFILE_REJECTED, served
+
+    def test_no_prefix_returns_none(self):
+        adapter, Req, _REJ, _ = self._adapter(multiplex=True)
+        assert adapter._resolve_request_profile(Req(None)) is None
+
+    def test_prefix_ignored_when_multiplex_off(self):
+        adapter, Req, _REJ, _ = self._adapter(multiplex=False)
+        # Even a bogus profile is ignored (not 404'd) when multiplexing is off.
+        assert adapter._resolve_request_profile(Req("anything")) is None
+
+    def test_known_profile_accepted(self, monkeypatch):
+        adapter, Req, _REJ, served = self._adapter(multiplex=True)
+        monkeypatch.setattr(
+            "hermes_cli.profiles.profiles_to_serve",
+            lambda multiplex: [(n, None) for n in served],
+        )
+        assert adapter._resolve_request_profile(Req("coder")) == "coder"
+
+    def test_unknown_profile_rejected(self, monkeypatch):
+        adapter, Req, REJ, served = self._adapter(multiplex=True)
+        monkeypatch.setattr(
+            "hermes_cli.profiles.profiles_to_serve",
+            lambda multiplex: [(n, None) for n in served],
+        )
+        assert adapter._resolve_request_profile(Req("ghost")) is REJ
--- a/tests/gateway/test_multiplex_lifecycle.py
+++ b/tests/gateway/test_multiplex_lifecycle.py
@ -0,0 +1,55 @@
+"""Phase 4: lifecycle guard + per-profile observability."""
+import pytest
+
+
+class TestServedProfilesStatus:
+    def test_write_and_read_served_profiles(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import importlib
+        import gateway.status as status
+        importlib.reload(status)
+        try:
+            status.write_runtime_status(
+                gateway_state="running", served_profiles=["default", "coder"]
+            )
+            rec = status.read_runtime_status()
+            assert rec.get("served_profiles") == ["default", "coder"]
+        finally:
+            importlib.reload(status)
+
+    def test_served_profiles_absent_by_default(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import importlib
+        import gateway.status as status
+        importlib.reload(status)
+        try:
+            status.write_runtime_status(gateway_state="running")
+            rec = status.read_runtime_status()
+            assert "served_profiles" not in rec
+        finally:
+            importlib.reload(status)
+
+
+class TestNamedProfileMultiplexerGuard:
+    """_guard_named_profile_under_multiplexer is inert unless all conditions hold."""
+
+    def test_inert_for_default_profile(self, monkeypatch):
+        from hermes_cli import gateway as gw
+        monkeypatch.setattr(gw, "_profile_suffix", lambda: "")
+        # Should return without raising (default profile => guard N/A).
+        gw._guard_named_profile_under_multiplexer(force=False)
+
+    def test_force_bypasses(self, monkeypatch):
+        from hermes_cli import gateway as gw
+        # Even if it looks like a named profile, force returns immediately.
+        monkeypatch.setattr(gw, "_profile_suffix", lambda: "coder")
+        gw._guard_named_profile_under_multiplexer(force=True)
+
+    def test_inert_when_no_default_gateway_running(self, monkeypatch, tmp_path):
+        from hermes_cli import gateway as gw
+        monkeypatch.setattr(gw, "_profile_suffix", lambda: "coder")
+        monkeypatch.setattr(
+            "hermes_constants.get_default_hermes_root", lambda: tmp_path
+        )
+        # No gateway.pid in tmp_path => no running default gateway => no raise.
+        gw._guard_named_profile_under_multiplexer(force=False)
--- a/tests/gateway/test_multiplex_phase0.py
+++ b/tests/gateway/test_multiplex_phase0.py
@ -0,0 +1,165 @@
+"""Phase 0 foundations for multi-profile gateway multiplexing.
+
+Covers the three Phase 0 deliverables:
+  1. ``gateway.multiplex_profiles`` config flag (default False, round-trips).
+  2. ``hermes_cli.profiles.profiles_to_serve`` enumeration.
+  3. Profile-stamped ``build_session_key`` that is BYTE-IDENTICAL when the
+     flag is off (the orphan-every-session guard) and namespace-segmented when
+     on, without disturbing the positional key layout downstream parsers rely
+     on.
+"""
+import pytest
+from unittest.mock import patch
+
+from gateway.config import GatewayConfig, Platform
+from gateway.session import SessionSource, SessionStore, build_session_key
+
+
+def _src(**kw) -> SessionSource:
+    kw.setdefault("platform", Platform.TELEGRAM)
+    kw.setdefault("chat_id", "99")
+    kw.setdefault("chat_type", "dm")
+    return SessionSource(**kw)
+
+
+class TestSessionKeyByteIdenticalWhenOff:
+    """The non-negotiable guard: with no profile (or 'default'), every key is
+    byte-for-byte what it was before Phase 0. A diff here orphans every
+    existing session on upgrade."""
+
+    @pytest.mark.parametrize("profile", [None, "default"])
+    def test_dm_with_chat_id(self, profile):
+        s = _src(chat_id="99", chat_type="dm")
+        assert build_session_key(s, profile=profile) == "agent:main:telegram:dm:99"
+
+    @pytest.mark.parametrize("profile", [None, "default"])
+    def test_dm_with_thread(self, profile):
+        s = _src(chat_id="99", chat_type="dm", thread_id="t1")
+        assert build_session_key(s, profile=profile) == "agent:main:telegram:dm:99:t1"
+
+    @pytest.mark.parametrize("profile", [None, "default"])
+    def test_dm_without_chat_id_falls_back_to_user(self, profile):
+        s = _src(chat_id="", chat_type="dm", user_id="jordan")
+        assert build_session_key(s, profile=profile) == "agent:main:telegram:dm:jordan"
+
+    @pytest.mark.parametrize("profile", [None, "default"])
+    def test_group_per_user(self, profile):
+        s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
+        assert (
+            build_session_key(s, profile=profile)
+            == "agent:main:discord:group:g1:alice"
+        )
+
+    @pytest.mark.parametrize("profile", [None, "default"])
+    def test_group_shared_when_disabled(self, profile):
+        s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
+        assert (
+            build_session_key(s, group_sessions_per_user=False, profile=profile)
+            == "agent:main:discord:group:g1"
+        )
+
+
+class TestSessionKeyNamespacedWhenOn:
+    """A named profile occupies the namespace slot, isolating its sessions."""
+
+    def test_named_profile_dm(self):
+        s = _src(chat_id="99", chat_type="dm")
+        assert build_session_key(s, profile="coder") == "agent:coder:telegram:dm:99"
+
+    def test_named_profile_group_per_user(self):
+        s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
+        assert (
+            build_session_key(s, profile="coder")
+            == "agent:coder:discord:group:g1:alice"
+        )
+
+    def test_two_profiles_same_chat_do_not_collide(self):
+        s = _src(chat_id="99", chat_type="dm")
+        a = build_session_key(s, profile="default")
+        b = build_session_key(s, profile="coder")
+        c = build_session_key(s, profile="writer")
+        assert a != b != c and a != c
+
+    def test_positional_layout_preserved_for_parsers(self):
+        """Downstream parsers split on ':' and read parts[2]=platform,
+        parts[3]=chat_type, parts[4]=chat_id (see qqbot adapter
+        _parse_gateway_session_key). The profile must occupy parts[1] only."""
+        s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
+        parts = build_session_key(s, profile="coder").split(":")
+        assert parts[0] == "agent"
+        assert parts[1] == "coder"  # namespace slot (was always 'main')
+        assert parts[2] == "discord"  # platform — unchanged offset
+        assert parts[3] == "group"  # chat_type — unchanged offset
+        assert parts[4] == "g1"  # chat_id — unchanged offset
+
+    def test_default_namespace_layout_matches_named(self):
+        """Default and named keys differ ONLY in parts[1]."""
+        s = _src(platform=Platform.SLACK, chat_id="c1", chat_type="channel", user_id="u1")
+        d = build_session_key(s, profile="default").split(":")
+        n = build_session_key(s, profile="coder").split(":")
+        assert d[0] == n[0] == "agent"
+        assert d[1] == "main" and n[1] == "coder"
+        assert d[2:] == n[2:]  # everything after the namespace is identical
+
+
+class TestMultiplexConfigFlag:
+    """gateway.multiplex_profiles defaults off and round-trips."""
+
+    def test_default_is_false(self):
+        assert GatewayConfig().multiplex_profiles is False
+
+    def test_to_dict_includes_flag(self):
+        assert GatewayConfig().to_dict()["multiplex_profiles"] is False
+
+    def test_from_dict_top_level(self):
+        cfg = GatewayConfig.from_dict({"multiplex_profiles": True})
+        assert cfg.multiplex_profiles is True
+
+    def test_from_dict_nested_gateway(self):
+        cfg = GatewayConfig.from_dict({"gateway": {"multiplex_profiles": True}})
+        assert cfg.multiplex_profiles is True
+
+    def test_from_dict_coerces_truthy_string(self):
+        cfg = GatewayConfig.from_dict({"multiplex_profiles": "true"})
+        assert cfg.multiplex_profiles is True
+
+    def test_roundtrip(self):
+        cfg = GatewayConfig.from_dict(GatewayConfig(multiplex_profiles=True).to_dict())
+        assert cfg.multiplex_profiles is True
+
+
+class TestSessionStoreProfileResolution:
+    """SessionStore._generate_session_key honors the flag: legacy namespace
+    when off, active-profile namespace when on."""
+
+    def _store(self, tmp_path, **cfg_kw):
+        config = GatewayConfig(**cfg_kw)
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            s = SessionStore(sessions_dir=tmp_path, config=config)
+        s._db = None
+        s._loaded = True
+        return s
+
+    def test_flag_off_uses_legacy_namespace(self, tmp_path):
+        store = self._store(tmp_path)  # multiplex_profiles defaults False
+        s = _src(chat_id="99", chat_type="dm")
+        assert store._generate_session_key(s) == "agent:main:telegram:dm:99"
+        assert store._generate_session_key(s) == build_session_key(s)
+
+    def test_flag_off_resolve_profile_is_none(self, tmp_path):
+        store = self._store(tmp_path)
+        assert store._resolve_profile_for_key() is None
+
+    def test_flag_on_uses_active_profile_namespace(self, tmp_path):
+        store = self._store(tmp_path, multiplex_profiles=True)
+        s = _src(chat_id="99", chat_type="dm")
+        with patch("hermes_cli.profiles.get_active_profile_name", return_value="coder"):
+            assert store._generate_session_key(s) == "agent:coder:telegram:dm:99"
+
+    def test_flag_on_default_profile_stays_legacy(self, tmp_path):
+        store = self._store(tmp_path, multiplex_profiles=True)
+        s = _src(chat_id="99", chat_type="dm")
+        with patch("hermes_cli.profiles.get_active_profile_name", return_value="default"):
+            assert store._generate_session_key(s) == "agent:main:telegram:dm:99"
+
+
--- a/tests/gateway/test_runtime_env_reload_config_authority.py
+++ b/tests/gateway/test_runtime_env_reload_config_authority.py
@ -51,3 +51,18 @@ def test_reload_runtime_env_keeps_env_max_iterations_when_config_omits_key(
    gateway_run._reload_runtime_env_preserving_config_authority()

    assert os.environ["HERMES_MAX_ITERATIONS"] == "123"
+
+
+def test_current_max_iterations_reloads_before_reading(monkeypatch) -> None:
+    monkeypatch.setenv("HERMES_MAX_ITERATIONS", "90")
+
+    def _fake_reload() -> None:
+        os.environ["HERMES_MAX_ITERATIONS"] = "200"
+
+    monkeypatch.setattr(
+        gateway_run,
+        "_reload_runtime_env_preserving_config_authority",
+        _fake_reload,
+    )
+
+    assert gateway_run._current_max_iterations() == 200
--- a/tests/hermes_cli/test_backup.py
+++ b/tests/hermes_cli/test_backup.py
@ -153,6 +153,39 @@ class TestShouldExclude:
        assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/SKILL.md"))
        assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/sub/item.txt"))

+    @pytest.mark.parametrize(
+        "rel",
+        [
+            "plugins/my-plugin/.venv/lib/python3.12/site-packages/x/__init__.py",
+            "plugins/my-plugin/venv/bin/python",
+            "mcp/server/site-packages/pkg/mod.py",
+            ".cache/uv/wheels/abc.whl",
+            "plugins/p/.cache/pip/http/deadbeef",
+            ".tox/py312/log.txt",
+            ".nox/tests/bin/pytest",
+            "plugins/p/.pytest_cache/v/cache/lastfailed",
+            ".mypy_cache/3.12/agent.meta.json",
+            ".ruff_cache/0.4.0/abc",
+        ],
+    )
+    def test_excludes_regeneratable_dependency_and_cache_dirs(self, rel):
+        """Python dep trees and tool caches under HERMES_HOME must be skipped —
+        these are what balloon a backup to hundreds of thousands of files."""
+        from hermes_cli.backup import _should_exclude
+        assert _should_exclude(Path(rel))
+
+    def test_does_not_exclude_curator_archive(self):
+        """skills/.archive/ holds restorable archived skills and MUST survive
+        a backup — it is intentionally NOT in the exclusion set."""
+        from hermes_cli.backup import _should_exclude
+        assert not _should_exclude(Path("skills/.archive/old-skill/SKILL.md"))
+
+    def test_does_not_exclude_legit_files_resembling_cache_names(self):
+        """Only directory-component matches are excluded; a normal file is kept."""
+        from hermes_cli.backup import _should_exclude
+        assert not _should_exclude(Path("skills/my-skill/venv-notes.md"))
+        assert not _should_exclude(Path("memories/cache.json"))
+
 # ---------------------------------------------------------------------------
 # Backup tests
 # ---------------------------------------------------------------------------
@ -272,6 +305,37 @@ class TestBackup:
            agent_files = [n for n in names if "hermes-agent" in n]
            assert agent_files == [], f"hermes-agent files leaked into backup: {agent_files}"

+    def test_excludes_dependency_and_cache_trees(self, tmp_path, monkeypatch):
+        """A plugin venv / site-packages / pip cache under HERMES_HOME must be
+        pruned by the walk, while real data (skills, config) is preserved.
+        This is the regression guard for the ballooning-backup bug."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        _make_hermes_tree(hermes_home)
+
+        # Simulate the heavy regeneratable trees that ballooned the backup.
+        venv_pkg = hermes_home / "plugins" / "heavy" / ".venv" / "lib" / "site-packages" / "dep"
+        venv_pkg.mkdir(parents=True)
+        (venv_pkg / "__init__.py").write_text("# dep\n")
+        pip_cache = hermes_home / ".cache" / "uv" / "wheels"
+        pip_cache.mkdir(parents=True)
+        (pip_cache / "abc.whl").write_bytes(b"\x00")
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out_zip = tmp_path / "backup.zip"
+        from hermes_cli.backup import run_backup
+        run_backup(Namespace(output=str(out_zip)))
+
+        with zipfile.ZipFile(out_zip, "r") as zf:
+            names = zf.namelist()
+        leaked = [n for n in names if ".venv" in n or "site-packages" in n or ".cache" in n]
+        assert leaked == [], f"regeneratable trees leaked into backup: {leaked}"
+        # Real data still present.
+        assert "skills/my-skill/SKILL.md" in names
+        assert "config.yaml" in names
+
    def test_includes_nested_hermes_agent_in_skills(self, tmp_path, monkeypatch):
        """Backup includes skills/.../hermes-agent/ but NOT root hermes-agent/."""
        hermes_home = tmp_path / ".hermes"
--- a/tests/hermes_cli/test_config.py
+++ b/tests/hermes_cli/test_config.py
@ -955,6 +955,17 @@ class TestInterimAssistantMessageConfig:
        assert raw["display"]["interim_assistant_messages"] is True


+class TestCliRefreshIntervalConfig:
+    """Test the CLI refresh_interval config default (#45592 / #48309)."""
+
+    def test_default_config_enables_cli_refresh_interval(self):
+        """cli_refresh_interval defaults to 1.0 so the idle status-bar
+        clock keeps ticking and the bottom chrome stays alive during
+        idle (#45592). Users on emulators where the periodic redraw
+        fights auto-scroll can set it to 0 (#48309)."""
+        assert DEFAULT_CONFIG["display"]["cli_refresh_interval"] == 1.0
+
+
 class TestDiscordChannelPromptsConfig:
    def test_default_config_includes_discord_channel_prompts(self):
        assert DEFAULT_CONFIG["discord"]["channel_prompts"] == {}
--- a/tests/hermes_cli/test_debug.py
+++ b/tests/hermes_cli/test_debug.py
@ -31,6 +31,9 @@ def hermes_home(tmp_path, monkeypatch):
    (logs_dir / "gateway.log").write_text(
        "2026-04-12 17:00:10 INFO gateway.run: started\n"
    )
+    (logs_dir / "gui.log").write_text(
+        "2026-04-12 17:00:12 INFO hermes_cli.web_server: dashboard request\n"
+    )
    (logs_dir / "desktop.log").write_text(
        "2026-04-12 17:00:15 INFO desktop: backend spawned\n"
    )
@ -454,6 +457,15 @@ class TestCollectDebugReport:

        assert "--- gateway.log" in report

+    def test_report_includes_gui_log(self, hermes_home):
+        from hermes_cli.debug import collect_debug_report
+
+        with patch("hermes_cli.dump.run_dump"):
+            report = collect_debug_report(log_lines=50)
+
+        assert "--- gui.log" in report
+        assert "dashboard request" in report
+
    def test_report_includes_desktop_log(self, hermes_home):
        from hermes_cli.debug import collect_debug_report

@ -538,8 +550,8 @@ class TestRunDebugShare:
        assert "FULL agent.log" in out
        assert "FULL gateway.log" in out

-    def test_share_uploads_four_pastes(self, hermes_home, capsys):
-        """Successful share uploads report + agent.log + gateway.log + desktop.log."""
+    def test_share_uploads_five_pastes(self, hermes_home, capsys):
+        """Successful share uploads report + agent.log + gateway.log + gui.log + desktop.log."""
        from hermes_cli.debug import run_debug_share

        args = MagicMock()
@ -561,15 +573,17 @@ class TestRunDebugShare:
            run_debug_share(args)

        out = capsys.readouterr().out
-        # Should have 4 uploads: report, agent.log, gateway.log, desktop.log
-        assert call_count[0] == 4
+        # Should have 5 uploads: report, agent.log, gateway.log, gui.log, desktop.log
+        assert call_count[0] == 5
        assert "paste.rs/paste1" in out  # Report
        assert "paste.rs/paste2" in out  # agent.log
        assert "paste.rs/paste3" in out  # gateway.log
-        assert "paste.rs/paste4" in out  # desktop.log
+        assert "paste.rs/paste4" in out  # gui.log
+        assert "paste.rs/paste5" in out  # desktop.log
        assert "Report" in out
        assert "agent.log" in out
        assert "gateway.log" in out
+        assert "gui.log" in out
        assert "desktop.log" in out

        # Each log paste should start with the dump header
@ -579,7 +593,10 @@ class TestRunDebugShare:
        gateway_paste = uploaded_content[2]
        assert "--- hermes dump ---" in gateway_paste
        assert "--- full gateway.log ---" in gateway_paste
-        desktop_paste = uploaded_content[3]
+        gui_paste = uploaded_content[3]
+        assert "--- hermes dump ---" in gui_paste
+        assert "--- full gui.log ---" in gui_paste
+        desktop_paste = uploaded_content[4]
        assert "--- hermes dump ---" in desktop_paste
        assert "--- full desktop.log ---" in desktop_paste

--- a/tests/hermes_cli/test_gateway_restart_loop.py
+++ b/tests/hermes_cli/test_gateway_restart_loop.py
@ -6,6 +6,7 @@ Covers:
 - _contains_gateway_lifecycle_command pattern matching
 """

+import json
 import os
 from argparse import Namespace

@ -250,3 +251,109 @@ class TestGatewaySelfTargetingGuard:
        args = Namespace(gateway_command="restart", all=False, system=False)
        with pytest.raises(_Reached):
            gw.gateway_command(args)
+
+
+# ---------------------------------------------------------------------------
+# Defense 3: terminal_tool hard-blocks gateway lifecycle commands inside gateway
+# ---------------------------------------------------------------------------
+
+class TestTerminalToolGatewayLifecycleGuard:
+    """terminal_tool must refuse gateway lifecycle commands when _HERMES_GATEWAY=1.
+
+    Issue #37453: systemctl --user restart hermes-gateway runs as a child of the
+    gateway process.  When systemd delivers SIGTERM the gateway kills its own
+    restart command mid-execution — the service may never restart.  The guard
+    must fire before execution, unconditionally (force=True cannot bypass it).
+    """
+
+    def _make_fake_env(self):
+        class _FakeEnv:
+            env = {}
+            def execute(self, command, **kwargs):  # pragma: no cover
+                raise AssertionError("execute must not be reached")
+        return _FakeEnv()
+
+    def _minimal_config(self):
+        return {"env_type": "local", "cwd": "/tmp", "timeout": 60, "lifetime_seconds": 3600}
+
+    def _patch_env(self, monkeypatch, fake_env, *, inside_gateway: bool):
+        import tools.terminal_tool as tt
+        eid = "default"
+        monkeypatch.setattr(tt, "_active_environments", {eid: fake_env})
+        monkeypatch.setattr(tt, "_last_activity", {eid: 0.0})
+        monkeypatch.setattr(tt, "_task_env_overrides", {})
+        monkeypatch.setattr(tt, "_get_env_config", self._minimal_config)
+        if inside_gateway:
+            monkeypatch.setenv("_HERMES_GATEWAY", "1")
+        else:
+            monkeypatch.delenv("_HERMES_GATEWAY", raising=False)
+
+    @pytest.mark.parametrize("cmd", [
+        "systemctl restart hermes-gateway",
+        "systemctl --user restart hermes-gateway",
+        "systemctl stop hermes-gateway.service",
+        "hermes gateway restart",
+        "launchctl kickstart gui/501/ai.hermes.gateway",
+        "pkill -f hermes.*gateway",
+    ])
+    def test_blocks_lifecycle_commands_inside_gateway(self, monkeypatch, cmd):
+        import tools.terminal_tool as tt
+        self._patch_env(monkeypatch, self._make_fake_env(), inside_gateway=True)
+
+        result = json.loads(tt.terminal_tool(command=cmd))
+
+        assert result["exit_code"] == 1
+        assert "Blocked" in result["error"]
+
+    def test_force_true_cannot_bypass_block(self, monkeypatch):
+        import tools.terminal_tool as tt
+        self._patch_env(monkeypatch, self._make_fake_env(), inside_gateway=True)
+
+        result = json.loads(tt.terminal_tool(
+            command="systemctl restart hermes-gateway", force=True
+        ))
+
+        assert result["exit_code"] == 1
+        assert "Blocked" in result["error"]
+
+    def test_safe_systemctl_commands_pass_through(self, monkeypatch):
+        """Non-hermes systemctl commands must not be blocked by this guard."""
+        import tools.terminal_tool as tt
+
+        calls = []
+
+        class _FakeEnv:
+            env = {}
+            def execute(self, command, **kwargs):
+                calls.append(command)
+                return {"output": "Active: running", "returncode": 0}
+
+        self._patch_env(monkeypatch, _FakeEnv(), inside_gateway=True)
+        monkeypatch.setattr(tt, "_check_all_guards", lambda cmd, env: {"approved": True})
+
+        result = json.loads(tt.terminal_tool(command="systemctl status nginx"))
+
+        assert result["exit_code"] == 0
+        assert calls == ["systemctl status nginx"]
+
+    def test_guard_inactive_outside_gateway(self, monkeypatch):
+        """Without _HERMES_GATEWAY=1 the lifecycle guard must not fire."""
+        import tools.terminal_tool as tt
+
+        calls = []
+
+        class _FakeEnv:
+            env = {}
+            def execute(self, command, **kwargs):
+                calls.append(command)
+                return {"output": "restarting...", "returncode": 0}
+
+        self._patch_env(monkeypatch, _FakeEnv(), inside_gateway=False)
+        monkeypatch.setattr(tt, "_check_all_guards", lambda cmd, env: {"approved": True})
+
+        result = json.loads(tt.terminal_tool(command="systemctl restart hermes-gateway"))
+
+        # Outside the gateway the lifecycle guard doesn't block — the normal
+        # approval flow handles it (here mocked as approved).
+        assert result["exit_code"] == 0
+        assert calls == ["systemctl restart hermes-gateway"]
--- a/tests/hermes_cli/test_kanban_db.py
+++ b/tests/hermes_cli/test_kanban_db.py
@ -505,6 +505,171 @@ def test_stale_claim_with_live_pid_uses_env_ttl_override(
        assert task.claim_expires > int(time.time()) + 3000


+def test_stale_claim_deferred_when_live_worker_survives_termination(
+    kanban_home, monkeypatch,
+):
+    """A TTL-expired claim whose worker survives the kill must NOT be released.
+
+    Releasing would let the dispatcher spawn a duplicate beside the still-alive
+    worker — the runaway seen when a cgroup memory.high throttle parks a worker
+    in uninterruptible (D) state, where a pending SIGKILL cannot land. The claim
+    is held (extended) and retried next tick instead.
+    """
+    import hermes_cli.kanban_db as _kb
+
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="x", assignee="a")
+        host = _kb._claimer_id().split(":", 1)[0]
+        kb.claim_task(conn, t, claimer=f"{host}:worker")
+        kb._set_worker_pid(conn, t, 12345)
+
+        old_expires = int(time.time()) - 60
+        # Heartbeat stale by > 1h so the live-pid EXTEND branch is skipped and
+        # the terminate path (the wedged-worker case) runs.
+        conn.execute(
+            "UPDATE tasks SET claim_expires = ?, last_heartbeat_at = ? "
+            "WHERE id = ?",
+            (old_expires, int(time.time()) - 7200, t),
+        )
+        monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: True)
+        monkeypatch.setattr(
+            _kb, "_terminate_reclaimed_worker",
+            lambda *a, **k: {
+                "termination_attempted": True,
+                "host_local": True,
+                "terminated": False,
+            },
+        )
+        reclaimed = kb.release_stale_claims(conn, signal_fn=lambda _p, _s: None)
+        assert reclaimed == 0
+
+        assert kb.get_task(conn, t).status == "running"
+        worker_pid = conn.execute(
+            "SELECT worker_pid FROM tasks WHERE id = ?", (t,),
+        ).fetchone()[0]
+        assert worker_pid == 12345  # worker not orphaned
+        claim_expires = conn.execute(
+            "SELECT claim_expires FROM tasks WHERE id = ?", (t,),
+        ).fetchone()[0]
+        assert claim_expires > old_expires  # claim held, not released
+
+        kinds = [
+            r["kind"] for r in conn.execute(
+                "SELECT kind FROM task_events WHERE task_id = ?", (t,),
+            ).fetchall()
+        ]
+        assert "reclaim_deferred" in kinds
+        assert "reclaimed" not in kinds
+
+
+def test_stale_claim_reclaimed_when_termination_succeeds(
+    kanban_home, monkeypatch,
+):
+    """When the worker is actually killed, the claim is released as before."""
+    import hermes_cli.kanban_db as _kb
+
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="x", assignee="a")
+        host = _kb._claimer_id().split(":", 1)[0]
+        kb.claim_task(conn, t, claimer=f"{host}:worker")
+        kb._set_worker_pid(conn, t, 12345)
+        conn.execute(
+            "UPDATE tasks SET claim_expires = ?, last_heartbeat_at = ? "
+            "WHERE id = ?",
+            (int(time.time()) - 60, int(time.time()) - 7200, t),
+        )
+        monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: False)
+        monkeypatch.setattr(
+            _kb, "_terminate_reclaimed_worker",
+            lambda *a, **k: {
+                "termination_attempted": True,
+                "host_local": True,
+                "terminated": True,
+            },
+        )
+        reclaimed = kb.release_stale_claims(conn, signal_fn=lambda _p, _s: None)
+        assert reclaimed == 1
+        assert kb.get_task(conn, t).status == "ready"
+
+
+def test_stale_claim_released_when_worker_not_host_local(
+    kanban_home, monkeypatch,
+):
+    """The defer guard only holds OUR own surviving workers.
+
+    A claim we cannot manage (different host, or no kill attempted) must still
+    be released, otherwise a foreign-host claim could strand a task forever.
+    """
+    import hermes_cli.kanban_db as _kb
+
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="x", assignee="a")
+        host = _kb._claimer_id().split(":", 1)[0]
+        kb.claim_task(conn, t, claimer=f"{host}:worker")
+        kb._set_worker_pid(conn, t, 12345)
+        conn.execute(
+            "UPDATE tasks SET claim_expires = ?, last_heartbeat_at = ? "
+            "WHERE id = ?",
+            (int(time.time()) - 60, int(time.time()) - 7200, t),
+        )
+        monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: True)
+        monkeypatch.setattr(
+            _kb, "_terminate_reclaimed_worker",
+            lambda *a, **k: {
+                "termination_attempted": False,
+                "host_local": False,
+                "terminated": False,
+            },
+        )
+        reclaimed = kb.release_stale_claims(conn, signal_fn=lambda _p, _s: None)
+        assert reclaimed == 1
+        assert kb.get_task(conn, t).status == "ready"
+
+
+def test_detect_stale_defers_when_live_worker_survives(kanban_home, monkeypatch):
+    """detect_stale_running must also hold the claim when the worker survives."""
+    import hermes_cli.kanban_db as _kb
+
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="wedged", assignee="worker")
+        kb.claim_task(conn, t)
+        kb._set_worker_pid(conn, t, os.getpid())
+
+        five_hours_ago = int(time.time()) - (5 * 3600)
+        with kb.write_txn(conn):
+            conn.execute(
+                "UPDATE tasks SET started_at = ?, last_heartbeat_at = NULL "
+                "WHERE id = ?",
+                (five_hours_ago, t),
+            )
+            conn.execute(
+                "UPDATE task_runs SET started_at = ? "
+                "WHERE id = (SELECT current_run_id FROM tasks WHERE id = ?)",
+                (five_hours_ago, t),
+            )
+
+        monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: True)
+        monkeypatch.setattr(
+            _kb, "_terminate_reclaimed_worker",
+            lambda *a, **k: {
+                "termination_attempted": True,
+                "host_local": True,
+                "terminated": False,
+            },
+        )
+        stale = kb.detect_stale_running(
+            conn, stale_timeout_seconds=14400, signal_fn=lambda p, s: None,
+        )
+        assert stale == []
+        assert kb.get_task(conn, t).status == "running"
+        kinds = [
+            r["kind"] for r in conn.execute(
+                "SELECT kind FROM task_events WHERE task_id = ?", (t,),
+            ).fetchall()
+        ]
+        assert "reclaim_deferred" in kinds
+
+
 def test_stale_claim_reclaim_event_records_diagnostic_payload(
    kanban_home, monkeypatch,
 ):
--- a/tests/hermes_cli/test_model_picker_expensive_confirm.py
+++ b/tests/hermes_cli/test_model_picker_expensive_confirm.py
@ -55,10 +55,12 @@ def test_prompt_toolkit_model_picker_defers_confirmation_off_key_handler(monkeyp
        lambda *_args: captured.setdefault("ran_inline", True)
    )

-    _bound(cli_mod.HermesCLI._handle_model_picker_selection, self_)()
+    # The key handler now resolves persistence via resolve_persist_behavior,
+    # which defaults to True (persist-by-default). Simulate that call.
+    _bound(cli_mod.HermesCLI._handle_model_picker_selection, self_)(persist_global=True)

    assert self_._model_picker_state is None
    assert captured["started"] is True
    assert captured["daemon"] is True
-    assert captured["args"] == (result, False)
+    assert captured["args"] == (result, True)
    assert "ran_inline" not in captured
--- a/tests/hermes_cli/test_model_switch_persist_default.py
+++ b/tests/hermes_cli/test_model_switch_persist_default.py
@ -0,0 +1,122 @@
+"""Tests for persist-by-default model switching.
+
+Covers:
+- ``parse_model_flags`` recognises ``--session`` (and keeps ``--global``).
+- ``resolve_persist_behavior`` applies the config-gated default and the
+  ``--session`` / ``--global`` overrides.
+- The default (no flags) persists, which is the user-facing fix: a plain
+  ``/model <name>`` survives across sessions.
+"""
+
+from unittest.mock import patch
+
+from hermes_cli.model_switch import parse_model_flags, resolve_persist_behavior
+
+
+# ---------------------------------------------------------------------------
+# parse_model_flags
+# ---------------------------------------------------------------------------
+
+
+class TestParseModelFlagsSession:
+    def test_no_flags(self):
+        assert parse_model_flags("sonnet") == ("sonnet", "", False, False, False)
+
+    def test_global_flag(self):
+        assert parse_model_flags("sonnet --global") == ("sonnet", "", True, False, False)
+
+    def test_session_flag(self):
+        assert parse_model_flags("sonnet --session") == (
+            "sonnet",
+            "",
+            False,
+            False,
+            True,
+        )
+
+    def test_session_with_provider(self):
+        assert parse_model_flags("sonnet --provider anthropic --session") == (
+            "sonnet",
+            "anthropic",
+            False,
+            False,
+            True,
+        )
+
+    def test_refresh_flag_still_parsed(self):
+        assert parse_model_flags("--refresh") == ("", "", False, True, False)
+
+    def test_unicode_dash_session_normalized(self):
+        # Telegram/iOS auto-converts -- to en/em dashes.
+        assert parse_model_flags("sonnet \u2013session") == (
+            "sonnet",
+            "",
+            False,
+            False,
+            True,
+        )
+
+
+# ---------------------------------------------------------------------------
+# resolve_persist_behavior
+# ---------------------------------------------------------------------------
+
+
+class TestResolvePersistBehavior:
+    def test_session_flag_always_session_only(self):
+        # --session opts out even if the config default is True.
+        with _config({"model": {"persist_switch_by_default": True}}):
+            assert resolve_persist_behavior(False, True) is False
+
+    def test_global_flag_always_persists(self):
+        # --global forces persist even if the config default is False.
+        with _config({"model": {"persist_switch_by_default": False}}):
+            assert resolve_persist_behavior(True, False) is True
+
+    def test_default_persists_when_config_missing(self):
+        # No model section at all → built-in default (True).
+        with _config({}):
+            assert resolve_persist_behavior(False, False) is True
+
+    def test_default_persists_when_key_true(self):
+        with _config({"model": {"persist_switch_by_default": True}}):
+            assert resolve_persist_behavior(False, False) is True
+
+    def test_default_session_only_when_key_false(self):
+        with _config({"model": {"persist_switch_by_default": False}}):
+            assert resolve_persist_behavior(False, False) is False
+
+    def test_default_when_model_is_flat_string(self):
+        # Fresh install: ``model: ""`` (not a dict) → built-in default True.
+        with _config({"model": ""}):
+            assert resolve_persist_behavior(False, False) is True
+
+    def test_session_overrides_global_when_both_set(self):
+        # --session is the explicit opt-out and wins over --global.
+        with _config({"model": {"persist_switch_by_default": True}}):
+            assert resolve_persist_behavior(True, True) is False
+
+
+# ---------------------------------------------------------------------------
+# helper
+# ---------------------------------------------------------------------------
+
+
+class _config:
+    """Context manager that patches ``load_config`` to return a fixed dict."""
+
+    def __init__(self, cfg: dict):
+        self.cfg = cfg
+
+    def __enter__(self):
+        self._patch = patch(
+            "hermes_cli.config.load_config",
+            return_value=self.cfg,
+        )
+        # resolve_persist_behavior imports load_config lazily inside the
+        # function, so patching the source module is sufficient.
+        self._patch.start()
+        return self
+
+    def __exit__(self, *exc):
+        self._patch.stop()
--- a/tests/hermes_cli/test_profiles.py
+++ b/tests/hermes_cli/test_profiles.py
@ -35,6 +35,7 @@ from hermes_cli.profiles import (
    has_bundled_skills_opt_out,
    NO_BUNDLED_SKILLS_MARKER,
    backfill_profile_envs,
+    profiles_to_serve,
 )
 from hermes_cli.config import DEFAULT_CONFIG

@ -1487,3 +1488,48 @@ class TestEdgeCases:
            delete_profile("coder", yes=True)

        assert get_active_profile() == "default"
+
+
+class TestProfilesToServe:
+    """profiles_to_serve(multiplex) — the gateway's profile-enumeration chokepoint."""
+
+    def test_off_returns_only_active_default(self, profile_env):
+        serve = profiles_to_serve(multiplex=False)
+        assert len(serve) == 1
+        name, home = serve[0]
+        assert name == "default"
+        assert home == _get_default_hermes_home()
+
+    def test_off_returns_only_active_named(self, profile_env, monkeypatch):
+        # A named profile's gateway runs with HERMES_HOME pointing at the
+        # profile dir; get_active_profile_name() infers the name from there.
+        create_profile("coder", no_alias=True)
+        monkeypatch.setenv("HERMES_HOME", str(get_profile_dir("coder")))
+        serve = profiles_to_serve(multiplex=False)
+        assert len(serve) == 1
+        assert serve[0][0] == "coder"
+        assert serve[0][1] == get_profile_dir("coder")
+
+    def test_on_returns_default_plus_all_named(self, profile_env):
+        create_profile("coder", no_alias=True)
+        create_profile("writer", no_alias=True)
+        serve = dict(profiles_to_serve(multiplex=True))
+        assert set(serve) == {"default", "coder", "writer"}
+        assert serve["default"] == _get_default_hermes_home()
+        assert serve["coder"] == get_profile_dir("coder")
+
+    def test_on_default_always_first(self, profile_env):
+        create_profile("coder", no_alias=True)
+        serve = profiles_to_serve(multiplex=True)
+        assert serve[0][0] == "default"
+
+    def test_on_active_profile_does_not_change_set(self, profile_env):
+        """Enumeration is independent of which profile is active."""
+        create_profile("coder", no_alias=True)
+        set_active_profile("coder")
+        serve = dict(profiles_to_serve(multiplex=True))
+        assert set(serve) == {"default", "coder"}
+
+    def test_on_no_named_profiles_returns_just_default(self, profile_env):
+        serve = profiles_to_serve(multiplex=True)
+        assert [n for n, _ in serve] == ["default"]
--- a/tests/hermes_cli/test_provider_catalog.py
+++ b/tests/hermes_cli/test_provider_catalog.py
@ -0,0 +1,127 @@
+"""Tests for the unified provider catalog (hermes_cli.provider_catalog).
+
+These are invariant tests, not snapshots: they assert the parity *contract*
+between what ``hermes model`` shows (``CANONICAL_PROVIDERS``) and what the
+catalog exposes, plus how each provider's ``auth_type`` maps to a desktop tab —
+never a specific provider count or a frozen vendor list (both change over time).
+"""
+
+from hermes_cli.models import CANONICAL_PROVIDERS
+from hermes_cli.provider_catalog import (
+    ProviderDescriptor,
+    provider_catalog,
+    provider_catalog_by_slug,
+    tab_for_auth_type,
+)
+
+
+def test_catalog_covers_every_hermes_model_provider():
+    """PARITY CONTRACT: the catalog == the `hermes model` universe."""
+    slugs = {d.slug for d in provider_catalog()}
+    for entry in CANONICAL_PROVIDERS:
+        assert entry.slug in slugs, (
+            f"{entry.slug} is shown in `hermes model` but missing from provider_catalog()"
+        )
+
+
+def test_catalog_has_no_providers_outside_hermes_model():
+    """The catalog must not invent providers `hermes model` doesn't show."""
+    canonical = {e.slug for e in CANONICAL_PROVIDERS}
+    for d in provider_catalog():
+        assert d.slug in canonical, f"{d.slug} in catalog but not in CANONICAL_PROVIDERS"
+
+
+def test_every_descriptor_lands_on_exactly_one_known_tab():
+    for d in provider_catalog():
+        assert d.tab in {"keys", "accounts"}, f"{d.slug} has bad tab {d.tab!r}"
+
+
+def test_descriptor_count_matches_canonical():
+    """One descriptor per canonical entry (no dupes, no drops)."""
+    cat = provider_catalog()
+    assert len(cat) == len(CANONICAL_PROVIDERS)
+    assert len({d.slug for d in cat}) == len(cat)
+
+
+def test_profileless_providers_still_present():
+    """Providers without a ProviderProfile must still resolve via fallbacks.
+
+    lmstudio / openai-api / tencent-tokenhub / xai-oauth have no profile on
+    main; they exist only as registry + canonical entries. The catalog must
+    not require a profile to include a provider.
+    """
+    by = provider_catalog_by_slug()
+    for slug in ("lmstudio", "openai-api", "tencent-tokenhub", "xai-oauth"):
+        assert slug in by, f"{slug} dropped from catalog (profile-less provider)"
+        assert by[slug].label, f"{slug} has empty label despite canonical fallback"
+        assert by[slug].description, f"{slug} has empty description despite fallback"
+
+
+def test_api_key_providers_route_to_keys_oauth_to_accounts():
+    by = provider_catalog_by_slug()
+    # api_key → keys
+    assert by["kilocode"].tab == "keys"
+    assert by["openai-api"].tab == "keys"
+    # account / sign-in flows → accounts
+    assert by["google-gemini-cli"].tab == "accounts"
+    assert by["copilot-acp"].tab == "accounts"
+
+
+def test_copilot_surfaces_as_a_provider_with_its_own_token_var():
+    """Regression for the reported bug: a GitHub Copilot login showed up under
+    tools, never as a provider, because the shared GITHUB_TOKEN is tool-category.
+
+    Copilot authenticates via the `copilot`/api_key path, so it belongs on the
+    keys tab — but its PRIMARY credential var must be the provider-owned
+    COPILOT_GITHUB_TOKEN, not the shared tool-category GITHUB_TOKEN. That is what
+    lets the desktop render Copilot as its own provider card.
+    """
+    by = provider_catalog_by_slug()
+    assert "copilot" in by
+    d = by["copilot"]
+    assert d.tab == "keys"
+    assert d.api_key_env_vars, "Copilot must expose a credential env var"
+    assert d.api_key_env_vars[0] == "COPILOT_GITHUB_TOKEN", (
+        "Copilot's primary var must be the provider-owned token, not shared GITHUB_TOKEN"
+    )
+
+
+def test_bedrock_routes_to_keys():
+    """Bedrock is aws_sdk (AWS_REGION/AWS_PROFILE), configured on the keys tab."""
+    by = provider_catalog_by_slug()
+    assert by["bedrock"].tab == "keys"
+
+
+def test_api_key_providers_expose_a_credential_env_var():
+    """Every keys-tab provider that authenticates via a pasted API key must
+    surface at least one env var to write the key into (otherwise the GUI can't
+    configure it).
+
+    Exemptions: ``aws_sdk`` (bedrock — uses AWS_REGION/AWS_PROFILE) and the
+    ``custom`` bring-your-own-endpoint pseudo-provider, which is configured
+    inline via the local-endpoint flow rather than a fixed env var.
+    """
+    exempt = {"custom"}
+    for d in provider_catalog():
+        if d.auth_type == "api_key" and d.slug not in exempt:
+            assert d.api_key_env_vars, f"{d.slug} is api_key but exposes no env var"
+
+
+def test_order_mirrors_canonical_declaration():
+    cat = provider_catalog()
+    assert [d.order for d in cat] == list(range(len(cat)))
+    assert [d.slug for d in cat] == [e.slug for e in CANONICAL_PROVIDERS]
+
+
+def test_descriptors_are_provider_descriptor_instances():
+    for d in provider_catalog():
+        assert isinstance(d, ProviderDescriptor)
+
+
+def test_tab_for_auth_type_helper():
+    assert tab_for_auth_type("api_key") == "keys"
+    assert tab_for_auth_type("aws_sdk") == "keys"
+    assert tab_for_auth_type("oauth_external") == "accounts"
+    assert tab_for_auth_type("oauth_device_code") == "accounts"
+    assert tab_for_auth_type("copilot") == "accounts"
+    assert tab_for_auth_type("external_process") == "accounts"
--- a/tests/hermes_cli/test_provider_parity.py
+++ b/tests/hermes_cli/test_provider_parity.py
@ -0,0 +1,90 @@
+"""End-to-end provider parity contract: the desktop Providers tabs must show
+the SAME provider universe as ``hermes model`` (the CLI/TUI picker).
+
+This is the single load-bearing invariant of the unified provider catalog:
+
+    keys(/api/env provider rows) ∪ ids(/api/providers/oauth) ⊇ CANONICAL_PROVIDERS
+
+i.e. every provider the CLI picker offers is configurable from the desktop app,
+on one of the two Providers sub-tabs (API keys or Accounts). It is asserted as
+an invariant against the real FastAPI endpoints (not a snapshot / count), so it
+can never silently drift again when a provider plugin is added.
+"""
+
+from fastapi.testclient import TestClient
+
+from hermes_cli.models import CANONICAL_PROVIDERS
+from hermes_cli.provider_catalog import provider_catalog
+from hermes_cli.web_server import _SESSION_TOKEN, app
+
+client = TestClient(app)
+HEADERS = {"X-Hermes-Session-Token": _SESSION_TOKEN}
+
+# `custom` is the bring-your-own-endpoint pseudo-provider configured inline via
+# the model picker's local-endpoint flow, not a fixed credential card. It is in
+# the CLI picker's universe but intentionally has no dedicated Providers-tab
+# card. Exempt it from the union check.
+_EXEMPT = {"custom"}
+
+# Providers that legitimately offer BOTH auth methods and so intentionally
+# appear on both desktop tabs (an API-key card AND an account sign-in card).
+# Anthropic supports a direct API key (Keys tab) and a subscription OAuth /
+# Claude Code login (Accounts tab); surfacing both is correct, not a bug.
+_DUAL_TAB = {"anthropic"}
+
+
+def _keys_tab_providers() -> set[str]:
+    """Provider slugs that have at least one card on the desktop API-keys tab."""
+    data = client.get("/api/env", headers=HEADERS).json()
+    return {
+        info.get("provider")
+        for info in data.values()
+        if info.get("category") == "provider" and info.get("provider")
+    }
+
+
+def _accounts_tab_providers() -> set[str]:
+    """Provider slugs offered on the desktop Accounts tab."""
+    data = client.get("/api/providers/oauth", headers=HEADERS).json()
+    return {p["id"] for p in data["providers"]}
+
+
+def test_every_hermes_model_provider_is_configurable_in_desktop():
+    """PARITY CONTRACT: GUI (keys ∪ accounts) ⊇ `hermes model` universe."""
+    gui = _keys_tab_providers() | _accounts_tab_providers()
+    missing = [
+        e.slug
+        for e in CANONICAL_PROVIDERS
+        if e.slug not in _EXEMPT and e.slug not in gui
+    ]
+    assert not missing, (
+        "providers shown in `hermes model` but not configurable in the desktop "
+        f"Providers tabs: {missing}"
+    )
+
+
+def test_each_provider_lands_on_the_tab_its_auth_type_dictates():
+    """A keys-tab provider must surface under /api/env; an accounts-tab provider
+    under /api/providers/oauth. Cross-checks the catalog's tab routing against
+    where each provider actually renders.
+    """
+    keys = _keys_tab_providers()
+    accounts = _accounts_tab_providers()
+    for d in provider_catalog():
+        if d.slug in _EXEMPT:
+            continue
+        if d.tab == "keys" and d.api_key_env_vars:
+            assert d.slug in keys, f"{d.slug} (keys tab) missing from /api/env"
+        elif d.tab == "accounts":
+            assert d.slug in accounts, f"{d.slug} (accounts tab) missing from /api/providers/oauth"
+
+
+def test_no_provider_appears_on_both_tabs():
+    """A provider should be configured exactly one way — not duplicated across
+    both tabs (which would confuse users about where to put credentials).
+
+    Exception: genuinely dual-auth providers (see ``_DUAL_TAB``) intentionally
+    appear on both tabs.
+    """
+    overlap = (_keys_tab_providers() & _accounts_tab_providers()) - _EXEMPT - _DUAL_TAB
+    assert not overlap, f"providers appearing on BOTH desktop tabs: {sorted(overlap)}"
--- a/tests/hermes_cli/test_web_oauth_dispatch.py
+++ b/tests/hermes_cli/test_web_oauth_dispatch.py
@ -470,6 +470,39 @@ def test_xai_oauth_listed_as_loopback_flow():
    assert "grok" in providers["xai-oauth"]["name"].lower()


+def test_accounts_offers_every_oauth_provider_from_catalog():
+    """PARITY CONTRACT: every accounts-tab provider in the unified catalog (the
+    `hermes model` universe) must be offered by /api/providers/oauth. This keeps
+    the desktop Accounts tab in lockstep with the CLI picker — no provider the
+    CLI can sign into may be missing from the GUI.
+    """
+    from hermes_cli.provider_catalog import provider_catalog
+
+    resp = client.get("/api/providers/oauth", headers=HEADERS)
+    assert resp.status_code == 200, resp.text
+    offered = {p["id"] for p in resp.json()["providers"]}
+    for d in provider_catalog():
+        if d.tab == "accounts":
+            assert d.slug in offered, (
+                f"{d.slug} is an accounts-tab provider in `hermes model` but is "
+                f"missing from the desktop Accounts tab (/api/providers/oauth)"
+            )
+
+
+def test_gemini_cli_and_copilot_acp_now_in_accounts():
+    """Regression: google-gemini-cli and copilot-acp were canonical providers the
+    CLI could configure, but had no Accounts card (the reported GUI/CLI drift).
+    """
+    resp = client.get("/api/providers/oauth", headers=HEADERS)
+    assert resp.status_code == 200, resp.text
+    providers = {p["id"]: p for p in resp.json()["providers"]}
+    assert "google-gemini-cli" in providers
+    assert "copilot-acp" in providers
+    # copilot-acp is managed by an external CLI: read-only card, not auto-removable.
+    assert providers["copilot-acp"]["flow"] == "external"
+    assert providers["copilot-acp"]["disconnectable"] is False
+
+
 def test_oauth_catalog_marks_external_providers_not_disconnectable():
    """External CLI credentials are visible in Accounts but cannot be removed by Hermes."""
    resp = client.get("/api/providers/oauth", headers=HEADERS)
@ -804,3 +837,56 @@ def test_unknown_pkce_provider_rejected_cleanly():
    # 4xx — what we MUST NOT see is a 200 with claude.ai in the body.
    assert resp.status_code >= 400, resp.text
    assert "claude.ai" not in resp.text.lower()
+
+
+def test_status_falls_through_to_generic_dispatcher_for_catalog_only_provider():
+    """Accounts-tab providers with no hardcoded branch reflect REAL status.
+
+    Providers appended to the Accounts tab from the unified provider_catalog()
+    carry status_fn=None and may have no explicit branch in
+    _resolve_provider_status. Before the fallthrough they rendered permanently
+    logged-out; now they dispatch to hermes_cli.auth.get_auth_status (the
+    canonical slug dispatcher) so membership AND status both auto-extend.
+    """
+    import hermes_cli.web_server as ws
+
+    fake_status = {
+        "logged_in": True,
+        "provider": "some-future-oauth",
+        "name": "Future OAuth Provider",
+        "access_token": "sk-future-secret-token-xyz",
+        "expires_at": "2026-12-01T00:00:00Z",
+        "has_refresh_token": True,
+    }
+    with patch("hermes_cli.auth.get_auth_status", return_value=fake_status):
+        out = ws._resolve_provider_status("some-future-oauth", None)
+
+    assert out["logged_in"] is True
+    assert out["source"] == "some-future-oauth"
+    assert out["source_label"] == "Future OAuth Provider"
+    # Token is previewed, never returned whole.
+    assert out["token_preview"] and "sk-future-secret-token-xyz" not in out["token_preview"]
+    assert out["expires_at"] == "2026-12-01T00:00:00Z"
+    assert out["has_refresh_token"] is True
+
+
+def test_status_hardcoded_branch_wins_over_generic_fallback():
+    """An existing hardcoded branch (nous) is unaffected by the fallthrough."""
+    import hermes_cli.web_server as ws
+
+    with patch(
+        "hermes_cli.auth.get_nous_auth_status",
+        return_value={"logged_in": True, "portal_base_url": "https://portal.test"},
+    ):
+        out = ws._resolve_provider_status("nous", None)
+    assert out["source"] == "nous_portal"
+    assert out["source_label"] == "https://portal.test"
+
+
+def test_status_unknown_provider_degrades_to_logged_out():
+    """A provider the generic dispatcher can't resolve stays logged-out cleanly."""
+    import hermes_cli.web_server as ws
+
+    with patch("hermes_cli.auth.get_auth_status", return_value={"logged_in": False}):
+        out = ws._resolve_provider_status("totally-unknown", None)
+    assert out["logged_in"] is False
--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@ -1299,6 +1299,57 @@ class TestWebServerEndpoints:
        for key, info in data.items():
            assert info["channel_managed"] is (key in channel_keys)

+    def test_get_env_vars_surfaces_catalog_providers(self):
+        """Every keys-tab provider in the unified catalog must appear in /api/env
+        as a provider card, even when it has no hand entry in OPTIONAL_ENV_VARS.
+
+        Regression for the GUI⇄CLI drift: openai-api, kilocode, novita,
+        tencent-tokenhub, copilot were configurable via `hermes model` but
+        invisible in the desktop Providers → API keys tab.
+        """
+        from hermes_cli.provider_catalog import provider_catalog
+
+        data = self.client.get("/api/env").json()
+        for d in provider_catalog():
+            if d.tab != "keys" or not d.api_key_env_vars:
+                continue
+            # The PRIMARY credential var must surface as this provider's card.
+            # (Shared aliases like GITHUB_TOKEN are intentionally left on their
+            # existing tool category and not hijacked — see the copilot test.)
+            primary = d.api_key_env_vars[0]
+            assert primary in data, f"{primary} ({d.slug}) missing from /api/env"
+            info = data[primary]
+            assert info["category"] == "provider"
+            assert info["provider"] == d.slug
+            assert info["provider_label"] == d.label
+
+    def test_get_env_vars_provider_rows_carry_grouping_hints(self):
+        """Provider env rows expose the backend `provider`/`provider_label` the
+        desktop Keys tab groups by (so it no longer relies on prefix guesses)."""
+        data = self.client.get("/api/env").json()
+        # OPENAI_API_KEY is a hand-listed protected var AND a catalog provider;
+        # it must come back tagged to the openai-api provider.
+        assert data["OPENAI_API_KEY"]["provider"] == "openai-api"
+        assert data["OPENAI_API_KEY"]["category"] == "provider"
+
+    def test_get_env_vars_copilot_uses_provider_token_not_shared_github_token(self):
+        """Copilot surfaces as its own provider card via COPILOT_GITHUB_TOKEN;
+        the shared GITHUB_TOKEN keeps its existing (tool) category."""
+        data = self.client.get("/api/env").json()
+        assert data["COPILOT_GITHUB_TOKEN"]["provider"] == "copilot"
+        assert data["COPILOT_GITHUB_TOKEN"]["category"] == "provider"
+        # Shared GITHUB_TOKEN must NOT be hijacked into the copilot provider card.
+        assert data.get("GITHUB_TOKEN", {}).get("provider", "") != "copilot"
+
+    def test_get_env_vars_bedrock_aws_vars_tagged_to_provider(self):
+        """Bedrock (aws_sdk, no api-key) must still appear on the Keys tab: its
+        AWS_REGION/AWS_PROFILE settings are tagged to the bedrock provider card.
+        """
+        data = self.client.get("/api/env").json()
+        assert data["AWS_REGION"]["provider"] == "bedrock"
+        assert data["AWS_REGION"]["category"] == "provider"
+        assert data["AWS_PROFILE"]["provider"] == "bedrock"
+
    def test_platform_scoped_messaging_env_vars_are_channel_managed(self):
        from hermes_cli.web_server import (
            _MESSAGING_KEYS_PAGE_KEYS,
@ -1552,6 +1603,27 @@ class TestWebServerEndpoints:
        assert telegram["enabled"] is False
        assert any(field["key"] == "TELEGRAM_BOT_TOKEN" and field["required"] for field in telegram["env_vars"])

+    def test_slack_messaging_platform_exposes_user_allowlist(self):
+        resp = self.client.get("/api/messaging/platforms")
+
+        assert resp.status_code == 200
+        platforms = resp.json()["platforms"]
+        slack = next(platform for platform in platforms if platform["id"] == "slack")
+        fields = {field["key"]: field for field in slack["env_vars"]}
+
+        assert "allowed Slack member IDs" in slack["description"]
+        assert set(fields) >= {
+            "SLACK_BOT_TOKEN",
+            "SLACK_APP_TOKEN",
+            "SLACK_ALLOWED_USERS",
+        }
+        assert fields["SLACK_ALLOWED_USERS"]["prompt"] == "Allowed Slack member IDs"
+        assert fields["SLACK_ALLOWED_USERS"]["is_password"] is False
+        assert "member IDs" in fields["SLACK_ALLOWED_USERS"]["description"]
+        assert "Bot User OAuth Token" in fields["SLACK_BOT_TOKEN"]["help"]
+        assert "App-Level Tokens" in fields["SLACK_APP_TOKEN"]["help"]
+        assert "Copy member ID" in fields["SLACK_ALLOWED_USERS"]["help"]
+
    def test_weixin_messaging_metadata_describes_personal_ilink_setup(self):
        resp = self.client.get("/api/messaging/platforms")

@ -1628,6 +1700,70 @@ class TestWebServerEndpoints:
        telegram = next(platform for platform in status if platform["id"] == "telegram")
        assert telegram["enabled"] is False

+    def test_update_messaging_platform_saves_slack_allowed_users(self):
+        from hermes_cli.config import load_env
+
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_ALLOWED_USERS": "U01ABC2DEF3,U04XYZ5LMN6"}},
+        )
+
+        assert resp.status_code == 200
+        assert load_env()["SLACK_ALLOWED_USERS"] == "U01ABC2DEF3,U04XYZ5LMN6"
+
+    def test_update_messaging_platform_rejects_swapped_slack_bot_token(self):
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_BOT_TOKEN": "xapp-wrong-token-type"}},
+        )
+
+        assert resp.status_code == 400
+        assert "xoxb-" in resp.json()["detail"]
+
+    def test_update_messaging_platform_rejects_swapped_slack_app_token(self):
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_APP_TOKEN": "xoxb-wrong-token-type"}},
+        )
+
+        assert resp.status_code == 400
+        assert "xapp-" in resp.json()["detail"]
+
+    def test_update_messaging_platform_rejects_invalid_slack_allowed_users(self):
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_ALLOWED_USERS": "U01ABC2DEF3,not-a-user"}},
+        )
+
+        assert resp.status_code == 400
+        assert "member IDs" in resp.json()["detail"]
+
+    def test_update_messaging_platform_accepts_slack_allowed_users_wildcard(self):
+        # "*" is the gateway's allow-all wildcard (gateway/platforms/slack.py),
+        # so the dashboard must accept it rather than rejecting it as malformed.
+        from hermes_cli.config import load_env
+
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_ALLOWED_USERS": "*"}},
+        )
+
+        assert resp.status_code == 200
+        assert load_env()["SLACK_ALLOWED_USERS"] == "*"
+
+    def test_update_messaging_platform_accepts_slack_allowed_users_trailing_comma(self):
+        # The gateway drops empty entries (gateway/platforms/slack.py), so a
+        # trailing/interior comma must not be rejected by the dashboard.
+        from hermes_cli.config import load_env
+
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_ALLOWED_USERS": "U01ABC2DEF3,,W04XYZ5LMN6,"}},
+        )
+
+        assert resp.status_code == 200
+        assert load_env()["SLACK_ALLOWED_USERS"] == "U01ABC2DEF3,,W04XYZ5LMN6,"
+
    def test_messaging_platform_test_reports_missing_required_setup(self):
        resp = self.client.put("/api/messaging/platforms/discord", json={"enabled": True})
        assert resp.status_code == 200
@ -5062,6 +5198,7 @@ class TestPtyWebSocket:

        _argv, _cwd, env = self.ws_module._resolve_chat_argv()

+        assert env["HERMES_TUI_DASHBOARD"] == "1"
        assert env["HERMES_TUI_INLINE"] == "1"
        assert env["HERMES_TUI_DISABLE_MOUSE"] == "1"

--- a/tests/hermes_cli/test_web_server_files.py
+++ b/tests/hermes_cli/test_web_server_files.py
@ -436,3 +436,55 @@ def test_stream_upload_large_file_under_cap_succeeds(forced_files_client, monkey
    assert created.status_code == 200
    assert file_path.stat().st_size == len(payload)
    assert file_path.read_bytes() == payload
+
+
+def test_stream_upload_cleans_temp_on_cancellation(forced_files_client):
+    """A client disconnect mid-stream (asyncio.CancelledError) must not leak a temp file.
+
+    CancelledError is a BaseException, not an Exception, so it bypasses the
+    endpoint's ``except`` clauses entirely. The cleanup therefore lives in a
+    ``finally`` keyed on a success flag — without it, every aborted large
+    upload (the exact NS-501 scenario) would orphan a partial ``.upload`` temp
+    file in the target directory. We invoke the endpoint coroutine directly so
+    the BaseException propagates instead of being swallowed by the test client.
+    """
+    import asyncio
+
+    _client, root = forced_files_client
+    target = root / "out" / "aborted.bin"
+    target.parent.mkdir(parents=True, exist_ok=True)
+
+    class _AbortingUpload:
+        """UploadFile stand-in that yields one chunk then aborts like a dropped client."""
+
+        filename = "aborted.bin"
+
+        def __init__(self):
+            self._calls = 0
+
+        async def read(self, _size):
+            self._calls += 1
+            if self._calls == 1:
+                return b"partial chunk before the client vanished"
+            raise asyncio.CancelledError()
+
+        async def close(self):
+            return None
+
+    request = SimpleNamespace()
+
+    with pytest.raises(asyncio.CancelledError):
+        asyncio.run(
+            web_server.upload_managed_file_stream(
+                request=request,
+                file=_AbortingUpload(),
+                path=str(target),
+                overwrite=True,
+            )
+        )
+
+    # No partial data was promoted into place ...
+    assert not target.exists()
+    # ... and no .upload temp file was left behind.
+    leftovers = [p.name for p in target.parent.iterdir() if ".upload" in p.name]
+    assert leftovers == [], f"temp upload files leaked on cancellation: {leftovers}"
--- a/tests/openviking_plugin/test_openviking.py
+++ b/tests/openviking_plugin/test_openviking.py
@ -265,6 +265,355 @@ class TestOpenVikingSkillQuerySafety:
        assert RecordingVikingClient.calls == []


+class TestOpenVikingTurnConversion:
+    def test_extract_current_turn_anchors_on_latest_matching_user_and_assistant(self):
+        messages = [
+            {"role": "user", "content": "Please inspect the repository for assemble hooks."},
+            {"role": "assistant", "content": "Earlier answer."},
+            {"role": "user", "content": "Please inspect the repository for assemble hooks."},
+            {
+                "role": "assistant",
+                "content": "I will search the codebase.",
+                "tool_calls": [
+                    {
+                        "id": "call_rg_1",
+                        "type": "function",
+                        "function": {
+                            "name": "shell_command",
+                            "arguments": json.dumps({"command": "rg assemble"}),
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_rg_1",
+                "name": "shell_command",
+                "content": "agent/context_engine.py: no preassemble hook",
+            },
+            {"role": "assistant", "content": "The current main does not expose assemble."},
+        ]
+
+        turn = OpenVikingMemoryProvider._extract_current_turn_messages(
+            messages,
+            "Please inspect the repository for assemble hooks.",
+            "The current main does not expose assemble.",
+        )
+
+        assert turn == messages[2:]
+
+    def test_messages_to_openviking_batch_coalesces_tool_results(self):
+        turn = [
+            {"role": "user", "content": "Please inspect the repository for assemble hooks."},
+            {
+                "role": "assistant",
+                "content": "I will search the codebase.",
+                "tool_calls": [
+                    {
+                        "id": "call_rg_1",
+                        "type": "function",
+                        "function": {
+                            "name": "shell_command",
+                            "arguments": json.dumps({"command": "rg assemble"}),
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_rg_1",
+                "name": "shell_command",
+                "content": "agent/context_engine.py: no preassemble hook",
+            },
+            {"role": "assistant", "content": "The current main does not expose assemble."},
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        assert [message["role"] for message in batch] == ["user", "assistant", "assistant", "assistant"]
+        assert batch[0]["parts"] == [
+            {"type": "text", "text": "Please inspect the repository for assemble hooks."}
+        ]
+        assert batch[1]["parts"] == [
+            {"type": "text", "text": "I will search the codebase."}
+        ]
+        assert batch[2]["parts"] == [
+            {
+                "type": "tool",
+                "tool_id": "call_rg_1",
+                "tool_name": "shell_command",
+                "tool_input": {"command": "rg assemble"},
+                "tool_output": "agent/context_engine.py: no preassemble hook",
+                "tool_status": "completed",
+            }
+        ]
+        assert batch[3]["parts"] == [
+            {"type": "text", "text": "The current main does not expose assemble."}
+        ]
+
+    def test_messages_to_openviking_batch_marks_json_tool_error_results(self):
+        turn = [
+            {"role": "user", "content": "Check the file."},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_read_1",
+                        "type": "function",
+                        "function": {
+                            "name": "read_file",
+                            "arguments": json.dumps({"path": "missing.md"}),
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_read_1",
+                "name": "read_file",
+                "content": json.dumps({"error": "File not found", "exit_code": 1}),
+            },
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        assert batch[1]["role"] == "assistant"
+        assert batch[1]["parts"] == [
+            {
+                "type": "tool",
+                "tool_id": "call_read_1",
+                "tool_name": "read_file",
+                "tool_input": {"path": "missing.md"},
+                "tool_output": json.dumps({"error": "File not found", "exit_code": 1}),
+                "tool_status": "error",
+            }
+        ]
+
+    def test_messages_to_openviking_batch_keeps_pending_tool_call_without_result(self):
+        turn = [
+            {"role": "user", "content": "Start a long running check."},
+            {
+                "role": "assistant",
+                "content": "Starting it now.",
+                "tool_calls": [
+                    {
+                        "id": "call_long_1",
+                        "type": "function",
+                        "function": {
+                            "name": "long_check",
+                            "arguments": json.dumps({"target": "repo"}),
+                        },
+                    }
+                ],
+            },
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        assert batch[1]["parts"] == [
+            {"type": "text", "text": "Starting it now."},
+            {
+                "type": "tool",
+                "tool_id": "call_long_1",
+                "tool_name": "long_check",
+                "tool_input": {"target": "repo"},
+                "tool_status": "pending",
+            },
+        ]
+
+    def test_messages_to_openviking_batch_coalesces_adjacent_tool_results(self):
+        turn = [
+            {"role": "user", "content": "Run both tools."},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_a",
+                        "type": "function",
+                        "function": {
+                            "name": "first_tool",
+                            "arguments": json.dumps({"x": 1}),
+                        },
+                    },
+                    {
+                        "id": "call_b",
+                        "type": "function",
+                        "function": {
+                            "name": "second_tool",
+                            "arguments": json.dumps({"y": 2}),
+                        },
+                    },
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_a", "name": "first_tool", "content": "a"},
+            {"role": "tool", "tool_call_id": "call_b", "name": "second_tool", "content": "b"},
+            {"role": "assistant", "content": "Done."},
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        assert [message["role"] for message in batch] == ["user", "assistant", "assistant"]
+        assert batch[1]["parts"] == [
+            {
+                "type": "tool",
+                "tool_id": "call_a",
+                "tool_name": "first_tool",
+                "tool_input": {"x": 1},
+                "tool_output": "a",
+                "tool_status": "completed",
+            },
+            {
+                "type": "tool",
+                "tool_id": "call_b",
+                "tool_name": "second_tool",
+                "tool_input": {"y": 2},
+                "tool_output": "b",
+                "tool_status": "completed",
+            },
+        ]
+
+    def test_messages_to_openviking_batch_skips_openviking_recall_tool_results(self):
+        for recall_tool_name in ("viking_search", "viking_read", "viking_browse"):
+            turn = [
+                {"role": "user", "content": "What did we decide about context assembly?"},
+                {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [
+                        {
+                            "id": "call_recall_1",
+                            "type": "function",
+                            "function": {
+                                "name": recall_tool_name,
+                                "arguments": json.dumps({"query": "context assembly decision"}),
+                            },
+                        },
+                        {
+                            "id": "call_shell_1",
+                            "type": "function",
+                            "function": {
+                                "name": "shell_command",
+                                "arguments": json.dumps({"command": "rg preassemble"}),
+                            },
+                        },
+                    ],
+                },
+                {
+                    "role": "tool",
+                    "tool_call_id": "call_recall_1",
+                    "name": recall_tool_name,
+                    "content": json.dumps({
+                        "results": [
+                            {
+                                "uri": "viking://user/hermes/memories/context",
+                                "abstract": "Old OpenViking memory content",
+                            }
+                        ]
+                    }),
+                },
+                {
+                    "role": "tool",
+                    "tool_call_id": "call_shell_1",
+                    "name": "shell_command",
+                    "content": "plugins/memory/openviking/__init__.py",
+                },
+                {"role": "assistant", "content": "We decided to keep sync_turn scoped to ingestion."},
+            ]
+
+            batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+            assert [message["role"] for message in batch] == ["user", "assistant", "assistant"]
+            assert batch[1]["parts"] == [
+                {
+                    "type": "tool",
+                    "tool_id": "call_shell_1",
+                    "tool_name": "shell_command",
+                    "tool_input": {"command": "rg preassemble"},
+                    "tool_output": "plugins/memory/openviking/__init__.py",
+                    "tool_status": "completed",
+                }
+            ]
+            batch_text = json.dumps(batch)
+            assert recall_tool_name not in batch_text
+            assert "Old OpenViking memory content" not in batch_text
+
+    def test_messages_to_openviking_batch_empty_tool_id_does_not_drop_other_results(self):
+        # A recall tool result that arrives with an empty tool_call_id must not
+        # poison the skip set with "" and silently drop unrelated tool results
+        # that also lack an id. Empty tool_call_id is reachable in the canonical
+        # transcript (agent_runtime_helpers defaults it to "").
+        turn = [
+            {"role": "user", "content": "What did we decide?"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "",
+                        "type": "function",
+                        "function": {
+                            "name": "viking_search",
+                            "arguments": json.dumps({"query": "decision"}),
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "",
+                "name": "viking_search",
+                "content": json.dumps({"results": ["recall stuff"]}),
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "",
+                "name": "shell_command",
+                "content": "important shell output",
+            },
+            {"role": "assistant", "content": "done"},
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        batch_text = json.dumps(batch)
+        # The unrelated (empty-id) shell result must survive.
+        assert "important shell output" in batch_text
+        # The recall tool result must still be excluded.
+        assert "recall stuff" not in batch_text
+        assert "viking_search" not in batch_text
+
+    def test_messages_to_openviking_batch_preserves_responses_text_parts(self):
+        turn = [
+            {"role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+            {"role": "assistant", "content": [{"type": "output_text", "text": "answer"}]},
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        assert batch == [
+            {"role": "user", "parts": [{"type": "text", "text": "hello"}]},
+            {"role": "assistant", "parts": [{"type": "text", "text": "answer"}]},
+        ]
+
+    def test_messages_to_openviking_batch_adds_assistant_peer_id_when_requested(self):
+        turn = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "answer"},
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(
+            turn,
+            assistant_peer_id="hermes",
+        )
+
+        assert batch == [
+            {"role": "user", "parts": [{"type": "text", "text": "hello"}]},
+            {"role": "assistant", "parts": [{"type": "text", "text": "answer"}], "peer_id": "hermes"},
+        ]
+
+
 class TestOpenVikingRead:
    def test_overview_read_normalizes_uri_and_unwraps_result(self):
        provider = OpenVikingMemoryProvider()
--- a/tests/plugins/memory/test_hindsight_provider.py
+++ b/tests/plugins/memory/test_hindsight_provider.py
@ -83,6 +83,66 @@ def _make_mock_client():
    return client


+def _provider_for_mode(tmp_path, monkeypatch, mode: str):
+    """Create an initialized provider without pre-seeding its client."""
+    config = {
+        "mode": mode,
+        "apiKey": "test-key",
+        "api_url": "http://localhost:9999",
+        "bank_id": "test-bank",
+        "budget": "mid",
+        "memory_mode": "hybrid",
+    }
+    config_path = tmp_path / "hindsight" / "config.json"
+    config_path.parent.mkdir(parents=True, exist_ok=True)
+    config_path.write_text(json.dumps(config))
+
+    monkeypatch.setattr(
+        "plugins.memory.hindsight.get_hermes_home", lambda: tmp_path
+    )
+
+    provider = HindsightMemoryProvider()
+    provider.initialize(session_id="test-session", hermes_home=str(tmp_path), platform="cli")
+    return provider
+
+
+def _assert_cloud_client_lazy_installed_before_import(tmp_path, monkeypatch, mode: str):
+    """Cloud/local-external clients must ensure lazy deps before importing."""
+    import builtins
+
+    provider = _provider_for_mode(tmp_path, monkeypatch, mode)
+    ensure_calls = []
+
+    def fake_ensure(feature, prompt=True):
+        ensure_calls.append((feature, prompt))
+
+    class FakeHindsight:
+        def __init__(self, **kwargs):
+            self.kwargs = kwargs
+
+    real_import = builtins.__import__
+
+    def guarded_import(name, globals=None, locals=None, fromlist=(), level=0):
+        if name == "hindsight_client":
+            if ensure_calls != [("memory.hindsight", False)]:
+                raise ModuleNotFoundError("No module named 'hindsight_client'")
+            return SimpleNamespace(Hindsight=FakeHindsight)
+        return real_import(name, globals, locals, fromlist, level)
+
+    monkeypatch.setattr("tools.lazy_deps.ensure", fake_ensure)
+    monkeypatch.setattr(builtins, "__import__", guarded_import)
+
+    client = provider._get_client()
+
+    assert ensure_calls == [("memory.hindsight", False)]
+    assert isinstance(client, FakeHindsight)
+    assert client.kwargs == {
+        "base_url": "http://localhost:9999",
+        "timeout": 120.0,
+        "api_key": "test-key",
+    }
+
+
 class _FakeSessionDB:
    def __init__(self, messages=None):
        self._messages = list(messages or [])
@ -232,6 +292,14 @@ class TestSchemas:


 class TestConfig:
+    def test_cloud_client_lazy_installs_dependency_before_import(self, tmp_path, monkeypatch):
+        _assert_cloud_client_lazy_installed_before_import(tmp_path, monkeypatch, "cloud")
+
+    def test_local_external_client_lazy_installs_dependency_before_import(self, tmp_path, monkeypatch):
+        _assert_cloud_client_lazy_installed_before_import(
+            tmp_path, monkeypatch, "local_external"
+        )
+
    def test_default_values(self, provider):
        assert provider._auto_retain is True
        assert provider._auto_recall is True
--- a/tests/plugins/memory/test_openviking_provider.py
+++ b/tests/plugins/memory/test_openviking_provider.py
@ -1975,7 +1975,10 @@ def test_on_session_switch_commits_old_session_and_rotates_id():

    provider.on_session_switch("new-sid", parent_session_id="old-sid")

-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
    assert provider._session_id == "new-sid"
    assert provider._turn_count == 0

@ -1998,7 +2001,10 @@ def test_on_session_switch_commits_pending_tokens_without_turn_count():
    provider.on_session_switch("new-sid")

    provider._client.get.assert_called_once_with("/api/v1/sessions/old-sid")
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
    assert provider._session_id == "new-sid"
    assert provider._turn_count == 0

@ -2051,7 +2057,10 @@ def test_on_session_switch_waits_for_inflight_sync_thread():
    provider.on_session_switch("new-sid")

    assert join_calls, "expected on_session_switch to join the in-flight sync thread"
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )


 def test_on_session_switch_noop_on_empty_new_id():
@ -2186,6 +2195,78 @@ def test_sync_turn_retries_batch_write_with_fresh_client():
    )]


+def test_sync_turn_structured_messages_include_assistant_peer_id():
+    provider = OpenVikingMemoryProvider()
+    provider._client = MagicMock()
+    provider._endpoint = "http://test"
+    provider._api_key = ""
+    provider._account = "acct"
+    provider._user = "usr"
+    provider._agent = "hermes"
+    provider._session_id = "sid-structured"
+
+    captured = []
+
+    class StubClient:
+        def __init__(self, *a, **kw):
+            pass
+
+        def post(self, path, payload=None, **kwargs):
+            captured.append((path, payload))
+            return {}
+
+    import plugins.memory.openviking as _mod
+
+    real_client_cls = _mod._VikingClient
+    _mod._VikingClient = StubClient
+    messages = [
+        {"role": "user", "content": [{"type": "input_text", "text": "u"}]},
+        {
+            "role": "assistant",
+            "content": "Looking.",
+            "tool_calls": [
+                {
+                    "id": "call-1",
+                    "type": "function",
+                    "function": {"name": "shell_command", "arguments": json.dumps({"cmd": "pwd"})},
+                }
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call-1", "name": "shell_command", "content": "ok"},
+        {"role": "assistant", "content": [{"type": "output_text", "text": "a"}]},
+    ]
+    try:
+        provider.sync_turn("u", "a", messages=messages)
+        assert provider._drain_writers("sid-structured", timeout=2.0)
+    finally:
+        _mod._VikingClient = real_client_cls
+
+    assert captured == [(
+        "/api/v1/sessions/sid-structured/messages/batch",
+        {
+            "messages": [
+                {"role": "user", "parts": [{"type": "text", "text": "u"}]},
+                {"role": "assistant", "parts": [{"type": "text", "text": "Looking."}], "peer_id": "hermes"},
+                {
+                    "role": "assistant",
+                    "parts": [
+                        {
+                            "type": "tool",
+                            "tool_id": "call-1",
+                            "tool_name": "shell_command",
+                            "tool_input": {"cmd": "pwd"},
+                            "tool_output": "ok",
+                            "tool_status": "completed",
+                        }
+                    ],
+                    "peer_id": "hermes",
+                },
+                {"role": "assistant", "parts": [{"type": "text", "text": "a"}], "peer_id": "hermes"},
+            ]
+        },
+    )]
+
+
 def test_sync_turn_noop_when_session_id_blank():
    provider = OpenVikingMemoryProvider()
    provider._client = MagicMock()
@ -2206,7 +2287,10 @@ def test_on_session_end_marks_session_clean_after_successful_commit():

    provider.on_session_end([])

-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
    assert provider._turn_count == 0


@ -2228,7 +2312,10 @@ def test_on_session_end_commits_pending_tokens_without_turn_count():
    provider.on_session_end([])

    provider._client.get.assert_called_once_with("/api/v1/sessions/old-sid")
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )


 def test_end_then_switch_does_not_double_commit():
@ -2241,7 +2328,10 @@ def test_end_then_switch_does_not_double_commit():
    provider.on_session_switch("new-sid", parent_session_id="old-sid")

    # Exactly one commit call, on the OLD session, fired by on_session_end.
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
    assert provider._session_id == "new-sid"
    assert provider._turn_count == 0

@ -2253,7 +2343,10 @@ def test_end_then_switch_with_pending_tokens_does_not_double_commit():
    provider.on_session_end([])
    provider.on_session_switch("new-sid", parent_session_id="old-sid")

-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
    assert provider._session_id == "new-sid"
    assert provider._turn_count == 0

@ -2400,7 +2493,10 @@ def test_on_session_switch_does_not_block_caller_on_slow_drain():
    # Let the finalizer finish so it doesn't leak past the test.
    release_drain.set()
    assert provider._drain_finalizers(timeout=5.0)
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )


 def test_on_session_switch_defers_old_commit_to_finalizer_thread():
@ -2415,7 +2511,7 @@ def test_on_session_switch_defers_old_commit_to_finalizer_thread():
    committed = threading.Event()
    drain_timeouts = []

-    def fake_post(path):
+    def fake_post(path, payload=None):
        committed.set()
        return {}

@ -2433,7 +2529,10 @@ def test_on_session_switch_defers_old_commit_to_finalizer_thread():
    assert provider._turn_count == 0
    # The old-session commit lands on the finalizer thread, not inline.
    assert committed.wait(timeout=5.0), "old session was not finalized off-thread"
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
    # The finalizer drains with the deferred (longer) budget, not inline 10s.
    assert drain_timeouts == [_DEFERRED_COMMIT_TIMEOUT]

--- a/tests/run_agent/test_codex_app_server_integration.py
+++ b/tests/run_agent/test_codex_app_server_integration.py
@ -12,7 +12,7 @@ Verifies that:

 from __future__ import annotations

-from unittest.mock import patch
+from unittest.mock import MagicMock, patch

 import pytest

@ -148,6 +148,17 @@ class TestRunConversationCodexPath:
                 and m.get("content") == "echo: hello"]
        assert final, f"expected final assistant message in {msgs}"

+    def test_projected_messages_are_synced_to_external_memory(self, fake_session):
+        agent = _make_codex_agent()
+        agent._memory_manager = MagicMock()
+        agent._memory_manager.build_system_prompt.return_value = ""
+
+        with patch.object(agent, "_spawn_background_review", return_value=None):
+            result = agent.run_conversation("hello")
+
+        agent._memory_manager.sync_all.assert_called_once()
+        assert agent._memory_manager.sync_all.call_args.kwargs["messages"] == result["messages"]
+
    def test_nudge_counters_tick(self, fake_session):
        """The skill nudge counter must accumulate tool_iterations across
        turns. The memory nudge counter is gated on memory being configured
--- a/Show more
+++ b/Show more