Merge remote-tracking branch 'origin/main' into pr48275-rebase

# Conflicts:
#	cron/scheduler.py
This commit is contained in:
teknium1 2026-06-19 07:40:29 -07:00
commit a58287afcb
No known key found for this signature in database
162 changed files with 8521 additions and 634 deletions

View file

@ -2535,3 +2535,56 @@ def sanitize_anthropic_kwargs(api_kwargs: Any, *, log_prefix: str = "") -> Any:
sorted(leaked),
)
return api_kwargs
def _is_stream_unavailable_error(exc: Exception) -> bool:
"""Return True when an Anthropic stream call should fall back to create()."""
err_lower = str(exc).lower()
if "stream" in err_lower and "not supported" in err_lower:
return True
if "invokemodelwithresponsestream" in err_lower:
from agent.bedrock_adapter import is_streaming_access_denied_error
return is_streaming_access_denied_error(exc)
return False
def create_anthropic_message(
client: Any,
api_kwargs: dict,
*,
log_prefix: str = "",
prefer_stream: bool = True,
) -> Any:
"""Create an Anthropic message, aggregating via stream when available.
Some Anthropic-compatible gateways are SSE-only: they ignore non-streaming
requests and return ``text/event-stream`` even for ``messages.create()``.
The SDK can surface that as raw text, so callers that expect a Message then
crash on ``.content``. Prefer ``messages.stream().get_final_message()`` to
match the main turn path, falling back to ``create()`` only for providers
that explicitly do not support streaming, such as restricted Bedrock roles.
"""
sanitize_anthropic_kwargs(api_kwargs, log_prefix=log_prefix)
messages_api = getattr(client, "messages", None)
stream_fn = getattr(messages_api, "stream", None)
if prefer_stream and callable(stream_fn):
stream_kwargs = dict(api_kwargs)
stream_kwargs.pop("stream", None)
try:
with stream_fn(**stream_kwargs) as stream:
return stream.get_final_message()
except Exception as exc:
if not _is_stream_unavailable_error(exc):
raise
logger.debug(
"%sAnthropic Messages stream unavailable; falling back to "
"messages.create(): %s",
log_prefix,
exc,
)
create_kwargs = dict(api_kwargs)
create_kwargs.pop("stream", None)
return messages_api.create(**create_kwargs)

View file

@ -997,7 +997,7 @@ class _AnthropicCompletionsAdapter:
self._is_oauth = is_oauth
def create(self, **kwargs) -> Any:
from agent.anthropic_adapter import build_anthropic_kwargs
from agent.anthropic_adapter import build_anthropic_kwargs, create_anthropic_message
from agent.transports import get_transport
messages = kwargs.get("messages", [])
@ -1041,7 +1041,7 @@ class _AnthropicCompletionsAdapter:
if not _forbids_sampling_params(model):
anthropic_kwargs["temperature"] = temperature
response = self._client.messages.create(**anthropic_kwargs)
response = create_anthropic_message(self._client, anthropic_kwargs)
_transport = get_transport("anthropic_messages")
_nr = _transport.normalize_response(
response, strip_tool_prefix=self._is_oauth

View file

@ -290,6 +290,7 @@ def run_codex_app_server_turn(
original_user_message=original_user_message,
final_response=turn.final_text,
interrupted=False,
messages=messages,
)
except Exception:
logger.debug("external memory sync raised", exc_info=True)

View file

@ -3197,15 +3197,22 @@ def run_conversation(
# Terminal — flush buffered context so the user sees
# what was tried before the abort.
agent._flush_status_buffer()
# Summarize once: Cloudflare/proxy HTML challenge pages and
# other raw provider bodies must be collapsed to a short
# one-liner here, otherwise the full page leaks into the
# returned ``error`` field and downstream consumers deliver
# it verbatim (e.g. a cron failure notification dumped a
# ~60KB Cloudflare challenge page as 31 Discord messages).
_nonretryable_summary = agent._summarize_api_error(api_error)
if classified.reason == FailoverReason.content_policy_blocked:
agent._emit_status(
f"❌ Provider safety filter blocked this request: "
f"{agent._summarize_api_error(api_error)}"
f"{_nonretryable_summary}"
)
else:
agent._emit_status(
f"❌ Non-retryable error (HTTP {status_code}): "
f"{agent._summarize_api_error(api_error)}"
f"{_nonretryable_summary}"
)
agent._vprint(f"{agent.log_prefix}❌ Non-retryable client error (HTTP {status_code}). Aborting.", force=True)
agent._vprint(f"{agent.log_prefix} 🔌 Provider: {_provider} Model: {_model}", force=True)
@ -3290,18 +3297,17 @@ def run_conversation(
else:
agent._persist_session(messages, conversation_history)
if classified.reason == FailoverReason.content_policy_blocked:
_summary = agent._summarize_api_error(api_error)
_policy_response = (
"⚠️ The model provider's safety filter blocked this request "
"(not a Hermes/gateway failure).\n\n"
f"Provider message: {_summary}\n\n"
f"Provider message: {_nonretryable_summary}\n\n"
f"{_CONTENT_POLICY_RECOVERY_HINT}"
)
return _content_policy_blocked_result(
messages,
api_call_count,
final_response=_policy_response,
error_detail=_summary,
error_detail=_nonretryable_summary,
)
return {
"final_response": None,
@ -3309,7 +3315,7 @@ def run_conversation(
"api_calls": api_call_count,
"completed": False,
"failed": True,
"error": str(api_error),
"error": _nonretryable_summary,
}
if retry_count >= max_retries:

View file

@ -15,6 +15,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple
from hermes_constants import OPENROUTER_BASE_URL
from hermes_cli.config import load_env
from agent.secret_scope import get_secret as _get_secret
from agent.credential_persistence import (
is_borrowed_credential_source,
sanitize_borrowed_credential_payload,
@ -1666,7 +1667,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
_env_file = load_env()
def _env_val(key: str) -> str:
return (_env_file.get(key) or os.environ.get(key) or "").strip()
return (_env_file.get(key) or _get_secret(key, "") or "").strip()
anthropic_api_key = _env_val("ANTHROPIC_API_KEY")
anthropic_oauth_env = (
@ -1952,7 +1953,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
# changes to the .env file.
def _get_env_prefer_dotenv(key: str) -> str:
env_file = load_env()
val = env_file.get(key) or os.environ.get(key) or ""
val = env_file.get(key) or _get_secret(key, "") or ""
return val.strip()
# Honour user suppression — `hermes auth remove <provider> <N>` for an

50
agent/message_content.py Normal file
View file

@ -0,0 +1,50 @@
from __future__ import annotations
from collections.abc import Mapping
from typing import Any
_NON_TEXT_PART_TYPES = {"image", "image_url", "input_image", "audio", "input_audio"}
_TEXT_KEYS = ("text", "content", "input_text", "output_text", "summary_text")
def _field(value: Any, key: str) -> Any:
if isinstance(value, Mapping):
return value.get(key)
return getattr(value, key, None)
def _text_from_part(part: Any) -> str:
if part is None:
return ""
if isinstance(part, str):
return part
part_type = str(_field(part, "type") or "").strip().lower()
if part_type in _NON_TEXT_PART_TYPES:
return ""
for key in _TEXT_KEYS:
text = _field(part, key)
if isinstance(text, str):
return text
return ""
def flatten_message_text(content: Any, *, sep: str = "\n") -> str:
"""Return the visible text from common chat/Responses message content shapes."""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
chunks = [_text_from_part(part) for part in content]
return sep.join(chunk for chunk in chunks if chunk)
text = _text_from_part(content)
if text:
return text
try:
return str(content)
except Exception:
return ""

205
agent/secret_scope.py Normal file
View file

@ -0,0 +1,205 @@
"""Profile-scoped credential resolution for multi-profile gateway multiplexing.
The multiplexing gateway serves many profiles from one process. Each profile
has its own ``.env`` with its own provider keys and platform tokens, so we
**cannot** union them into the process-global ``os.environ`` (that would leak
profile A's keys to profile B's turns, and to every subprocess spawned with
``env=dict(os.environ)``).
This module provides a fail-closed, context-local secret scope:
- ``set_secret_scope(mapping)`` installs the active profile's secrets for the
current task (a contextvar, so it propagates into the agent's worker thread
via ``copy_context()`` exactly like the HERMES_HOME override).
- ``get_secret(name)`` reads from that scope. When multiplexing is **active**
and no scope is set, it RAISES rather than silently falling back to
``os.environ`` an un-migrated or newly-added call site fails loud at that
exact line instead of leaking another profile's value. When multiplexing is
**off** (the default), it transparently reads ``os.environ`` so the
single-profile gateway and every non-gateway caller behave exactly as before.
Design rationale lives in ``docs/design/multiplexing-gateway.md`` (Workstream A).
"""
from __future__ import annotations
import os
from contextvars import ContextVar, Token
from pathlib import Path
from typing import Dict, Mapping, Optional
# ── multiplex-active flag ────────────────────────────────────────────────
# Process-global: set once at gateway startup when gateway.multiplex_profiles
# is true. Governs whether get_secret() fails closed on an unscoped read.
# A plain module global (not a contextvar): it describes the deployment mode,
# not a per-task value.
_MULTIPLEX_ACTIVE: bool = False
def set_multiplex_active(active: bool) -> None:
"""Mark whether the process is running as a profile multiplexer.
Called once at gateway startup. When True, ``get_secret`` fails closed on
an unscoped read instead of falling back to ``os.environ``.
"""
global _MULTIPLEX_ACTIVE
_MULTIPLEX_ACTIVE = bool(active)
def is_multiplex_active() -> bool:
"""Return whether the process is running as a profile multiplexer."""
return _MULTIPLEX_ACTIVE
# ── the secret scope contextvar ──────────────────────────────────────────
_SECRET_SCOPE: ContextVar[Optional[Mapping[str, str]]] = ContextVar(
"_SECRET_SCOPE", default=None
)
class UnscopedSecretError(RuntimeError):
"""Raised when a secret is read in multiplex mode with no scope installed.
This is the fail-closed signal: it means a credential read reached
``get_secret`` without a profile scope active, which in a multiplexer would
otherwise leak whichever profile's value happened to be in ``os.environ``.
The fix is to wrap the call path in ``set_secret_scope(...)`` (the per-turn
/ per-adapter profile scope), not to widen the allowlist.
"""
def set_secret_scope(secrets: Optional[Mapping[str, str]]) -> Token:
"""Install the active profile's secret mapping for the current context.
Returns a token for ``reset_secret_scope``. Pass ``None`` to clear.
"""
return _SECRET_SCOPE.set(secrets)
def reset_secret_scope(token: Token) -> None:
"""Restore the previous secret scope."""
_SECRET_SCOPE.reset(token)
def current_secret_scope() -> Optional[Mapping[str, str]]:
"""Return the active secret mapping, or None when no scope is installed."""
return _SECRET_SCOPE.get()
# ── genuinely-global env vars (NOT per-profile secrets) ──────────────────
# These are process/deployment-level settings, not profile credentials. They
# legitimately live in os.environ and must keep reading from it even in
# multiplex mode — routing them through the fail-closed path would wrongly
# crash. Anything matching is read from os.environ regardless of scope.
#
# Membership test is by exact name OR prefix (see _is_global_env). Keep this
# list tight: when in doubt a value is a profile secret, not a global.
_GLOBAL_ENV_EXACT = frozenset({
# Hermes runtime / deployment
"HERMES_HOME", "HERMES_PROFILE", "HERMES_GATEWAY_LOCK_DIR",
"HERMES_MAX_ITERATIONS", "HERMES_MAX_TOKENS", "HERMES_API_TIMEOUT",
"HERMES_REDACT_SECRETS", "HERMES_NOUS_TIMEOUT_SECONDS",
"_HERMES_GATEWAY",
# OS / interpreter
"PATH", "HOME", "USER", "LANG", "LC_ALL", "TZ", "PWD", "SHELL", "TMPDIR",
"VIRTUAL_ENV", "PYTHONPATH", "SSL_CERT_FILE",
# Kanban paths (per-board, not per-profile-secret)
"HERMES_KANBAN_DB", "HERMES_KANBAN_WORKSPACES_ROOT", "HERMES_KANBAN_BOARD",
})
_GLOBAL_ENV_PREFIXES = (
"HERMES_KANBAN_",
"HERMES_TELEGRAM_", # tuning knobs (batch delays, fallback toggles) — NOT the token
"TERMINAL_", # terminal/sandbox backend settings
)
def _is_global_env(name: str) -> bool:
"""Return True for genuinely process-global (non-profile-secret) env vars."""
if name in _GLOBAL_ENV_EXACT:
return True
return any(name.startswith(p) for p in _GLOBAL_ENV_PREFIXES)
def get_secret(name: str, default: Optional[str] = None) -> Optional[str]:
"""Resolve a credential by env-var name, honoring the active profile scope.
Resolution order:
1. Genuinely-global vars (``_is_global_env``) always read ``os.environ``
they are deployment settings, not profile secrets.
2. When a secret scope is installed (multiplexed turn), read from it; an
absent key returns ``default``. The scope is authoritative we do NOT
fall through to ``os.environ``, because in a multiplexer ``os.environ``
may hold another profile's value.
3. No scope installed:
- multiplex INACTIVE (default deployment): read ``os.environ``
identical to the legacy ``os.getenv`` behavior every caller had before.
- multiplex ACTIVE: FAIL CLOSED. Raise ``UnscopedSecretError`` so the
missing scope is caught loudly instead of leaking a cross-profile value.
"""
if _is_global_env(name):
val = os.environ.get(name)
return val if val is not None else default
scope = _SECRET_SCOPE.get()
if scope is not None:
val = scope.get(name)
return val if val is not None else default
if _MULTIPLEX_ACTIVE:
raise UnscopedSecretError(
f"get_secret({name!r}) called with no profile secret scope active "
f"while multiplexing is on. This credential read must run inside a "
f"set_secret_scope(...) block (the per-turn / per-adapter profile "
f"scope). Reading os.environ here would risk leaking another "
f"profile's value. See docs/design/multiplexing-gateway.md "
f"(Workstream A)."
)
val = os.environ.get(name)
return val if val is not None else default
def load_env_file(env_path: Path) -> Dict[str, str]:
"""Parse a ``.env`` file into a plain dict WITHOUT touching ``os.environ``.
Used to load a profile's secrets into an isolated mapping for
``set_secret_scope``. Mirrors python-dotenv's basic parsing (KEY=VALUE,
``export`` prefix, ``#`` comments, optional matching quotes) but never
mutates the process environment that isolation is the whole point.
"""
secrets: Dict[str, str] = {}
try:
text = env_path.read_text(encoding="utf-8")
except (FileNotFoundError, OSError, UnicodeDecodeError):
return secrets
for raw in text.splitlines():
line = raw.strip()
if not line or line.startswith("#"):
continue
if line.startswith("export "):
line = line[len("export "):].lstrip()
if "=" not in line:
continue
key, _, value = line.partition("=")
key = key.strip()
if not key:
continue
value = value.strip()
if len(value) >= 2 and value[0] == value[-1] and value[0] in ("'", '"'):
value = value[1:-1]
secrets[key] = value
return secrets
def build_profile_secret_scope(hermes_home: Path) -> Dict[str, str]:
"""Build a profile's secret mapping from its ``<home>/.env``.
Returns a fresh dict (safe to install via ``set_secret_scope``). Genuinely
global vars are intentionally NOT copied in ``get_secret`` reads those
from ``os.environ`` directly, so the scope holds only profile secrets.
"""
return load_env_file(Path(hermes_home) / ".env")

View file

@ -14,6 +14,7 @@ import { useSkinCommand } from '@/themes/use-skin-command'
import { formatRefValue } from '../components/assistant-ui/directive-text'
import { getCronJobs, getSessionMessages, listAllProfileSessions, type SessionInfo, triggerCronJob } from '../hermes'
import { type ChatMessage, chatMessageText, preserveLocalAssistantErrors, toChatMessages } from '../lib/chat-messages'
import { storedSessionIdForNotification } from '../lib/session-ids'
import {
isMessagingSource,
LOCAL_SESSION_SOURCE_IDS,
@ -276,16 +277,20 @@ export function DesktopController() {
}
}, [])
// Notification click: the main process already focused the window; jump to its session.
// Notification click: the main process already focused the window; jump to its
// session. Notifications are tagged with the gateway *runtime* session id, but
// the chat route is keyed by the *stored* id — navigating with the runtime id
// resumes a non-existent stored session ("session not found") and strands the
// user. Translate runtime -> stored before navigating.
useEffect(() => {
const unsubscribe = window.hermesDesktop?.onFocusSession?.(sessionId => {
if (sessionId) {
navigate(sessionRoute(sessionId))
navigate(sessionRoute(storedSessionIdForNotification(sessionId, runtimeIdByStoredSessionIdRef.current)))
}
})
return () => unsubscribe?.()
}, [navigate])
}, [navigate, runtimeIdByStoredSessionIdRef])
// Notification action button (Approve/Reject) — resolve in place, no navigation.
useEffect(() => {

View file

@ -32,6 +32,7 @@ import {
clearComposerAttachments,
type ComposerAttachment,
setComposerAttachmentUploadState,
setComposerDraft,
terminalContextBlocksFromDraft,
updateComposerAttachment
} from '@/store/composer'
@ -951,8 +952,26 @@ export function usePromptActions({
return
}
// send / prefill carry an optional `notice` (e.g. "⊙ Goal set …")
// that the backend wants shown as a system line before the message
// is acted on. Mirrors the TUI's createSlashHandler — without it a
// `/goal <text>` looked like it did nothing.
if ((dispatch.type === 'send' || dispatch.type === 'prefill') && dispatch.notice?.trim()) {
renderSlashOutput(dispatch.notice.trim())
}
const message = ('message' in dispatch ? dispatch.message : '')?.trim() ?? ''
// /undo returns a prefill directive: drop the backed-up message into
// the composer for editing instead of submitting it immediately.
if (dispatch.type === 'prefill') {
if (message) {
setComposerDraft(message)
}
return
}
if (!message) {
renderSlashOutput(
`/${name}: ${dispatch.type === 'skill' ? 'skill payload missing message' : 'empty message'}`

View file

@ -2,7 +2,7 @@ import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/re
import { atom } from 'nanostores'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import type { OAuthProvider } from '@/types/hermes'
import type { EnvVarInfo, OAuthProvider } from '@/types/hermes'
const listOAuthProviders = vi.fn()
const disconnectOAuthProvider = vi.fn()
@ -36,6 +36,25 @@ function provider(id: string, loggedIn: boolean, patch: Partial<OAuthProvider> =
}
}
// One `/api/env` row (an EnvVarInfo) for the API-keys view. Mirrors the
// `provider()` factory above: a valid base + per-test overrides, typed against
// the real response shape so it can't drift from EnvVarInfo.
function keyVar(patch: Partial<EnvVarInfo> = {}): EnvVarInfo {
return {
advanced: false,
category: 'provider',
description: '',
is_password: true,
is_set: false,
provider: '',
provider_label: '',
redacted_value: null,
tools: [],
url: '',
...patch
}
}
beforeEach(() => {
onboarding.set({ manual: false })
getEnvVars.mockResolvedValue({})
@ -97,4 +116,56 @@ describe('ProvidersSettings', () => {
expect(screen.queryByRole('button', { name: 'Remove Qwen Code' })).toBeNull()
expect(screen.getByText(/managed by its own CLI/)).toBeTruthy()
})
it('renders a Keys card for a backend-tagged provider with no PROVIDER_GROUPS prefix', async () => {
// A provider the backend catalog tags (provider/provider_label) but that has
// no desktop PROVIDER_GROUPS prefix row must still render its own card —
// this is the GUI/CLI drift fix: membership comes from the backend, not
// from the hand-maintained prefix list.
getEnvVars.mockResolvedValue({
WIDGETAI_API_KEY: keyVar({
provider: 'widgetai',
provider_label: 'WidgetAI',
url: 'https://widgetai.example/keys'
})
})
listOAuthProviders.mockResolvedValue({ providers: [] })
const { ProvidersSettings } = await import('./providers-settings')
render(<ProvidersSettings onClose={vi.fn()} onViewChange={vi.fn()} view="keys" />)
expect(await screen.findByText('WidgetAI')).toBeTruthy()
})
it('orders API-key providers by priority then name, and filters them via search', async () => {
// These three providers have no curated PROVIDER_GROUPS priority, so they
// share the default priority and fall back to alphabetical among themselves
// (Acme, Middle, Zebra) — exercising the name tiebreak of the priority sort.
getEnvVars.mockResolvedValue({
ZEBRA_API_KEY: keyVar({ provider: 'zebra', provider_label: 'Zebra' }),
ACME_API_KEY: keyVar({ provider: 'acme', provider_label: 'Acme' }),
MIDDLE_API_KEY: keyVar({ provider: 'middle', provider_label: 'Middle' })
})
listOAuthProviders.mockResolvedValue({ providers: [] })
const { ProvidersSettings } = await import('./providers-settings')
render(<ProvidersSettings onClose={vi.fn()} onViewChange={vi.fn()} view="keys" />)
// Equal priority → alphabetical tiebreak: Acme, Middle, Zebra.
await screen.findByText('Acme')
const labels = screen.getAllByText(/Acme|Middle|Zebra/).map(el => el.textContent)
expect(labels).toEqual(['Acme', 'Middle', 'Zebra'])
// Typing narrows the list to matching providers only.
const search = screen.getByPlaceholderText('Search providers…')
fireEvent.change(search, { target: { value: 'mid' } })
await waitFor(() => expect(screen.queryByText('Acme')).toBeNull())
expect(screen.getByText('Middle')).toBeTruthy()
expect(screen.queryByText('Zebra')).toBeNull()
// A non-matching query shows the empty-state copy.
fireEvent.change(search, { target: { value: 'nonesuch-xyz' } })
expect(await screen.findByText('No providers match your search.')).toBeTruthy()
})
})

View file

@ -12,6 +12,7 @@ import {
sortProviders
} from '@/components/desktop-onboarding-overlay'
import { Button } from '@/components/ui/button'
import { SearchField } from '@/components/ui/search-field'
import { disconnectOAuthProvider, listOAuthProviders } from '@/hermes'
import { useI18n } from '@/i18n'
import { Check, ChevronDown, ChevronRight, KeyRound, Loader2, Terminal, Trash2 } from '@/lib/icons'
@ -45,8 +46,17 @@ export const PROVIDER_VIEWS = ['accounts', 'keys'] as const
export type ProviderView = (typeof PROVIDER_VIEWS)[number]
// Group the env catalog by provider — one ListRow per vendor plus optional
// advanced overrides (base URL, region, etc.). Groups without a key field and
// the "Other" bucket are skipped.
// advanced overrides (base URL, region, etc.). Groups without a key field are
// skipped.
//
// Grouping key precedence:
// 1. Backend `provider_label` / `provider` (from the unified provider catalog
// in hermes_cli/provider_catalog.py) — the SAME provider identity
// `hermes model` uses. This is authoritative: a provider tagged by the
// backend always renders a card, even with no PROVIDER_GROUPS row.
// 2. Desktop prefix match (`providerGroup`) — legacy fallback for provider
// env vars that predate the backend tagging.
// Only entries that resolve to neither (the "Other" bucket) are skipped.
function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGroup[] {
const buckets = new Map<string, [string, EnvVarInfo][]>()
@ -55,7 +65,9 @@ function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGr
continue
}
const name = providerGroup(key)
// Prefer the backend-supplied provider label/id so the Keys tab groups by
// the same identity the CLI picker uses; fall back to the prefix guess.
const name = info.provider_label?.trim() || info.provider?.trim() || providerGroup(key)
if (name === 'Other') {
continue
@ -73,6 +85,9 @@ function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGr
continue
}
// Presentation overlay (priority, blurb, docs) is keyed by the prefix-based
// group name; when the backend introduced this provider it may have no
// overlay entry, so fall back to the backend/env metadata for display.
const meta = providerMeta(name)
groups.push({
@ -131,6 +146,7 @@ function OAuthPicker({
const rest = featured ? ordered.filter(p => p.id !== FEATURED_ID) : ordered
// Keep connected accounts grouped and always visible; only the unconnected
// providers hide behind the disclosure, so the page leads with what's set up.
// Both lists preserve `sortProviders` order (curated priority, then name).
const connected = rest.filter(p => p.status?.logged_in)
const others = rest.filter(p => !p.status?.logged_in)
const collapsible = others.length > 0
@ -284,6 +300,8 @@ export function ProvidersSettings({ onClose, onViewChange, view }: ProvidersSett
const [oauthProviders, setOauthProviders] = useState<OAuthProvider[]>([])
const [openProvider, setOpenProvider] = useState<null | string>(null)
const [disconnecting, setDisconnecting] = useState<null | string>(null)
// Free-text filter for the API-keys view (provider name / env-var key / desc).
const [keyQuery, setKeyQuery] = useState('')
// The onboarding overlay owns the OAuth flow. Watch its `manual` flag so we
// re-read connection state when the user finishes (or dismisses) a sign-in
// they launched from this page — otherwise the cards keep their stale status.
@ -372,20 +390,49 @@ export function ProvidersSettings({ onClose, onViewChange, view }: ProvidersSett
const keyGroups = buildProviderKeyGroups(vars)
if (showApiKeys) {
const q = keyQuery.trim().toLowerCase()
const visibleGroups = q
? keyGroups.filter(group => {
const haystack = [
group.name,
group.description ?? '',
group.primary[0],
...group.advanced.map(([k]) => k)
]
return haystack.some(s => s.toLowerCase().includes(q))
})
: keyGroups
return (
<SettingsContent>
{keyGroups.length > 0 ? (
<div className="grid gap-2">
{keyGroups.map(group => (
<ProviderKeyRows
expanded={openProvider === group.name}
group={group}
key={group.name}
onExpand={() => setOpenProvider(group.name)}
onToggle={() => setOpenProvider(prev => (prev === group.name ? null : group.name))}
rowProps={rowProps}
/>
))}
<div className="grid gap-3">
<SearchField
aria-label={t.settings.providers.searchKeys}
containerClassName="w-full"
onChange={setKeyQuery}
placeholder={t.settings.providers.searchKeys}
value={keyQuery}
/>
{visibleGroups.length > 0 ? (
<div className="grid gap-2">
{visibleGroups.map(group => (
<ProviderKeyRows
expanded={openProvider === group.name}
group={group}
key={group.name}
onExpand={() => setOpenProvider(group.name)}
onToggle={() => setOpenProvider(prev => (prev === group.name ? null : group.name))}
rowProps={rowProps}
/>
))}
</div>
) : (
<div className="grid min-h-24 place-items-center px-4 py-6 text-center text-[length:var(--conversation-caption-font-size)] text-muted-foreground">
{t.settings.providers.noKeysMatch}
</div>
)}
</div>
) : (
<NoProviderKeys />

View file

@ -106,6 +106,13 @@ export interface SkillCommandDispatchResponse {
export interface SendCommandDispatchResponse {
type: 'send'
message: string
notice?: string
}
export interface PrefillCommandDispatchResponse {
type: 'prefill'
message: string
notice?: string
}
export type CommandDispatchResponse =
@ -113,6 +120,7 @@ export type CommandDispatchResponse =
| AliasCommandDispatchResponse
| SkillCommandDispatchResponse
| SendCommandDispatchResponse
| PrefillCommandDispatchResponse
export type SidebarNavId = 'artifacts' | 'command-center' | 'messaging' | 'new-session' | 'settings' | 'skills'

View file

@ -859,7 +859,10 @@ const ProcessNotificationNote: FC<{ text: string }> = ({ text }) => {
<summary className="cursor-pointer select-none text-muted-foreground/45 hover:text-muted-foreground/70">
output
</summary>
<pre className="mt-0.5 max-h-48 overflow-auto whitespace-pre-wrap font-mono text-[0.625rem] leading-4 text-muted-foreground/55">
<pre
className="mt-0.5 max-h-48 overflow-auto whitespace-pre-wrap font-mono text-[0.625rem] leading-4 text-muted-foreground/55"
data-selectable-text="true"
>
{detail}
</pre>
</details>

View file

@ -41,7 +41,11 @@ export function TerminalOutput({ className, text }: TerminalOutputProps) {
}, [text])
return (
<div className={cn('max-h-16 overflow-auto overscroll-contain', className)} ref={ref}>
<div
className={cn('max-h-16 overflow-auto overscroll-contain', className)}
data-selectable-text="true"
ref={ref}
>
<pre className="w-max min-w-full font-mono text-[0.5625rem] leading-[0.85rem] whitespace-pre text-muted-foreground/70">
{text}
</pre>

View file

@ -581,6 +581,8 @@ export const en: Translations = {
removedMessage: provider => `${provider} was removed.`,
failedRemove: provider => `Could not remove ${provider}`,
noProviderKeys: 'No provider API keys available.',
searchKeys: 'Search providers…',
noKeysMatch: 'No providers match your search.',
loading: 'Loading providers...'
},
sessions: {

View file

@ -700,6 +700,8 @@ export const ja = defineLocale({
removedMessage: provider => `${provider} を削除しました。`,
failedRemove: provider => `${provider} を削除できませんでした`,
noProviderKeys: '利用可能なプロバイダー API キーがありません。',
searchKeys: 'プロバイダーを検索…',
noKeysMatch: '一致するプロバイダーがありません。',
loading: 'プロバイダーを読み込み中...'
},
sessions: {

View file

@ -462,6 +462,8 @@ export interface Translations {
removedMessage: (provider: string) => string
failedRemove: (provider: string) => string
noProviderKeys: string
searchKeys: string
noKeysMatch: string
loading: string
}
sessions: {

View file

@ -677,6 +677,8 @@ export const zhHant = defineLocale({
removedMessage: provider => `${provider} 已移除。`,
failedRemove: provider => `無法移除 ${provider}`,
noProviderKeys: '沒有可用的提供方 API 金鑰。',
searchKeys: '搜尋提供方…',
noKeysMatch: '沒有符合的提供方。',
loading: '正在載入提供方...'
},
sessions: {

View file

@ -774,6 +774,8 @@ export const zh: Translations = {
removedMessage: provider => `${provider} 已移除。`,
failedRemove: provider => `无法移除 ${provider}`,
noProviderKeys: '没有可用的提供方 API 密钥。',
searchKeys: '搜索提供方…',
noKeysMatch: '没有匹配的提供方。',
loading: '正在加载提供方...'
},
sessions: {

View file

@ -2,7 +2,7 @@ import { describe, expect, it } from 'vitest'
import type { ComposerAttachment } from '@/store/composer'
import { coerceThinkingText, optimisticAttachmentRef } from './chat-runtime'
import { coerceThinkingText, optimisticAttachmentRef, parseCommandDispatch } from './chat-runtime'
const DATA_URL = 'data:image/png;base64,iVBORw0KGgoAAAANS'
@ -52,3 +52,31 @@ describe('coerceThinkingText', () => {
).toBe('')
})
})
describe('parseCommandDispatch', () => {
it('keeps the notice on a send directive (e.g. /goal set)', () => {
// The backend's /goal set returns {type:send, notice:"⊙ Goal set …", message}.
// Dropping the notice made /goal look like it did nothing in the desktop app.
const parsed = parseCommandDispatch({ type: 'send', notice: '⊙ Goal set', message: 'do the thing' })
expect(parsed).toEqual({ type: 'send', message: 'do the thing', notice: '⊙ Goal set' })
})
it('keeps message-only send directives working (no notice)', () => {
expect(parseCommandDispatch({ type: 'send', message: 'hi' })).toEqual({
type: 'send',
message: 'hi',
notice: undefined
})
})
it('parses a prefill directive with its notice (e.g. /undo)', () => {
const parsed = parseCommandDispatch({ type: 'prefill', notice: 'backed up 1 turn', message: 'edit me' })
expect(parsed).toEqual({ type: 'prefill', message: 'edit me', notice: 'backed up 1 turn' })
})
it('rejects a prefill directive missing its message', () => {
expect(parseCommandDispatch({ type: 'prefill', notice: 'x' })).toBeNull()
})
})

View file

@ -238,7 +238,12 @@ export function parseCommandDispatch(raw: unknown): CommandDispatchResponse | nu
return typeof row.name === 'string' ? { type: 'skill', name: row.name, message: str(row.message) } : null
case 'send':
return typeof row.message === 'string' ? { type: 'send', message: row.message } : null
return typeof row.message === 'string' ? { type: 'send', message: row.message, notice: str(row.notice) } : null
case 'prefill':
return typeof row.message === 'string'
? { type: 'prefill', message: row.message, notice: str(row.notice) }
: null
default:
return null

View file

@ -0,0 +1,44 @@
import { describe, expect, it } from 'vitest'
import { storedSessionIdForNotification } from './session-ids'
describe('storedSessionIdForNotification', () => {
it('translates a runtime id back to its stored id', () => {
// The route is keyed by the stored id, but notifications carry the runtime
// id. Resolving runtime -> stored keeps notification-click navigation from
// resuming a non-existent stored session ("session not found").
const map = new Map([['stored-abc', 'runtime-123']])
expect(storedSessionIdForNotification('runtime-123', map)).toBe('stored-abc')
})
it('returns the id unchanged when no mapping is known', () => {
// A notification for a session this window never opened may already carry a
// stored id; let the resume/REST lookup handle it as-is.
const map = new Map([['stored-abc', 'runtime-123']])
expect(storedSessionIdForNotification('stored-xyz', map)).toBe('stored-xyz')
})
it('returns the id unchanged for an empty map', () => {
expect(storedSessionIdForNotification('runtime-123', new Map())).toBe('runtime-123')
})
it('resolves the correct stored id among several sessions', () => {
const map = new Map([
['stored-1', 'runtime-1'],
['stored-2', 'runtime-2'],
['stored-3', 'runtime-3']
])
expect(storedSessionIdForNotification('runtime-2', map)).toBe('stored-2')
})
it('does not treat a stored id as a runtime id (keys are not matched)', () => {
// The map is stored -> runtime. A value that only appears as a *key* must
// not be rewritten, otherwise an already-stored id could be mangled.
const map = new Map([['stored-1', 'runtime-1']])
expect(storedSessionIdForNotification('stored-1', map)).toBe('stored-1')
})
})

View file

@ -0,0 +1,26 @@
// The gateway tags every event — and therefore every native notification —
// with the *runtime* session id (the key under which the session lives in the
// gateway's in-memory `_sessions` map). The chat route, however, is keyed by
// the *stored* session id (`stored_session_id`), which is a different value:
// a brand-new chat gets a runtime id immediately but its stored id is assigned
// when the first turn persists. Navigating to a runtime id therefore tries to
// resume a stored session that does not exist ("session not found") and
// strands the user, who experiences it as the running session being destroyed.
//
// `runtimeIdByStoredSessionId` maps stored -> runtime; this resolves the
// reverse so notification-click navigation lands on the real route. The id is
// returned unchanged when no mapping is known — it may already be a stored id
// (e.g. a notification for a session this window never opened), in which case
// the normal resume/REST lookup handles it.
export function storedSessionIdForNotification(
id: string,
runtimeIdByStoredSessionId: ReadonlyMap<string, string>
): string {
for (const [storedId, runtimeId] of runtimeIdByStoredSessionId) {
if (runtimeId === id) {
return storedId
}
}
return id
}

View file

@ -680,6 +680,7 @@ textarea,
[contenteditable]:not([contenteditable='false']),
[data-slot='aui_user-message-root'],
[data-slot='aui_assistant-message-content'],
[data-slot='aui_system-message-root'],
[data-selectable-text='true'],
[data-selectable-text='true'] * {
-webkit-user-select: text;

View file

@ -108,6 +108,12 @@ export interface EnvVarInfo {
description: string
is_password: boolean
is_set: boolean
// Backend-derived provider grouping hints (from the unified provider catalog
// in hermes_cli/provider_catalog.py). When present, the Keys tab groups by
// this provider identity — the SAME one `hermes model` uses — instead of
// desktop-only env-var prefix guesses. Empty for non-provider env vars.
provider?: string
provider_label?: string
redacted_value: null | string
tools: string[]
url: null | string

58
cli.py
View file

@ -6959,24 +6959,43 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
self._close_model_picker()
def _handle_model_switch(self, cmd_original: str):
"""Handle /model command — switch model for this session.
"""Handle /model command — switch model.
Supports:
/model show current model + usage hints
/model <name> switch for this session only
/model <name> --global switch and persist to config.yaml
/model <name> switch model (persists by default)
/model <name> --session switch for this session only
/model <name> --global switch and persist (explicit)
/model <name> --provider <provider> switch provider + model
/model --provider <provider> switch to provider, auto-detect model
Persistence defaults to on (``model.persist_switch_by_default`` in
config.yaml, default True). Use ``--session`` for a one-off switch.
"""
from hermes_cli.model_switch import switch_model, parse_model_flags
from hermes_cli.model_switch import (
switch_model,
parse_model_flags,
resolve_persist_behavior,
)
from hermes_cli.providers import get_label
# Parse args from the original command
parts = cmd_original.split(None, 1) # split off '/model'
raw_args = parts[1].strip() if len(parts) > 1 else ""
# Parse --provider, --global, and --refresh flags
model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
# Parse --provider, --global, --session, and --refresh flags
(
model_input,
explicit_provider,
is_global_flag,
force_refresh,
is_session,
) = parse_model_flags(raw_args)
# Resolve the effective persistence once: --session overrides the
# config-gated default, --global forces persist, otherwise defer to
# model.persist_switch_by_default (defaults to True so /model survives
# across sessions).
persist_global = resolve_persist_behavior(is_global_flag, is_session)
# --refresh: wipe the on-disk picker cache before building the
# provider list. Forces a live re-fetch of every authed provider's
@ -7024,7 +7043,8 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
if not providers:
_cprint(" No authenticated providers found.")
_cprint("")
_cprint(" /model <name> switch model")
_cprint(" /model <name> switch model (persists)")
_cprint(" /model <name> --session switch for this session only")
_cprint(" /model --provider <slug> switch provider")
_cprint(" /model --refresh re-fetch live model lists")
return
@ -7144,7 +7164,7 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
save_config_value("model.default", result.new_model)
if result.provider_changed:
save_config_value("model.provider", result.target_provider)
_cprint(" Saved to config.yaml (--global)")
_cprint(" Saved to config.yaml")
else:
_cprint(" (session only — add --global to persist)")
@ -11917,7 +11937,13 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
# --- /model picker modal ---
if self._model_picker_state:
try:
self._handle_model_picker_selection()
# Picker selections persist by default (same default as
# /model <name>); honour model.persist_switch_by_default.
from hermes_cli.model_switch import resolve_persist_behavior
self._handle_model_picker_selection(
persist_global=resolve_persist_behavior(False, False)
)
except Exception as _exc:
_cprint(f" ✗ Model selection failed: {_exc}")
self._close_model_picker()
@ -13527,13 +13553,13 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
style=style,
full_screen=False,
mouse_support=False,
# The status bar contains wall-clock read-outs (live prompt elapsed
# and idle-since-last-turn). Once a turn finishes there may be no
# further events to invalidate the app, so prompt_toolkit would keep
# rendering the first post-turn value (usually ``✓ 0s``) forever.
# A low-rate refresh keeps the clock honest without reintroducing a
# custom repaint thread or touching conversation state.
refresh_interval=1.0,
# Read from display.cli_refresh_interval (default 0 = disabled).
# When non-zero, prompt_toolkit redraws the UI on this cadence
# during idle, keeping wall-clock status-bar read-outs ticking.
# Set to 0 to suppress background redraws entirely — avoids
# fighting terminal auto-scroll in non-fullscreen mode (Xshell,
# iTerm2, Windows Terminal). See #48309.
refresh_interval=float(CLI_CONFIG.get("display", {}).get("cli_refresh_interval", 0)),
# Erase the live bottom chrome (status bar, input box, separator
# rules) on exit instead of freezing a final copy into scrollback.
# Without this, prompt_toolkit's render_as_done teardown repaints

View file

@ -15,6 +15,7 @@ import contextvars
import json
import logging
import os
import re
import shutil
import subprocess
import sys
@ -45,6 +46,59 @@ from hermes_time import now as _hermes_now
logger = logging.getLogger(__name__)
def _summarize_cron_failure_for_delivery(job: dict, error: str | None) -> str:
"""Return a compact one-line failure message for chat delivery.
Full details stay in the cron output directory and the logs. Chat should
show the operator what broke without dumping provider JSON, retry noise, or
stack traces into the delivery channel.
"""
job_name = job.get("name") or job.get("id") or "cron job"
text = (error or "unknown error").strip()
lower = text.lower()
# Provider/API failures are the common noisy path. Keep these short.
if "429" in text or "rate limit" in lower or "usage limit" in lower:
reason = "rate limit"
if "weekly usage limit" in lower:
reason = "weekly usage limit"
elif "quota" in lower:
reason = "quota limit"
return (
f"⚠️ Cron '{job_name}' failed: provider {reason}. "
"Fallback chain was exhausted or unavailable. "
"Full details saved in cron output."
)
if "readtimeout" in lower or "timed out" in lower or "timeout" in lower:
return (
f"⚠️ Cron '{job_name}' failed: provider timeout. "
"Fallback chain was exhausted or unavailable. "
"Full details saved in cron output."
)
# Match authentication/authorization wording at a word boundary and the
# 401/403 status codes as whole tokens, so "oauth", "4015" and similar do
# not trip a misleading auth message.
if re.search(r"authenticat|authoriz", lower) or re.search(r"\b(401|403)\b", text):
return (
f"⚠️ Cron '{job_name}' failed: provider authentication error. "
"Full details saved in cron output."
)
# Strip common exception wrappers and collapse provider payloads. Bound
# the input first so a multi-KB provider blob cannot slow the
# substitutions.
cleaned = re.sub(
r"^(RuntimeError|Exception|ValueError|HTTPStatusError):\s*",
"", text[:2000],
)
cleaned = re.sub(r"\s+", " ", cleaned).strip()
if len(cleaned) > 180:
cleaned = cleaned[:177].rstrip() + "..."
return f"⚠️ Cron '{job_name}' failed: {cleaned}"
class CronPromptInjectionBlocked(Exception):
"""Raised by _build_job_prompt when the fully-assembled prompt trips the
injection scanner. Caught in run_job so the operator sees a clean
@ -1992,7 +2046,7 @@ def run_one_job(job: dict, *, adapters=None, loop=None, verbose: bool = False) -
# Deliver the final response to the origin/target chat.
# If the agent responded with [SILENT], skip delivery (but
# output is already saved above). Failed jobs always deliver.
deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
deliver_content = final_response if success else _summarize_cron_failure_for_delivery(job, error)
# Treat whitespace-only final responses the same as empty
# responses: do not deliver a blank message, and let the
# empty-response guard below mark the run as a soft failure.

View file

@ -545,6 +545,13 @@ class GatewayConfig:
thread_sessions_per_user: bool = False # When False (default), threads are shared across all participants
max_concurrent_sessions: Optional[int] = None # Positive int caps simultaneous active chat sessions
# Multi-profile multiplexing (opt-in; default off preserves one-gateway-per-profile).
# When True, the default profile's gateway serves inbound messages for every
# profile on the host: profiles are stamped into session keys and (in later
# phases) per-profile adapters/credentials are resolved. When False, the
# gateway behaves exactly as before — single HERMES_HOME, no profile stamping.
multiplex_profiles: bool = False
# Unauthorized DM policy
unauthorized_dm_behavior: str = "pair" # "pair" or "ignore"
@ -650,6 +657,7 @@ class GatewayConfig:
"group_sessions_per_user": self.group_sessions_per_user,
"thread_sessions_per_user": self.thread_sessions_per_user,
"max_concurrent_sessions": self.max_concurrent_sessions,
"multiplex_profiles": self.multiplex_profiles,
"unauthorized_dm_behavior": self.unauthorized_dm_behavior,
"streaming": self.streaming.to_dict(),
"session_store_max_age_days": self.session_store_max_age_days,
@ -695,7 +703,12 @@ class GatewayConfig:
group_sessions_per_user = data.get("group_sessions_per_user")
thread_sessions_per_user = data.get("thread_sessions_per_user")
multiplex_profiles = data.get("multiplex_profiles")
nested_gateway = data.get("gateway") if isinstance(data.get("gateway"), dict) else {}
if multiplex_profiles is None and isinstance(nested_gateway, dict):
# Also honor gateway.multiplex_profiles written by
# ``hermes config set gateway.multiplex_profiles true``.
multiplex_profiles = nested_gateway.get("multiplex_profiles")
if "max_concurrent_sessions" in data:
max_concurrent_raw = data.get("max_concurrent_sessions")
max_concurrent_key = "max_concurrent_sessions"
@ -732,6 +745,7 @@ class GatewayConfig:
stt_enabled=_coerce_bool(stt_enabled, True),
group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
multiplex_profiles=_coerce_bool(multiplex_profiles, False),
max_concurrent_sessions=max_concurrent_sessions,
unauthorized_dm_behavior=unauthorized_dm_behavior,
streaming=StreamingConfig.from_dict(data.get("streaming", {})),
@ -823,6 +837,13 @@ def load_gateway_config() -> GatewayConfig:
if "thread_sessions_per_user" in yaml_cfg:
gw_data["thread_sessions_per_user"] = yaml_cfg["thread_sessions_per_user"]
# Multiplexing flag: accept both the top-level key and the nested
# gateway.multiplex_profiles form (from_dict resolves the nested
# fallback, but surface the top-level key here for parity with the
# other session-scope flags above).
if "multiplex_profiles" in yaml_cfg:
gw_data["multiplex_profiles"] = yaml_cfg["multiplex_profiles"]
gateway_section = yaml_cfg.get("gateway")
if isinstance(gateway_section, dict) and "max_concurrent_sessions" in gateway_section:
gw_data["max_concurrent_sessions"] = gateway_section["max_concurrent_sessions"]
@ -2143,5 +2164,24 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
except Exception as e:
logger.debug("Plugin platform enable pass failed: %s", e)
# Relay (generic connector-fronted platform, EXPERIMENTAL). Enabled when a
# connector relay URL is configured via GATEWAY_RELAY_URL (env) or
# gateway.relay_url (config.yaml). The adapter is registered into the
# platform_registry at gateway startup (gateway.relay.register_relay_adapter)
# and dials OUT to the connector — so, like Telegram/Matrix, it has no public
# inbound port and just needs Platform.RELAY present+enabled in
# config.platforms for start_gateway()'s connect loop to bring it up. The
# connected-checker (Platform.RELAY in _PLATFORM_CONNECTED_CHECKERS) keys on
# extra["relay_url"], so mirror the URL into extra here.
relay_url_env = os.getenv("GATEWAY_RELAY_URL", "").strip()
relay_url_yaml = ""
existing_relay = config.platforms.get(Platform.RELAY)
if existing_relay is not None:
relay_url_yaml = str(existing_relay.extra.get("relay_url") or "").strip()
relay_url_val = relay_url_env or relay_url_yaml
if relay_url_val:
relay_config = _enable_from_env(Platform.RELAY)
relay_config.extra["relay_url"] = relay_url_val.rstrip("/")
for platform_config in config.platforms.values():
platform_config.extra.pop("_enabled_explicit", None)

View file

@ -23,6 +23,58 @@ from typing import Any, Optional
logger = logging.getLogger("gateway.run")
def _acquire_singleton_lock(lock_path) -> "tuple[Optional[object], str]":
"""Take an exclusive, non-blocking advisory lock for the sole dispatcher.
Only one gateway process machine-wide may run the embedded kanban
dispatcher: concurrent dispatchers double the reclaim frequency (each
runs its own ``release_stale_claims`` promote dispatch loop), double
claim-attempt events in the event log, and with ``wal_autocheckpoint=0``
concurrent manual WAL checkpoints can corrupt index pages. The
``dispatch_in_gateway`` config flag is the primary control; this lock is the
backstop that survives config drift and same-profile restart races.
Delegates to :func:`gateway.status._try_acquire_file_lock` (``fcntl`` on
POSIX, ``msvcrt`` on Windows) so the guard is cross-platform.
Returns ``(handle, "held")`` on success the caller keeps the file handle
for the process lifetime and **must** release it via
:func:`_release_singleton_lock` when done. ``(None, "contended")`` when
another process holds the lock (caller must NOT dispatch). ``(None,
"unavailable")`` when locking cannot be performed (non-POSIX filesystem
without flock, or the status.py helpers are unimportable) caller falls
back to config-only control.
"""
try:
from gateway.status import _try_acquire_file_lock # deferred; same package
except ImportError:
return None, "unavailable"
try:
Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
handle = open(str(lock_path), "a+", encoding="utf-8")
except OSError:
return None, "unavailable"
if not _try_acquire_file_lock(handle):
handle.close()
return None, "contended"
return handle, "held"
def _release_singleton_lock(handle) -> None:
"""Release a dispatcher singleton lock acquired via :func:`_acquire_singleton_lock`."""
if handle is None:
return
try:
from gateway.status import _release_file_lock
_release_file_lock(handle)
except Exception:
pass
try:
handle.close()
except Exception:
pass
class GatewayKanbanWatchersMixin:
"""Kanban watcher / notifier / dispatcher loops for GatewayRunner."""
@ -606,6 +658,31 @@ class GatewayKanbanWatchersMixin:
logger.warning("kanban dispatcher: kanban_db not importable; dispatcher disabled")
return
# Single-dispatcher backstop. dispatch_in_gateway defaults to true, so a
# new profile gateway (or a same-profile restart race) can silently
# start a second dispatcher; concurrent dispatchers double reclaim
# frequency, double claim-attempt events, and — with
# wal_autocheckpoint=0 — concurrent manual WAL checkpoints can corrupt
# index pages. The lock lives at the machine-global kanban root
# (shared across profiles by design), so it serialises ALL gateways.
self._kanban_dispatcher_lock_handle = None
_lock_path = _kb.kanban_home() / "kanban" / ".dispatcher.lock"
_lock_handle, _lock_state = _acquire_singleton_lock(_lock_path)
if _lock_state == "contended":
logger.info(
"kanban dispatcher: another gateway already holds the dispatcher "
"lock (%s); this gateway will NOT dispatch.", _lock_path,
)
return
if _lock_state == "held":
self._kanban_dispatcher_lock_handle = _lock_handle # hold for process lifetime
logger.info("kanban dispatcher: holding singleton dispatcher lock (%s)", _lock_path)
else:
logger.warning(
"kanban dispatcher: advisory lock unavailable at %s; proceeding "
"on config control alone.", _lock_path,
)
try:
interval = float(kanban_cfg.get("dispatch_interval_seconds", 60) or 60)
except (ValueError, TypeError):
@ -1052,6 +1129,8 @@ class GatewayKanbanWatchersMixin:
last_warn_at = now
except asyncio.CancelledError:
logger.debug("kanban dispatcher: cancelled")
_release_singleton_lock(self._kanban_dispatcher_lock_handle)
self._kanban_dispatcher_lock_handle = None
raise
except Exception:
logger.exception("kanban dispatcher: unexpected watcher error")
@ -1062,3 +1141,6 @@ class GatewayKanbanWatchersMixin:
while slept < interval and self._running:
await asyncio.sleep(min(1.0, interval - slept))
slept += 1.0
_release_singleton_lock(self._kanban_dispatcher_lock_handle)
self._kanban_dispatcher_lock_handle = None

View file

@ -1043,7 +1043,13 @@ class APIServerAdapter(BasePlatformAdapter):
matching the semantics of the native gateway's ``session_key``.
"""
from run_agent import AIAgent
from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config, GatewayRunner
from gateway.run import (
_current_max_iterations,
_resolve_runtime_agent_kwargs,
_resolve_gateway_model,
_load_gateway_config,
GatewayRunner,
)
from hermes_cli.tools_config import _get_platform_tools
runtime_kwargs = _resolve_runtime_agent_kwargs()
@ -1053,7 +1059,7 @@ class APIServerAdapter(BasePlatformAdapter):
user_config = _load_gateway_config()
enabled_toolsets = sorted(_get_platform_tools(user_config, "api_server"))
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
max_iterations = _current_max_iterations()
# Load fallback provider chain so the API server platform has the
# same fallback behaviour as Telegram/Discord/Slack (fixes #4954).

View file

@ -57,6 +57,11 @@ from gateway.platforms.base import (
logger = logging.getLogger(__name__)
# Sentinel returned by _resolve_request_profile when a /p/<profile>/ prefix
# names a profile this gateway does not serve (→ 404). Distinct from None
# (no prefix / multiplexing off → handle as the default profile).
_PROFILE_REJECTED = object()
_BUILTIN_DELIVER_PLATFORMS = {
"telegram", "discord", "slack", "signal", "sms", "whatsapp",
"matrix", "mattermost", "homeassistant", "email", "dingtalk",
@ -189,6 +194,14 @@ class WebhookAdapter(BasePlatformAdapter):
app = web.Application()
app.router.add_get("/health", self._handle_health)
app.router.add_post("/webhooks/{route_name}", self._handle_webhook)
# Multi-profile multiplexing: a /p/<profile>/webhooks/<route> prefix
# routes the inbound event to that profile. Same handler; the profile is
# captured from the path and stamped onto the SessionSource so the agent
# turn resolves that profile's config/skills/credentials. Only honored
# when gateway.multiplex_profiles is on (the handler validates).
app.router.add_post(
"/p/{profile}/webhooks/{route_name}", self._handle_webhook
)
# Port conflict detection — fail fast if port is already in use
import socket as _socket
@ -397,6 +410,35 @@ class WebhookAdapter(BasePlatformAdapter):
except Exception as e:
logger.error("[webhook] Failed to reload dynamic routes: %s", e)
def _resolve_request_profile(self, request: "web.Request"):
"""Resolve + validate the /p/<profile>/ URL prefix on a webhook request.
Returns:
- ``None`` when no profile prefix is present, or multiplexing is off
(the prefix is ignored, request handled as the default profile).
- the profile name (str) when present, multiplexing is on, and the
profile is one this gateway serves.
- ``_PROFILE_REJECTED`` when a prefix is present but the profile is
unknown/unconfigured (handler returns 404).
"""
profile = (request.match_info.get("profile") or "").strip()
if not profile:
return None
runner = self.gateway_runner
cfg = getattr(runner, "config", None)
if not getattr(cfg, "multiplex_profiles", False):
# Prefix supplied but multiplexing is off — ignore it, behave as
# the single-profile gateway (don't 404 a would-be valid route).
return None
try:
from hermes_cli.profiles import profiles_to_serve
served = {name for name, _ in profiles_to_serve(multiplex=True)}
except Exception:
return _PROFILE_REJECTED
if profile not in served:
return _PROFILE_REJECTED
return profile
async def _handle_webhook(self, request: "web.Request") -> "web.Response":
"""POST /webhooks/{route_name} — receive and process a webhook event."""
# Hot-reload dynamic subscriptions on each request (mtime-gated, cheap)
@ -405,6 +447,13 @@ class WebhookAdapter(BasePlatformAdapter):
route_name = request.match_info.get("route_name", "")
route_config = self._routes.get(route_name)
# Multi-profile: resolve + validate the /p/<profile>/ prefix if present.
profile = self._resolve_request_profile(request)
if profile is _PROFILE_REJECTED:
return web.json_response(
{"error": "Unknown or unconfigured profile"}, status=404
)
if not route_config:
return web.json_response(
{"error": f"Unknown route: {route_name}"}, status=404
@ -641,6 +690,8 @@ class WebhookAdapter(BasePlatformAdapter):
user_id=f"webhook:{route_name}",
user_name=route_name,
)
if profile and isinstance(profile, str):
source.profile = profile
event = MessageEvent(
text=prompt,
message_type=MessageType.TEXT,

View file

@ -57,6 +57,13 @@ class RelayAdapter(BasePlatformAdapter):
self._transport = transport
# Capability surface read by stream_consumer (getattr(..., 4096)).
self.MAX_MESSAGE_LENGTH = descriptor.max_message_length
# chat_id -> guild_id (Discord) / workspace scope, learned from inbound
# events. The connector's egress guard resolves the owning tenant from
# the OUTBOUND action's metadata.guild_id; the gateway's generic delivery
# path (run.py _thread_metadata_for_source) only carries thread_id, so we
# re-attach the scope here from what we saw inbound. Keyed by chat_id
# (channel) since that's what send() receives. See routedEgressGuard.ts.
self._scope_by_chat: Dict[str, str] = {}
self.supports_code_blocks = descriptor.markdown_dialect not in ("", "plain")
# ── capability surface (from descriptor) ─────────────────────────────
@ -108,8 +115,35 @@ class RelayAdapter(BasePlatformAdapter):
async def _on_inbound(self, event) -> None:
"""Bridge a connector-delivered MessageEvent into the normal adapter path."""
self._capture_scope(event)
await self.handle_message(event)
def _capture_scope(self, event) -> None:
"""Remember chat_id -> guild scope from an inbound event so our outbound
(the agent's reply) can re-assert it for the connector's egress tenant
resolution. Never raises scope tracking must not break inbound."""
try:
src = getattr(event, "source", None)
scope = getattr(src, "guild_id", None) if src else None
chat = getattr(src, "chat_id", None) if src else None
if scope and chat:
self._scope_by_chat[str(chat)] = str(scope)
except Exception: # noqa: BLE001 - scope tracking must never break inbound
pass
def _with_scope(self, chat_id: str, metadata: Optional[Dict[str, Any]]) -> Dict[str, Any]:
"""Ensure the outbound metadata carries guild_id for the connector's
egress tenant resolution. The connector resolves the owning tenant from
metadata.guild_id (Discord); without it egress is declined as
'target not routed to an onboarded tenant'. No-op when we have no scope
for this chat (e.g. DMs) or it's already present."""
meta: Dict[str, Any] = dict(metadata or {})
if not meta.get("guild_id"):
scope = self._scope_by_chat.get(str(chat_id))
if scope:
meta["guild_id"] = scope
return meta
async def on_interrupt(self, session_key: str, chat_id: str) -> None:
"""Bridge a connector-delivered /stop into the adapter's interrupt path.
@ -140,7 +174,7 @@ class RelayAdapter(BasePlatformAdapter):
"chat_id": chat_id,
"content": content,
"reply_to": reply_to,
"metadata": metadata or {},
"metadata": self._with_scope(chat_id, metadata),
}
)
return SendResult(

View file

@ -54,6 +54,35 @@ _HANDSHAKE_TIMEOUT_S = 30.0
_OUTBOUND_TIMEOUT_S = 30.0
def _ws_dial_url(url: str) -> str:
"""Normalize a connector URL to the ``ws(s)://…/relay`` dial target.
The relay URL is configured once (``GATEWAY_RELAY_URL`` / ``gateway.relay_url``)
as the connector's BASE URL (e.g. ``https://connector.example``) and shared by
both the provision POST (which needs ``http(s):///relay/provision`` see
``_provision_url``) and the WS dial (which needs ``ws(s):///relay``, the path
the connector mounts its ``WebSocketServer`` on). Two normalizations, both
load-bearing:
- scheme: ``https -> wss``, ``http -> ws`` (``websockets.connect`` raises
"scheme isn't ws or wss" on an http(s) URL).
- path: ensure it ends in ``/relay`` (the connector returns HTTP 400 on an
upgrade to any other path, since the WS server is mounted at ``/relay``).
Idempotent: an already-``ws(s):///relay`` URL is returned unchanged, so a URL
configured WITH the scheme and/or ``/relay`` still works.
"""
raw = (url or "").strip()
if raw.startswith("https://"):
raw = "wss://" + raw[len("https://"):]
elif raw.startswith("http://"):
raw = "ws://" + raw[len("http://"):]
raw = raw.rstrip("/")
if not raw.endswith("/relay"):
raw = f"{raw}/relay"
return raw
def _event_from_wire(raw: Dict[str, Any]) -> MessageEvent:
"""Rebuild a MessageEvent from the connector's normalized inbound payload.
@ -118,7 +147,7 @@ class WebSocketRelayTransport:
"WebSocketRelayTransport requires the 'websockets' package "
"(install the messaging extra)."
)
self._url = url
self._url = _ws_dial_url(url)
self._platform = platform
self._bot_id = bot_id
self._connect_timeout_s = connect_timeout_s

View file

@ -195,6 +195,19 @@ def _gateway_platform_value(platform: Any) -> str:
return str(getattr(platform, "value", platform) or "").strip().lower()
def _non_conversational_metadata(
metadata: Optional[Dict[str, Any]] = None,
*,
platform: Any = None,
) -> Optional[Dict[str, Any]]:
"""Mark Discord lifecycle/status sends without changing other platforms."""
if _gateway_platform_value(platform) != "discord":
return metadata
merged = dict(metadata or {})
merged["non_conversational"] = True
return merged
def _is_transient_network_error(exc: BaseException) -> bool:
"""Return True for transient network errors safe to log + swallow.
@ -1173,13 +1186,31 @@ def _reload_runtime_env_preserving_config_authority() -> None:
pick up rotated API keys. config.yaml remains authoritative for agent budget
settings such as agent.max_turns; otherwise a stale HERMES_MAX_ITERATIONS in
.env can replace the startup bridge on later turns.
In multiplex mode this is a NO-OP for the credential reload: secrets come
from the per-turn ``set_secret_scope`` (installed by ``_profile_runtime_scope``)
which loads the routed profile's ``.env`` into an isolated mapping. Mutating
the process-global ``os.environ`` here would defeat that isolation and leak
the default profile's keys to every profile's turns and subprocesses.
"""
from agent.secret_scope import is_multiplex_active
if is_multiplex_active():
# Credentials are resolved from the active profile's secret scope, not
# os.environ. Still honor config.yaml's agent.max_turns bridge below
# using the scoped home, but never reload .env into global env.
_bridge_max_turns_from_config(_hermes_home)
return
load_hermes_dotenv(
hermes_home=_hermes_home,
project_env=Path(__file__).resolve().parents[1] / '.env',
)
_bridge_max_turns_from_config(_hermes_home)
config_path = _hermes_home / 'config.yaml'
def _bridge_max_turns_from_config(home: "Path") -> None:
"""Bridge config.yaml agent.max_turns into HERMES_MAX_ITERATIONS (a global)."""
config_path = home / 'config.yaml'
if not config_path.exists():
return
try:
@ -1196,6 +1227,80 @@ def _reload_runtime_env_preserving_config_authority() -> None:
os.environ["HERMES_MAX_ITERATIONS"] = str(agent_cfg["max_turns"])
def _current_max_iterations() -> int:
"""Return the current per-turn iteration budget after runtime env refresh."""
_reload_runtime_env_preserving_config_authority()
try:
return int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
except (TypeError, ValueError):
return 90
from contextlib import contextmanager as _contextmanager
# Platforms that bind a host TCP port (HTTP/webhook listeners). In a profile
# multiplexer the default profile owns the single shared listener and serves
# every profile through the /p/<profile>/ URL prefix, so a SECONDARY profile
# enabling one of these is always a misconfiguration: it would try to bind a
# port already held by the default's listener. We hard-error on it rather than
# silently dropping the adapter (see _start_one_profile_adapters).
# Stored as platform .value strings since the Platform enum is imported below.
_PORT_BINDING_PLATFORM_VALUES = frozenset({
"webhook",
"api_server",
"msgraph_webhook",
"feishu",
"wecom_callback",
"bluebubbles",
"sms",
})
class MultiplexConfigError(RuntimeError):
"""A profile multiplexer config is invalid (fail-fast at startup).
Distinct from a transient adapter-connect failure: a transient error is
logged and the gateway stays alive to retry, but a config error means the
operator must fix config.yaml, so it aborts startup cleanly.
"""
@_contextmanager
def _profile_runtime_scope(profile_home: "Path"):
"""Scope config/skills/memory AND credentials to a profile for one turn.
Combines the two seams the multiplexer needs:
1. ``set_hermes_home_override`` redirects ``get_hermes_home()`` (config,
skills, memory, SOUL, sessions) to the profile's home. Contextvar, so
it propagates into the agent worker thread via ``copy_context()``.
2. ``set_secret_scope`` installs the profile's ``.env`` secrets as the
authoritative credential source, so ``get_secret`` reads this profile's
keys and never the process-global ``os.environ`` (which in a
multiplexer may hold another profile's values).
Only used on the multiplexed inbound path. Single-profile gateways never
enter this scope, so their behavior is unchanged. Loading the profile's
``.env`` here does NOT mutate ``os.environ`` ``build_profile_secret_scope``
returns an isolated dict which is what keeps subprocesses (MCP, kanban)
from inheriting cross-profile secrets.
"""
from hermes_constants import set_hermes_home_override, reset_hermes_home_override
from agent.secret_scope import (
build_profile_secret_scope,
set_secret_scope,
reset_secret_scope,
)
home_token = set_hermes_home_override(str(profile_home))
secret_token = set_secret_scope(build_profile_secret_scope(Path(profile_home)))
try:
yield
finally:
reset_secret_scope(secret_token)
reset_hermes_home_override(home_token)
_DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P<host>.+):(?P<container>/[^:]+?)(?::(?P<options>[^:]+))?$")
_DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"}
@ -2240,7 +2345,22 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
def __init__(self, config: Optional[GatewayConfig] = None):
global _gateway_runner_ref
self.config = config or load_gateway_config()
# Mark the process as a profile multiplexer when configured. This flips
# agent.secret_scope.get_secret() to fail-closed on any unscoped
# credential read, so a missed migration crashes loudly instead of
# leaking a cross-profile value (Workstream A). Inert when off.
try:
from agent.secret_scope import set_multiplex_active
set_multiplex_active(bool(getattr(self.config, "multiplex_profiles", False)))
except Exception:
logger.debug("could not set multiplex-active flag", exc_info=True)
self.adapters: Dict[Platform, BasePlatformAdapter] = {}
# Multi-profile multiplexing: adapters for NON-default profiles live
# here, keyed by profile name then Platform. self.adapters stays the
# default/active profile's map so the ~93 existing self.adapters[...]
# sites are untouched when multiplexing is off (this dict is empty).
# Populated by _start_secondary_profile_adapters().
self._profile_adapters: Dict[str, Dict[Platform, BasePlatformAdapter]] = {}
self._warn_if_docker_media_delivery_is_risky()
_gateway_runner_ref = _weakref.ref(self)
@ -2792,10 +2912,24 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
except Exception:
pass
config = getattr(self, "config", None)
# Mirror SessionStore._resolve_profile_for_key so this fallback path
# produces the same namespace as the primary path: None (legacy
# agent:main) unless multiplexing is on, then the active profile.
_profile = None
if getattr(config, "multiplex_profiles", False):
if source.profile:
_profile = source.profile
else:
try:
from hermes_cli.profiles import get_active_profile_name
_profile = get_active_profile_name() or "default"
except Exception:
_profile = None
return build_session_key(
source,
group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
profile=_profile,
)
def _telegram_topic_mode_enabled(self, source: SessionSource) -> bool:
@ -5335,7 +5469,30 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
"attempts": 1,
"next_retry": time.monotonic() + 30,
}
# Multi-profile multiplexing: bring up adapters for every OTHER profile
# this gateway serves. Each profile's adapters connect under that
# profile's home + credential scope and stamp their inbound events with
# the profile so the agent turn resolves correctly. No-op when off.
try:
_secondary_connected = await self._start_secondary_profile_adapters()
connected_count += _secondary_connected
except MultiplexConfigError as e:
# Invalid multiplexer config — abort startup cleanly so the operator
# fixes config.yaml rather than running a half-wired gateway.
reason = str(e)
logger.error("Gateway multiplexer config error: %s", reason)
try:
from gateway.status import write_runtime_status
write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
except Exception:
pass
self._request_clean_exit(reason)
self._startup_restore_in_progress = False
return True
except Exception as e:
logger.error("Secondary-profile adapter startup failed: %s", e, exc_info=True)
if connected_count == 0:
if startup_nonretryable_errors:
reason = "; ".join(startup_nonretryable_errors)
@ -6342,6 +6499,22 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
time.monotonic() - _adapter_started_at,
e,
)
# Disconnect secondary-profile adapters (multiplex mode).
for _prof, _amap in list(getattr(self, "_profile_adapters", {}).items()):
for platform, adapter in list(_amap.items()):
try:
await adapter.cancel_background_tasks()
except Exception as e:
logger.debug("%s bg-cancel error (profile %s): %s", platform.value, _prof, e)
try:
await adapter.disconnect()
logger.info("%s disconnected (profile: %s)", platform.value, _prof)
except Exception as e:
logger.error("%s disconnect error (profile %s): %s", platform.value, _prof, e)
_amap.clear()
if hasattr(self, "_profile_adapters"):
self._profile_adapters.clear()
logger.info(
"Shutdown phase: all adapters disconnected at +%.2fs",
_phase_elapsed(),
@ -6511,6 +6684,175 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
"""Wait for shutdown signal."""
await self._shutdown_event.wait()
async def _start_secondary_profile_adapters(self) -> int:
"""Bring up adapters for every non-active profile this gateway serves.
Returns the number of secondary adapters that connected. No-op (returns
0) unless ``gateway.multiplex_profiles`` is on.
Each profile's adapters are created and connected under that profile's
HERMES_HOME + secret scope (``_profile_runtime_scope``), stored in
``self._profile_adapters[profile]``, and given a message handler that
stamps ``source.profile`` before delegating to the shared
``_handle_message`` so the agent turn resolves that profile's config,
skills, and credentials. Same-platform credential collisions (two
profiles polling the same bot token) are detected and refused here, the
only point that sees every profile's resolved credentials together.
"""
if not getattr(self.config, "multiplex_profiles", False):
return 0
try:
from hermes_cli.profiles import profiles_to_serve, get_active_profile_name
except Exception:
return 0
active = get_active_profile_name() or "default"
connected = 0
# (platform, token-fingerprint) -> profile that claimed it. Detects two
# profiles trying to poll the same bot credential (impossible to do
# concurrently). Seed with the active profile's adapters.
claimed: Dict[tuple, str] = {}
for _plat, _ad in self.adapters.items():
fp = self._adapter_credential_fingerprint(_ad)
if fp is not None:
claimed[(_plat, fp)] = active
for profile_name, profile_home in profiles_to_serve(multiplex=True):
if profile_name == active:
continue # handled by the primary startup loop
try:
connected += await self._start_one_profile_adapters(
profile_name, profile_home, claimed
)
except MultiplexConfigError:
# Config error (e.g. a secondary profile binding a port) is not
# transient — propagate so startup aborts cleanly instead of
# limping along with a half-configured multiplexer.
raise
except Exception as e:
logger.error(
"Failed to start adapters for profile '%s': %s",
profile_name, e, exc_info=True,
)
# Record served profiles in runtime status for `hermes status`.
try:
from gateway.status import write_runtime_status
served = [active] + sorted(self._profile_adapters.keys())
write_runtime_status(served_profiles=served)
except Exception:
logger.debug("could not record served_profiles", exc_info=True)
return connected
async def _start_one_profile_adapters(
self, profile_name: str, profile_home: "Path", claimed: Dict[tuple, str]
) -> int:
"""Create+connect one profile's adapters under its runtime scope."""
from gateway.config import load_gateway_config
with _profile_runtime_scope(profile_home):
profile_cfg = load_gateway_config()
profile_map = self._profile_adapters.setdefault(profile_name, {})
connected = 0
for platform, platform_config in profile_cfg.platforms.items():
if not platform_config.enabled:
continue
# A secondary profile must NOT enable a port-binding platform: the
# default profile's listener already serves every profile via the
# /p/<profile>/ prefix, so a second bind can only collide. This is a
# config error, not a transient failure — fail fast and loud.
if platform.value in _PORT_BINDING_PLATFORM_VALUES:
raise MultiplexConfigError(
f"Profile '{profile_name}' enables the port-binding platform "
f"'{platform.value}', but gateway.multiplex_profiles is on. The "
f"default profile owns the single shared HTTP listener and "
f"serves every profile through the /p/{profile_name}/ URL "
f"prefix — a secondary profile cannot bind its own port. "
f"Remove platforms.{platform.value} from profile "
f"'{profile_name}'s config.yaml (configure it only on the "
f"default profile)."
)
with _profile_runtime_scope(profile_home):
adapter = self._create_adapter(platform, platform_config)
if not adapter:
continue
# Same-token conflict detection — refuse a duplicate poll.
fp = self._adapter_credential_fingerprint(adapter)
if fp is not None:
owner = claimed.get((platform, fp))
if owner is not None:
logger.error(
"Profile '%s' and '%s' both configure %s with the same "
"credential — refusing to start the duplicate (a single "
"bot token cannot be polled twice). Give each profile its "
"own %s credential.",
owner, profile_name, platform.value, platform.value,
)
await self._safe_adapter_disconnect(adapter, platform)
continue
claimed[(platform, fp)] = profile_name
# Stamp every inbound event from this adapter with its profile so
# the agent turn (and session key) resolve to the right home.
adapter.set_message_handler(
self._make_profile_message_handler(profile_name)
)
adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
adapter.set_session_store(self.session_store)
adapter.set_busy_session_handler(self._handle_active_session_busy_message)
adapter.set_topic_recovery_fn(self._recover_telegram_topic_thread_id)
adapter._busy_text_mode = self._busy_text_mode
try:
with _profile_runtime_scope(profile_home):
success = await self._connect_adapter_with_timeout(adapter, platform)
if success:
profile_map[platform] = adapter
connected += 1
logger.info("%s connected (profile: %s)", platform.value, profile_name)
else:
logger.warning("%s failed to connect (profile: %s)", platform.value, profile_name)
await self._safe_adapter_disconnect(adapter, platform)
except Exception as e:
logger.error("%s error (profile: %s): %s", platform.value, profile_name, e)
await self._safe_adapter_disconnect(adapter, platform)
return connected
def _make_profile_message_handler(self, profile_name: str):
"""Return a message handler that stamps source.profile then delegates."""
async def _handler(event):
try:
if getattr(event, "source", None) is not None and not event.source.profile:
event.source.profile = profile_name
except Exception:
pass
return await self._handle_message(event)
return _handler
@staticmethod
def _adapter_credential_fingerprint(adapter: Any) -> Optional[str]:
"""Return a stable, log-safe fingerprint of an adapter's credential.
Used only to detect two profiles claiming the same bot token. Returns a
salted hash (never the token itself) of the adapter's primary
credential, or None when no credential is discoverable (in which case
we don't attempt conflict detection for it).
"""
token = None
for attr in ("token", "bot_token", "_token", "api_token", "_bot_token"):
val = getattr(adapter, attr, None)
if isinstance(val, str) and val.strip():
token = val.strip()
break
if not token:
return None
import hashlib
return hashlib.sha256(("hermes-mux:" + token).encode("utf-8")).hexdigest()[:16]
def _create_adapter(
self,
platform: Platform,
@ -10633,7 +10975,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
disabled_toolsets = agent_cfg.get("disabled_toolsets") or None
pr = self._provider_routing
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
max_iterations = _current_max_iterations()
reasoning_config = self._resolve_session_reasoning_config(source=source)
self._reasoning_config = reasoning_config
self._service_tier = self._load_service_tier()
@ -11737,7 +12079,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
chunks = [clean[i:i + max_chunk] for i in range(0, len(clean), max_chunk)]
for chunk in chunks:
try:
await adapter.send(chat_id, f"```\n{chunk}\n```", metadata=metadata)
await adapter.send(
chat_id,
f"```\n{chunk}\n```",
metadata=_non_conversational_metadata(metadata, platform=platform),
)
except Exception as e:
logger.debug("Update stream send failed: %s", e)
@ -11760,12 +12106,16 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
exit_code_raw = exit_code_path.read_text().strip() or "1"
exit_code = int(exit_code_raw)
if exit_code == 0:
await adapter.send(chat_id, "✅ Hermes update finished.", metadata=metadata)
await adapter.send(
chat_id,
"✅ Hermes update finished.",
metadata=_non_conversational_metadata(metadata, platform=platform),
)
else:
await adapter.send(
chat_id,
"❌ Hermes update failed (exit code {}).".format(exit_code),
metadata=metadata,
metadata=_non_conversational_metadata(metadata, platform=platform),
)
logger.info("Update finished (exit=%s), notified %s", exit_code, session_key)
except Exception as e:
@ -11816,7 +12166,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
prompt=prompt_text,
default=default,
session_key=session_key,
metadata=metadata,
metadata=_non_conversational_metadata(metadata, platform=platform),
)
sent_buttons = True
except Exception as btn_err:
@ -11830,7 +12180,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
f"{prompt_text}{default_hint}\n\n"
f"Reply `{_p}approve` (yes) or `{_p}deny` (no), "
f"or type your answer directly.",
metadata=metadata,
metadata=_non_conversational_metadata(metadata, platform=platform),
)
# Keep the prompt marker on disk until the user
# answers. If the gateway restarts mid-prompt, the
@ -11854,7 +12204,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
await adapter.send(
chat_id,
"❌ Hermes update timed out after 30 minutes.",
metadata=metadata,
metadata=_non_conversational_metadata(metadata, platform=platform),
)
except Exception:
pass
@ -11960,7 +12310,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
msg = "✅ Hermes update finished successfully."
else:
msg = "❌ Hermes update failed. Check the gateway logs or run `hermes update` manually for details."
await adapter.send(chat_id, msg, metadata=metadata)
await adapter.send(
chat_id,
msg,
metadata=_non_conversational_metadata(metadata, platform=platform),
)
logger.info(
"Sent post-update notification to %s:%s (exit=%s)",
platform_str,
@ -12023,7 +12377,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
result = await adapter.send(
str(chat_id),
"♻ Gateway restarted successfully. Your session continues.",
metadata=metadata,
metadata=_non_conversational_metadata(metadata, platform=platform),
)
# adapter.send() catches provider errors (e.g. "Chat not found")
# and returns SendResult(success=False) rather than raising, so
@ -12090,9 +12444,21 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
adapter=adapter,
)
if metadata:
result = await adapter.send(str(home.chat_id), message, metadata=metadata)
result = await adapter.send(
str(home.chat_id),
message,
metadata=_non_conversational_metadata(metadata, platform=platform),
)
else:
result = await adapter.send(str(home.chat_id), message)
_startup_meta = _non_conversational_metadata(platform=platform)
if _startup_meta:
result = await adapter.send(
str(home.chat_id),
message,
metadata=_startup_meta,
)
else:
result = await adapter.send(str(home.chat_id), message)
if result is not None and getattr(result, "success", True) is False:
logger.warning(
"Home-channel startup notification failed for %s:%s: %s",
@ -12733,7 +13099,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
if adapter and chat_id:
try:
send_meta = {"thread_id": thread_id} if thread_id else None
await adapter.send(chat_id, message_text, metadata=send_meta)
await adapter.send(
chat_id,
message_text,
metadata=_non_conversational_metadata(send_meta, platform=platform_name),
)
except Exception as e:
logger.error("Watcher delivery error: %s", e)
break
@ -12754,7 +13124,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
if adapter and chat_id:
try:
send_meta = {"thread_id": thread_id} if thread_id else None
await adapter.send(chat_id, message_text, metadata=send_meta)
await adapter.send(
chat_id,
message_text,
metadata=_non_conversational_metadata(send_meta, platform=platform_name),
)
except Exception as e:
logger.error("Watcher delivery error: %s", e)
@ -13740,6 +14114,64 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
channel_prompt: Optional[str] = None,
persist_user_message: Optional[str] = None,
persist_user_timestamp: Optional[float] = None,
) -> Dict[str, Any]:
"""Profile-scoping wrapper around the agent run.
When multiplexing is active, resolve the inbound source's profile and
run the whole turn inside ``_profile_runtime_scope`` so config/skills/
memory resolve to that profile's home AND credentials resolve from that
profile's secret scope (never the process-global ``os.environ``). When
multiplexing is off this is a transparent pass-through zero behavior
change for single-profile gateways.
"""
if not getattr(getattr(self, "config", None), "multiplex_profiles", False):
return await self._run_agent_inner(
message, context_prompt, history, source, session_id,
session_key=session_key, run_generation=run_generation,
_interrupt_depth=_interrupt_depth, event_message_id=event_message_id,
channel_prompt=channel_prompt, persist_user_message=persist_user_message,
persist_user_timestamp=persist_user_timestamp,
)
profile_home = self._resolve_profile_home_for_source(source)
with _profile_runtime_scope(profile_home):
return await self._run_agent_inner(
message, context_prompt, history, source, session_id,
session_key=session_key, run_generation=run_generation,
_interrupt_depth=_interrupt_depth, event_message_id=event_message_id,
channel_prompt=channel_prompt, persist_user_message=persist_user_message,
persist_user_timestamp=persist_user_timestamp,
)
def _resolve_profile_home_for_source(self, source: SessionSource) -> "Path":
"""Resolve which profile's HERMES_HOME should serve this inbound source.
Prefers the profile the source was routed to (``source.profile`` set
by the /p/<profile>/ URL prefix or a per-credential adapter), falling
back to the active profile (the multiplexer's own home).
"""
from hermes_cli.profiles import get_active_profile_name, get_profile_dir
try:
name = (source.profile or "").strip() or get_active_profile_name() or "default"
return get_profile_dir(name)
except Exception:
from hermes_constants import get_hermes_home
return get_hermes_home()
async def _run_agent_inner(
self,
message: str,
context_prompt: str,
history: List[Dict[str, Any]],
source: SessionSource,
session_id: str,
session_key: str = None,
run_generation: Optional[int] = None,
_interrupt_depth: int = 0,
event_message_id: Optional[str] = None,
channel_prompt: Optional[str] = None,
persist_user_message: Optional[str] = None,
persist_user_timestamp: Optional[float] = None,
) -> Dict[str, Any]:
"""
Run the agent with the given message and context.
@ -14135,6 +14567,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
if _progress_thread_id == source.thread_id
else {"thread_id": _progress_thread_id}
) if _progress_thread_id else None
_progress_metadata = _non_conversational_metadata(_progress_metadata, platform=source.platform)
_progress_reply_to = (
event_message_id
if source.platform in (Platform.FEISHU, Platform.MATTERMOST) and source.thread_id and event_message_id
@ -14581,9 +15014,6 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
# session_key is now set via contextvars in _set_session_env()
# (concurrency-safe). Keep os.environ as fallback for CLI/cron.
os.environ["HERMES_SESSION_KEY"] = session_key or ""
# Read from env var or use default (same as CLI)
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
# Map platform enum to the platform hint key the agent understands.
# Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
@ -14598,10 +15028,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
if self._ephemeral_system_prompt:
combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
# Re-read .env and config for fresh credentials (gateway is long-lived,
# keys may change without restart). Keep config.yaml authoritative for
# runtime budget settings bridged into env vars.
_reload_runtime_env_preserving_config_authority()
max_iterations = _current_max_iterations()
try:
model, runtime_kwargs = self._resolve_session_agent_runtime(
@ -14799,6 +15226,9 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
except KeyError:
pass
self._init_cached_agent_for_turn(agent, _interrupt_depth)
# Refresh agent max_iterations from current config
# (cached agent may have been created with old config)
agent.max_iterations = max_iterations
logger.debug("Reusing cached agent for session %s", session_key)
if agent is None:
@ -14900,7 +15330,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
_status_adapter.send(
_status_chat_id,
message,
metadata=_status_thread_metadata,
metadata=_non_conversational_metadata(_status_thread_metadata, platform=source.platform),
),
_loop_for_step,
logger=logger,
@ -15742,7 +16172,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
_notify_res = await _notify_adapter.send(
source.chat_id,
_heartbeat_text,
metadata=_status_thread_metadata,
metadata=_non_conversational_metadata(_status_thread_metadata, platform=source.platform),
)
if getattr(_notify_res, "success", False) and getattr(
_notify_res, "message_id", None

View file

@ -92,6 +92,11 @@ class SessionSource:
parent_chat_id: Optional[str] = None # Parent channel when chat_id refers to a thread
message_id: Optional[str] = None # ID of the triggering message (for pin/reply/react)
role_authorized: bool = False # True when adapter granted access via role (not user ID)
# Profile this inbound message is routed to in a multiplexing gateway
# (from the /p/<profile>/ URL prefix or per-credential adapter ownership).
# None => the gateway's active/default profile. Drives both session-key
# namespacing and the per-turn config/credential scope.
profile: Optional[str] = None
@property
def description(self) -> str:
@ -135,6 +140,8 @@ class SessionSource:
d["parent_chat_id"] = self.parent_chat_id
if self.message_id:
d["message_id"] = self.message_id
if self.profile:
d["profile"] = self.profile
return d
@classmethod
@ -153,6 +160,7 @@ class SessionSource:
guild_id=data.get("guild_id"),
parent_chat_id=data.get("parent_chat_id"),
message_id=data.get("message_id"),
profile=data.get("profile"),
)
@ -615,15 +623,41 @@ def is_shared_multi_user_session(
return not group_sessions_per_user
def _session_key_namespace(profile: Optional[str]) -> str:
"""Return the ``agent:<ns>`` namespace prefix for a session key.
The historical key format is ``agent:main:<platform>:<chat_type>:...`` where
``main`` is a static namespace literal (NOT a branch name branching keys
off ``session_id``, not this slot). Multi-profile multiplexing reuses this
slot to carry the profile:
- default profile (or ``None``/``""``/``"default"``) ``agent:main``
BYTE-IDENTICAL to every key ever generated, so existing sessions and all
positional parsers (``parts[2]`` == platform, etc.) are unaffected.
- named profile ``coder`` ``agent:coder`` keeps the same positional
layout, just a different namespace, so two profiles serving the same
platform/chat never collide.
"""
if not profile or profile == "default":
return "agent:main"
return f"agent:{profile}"
def build_session_key(
source: SessionSource,
group_sessions_per_user: bool = True,
thread_sessions_per_user: bool = False,
profile: Optional[str] = None,
) -> str:
"""Build a deterministic session key from a message source.
This is the single source of truth for session key construction.
``profile`` selects the key namespace (see :func:`_session_key_namespace`).
It defaults to ``None`` the legacy ``agent:main`` namespace, so callers
that don't multiplex produce byte-identical keys to before. Only the
multiplexing gateway passes a non-default profile.
DM rules:
- DMs include chat_id when present, so each private conversation is isolated.
- thread_id further differentiates threaded DMs within the same DM chat.
@ -643,6 +677,7 @@ def build_session_key(
shared session per chat.
- Without identifiers, messages fall back to one session per platform/chat_type.
"""
ns = _session_key_namespace(profile)
platform = source.platform.value
if source.chat_type == "dm":
dm_chat_id = source.chat_id
@ -651,12 +686,12 @@ def build_session_key(
if dm_chat_id:
if source.thread_id:
return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
return f"agent:main:{platform}:dm:{dm_chat_id}"
return f"{ns}:{platform}:dm:{dm_chat_id}:{source.thread_id}"
return f"{ns}:{platform}:dm:{dm_chat_id}"
# No chat_id — fall back to the sender's own identifier before the
# bare per-platform sink. Without this, every DM from every user that
# arrives without a chat_id (non-standard adapters / synthetic sources)
# collapses into one shared "agent:main:<platform>:dm" session, and a
# collapses into one shared "<ns>:<platform>:dm" session, and a
# single cached agent ends up serving multiple people's conversations —
# cross-user history bleed. participant_id keeps DMs isolated per user.
dm_participant_id = source.user_id_alt or source.user_id
@ -667,11 +702,11 @@ def build_session_key(
)
if dm_participant_id:
if source.thread_id:
return f"agent:main:{platform}:dm:{dm_participant_id}:{source.thread_id}"
return f"agent:main:{platform}:dm:{dm_participant_id}"
return f"{ns}:{platform}:dm:{dm_participant_id}:{source.thread_id}"
return f"{ns}:{platform}:dm:{dm_participant_id}"
if source.thread_id:
return f"agent:main:{platform}:dm:{source.thread_id}"
return f"agent:main:{platform}:dm"
return f"{ns}:{platform}:dm:{source.thread_id}"
return f"{ns}:{platform}:dm"
participant_id = source.user_id_alt or source.user_id
if participant_id and source.platform == Platform.WHATSAPP:
@ -679,7 +714,7 @@ def build_session_key(
# single group member gets two isolated per-user sessions when the
# bridge reshuffles alias forms.
participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
key_parts = ["agent:main", platform, source.chat_type]
key_parts = [ns, platform, source.chat_type]
if source.chat_id:
key_parts.append(source.chat_id)
@ -775,12 +810,32 @@ class SessionStore:
logger.debug("Could not remove temp file %s: %s", tmp_path, e)
raise
def _resolve_profile_for_key(self, source: Optional[SessionSource] = None) -> Optional[str]:
"""Return the profile namespace for session keys, or None when off.
When ``multiplex_profiles`` is disabled (default), returns ``None`` so
keys stay in the legacy ``agent:main`` namespace byte-identical to
before. When enabled, prefers the profile the inbound source was routed
to (``source.profile`` set by the /p/<profile>/ URL prefix or
per-credential adapter), falling back to the active profile name.
"""
if not getattr(self.config, "multiplex_profiles", False):
return None
if source is not None and source.profile:
return source.profile
try:
from hermes_cli.profiles import get_active_profile_name
return get_active_profile_name() or "default"
except Exception:
return None
def _generate_session_key(self, source: SessionSource) -> str:
"""Generate a session key from a source."""
return build_session_key(
source,
group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
profile=self._resolve_profile_for_key(source),
)
def _is_session_expired(self, entry: SessionEntry) -> bool:

View file

@ -1030,12 +1030,13 @@ class GatewaySlashCommandsMixin:
)
async def _handle_model_command(self, event: MessageEvent) -> Optional[str]:
"""Handle /model command — switch model for this session.
"""Handle /model command — switch model.
Supports:
/model interactive picker (Telegram/Discord) or text list
/model <name> switch for this session only
/model <name> --global switch and persist to config.yaml
/model <name> switch model (persists by default)
/model <name> --session switch for this session only
/model <name> --global switch and persist (explicit)
/model <name> --provider <provider> switch provider + model
/model --provider <provider> switch to provider, auto-detect model
"""
@ -1043,6 +1044,7 @@ class GatewaySlashCommandsMixin:
import yaml
from hermes_cli.model_switch import (
switch_model as _switch_model, parse_model_flags,
resolve_persist_behavior,
list_authenticated_providers,
list_picker_providers,
)
@ -1050,8 +1052,15 @@ class GatewaySlashCommandsMixin:
raw_args = event.get_command_args().strip()
# Parse --provider, --global, and --refresh flags
model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
# Parse --provider, --global, --session, and --refresh flags
(
model_input,
explicit_provider,
is_global_flag,
force_refresh,
is_session,
) = parse_model_flags(raw_args)
persist_global = resolve_persist_behavior(is_global_flag, is_session)
# --refresh: bust the disk cache so the picker shows live data.
if force_refresh:
@ -1362,7 +1371,7 @@ class GatewaySlashCommandsMixin:
# override rather than relying on cache signature mismatch detection.
self._evict_cached_agent(session_key)
# Persist to config if --global
# Persist to config (default) unless --session opted out
if persist_global:
try:
if config_path.exists():

View file

@ -14,6 +14,7 @@ concurrently under distinct configurations).
import hashlib
import json
import os
import shlex
import signal
import subprocess
import sys
@ -164,20 +165,86 @@ def _read_process_cmdline(pid: int) -> Optional[str]:
return None
def looks_like_gateway_command_line(command: str | None) -> bool:
"""Return True only for a real ``gateway run`` process command line.
Lifecycle decisions (is the gateway up? did restart relaunch it?) must not
fire on loose substring matches. The previous ``"... gateway" in cmdline``
test also matched ``hermes_cli.main gateway status`` and even unrelated
processes like ``python -m tui_gateway`` -- which made ``restart()`` race
against a still-draining old process and ``status``/``start`` report false
positives. This requires the actual ``gateway`` subcommand followed by
``run`` (or one of the gateway-dedicated entrypoints), excluding the other
``gateway`` management subcommands and any process that merely contains the
word "gateway".
Tokenizes quote-aware (``shlex``) so quoted Windows paths with spaces
(``"C:\\Program Files\\...\\hermes-gateway.exe"``) survive, and strips
``--profile``/``-p`` selectors from anywhere in argv -- Hermes's
``_apply_profile_override`` removes them before argparse, so the profile
flag (and a profile literally named ``gateway``) can legally appear on
either side of the ``gateway`` subcommand.
"""
if not command:
return False
try:
raw_tokens = shlex.split(command, posix=False)
except ValueError:
raw_tokens = command.split()
# Strip surrounding quotes, normalize slashes + case per token.
tokens = [t.strip("\"'").replace("\\", "/").lower() for t in raw_tokens]
if not tokens:
return False
# Gateway-dedicated entrypoints carry no subcommand to inspect.
for token in tokens:
if token == "gateway/run.py" or token.endswith("/gateway/run.py"):
return True
basename = token.rsplit("/", 1)[-1]
if basename in ("hermes-gateway", "hermes-gateway.exe"):
return True
joined = " ".join(tokens)
has_gateway_entry = (
"hermes_cli.main" in joined
or "hermes_cli/main.py" in joined
or any(t.rsplit("/", 1)[-1] in ("hermes", "hermes.exe") for t in tokens)
)
if not has_gateway_entry:
return False
# Drop profile selectors anywhere: --profile X / -p X / --profile=X / -p=X.
# This consumes a profile VALUE of "gateway" too, so the real subcommand
# token is the one we land on below.
filtered: list[str] = []
skip_next = False
for token in tokens:
if skip_next:
skip_next = False
continue
if token in ("--profile", "-p"):
skip_next = True
continue
if token.startswith("--profile=") or token.startswith("-p="):
continue
filtered.append(token)
for i, token in enumerate(filtered):
if token != "gateway":
continue
if i + 1 >= len(filtered):
return True # bare `hermes gateway` defaults to `run`
return filtered[i + 1] == "run"
return False
def _looks_like_gateway_process(pid: int) -> bool:
"""Return True when the live PID still looks like the Hermes gateway."""
cmdline = _read_process_cmdline(pid)
if not cmdline:
return False
patterns = (
"hermes_cli.main gateway",
"hermes_cli/main.py gateway",
"hermes gateway",
"hermes-gateway",
"gateway/run.py",
)
return any(pattern in cmdline for pattern in patterns)
return looks_like_gateway_command_line(cmdline)
def _record_looks_like_gateway(record: dict[str, Any]) -> bool:
@ -189,15 +256,8 @@ def _record_looks_like_gateway(record: dict[str, Any]) -> bool:
if not isinstance(argv, list) or not argv:
return False
# Normalize Windows backslashes so patterns match cross-platform.
cmdline = " ".join(str(part) for part in argv).replace("\\", "/")
patterns = (
"hermes_cli.main gateway",
"hermes_cli/main.py gateway",
"hermes gateway",
"gateway/run.py",
)
return any(pattern in cmdline for pattern in patterns)
cmdline = " ".join(str(part) for part in argv)
return looks_like_gateway_command_line(cmdline)
def _build_pid_record() -> dict:
@ -515,6 +575,7 @@ def write_runtime_status(
platform_state: Any = _UNSET,
error_code: Any = _UNSET,
error_message: Any = _UNSET,
served_profiles: Any = _UNSET,
) -> None:
"""Persist gateway runtime health information for diagnostics/status."""
path = _get_runtime_status_path()
@ -535,6 +596,11 @@ def write_runtime_status(
payload["restart_requested"] = bool(restart_requested)
if active_agents is not _UNSET:
payload["active_agents"] = max(0, int(active_agents))
if served_profiles is not _UNSET:
# Profiles this gateway multiplexes (multi-profile mode). Absent/empty
# for a single-profile gateway. Lets `hermes status` show per-profile
# coverage without a second probe.
payload["served_profiles"] = list(served_profiles or [])
if platform is not _UNSET:
platform_payload = payload["platforms"].get(platform, {})

View file

@ -34,14 +34,38 @@ logger = logging.getLogger(__name__)
# ``hermes-agent`` is special-cased to root level only in ``_should_exclude``
# so that skill directories like ``skills/autonomous-ai-agents/hermes-agent/``
# are not accidentally excluded.
#
# The dependency/cache entries below matter for more than tidiness: without
# them a single plugin venv, MCP-server install, or pip/uv cache living under
# HERMES_HOME gets walked file-by-file, ballooning a backup to hundreds of
# thousands of entries that crawl for hours — the exact "backup stuck for
# days / 426543 files" symptom users hit. The dependency/test-env names mostly
# mirror ``agent.skill_utils.EXCLUDED_SKILL_DIRS`` (the project's canonical
# "regeneratable dir" set); ``.cache`` is an additional backup-only entry, as
# it names a broad regeneratable cache convention (pip/uv/etc.) that the skill
# scanner doesn't need to prune but a backup walk does. We deliberately do NOT
# exclude ``.archive`` here because the curator's ``skills/.archive/`` holds
# restorable user skills that must survive a backup.
_EXCLUDED_DIRS = {
"hermes-agent", # the codebase repo — re-clone instead
"__pycache__", # bytecode caches — regenerated on import
".git", # nested git dirs (profiles shouldn't have these, but safety)
"node_modules", # js deps if website/ somehow leaks in
"node_modules", # js deps — reinstalled on demand
"backups", # prior auto-backups — don't nest backups exponentially
"checkpoints", # session-local trajectory caches — regenerated per-session,
# session-hash-keyed so they don't port to another machine anyway
# Python dependency trees (plugin / MCP-server venvs under HERMES_HOME) —
# regenerated by reinstalling; never irreplaceable state.
".venv",
"venv",
"site-packages",
# Tool / build caches — all regeneratable.
".cache",
".tox",
".nox",
".pytest_cache",
".mypy_cache",
".ruff_cache",
}
# File-name suffixes to skip

View file

@ -123,8 +123,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
# Configuration
CommandDef("config", "Show current configuration", "Configuration",
cli_only=True),
CommandDef("model", "Switch model for this session", "Configuration",
args_hint="[model] [--provider name] [--global] [--refresh]"),
CommandDef("model", "Switch model (persists by default)", "Configuration",
args_hint="[model] [--provider name] [--global|--session] [--refresh]"),
CommandDef("codex-runtime", "Toggle codex app-server runtime for OpenAI/Codex models",
"Configuration", aliases=("codex_runtime",),
args_hint="[auto|codex_app_server]"),

View file

@ -1581,6 +1581,14 @@ DEFAULT_CONFIG = {
# TUI busy indicator style: kaomoji (default), emoji, unicode (braille
# spinner), or ascii. Live-swappable via `/indicator <style>`.
"tui_status_indicator": "kaomoji",
# Seconds between prompt_toolkit redraws in the classic CLI when idle.
# Default 1.0 keeps the wall-clock status-bar read-outs (idle-since-
# last-turn) ticking and keeps the bottom chrome alive during idle —
# without it prompt_toolkit stops repainting the status bar after a
# turn and it can go stale/disappear (#45592).
# Set 0 to disable the background refresh if it fights terminal
# auto-scroll in non-fullscreen mode on some emulators (#48309).
"cli_refresh_interval": 1.0,
"user_message_preview": { # CLI: how many submitted user-message lines to echo back in scrollback
"first_lines": 2,
"last_lines": 2,
@ -3453,6 +3461,7 @@ OPTIONAL_ENV_VARS = {
"Required scopes: chat:write, app_mentions:read, channels:history, groups:history, "
"im:history, im:read, im:write, users:read, files:read, files:write",
"prompt": "Slack Bot Token (xoxb-...)",
"help": "In your Slack app, add the required bot scopes, install the app to the workspace, then copy OAuth & Permissions > Bot User OAuth Token.",
"url": "https://api.slack.com/apps",
"password": True,
"category": "messaging",
@ -3462,10 +3471,19 @@ OPTIONAL_ENV_VARS = {
"App-Level Tokens. Also ensure Event Subscriptions include: message.im, "
"message.channels, message.groups, app_mention",
"prompt": "Slack App Token (xapp-...)",
"help": "In your Slack app, enable Socket Mode, then create Basic Information > App-Level Tokens with the connections:write scope.",
"url": "https://api.slack.com/apps",
"password": True,
"category": "messaging",
},
"SLACK_ALLOWED_USERS": {
"description": "Comma-separated Slack member IDs allowed to use Hermes, e.g. U01ABC2DEF3. Without this, Slack may connect but deny messages by default.",
"prompt": "Allowed Slack member IDs",
"help": "In Slack, open your profile, choose More or the three-dot menu, then Copy member ID. Add multiple IDs comma-separated.",
"url": "https://api.slack.com/apps",
"password": False,
"category": "messaging",
},
"MATTERMOST_URL": {
"description": "Mattermost server URL (e.g. https://mm.example.com)",
"prompt": "Mattermost server URL",

View file

@ -25,7 +25,7 @@ _GATEWAY_LIFECYCLE_PATTERNS = re.compile(
r"(?i)"
r"(hermes\s+gateway\s+(restart|stop|start))"
r"|(launchctl\s+(kickstart|unload|load|stop|restart)\s+.*hermes)"
r"|(systemctl\s+(restart|stop|start)\s+.*hermes)"
r"|(systemctl\s+(-\S+\s+)*(restart|stop|start)\s+.*hermes)"
r"|(p?kill\s+.*hermes.*gateway)"
)

View file

@ -191,10 +191,10 @@ _PRIVACY_NOTICE = """\
This will upload the following to a public paste service:
System info (OS, Python version, Hermes version, provider, which API keys
are configured NOT the actual keys)
Recent log lines (agent.log, errors.log, gateway.log, desktop.log may
contain conversation fragments and file paths)
Full agent.log, gateway.log, and desktop.log (up to 512 KB each likely
contains conversation content, tool outputs, and file paths)
Recent log lines (agent.log, errors.log, gateway.log, gui.log, desktop.log
may contain conversation fragments and file paths)
Full agent.log, gateway.log, gui.log, and desktop.log (up to 512 KB each
likely contains conversation content, tool outputs, and file paths)
Pastes auto-delete after 6 hours.
"""
@ -503,6 +503,9 @@ def _capture_default_log_snapshots(
"gateway": _capture_log_snapshot(
"gateway", tail_lines=errors_lines, redact=redact
),
"gui": _capture_log_snapshot(
"gui", tail_lines=errors_lines, redact=redact
),
"desktop": _capture_log_snapshot(
"desktop", tail_lines=errors_lines, redact=redact
),
@ -574,6 +577,10 @@ def collect_debug_report(
buf.write(log_snapshots["gateway"].tail_text)
buf.write("\n\n")
buf.write(f"--- gui.log (last {errors_lines} lines) ---\n")
buf.write(log_snapshots["gui"].tail_text)
buf.write("\n\n")
buf.write(f"--- desktop.log (last {errors_lines} lines) ---\n")
buf.write(log_snapshots["desktop"].tail_text)
buf.write("\n")
@ -639,6 +646,7 @@ def build_debug_share(
)
agent_log = log_snapshots["agent"].full_text
gateway_log = log_snapshots["gateway"].full_text
gui_log = log_snapshots["gui"].full_text
desktop_log = log_snapshots["desktop"].full_text
# Prepend dump header to each full log so every paste is self-contained.
@ -646,6 +654,8 @@ def build_debug_share(
agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
if gateway_log:
gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
if gui_log:
gui_log = dump_text + "\n\n--- full gui.log ---\n" + gui_log
if desktop_log:
desktop_log = dump_text + "\n\n--- full desktop.log ---\n" + desktop_log
@ -657,6 +667,8 @@ def build_debug_share(
agent_log = _REDACTION_BANNER + agent_log
if gateway_log:
gateway_log = _REDACTION_BANNER + gateway_log
if gui_log:
gui_log = _REDACTION_BANNER + gui_log
if desktop_log:
desktop_log = _REDACTION_BANNER + desktop_log
@ -670,6 +682,7 @@ def build_debug_share(
for label, content in (
("agent.log", agent_log),
("gateway.log", gateway_log),
("gui.log", gui_log),
("desktop.log", desktop_log),
):
if not content:
@ -712,11 +725,14 @@ def run_debug_share(args):
)
agent_log = log_snapshots["agent"].full_text
gateway_log = log_snapshots["gateway"].full_text
gui_log = log_snapshots["gui"].full_text
desktop_log = log_snapshots["desktop"].full_text
if agent_log:
agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
if gateway_log:
gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
if gui_log:
gui_log = dump_text + "\n\n--- full gui.log ---\n" + gui_log
if desktop_log:
desktop_log = dump_text + "\n\n--- full desktop.log ---\n" + desktop_log
if redact:
@ -725,12 +741,15 @@ def run_debug_share(args):
agent_log = _REDACTION_BANNER + agent_log
if gateway_log:
gateway_log = _REDACTION_BANNER + gateway_log
if gui_log:
gui_log = _REDACTION_BANNER + gui_log
if desktop_log:
desktop_log = _REDACTION_BANNER + desktop_log
print(report)
for title, body in (
("FULL agent.log", agent_log),
("FULL gateway.log", gateway_log),
("FULL gui.log", gui_log),
("FULL desktop.log", desktop_log),
):
if body:

View file

@ -319,23 +319,12 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
# gateway. See #13242.
exclude_pids = exclude_pids | _get_ancestor_pids()
pids: list[int] = []
patterns = [
"hermes_cli.main gateway",
"hermes_cli.main --profile",
"hermes_cli.main -p",
"hermes_cli/main.py gateway",
"hermes_cli/main.py --profile",
"hermes_cli/main.py -p",
"hermes gateway",
# Windows: only match invocations that actually carry the ``gateway``
# subcommand or the gateway-dedicated console-script shim. Bare
# ``hermes.exe --profile`` / ``hermes.exe -p`` would also match
# ``hermes.exe --profile foo dashboard`` and other CLI subcommands,
# producing false-positive gateway PIDs (Copilot review).
"hermes.exe gateway",
"hermes-gateway.exe",
"gateway/run.py",
]
# Strict command-line matcher shared with gateway.status: requires the
# actual ``gateway run`` subcommand (or the dedicated entrypoints), so this
# scan no longer false-matches ``gateway status``/``dashboard`` siblings or
# unrelated processes like ``python -m tui_gateway``. Lazy import mirrors the
# circular-import avoidance used elsewhere in this module.
from gateway.status import looks_like_gateway_command_line
current_home = str(get_hermes_home().resolve())
current_home_lc = current_home.lower()
current_profile_arg = _profile_arg(current_home)
@ -430,8 +419,7 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
current_cmd = line[len("CommandLine=") :]
elif line.startswith("ProcessId="):
pid_str = line[len("ProcessId=") :]
current_cmd_lc = current_cmd.lower()
if any(p in current_cmd_lc for p in patterns) and (
if looks_like_gateway_command_line(current_cmd) and (
all_profiles or _matches_current_profile(current_cmd)
):
try:
@ -456,8 +444,7 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
with open(f"/proc/{pid}/cmdline", "rb") as _f:
cmdline = _f.read().decode("utf-8", errors="replace")
cmdline = cmdline.replace("\x00", " ")
cmdline_lc = cmdline.lower()
if any(p in cmdline_lc for p in patterns) and (
if looks_like_gateway_command_line(cmdline) and (
all_profiles or _matches_current_profile(cmdline)
):
_append_unique_pid(pids, pid, exclude_pids)
@ -500,8 +487,7 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
if pid is None:
continue
command_lc = command.lower()
if any(pattern in command_lc for pattern in patterns) and (
if looks_like_gateway_command_line(command) and (
all_profiles or _matches_current_profile(command)
):
_append_unique_pid(pids, pid, exclude_pids)
@ -3865,6 +3851,86 @@ def _running_under_gateway_supervisor() -> bool:
return False
def _guard_named_profile_under_multiplexer(force: bool = False) -> None:
"""Refuse a named-profile gateway when a multiplexer is already serving it.
When the default profile's gateway runs with gateway.multiplex_profiles=on,
it is the sole inbound process for EVERY profile on the host. Starting a
separate gateway for a named profile would double-bind that profile's
platforms (two pollers on one bot token, port fights). In that mode a
named-profile ``hermes gateway run`` is always a misconfiguration, so we
hard-error with a pointer to the multiplexer. ``--force`` overrides.
Inert unless ALL of: (a) this invocation is a named profile, (b) a default-
profile gateway is running, (c) that gateway's config has multiplexing on.
"""
if force:
return
# (a) Are we a named profile? Default/custom-hash homes return "".
try:
suffix = _profile_suffix()
except Exception:
return
if not suffix:
return # default profile (or unrecognized) — this guard doesn't apply
try:
from hermes_constants import get_default_hermes_root
default_root = get_default_hermes_root()
# (b) Is the default-profile gateway running?
from gateway.status import get_running_pid as _default_running_pid # noqa
except Exception:
return
try:
import yaml as _yaml
from gateway.status import _read_pid_record # type: ignore
# (b) default gateway PID file present + alive
default_pid_path = default_root / "gateway.pid"
rec = _read_pid_record(default_pid_path)
if not rec:
return
from gateway.status import _pid_exists, _pid_from_record
pid = _pid_from_record(rec)
if not pid or not _pid_exists(pid):
return
# (c) default config has multiplexing on
cfg_path = default_root / "config.yaml"
if not cfg_path.exists():
return
with open(cfg_path, encoding="utf-8") as f:
cfg = _yaml.safe_load(f) or {}
multiplex = bool(
cfg.get("multiplex_profiles")
or (cfg.get("gateway", {}) or {}).get("multiplex_profiles")
)
if not multiplex:
return
except Exception:
logger.debug("Multiplexer-conflict probe failed", exc_info=True)
return
print_error(
f"The default gateway is running as a profile multiplexer and already "
f"serves profile '{suffix}'."
)
print(
" When gateway.multiplex_profiles is on, the default gateway is the\n"
" single inbound process for every profile. Starting a separate\n"
" gateway for this profile would double-bind its platforms (two\n"
" pollers on one bot token, port conflicts).\n"
)
print(" Manage the multiplexer instead (from the default profile):")
print()
print(" hermes gateway restart")
print()
print(" Pass --force to start a separate profile gateway anyway (not")
print(" recommended while the multiplexer is running).")
sys.exit(1)
def _guard_supervised_gateway_conflict(force: bool = False) -> None:
"""Refuse a foreground gateway when a service manager already supervises one.
@ -3977,6 +4043,7 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False, fo
systemd/launchd service is already supervising this profile.
"""
_guard_official_docker_root_gateway()
_guard_named_profile_under_multiplexer(force=force)
_guard_supervised_gateway_conflict(force=force)
_guard_existing_gateway_process_conflict(replace=replace)
sys.path.insert(0, str(PROJECT_ROOT))

View file

@ -1302,10 +1302,54 @@ def stop() -> None:
print("✗ No gateway was running")
def _wait_for_gateway_absent(timeout_s: float = 30.0, interval_s: float = 0.5) -> bool:
"""Block until no gateway process is detectable, or the timeout elapses.
``stop()`` can return while the previous gateway is still draining
in-flight agents (the drain runs up to the restart-drain timeout). Uses the
authoritative ``get_running_pid()`` (lock + liveness + start-time +
gateway-shape) plus the now-strict ``_gateway_pids()`` scan so a relaunch
never races a still-alive old process.
"""
from gateway.status import get_running_pid
deadline = time.monotonic() + max(timeout_s, interval_s)
while time.monotonic() < deadline:
if get_running_pid() is None and not _gateway_pids():
return True
time.sleep(interval_s)
return get_running_pid() is None and not _gateway_pids()
def restart() -> None:
"""Stop the gateway then start it again."""
"""Stop the gateway then start it again.
Waits for the old gateway to be authoritatively gone before relaunching --
otherwise ``start()``'s "already running" guard sees the still-draining old
process and no-ops, and when that process later exits nothing replaces it (a
silent outage). Fails loudly if the process can't be cleared or the relaunch
doesn't produce a running gateway.
"""
_assert_windows()
from hermes_cli.gateway import kill_gateway_processes
stop()
if not _wait_for_gateway_absent(timeout_s=30.0):
print("⚠ Gateway still present after stop; forcing termination before restart...")
kill_gateway_processes(all_profiles=False, force=True)
if not _wait_for_gateway_absent(timeout_s=10.0):
raise RuntimeError(
"Gateway process still detected after force kill; refusing to "
"start a duplicate. Investigate stray PIDs before retrying."
)
# Give Windows a moment to release the listening port.
time.sleep(1.0)
start()
if not _wait_for_gateway_ready(timeout_s=15.0):
raise RuntimeError(
"Gateway restart did not produce a running gateway process. "
"Check logs/gateway.log and run `hermes gateway status`."
)

View file

@ -121,6 +121,16 @@ DEFAULT_CLAIM_TTL_SECONDS = 15 * 60
# effect of normal API traffic.
DEFAULT_CLAIM_HEARTBEAT_MAX_STALE_SECONDS = 60 * 60
# Grace added to a claim when a reclaim is deferred because the previous
# host-local worker is still alive after a termination attempt. Releasing the
# claim in that state would spawn a duplicate alongside the surviving worker —
# the runaway seen when a cgroup memory.high throttle parks a worker in
# uninterruptible (D) state, where a pending SIGKILL cannot be delivered until
# the throttle lifts. Holding the claim a short grace and retrying next tick
# stops the duplication; once no duplicate is spawned the pressure eases, the
# signal lands, and the following tick reclaims cleanly.
RECLAIM_DEFER_GRACE_SECONDS = 120
def _resolve_claim_ttl_seconds(ttl_seconds: Optional[int] = None) -> int:
"""Return the effective claim TTL, honoring the kanban env override.
@ -3286,6 +3296,14 @@ def release_stale_claims(
termination = _terminate_reclaimed_worker(
row["worker_pid"], row["claim_lock"], signal_fn=signal_fn,
)
# Never release a claim while our own worker is still alive: that would
# spawn a duplicate beside it. Hold the claim and retry next tick.
if _worker_survived_termination(termination):
_defer_reclaim_for_live_worker(
conn, row["id"], row["claim_lock"], now, termination,
reason="ttl_expired_worker_alive",
)
continue
with write_txn(conn):
cur = conn.execute(
"UPDATE tasks SET status = 'ready', claim_lock = NULL, "
@ -5113,7 +5131,13 @@ def _terminate_reclaimed_worker(
info["termination_attempted"] = True
try:
kill(int(pid), signal.SIGTERM)
except (ProcessLookupError, OSError):
except ProcessLookupError:
# Process is already gone — that's a successful termination, not a
# survival. Leaving terminated=False here would make the reclaim guard
# misread a dead worker as still-alive and defer forever.
info["terminated"] = True
return info
except OSError:
return info
for _ in range(10):
@ -5136,6 +5160,63 @@ def _terminate_reclaimed_worker(
return info
def _worker_survived_termination(termination: dict) -> bool:
"""True when we tried to kill our own host-local worker and it is still alive.
Reclaiming in this state would release the claim and let the dispatcher
spawn a second worker while the first is still running the duplication
loop. Only host-local workers we actually signalled count: a non-local
claim lock or a no-op attempt (no ``os.kill`` available) must fall through
to the normal release path, since we cannot manage that worker anyway.
"""
return bool(
termination.get("termination_attempted")
and termination.get("host_local")
and not termination.get("terminated")
)
def _defer_reclaim_for_live_worker(
conn: sqlite3.Connection,
task_id: str,
claim_lock: Optional[str],
now: int,
termination: dict,
*,
reason: str,
) -> None:
"""Hold a claim whose worker survived termination instead of releasing it.
Extends ``claim_expires`` by ``RECLAIM_DEFER_GRACE_SECONDS`` so the task
stays ``running`` (no duplicate spawn) and records a ``reclaim_deferred``
event so the hold is visible in ``hermes kanban tail``. The next dispatch
tick retries the kill; this is self-correcting because not spawning a
duplicate is what lets the throttled worker finally die.
"""
grace = now + RECLAIM_DEFER_GRACE_SECONDS
with write_txn(conn):
cur = conn.execute(
"UPDATE tasks SET claim_expires = ? "
"WHERE id = ? AND status = 'running' AND claim_lock IS ?",
(grace, task_id, claim_lock),
)
if cur.rowcount != 1:
return
run_id = _current_run_id(conn, task_id)
if run_id is not None:
conn.execute(
"UPDATE task_runs SET claim_expires = ? WHERE id = ?",
(grace, run_id),
)
payload = {
"reason": reason,
"claim_lock": claim_lock,
"claim_expires_now": grace,
}
payload.update(termination)
_append_event(conn, task_id, "reclaim_deferred", payload, run_id=run_id)
def heartbeat_worker(
conn: sqlite3.Connection,
task_id: str,
@ -5374,6 +5455,15 @@ def detect_stale_running(
pid, lock, signal_fn=signal_fn,
)
# Never release a claim while our own worker is still alive: that would
# spawn a duplicate beside it. Hold the claim and retry next tick.
if _worker_survived_termination(termination):
_defer_reclaim_for_live_worker(
conn, tid, lock, now, termination,
reason="heartbeat_stale_worker_alive",
)
continue
with write_txn(conn):
cur = conn.execute(
"UPDATE tasks SET status = 'ready', claim_lock = NULL, "

View file

@ -299,34 +299,46 @@ class ModelSwitchResult:
# Flag parsing
# ---------------------------------------------------------------------------
def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool]:
"""Parse --provider, --global, and --refresh flags from /model command args.
def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool, bool]:
"""Parse --provider, --global, --session, and --refresh flags from /model command args.
Returns (model_input, explicit_provider, is_global, force_refresh).
Returns ``(model_input, explicit_provider, is_global, force_refresh, is_session)``.
``is_global`` and ``is_session`` are independent flag presences; the
*effective* persistence decision is resolved by
:func:`resolve_persist_behavior` so the config-gated default
(``model.persist_switch_by_default``) is applied in one place.
Examples::
"sonnet" -> ("sonnet", "", False, False)
"sonnet --global" -> ("sonnet", "", True, False)
"sonnet --provider anthropic" -> ("sonnet", "anthropic", False, False)
"--provider my-ollama" -> ("", "my-ollama", False, False)
"--refresh" -> ("", "", False, True)
"sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True, False)
"sonnet" -> ("sonnet", "", False, False, False)
"sonnet --global" -> ("sonnet", "", True, False, False)
"sonnet --session" -> ("sonnet", "", False, False, True)
"sonnet --provider anthropic" -> ("sonnet", "anthropic", False, False, False)
"--provider my-ollama" -> ("", "my-ollama", False, False, False)
"--refresh" -> ("", "", False, True, False)
"sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True, False, False)
"""
is_global = False
explicit_provider = ""
force_refresh = False
is_session = False
# Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
# A single Unicode dash before a flag keyword becomes "--"
import re as _re
raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global|refresh)', r'--\1', raw_args)
raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global|session|refresh)', r'--\1', raw_args)
# Extract --global
if "--global" in raw_args:
is_global = True
raw_args = raw_args.replace("--global", "").strip()
# Extract --session (explicit session-only; overrides the persist default)
if "--session" in raw_args:
is_session = True
raw_args = raw_args.replace("--session", "").strip()
# Extract --refresh (bust the model picker disk cache before listing)
if "--refresh" in raw_args:
force_refresh = True
@ -345,7 +357,37 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool]:
i += 1
model_input = " ".join(filtered).strip()
return (model_input, explicit_provider, is_global, force_refresh)
return (model_input, explicit_provider, is_global, force_refresh, is_session)
def resolve_persist_behavior(is_global: bool, is_session: bool) -> bool:
"""Decide whether a ``/model`` switch should persist to ``config.yaml``.
Resolution order:
1. ``--session`` explicitly opts out ``False`` (this session only).
2. ``--global`` explicitly opts in ``True``.
3. Otherwise defer to ``model.persist_switch_by_default`` in
``config.yaml`` (defaults to ``True``, so a plain ``/model <name>``
survives across sessions the behavior users expect).
The config read is defensive: on a fresh install ``model`` may be a
flat string rather than a dict, in which case the built-in default
(``True``) applies.
"""
if is_session:
return False
if is_global:
return True
try:
from hermes_cli.config import load_config
model_cfg = load_config().get("model")
if isinstance(model_cfg, dict):
return bool(model_cfg.get("persist_switch_by_default", True))
except Exception:
pass
return True
# ---------------------------------------------------------------------------

View file

@ -29,7 +29,7 @@ import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path, PurePosixPath, PureWindowsPath
from typing import List, Optional
from typing import List, Optional, Tuple
from agent.skill_utils import is_excluded_skill_path
@ -781,6 +781,47 @@ def list_profiles() -> List[ProfileInfo]:
return profiles
def profiles_to_serve(multiplex: bool) -> List[Tuple[str, Path]]:
"""Return the ``(profile_name, hermes_home)`` pairs a gateway should serve.
This is the single chokepoint for "which profiles does the inbound gateway
handle" so later multiplexing phases never re-derive the set.
- ``multiplex=False`` (default): returns exactly one entry for the *active*
profile byte-for-byte the single-profile behavior the gateway has
always had. The name is ``"default"`` for the default profile or the
active named profile's id.
- ``multiplex=True``: returns the default profile plus every valid named
profile under ``profiles/``, each paired with its own HERMES_HOME.
Intentionally lightweight (a directory scan + name validation only): no
per-profile config reads, gateway-running probes, or skill counts like
:func:`list_profiles`. It runs on gateway startup and must stay cheap.
The returned ``hermes_home`` is the path to pass to
``set_hermes_home_override`` when scoping a turn to that profile.
"""
active = get_active_profile_name() or "default"
if not multiplex:
return [(active, get_profile_dir(active))]
serve: List[Tuple[str, Path]] = [("default", _get_default_hermes_home())]
profiles_root = _get_profiles_root()
if profiles_root.is_dir():
for entry in sorted(profiles_root.iterdir()):
if not entry.is_dir():
continue
name = entry.name
if name == "default":
continue # default is the built-in entry already added above
if not _PROFILE_ID_RE.match(name):
continue
serve.append((name, entry))
return serve
def create_profile(
name: str,
clone_from: Optional[str] = None,

View file

@ -0,0 +1,170 @@
"""Unified provider catalog — one source of truth for the provider universe.
The provider list shown by ``hermes model`` (CLI/TUI) and the desktop Settings
Providers tabs (Accounts + API keys) **must be the same set**. Historically
they were not: the CLI picker read :data:`hermes_cli.models.CANONICAL_PROVIDERS`
(which auto-extends from ``plugins/model-providers/<name>/``), while the desktop
tabs read separate hand-maintained lists (``_OAUTH_PROVIDER_CATALOG``,
``OPTIONAL_ENV_VARS`` + ``PROVIDER_GROUPS``) that nobody kept in sync. Every
provider added after those lists were written silently went missing from the
GUI e.g. GitHub Copilot showing up only under "tools", or ``openai-api`` being
configurable from the CLI but not the desktop app.
This module fixes that at the root: it derives ONE descriptor per provider from
the same universe ``hermes model`` renders (``CANONICAL_PROVIDERS``), joining:
* ``auth_type`` / ``api_key_env_vars`` / ``base_url_env_var`` from
:data:`hermes_cli.auth.PROVIDER_REGISTRY` (credential truth), and
* ``display_name`` / ``description`` / ``signup_url`` from the provider's
:class:`providers.base.ProviderProfile` when one exists, falling back to the
``CANONICAL_PROVIDERS`` entry's ``label`` / ``tui_desc`` and the
``OPTIONAL_ENV_VARS`` signup URL otherwise (many profiles leave these blank,
and four canonical providers have no profile at all lmstudio, openai-api,
tencent-tokenhub, xai-oauth so the fallbacks are load-bearing).
Each descriptor is tagged with the ``tab`` it belongs on (``keys`` vs
``accounts``) based purely on how the provider authenticates. The desktop
``/api/env`` and ``/api/providers/oauth`` endpoints derive their MEMBERSHIP from
this catalog; the old hand lists are demoted to presentation/override overlays
(bespoke OAuth flow + status resolvers, richer copy, icons, ordering) and no
longer decide which providers exist.
Parity contract (locked by tests): the union of the two tabs equals the
``CANONICAL_PROVIDERS`` universe, i.e. exactly what ``hermes model`` shows.
"""
from __future__ import annotations
from dataclasses import dataclass
# Auth types that authenticate via an account / sign-in flow rather than a
# pasted API key. These route to the desktop "Accounts" tab; everything else
# (api_key, and aws_sdk which is configured via AWS_REGION/AWS_PROFILE) routes
# to the "API keys" tab. Mirrors the auth_type strings used in
# hermes_cli.auth.PROVIDER_REGISTRY and providers.base.ProviderProfile.
_ACCOUNTS_AUTH_TYPES: frozenset[str] = frozenset(
{
"oauth_device_code",
"oauth_external",
"oauth_minimax",
"external_process", # copilot-acp: spawns `copilot --acp --stdio`
"copilot", # GitHub Copilot token / gh auth
}
)
@dataclass(frozen=True)
class ProviderDescriptor:
"""One provider, as seen by every surface (CLI picker + both GUI tabs)."""
slug: str # canonical id, e.g. "google-gemini-cli"
label: str # human display name
description: str # one-line description
auth_type: str # api_key | oauth_* | external_process | copilot | aws_sdk
tab: str # "keys" | "accounts"
api_key_env_vars: tuple[str, ...] # credential env vars (may be empty)
base_url_env_var: str # base-URL override env var (may be "")
signup_url: str # signup / console URL (may be "")
order: int # CANONICAL_PROVIDERS index — mirrors `hermes model`
def tab_for_auth_type(auth_type: str) -> str:
"""Return the desktop tab ("keys"|"accounts") a provider's auth maps to."""
return "accounts" if auth_type in _ACCOUNTS_AUTH_TYPES else "keys"
def _split_env_vars(env_vars: tuple[str, ...]) -> tuple[tuple[str, ...], str]:
"""Split a profile's ``env_vars`` into (api_key_vars, base_url_var)."""
keys = tuple(v for v in env_vars if not (v.endswith("_BASE_URL") or v.endswith("_URL")))
base = next((v for v in env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), "")
return keys, base
def provider_catalog() -> list[ProviderDescriptor]:
"""Return one descriptor per provider in the ``hermes model`` universe.
Membership is :data:`CANONICAL_PROVIDERS` (the list the CLI/TUI picker
renders, which auto-extends from provider plugins). Auth + env come from
``PROVIDER_REGISTRY``; display metadata from ``ProviderProfile`` with
canonical/env fallbacks so providers without a profile (or with blank
profile metadata) still resolve sensibly.
"""
from hermes_cli.models import CANONICAL_PROVIDERS
# PROVIDER_REGISTRY / list_providers are imported lazily and defensively:
# this module is on the import path of the web server and the CLI, and we
# never want a provider-plugin import error to blank the whole catalog.
try:
from hermes_cli.auth import PROVIDER_REGISTRY
except Exception:
PROVIDER_REGISTRY = {}
try:
from providers import list_providers
profiles = {p.name: p for p in list_providers()}
except Exception:
profiles = {}
try:
from hermes_cli.config import OPTIONAL_ENV_VARS
except Exception:
OPTIONAL_ENV_VARS = {}
out: list[ProviderDescriptor] = []
for order, entry in enumerate(CANONICAL_PROVIDERS):
slug = entry.slug
cfg = PROVIDER_REGISTRY.get(slug)
prof = profiles.get(slug)
# auth_type: registry is authoritative; fall back to profile, then api_key.
auth_type = (
(getattr(cfg, "auth_type", "") if cfg else "")
or (getattr(prof, "auth_type", "") if prof else "")
or "api_key"
)
# Credential env vars: registry first (it already normalizes these),
# else derive from the profile's env_vars tuple.
if cfg and getattr(cfg, "api_key_env_vars", ()):
api_key_vars = tuple(cfg.api_key_env_vars)
base_url_var = getattr(cfg, "base_url_env_var", "") or ""
elif prof and getattr(prof, "env_vars", ()):
api_key_vars, base_url_var = _split_env_vars(tuple(prof.env_vars))
else:
api_key_vars, base_url_var = (), ""
label = (
(getattr(prof, "display_name", "") if prof else "")
or entry.label
or slug
)
description = (
(getattr(prof, "description", "") if prof else "")
or entry.tui_desc
or label
)
signup_url = (getattr(prof, "signup_url", "") if prof else "") or ""
if not signup_url and api_key_vars:
info = OPTIONAL_ENV_VARS.get(api_key_vars[0]) or {}
signup_url = info.get("url") or ""
out.append(
ProviderDescriptor(
slug=slug,
label=label,
description=description,
auth_type=auth_type,
tab=tab_for_auth_type(auth_type),
api_key_env_vars=api_key_vars,
base_url_env_var=base_url_var,
signup_url=signup_url,
order=order,
)
)
return out
def provider_catalog_by_slug() -> dict[str, ProviderDescriptor]:
"""Convenience: the catalog keyed by slug."""
return {d.slug: d for d in provider_catalog()}

View file

@ -12,6 +12,7 @@ logger = logging.getLogger(__name__)
from hermes_cli import auth as auth_mod
from agent.credential_pool import CredentialPool, PooledCredential, get_custom_provider_pool_key, load_pool
from agent.secret_scope import get_secret as _get_secret
from hermes_cli.auth import (
AuthError,
DEFAULT_CODEX_BASE_URL,
@ -35,6 +36,19 @@ from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_host_matches, base_url_hostname, env_int
def _getenv(name: str, default: str = "") -> str:
"""Profile-scoped replacement for ``os.getenv`` on credential/provider reads.
Routes through the secret scope (Workstream A): identical to ``os.getenv``
when multiplexing is off, scope-aware (and fail-closed on an unscoped read)
when on. Genuinely-global vars are handled inside ``get_secret`` and still
read ``os.environ``. Keeps the ``(name, default) -> str`` contract every
call site here already relies on.
"""
val = _get_secret(name, default)
return val if val is not None else default
def _normalize_custom_provider_name(value: str) -> str:
return value.strip().lower().replace(" ", "-")
@ -156,7 +170,7 @@ def _host_derived_api_key(base_url: str) -> str:
if sanitized in ("OPENAI", "OPENROUTER", "OLLAMA"):
return ""
env_name = f"{sanitized}_API_KEY"
return (os.getenv(env_name, "") or "").strip()
return (_getenv(env_name, "") or "").strip()
def _auto_detect_local_model(base_url: str) -> str:
@ -437,7 +451,7 @@ def resolve_requested_provider(requested: Optional[str] = None) -> str:
# Prefer the persisted config selection over any stale shell/.env
# provider override so chat uses the endpoint the user last saved.
env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
env_provider = _getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
if env_provider:
return env_provider
@ -542,7 +556,7 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
name_norm = _normalize_custom_provider_name(ep_name)
# Resolve the API key from the env var name stored in key_env
key_env = str(entry.get("key_env", "") or "").strip()
resolved_api_key = os.getenv(key_env, "").strip() if key_env else ""
resolved_api_key = _getenv(key_env, "").strip() if key_env else ""
# Fall back to inline api_key when key_env is absent or unresolvable
if not resolved_api_key:
resolved_api_key = str(entry.get("api_key", "") or "").strip()
@ -824,8 +838,8 @@ def _resolve_named_custom_runtime(
api_key_candidates = [
(explicit_api_key or "").strip(),
# Gate env key fallbacks on authoritative hosts (#28660)
(os.getenv("OPENAI_API_KEY", "").strip() if _da_is_openai_url else ""),
(os.getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter else ""),
(_getenv("OPENAI_API_KEY", "").strip() if _da_is_openai_url else ""),
(_getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter else ""),
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
# intuitive match without configuring `custom_providers` first.
@ -878,11 +892,11 @@ def _resolve_named_custom_runtime(
api_key_candidates = [
(explicit_api_key or "").strip(),
str(custom_provider.get("api_key", "") or "").strip(),
os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
_getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
# Gate provider env keys on their authoritative hosts — sending
# OPENAI_API_KEY to a local-llm endpoint leaks credentials (#28660).
(os.getenv("OPENAI_API_KEY", "").strip() if _cp_is_openai_url else ""),
(os.getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter else ""),
(_getenv("OPENAI_API_KEY", "").strip() if _cp_is_openai_url else ""),
(_getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter else ""),
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host as a final
# fallback when key_env wasn't set explicitly.
_host_derived_api_key(base_url),
@ -941,8 +955,8 @@ def _resolve_openrouter_runtime(
except Exception:
pass
env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
env_custom_base_url = os.getenv("CUSTOM_BASE_URL", "").strip()
env_openrouter_base_url = _getenv("OPENROUTER_BASE_URL", "").strip()
env_custom_base_url = _getenv("CUSTOM_BASE_URL", "").strip()
# Use config base_url when available and the provider context matches.
# OPENAI_BASE_URL env var is no longer consulted — config.yaml is
@ -982,8 +996,8 @@ def _resolve_openrouter_runtime(
if _is_openrouter_context:
api_key_candidates = [
explicit_api_key,
os.getenv("OPENROUTER_API_KEY"),
os.getenv("OPENAI_API_KEY"),
_getenv("OPENROUTER_API_KEY"),
_getenv("OPENAI_API_KEY"),
]
else:
# Custom endpoint: use api_key from config when using config base_url (#1760).
@ -1003,9 +1017,9 @@ def _resolve_openrouter_runtime(
api_key_candidates = [
explicit_api_key,
(cfg_api_key if use_config_base_url else ""),
(os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
(os.getenv("OPENAI_API_KEY") if (_is_openai_url or _is_openai_azure) else ""),
(os.getenv("OPENROUTER_API_KEY") if _is_openrouter_url else ""),
(_getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
(_getenv("OPENAI_API_KEY") if (_is_openai_url or _is_openai_azure) else ""),
(_getenv("OPENROUTER_API_KEY") if _is_openrouter_url else ""),
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
# intuitive match. Helper returns "" for IPs/loopback and for env
@ -1108,7 +1122,7 @@ def _resolve_azure_foundry_runtime(
if inferred:
cfg_api_mode = inferred
env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
env_base_url = _getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
base_url = explicit_base_url_clean or cfg_base_url or env_base_url
if not base_url:
raise AuthError(
@ -1197,7 +1211,7 @@ def _resolve_azure_foundry_runtime(
except Exception:
api_key = ""
if not api_key:
api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
api_key = _getenv("AZURE_FOUNDRY_API_KEY", "").strip()
if not api_key:
raise AuthError(
"Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
@ -1297,7 +1311,7 @@ def _resolve_explicit_runtime(
expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
if not api_key:
creds = resolve_nous_runtime_credentials(
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
timeout_seconds=float(_getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
)
api_key = creds.get("api_key", "")
expires_at = creds.get("expires_at")
@ -1326,7 +1340,7 @@ def _resolve_explicit_runtime(
if pconfig and pconfig.auth_type == "api_key":
env_url = ""
if pconfig.base_url_env_var:
env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
env_url = _getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
base_url = explicit_base_url
if not base_url:
@ -1398,8 +1412,8 @@ def resolve_runtime_provider(
if requested_provider == "anthropic" and "azure.com" in _eff_base:
_azure_key = (
(explicit_api_key or "").strip()
or os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
or os.getenv("ANTHROPIC_API_KEY", "").strip()
or _getenv("AZURE_ANTHROPIC_KEY", "").strip()
or _getenv("ANTHROPIC_API_KEY", "").strip()
)
return {
"provider": "anthropic",
@ -1454,8 +1468,8 @@ def resolve_runtime_provider(
if provider == "openrouter":
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
cfg_base_url = str(model_cfg.get("base_url") or "").strip()
env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
env_openai_base_url = _getenv("OPENAI_BASE_URL", "").strip()
env_openrouter_base_url = _getenv("OPENROUTER_BASE_URL", "").strip()
has_custom_endpoint = bool(
explicit_base_url
or env_openai_base_url
@ -1511,7 +1525,7 @@ def resolve_runtime_provider(
if provider == "nous":
try:
creds = resolve_nous_runtime_credentials(
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
timeout_seconds=float(_getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
)
return {
"provider": "nous",
@ -1664,7 +1678,7 @@ def resolve_runtime_provider(
for hint_key in ("key_env", "api_key_env"):
env_var = str(model_cfg.get(hint_key) or "").strip()
if env_var:
token = os.getenv(env_var, "").strip()
token = _getenv(env_var, "").strip()
if token:
break
# Next: an inline api_key on the model config (useful in multi-profile
@ -1674,8 +1688,8 @@ def resolve_runtime_provider(
# Finally fall back to the historical fixed names.
if not token:
token = (
os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
or os.getenv("ANTHROPIC_API_KEY", "").strip()
_getenv("AZURE_ANTHROPIC_KEY", "").strip()
or _getenv("ANTHROPIC_API_KEY", "").strip()
)
if not token:
raise AuthError(

View file

@ -1554,6 +1554,7 @@ async def upload_managed_file_stream(
)
tmp_path = Path(tmp_name)
total = 0
renamed = False
try:
with os.fdopen(tmp_fd, "wb") as out:
while True:
@ -1565,16 +1566,21 @@ async def upload_managed_file_stream(
raise HTTPException(status_code=413, detail="File is too large")
out.write(chunk)
os.replace(tmp_path, target)
renamed = True
except HTTPException:
tmp_path.unlink(missing_ok=True)
raise
except PermissionError:
tmp_path.unlink(missing_ok=True)
raise HTTPException(status_code=403, detail="File is not writable")
except OSError as exc:
tmp_path.unlink(missing_ok=True)
raise HTTPException(status_code=500, detail=f"Could not write file: {exc}")
finally:
# Clean up the temp file on every non-success exit, including
# BaseException paths the `except` clauses above don't catch — most
# importantly asyncio.CancelledError when a browser aborts a large
# upload mid-stream (the exact NS-501 scenario). os.replace clears
# tmp_path on success, so only unlink when the rename didn't happen.
if not renamed:
tmp_path.unlink(missing_ok=True)
await file.close()
return {
@ -2316,6 +2322,43 @@ def _gateway_display_command(profile: Optional[str], verb: str) -> str:
return " ".join(["hermes", *_gateway_subcommand(profile, verb)])
# Slack member IDs (users U..., Enterprise Grid W...). Kept in sync with the
# frontend SLACK_MEMBER_ID_RE in web/src/pages/ChannelsPage.tsx.
_SLACK_MEMBER_ID_RE = re.compile(r"[UW][A-Z0-9]{2,}")
def _validate_messaging_env_value(platform_id: str, key: str, value: str) -> None:
"""Reject platform credentials that are clearly in the wrong field."""
if platform_id != "slack" or not value:
return
if key == "SLACK_BOT_TOKEN" and not value.startswith("xoxb-"):
raise HTTPException(
status_code=400,
detail="Slack Bot Token must start with xoxb-. Paste the bot token from OAuth & Permissions.",
)
if key == "SLACK_APP_TOKEN" and not value.startswith("xapp-"):
raise HTTPException(
status_code=400,
detail="Slack App Token must start with xapp-. Paste the app-level token from Basic Information > App-Level Tokens.",
)
if key == "SLACK_ALLOWED_USERS":
# Mirror the gateway's parse (gateway/platforms/slack.py): split on comma,
# strip, and drop empty entries so a trailing/interior comma isn't rejected
# here when the runtime would accept it. "*" is the allow-all wildcard.
user_ids = [part.strip() for part in value.split(",") if part.strip()]
invalid = [
user_id
for user_id in user_ids
if user_id != "*" and not _SLACK_MEMBER_ID_RE.fullmatch(user_id)
]
if invalid:
raise HTTPException(
status_code=400,
detail="Slack allowed user IDs must be comma-separated member IDs like U01ABC2DEF3.",
)
def _spawn_gateway_restart(profile: Optional[str] = None) -> Tuple[subprocess.Popen, bool]:
"""Spawn ``hermes gateway restart``, reusing an in-flight restart.
@ -3925,28 +3968,135 @@ async def update_config(body: ConfigUpdate, profile: Optional[str] = None):
raise HTTPException(status_code=500, detail="Internal server error")
def _catalog_provider_env_metadata() -> dict:
"""Map provider env vars → desktop card metadata, derived from the catalog.
Returns ``{env_var: {provider, provider_label, description, url, is_password,
advanced}}`` for every API-key provider in the unified ``provider_catalog()``
(i.e. the ``hermes model`` universe). This is what lets the desktop Keys tab
render a card for a provider even when its env var was never hand-added to
``OPTIONAL_ENV_VARS`` closing the drift where CLI-configurable providers
(openai-api, kilocode, novita, tencent-tokenhub, copilot, ) were missing
from the GUI.
Hand ``OPTIONAL_ENV_VARS`` prose is layered ON TOP of this in the endpoint;
this only supplies membership + grouping + sensible fallbacks.
"""
try:
from hermes_cli.provider_catalog import provider_catalog
except Exception:
return {}
# Env vars already declared with a NON-provider category (e.g. the shared
# GITHUB_TOKEN, which is a Skills-Hub "tool" credential) must not be
# promoted into a provider card. Copilot lists GITHUB_TOKEN among its auth
# aliases, but its provider card uses the provider-owned COPILOT_GITHUB_TOKEN.
try:
from hermes_cli.config import OPTIONAL_ENV_VARS as _OPT
except Exception:
_OPT = {}
_non_provider_keys = {
k for k, v in _OPT.items()
if (v or {}).get("category") and (v or {}).get("category") != "provider"
}
meta: dict = {}
for d in provider_catalog():
if d.tab != "keys":
continue
# API-key vars: the first is the primary (password) field; any aliases
# are kept as additional password fields so users can clear them too.
for env_var in d.api_key_env_vars:
if env_var in _non_provider_keys:
continue # don't hijack a shared tool/messaging credential
meta.setdefault(
env_var,
{
"provider": d.slug,
"provider_label": d.label,
"description": d.description,
"url": d.signup_url or None,
"is_password": True,
"advanced": False,
"category": "provider",
},
)
# Base-URL override is an advanced, non-secret field for the same card.
if d.base_url_env_var:
meta.setdefault(
d.base_url_env_var,
{
"provider": d.slug,
"provider_label": d.label,
"description": f"{d.label} base URL override",
"url": None,
"is_password": False,
"advanced": True,
"category": "provider",
},
)
# AWS-SDK providers (Bedrock) authenticate via the AWS credential chain
# rather than a pasted API key, so they have no api_key_env_vars. Tag
# their AWS_* settings to the provider card so they still appear on the
# Keys tab (otherwise Bedrock — a `hermes model` provider — would be
# invisible in the desktop app).
if d.auth_type == "aws_sdk":
for aws_var in ("AWS_REGION", "AWS_PROFILE"):
existing = meta.get(aws_var, {})
meta[aws_var] = {
"provider": d.slug,
"provider_label": d.label,
"description": existing.get("description") or f"{d.label} ({aws_var})",
"url": existing.get("url"),
"is_password": False,
"advanced": existing.get("advanced", True),
"category": "provider",
}
return meta
@app.get("/api/env")
async def get_env_vars(profile: Optional[str] = None):
with _profile_scope(profile):
env_on_disk = load_env()
channel_keys = _channel_managed_env_keys()
result = {}
for var_name, info in OPTIONAL_ENV_VARS.items():
catalog_meta = _catalog_provider_env_metadata()
def _row(var_name: str, info: dict) -> dict:
value = env_on_disk.get(var_name)
result[var_name] = {
cat_meta = catalog_meta.get(var_name) or {}
# Hand OPTIONAL_ENV_VARS prose wins where present; the catalog fills any
# gaps (description/url) and always supplies provider grouping hints.
return {
"is_set": bool(value),
"redacted_value": redact_key(value) if value else None,
"description": info.get("description", ""),
"url": info.get("url"),
"category": info.get("category", ""),
"is_password": info.get("password", False),
"description": info.get("description") or cat_meta.get("description", ""),
"url": info.get("url") if info.get("url") is not None else cat_meta.get("url"),
"category": info.get("category") or cat_meta.get("category", ""),
"is_password": info.get("password", cat_meta.get("is_password", False)),
"tools": info.get("tools", []),
"advanced": info.get("advanced", False),
"advanced": info.get("advanced", cat_meta.get("advanced", False)),
# True when this var is a messaging-platform credential owned by a
# Channels page card. The Keys/Env page uses this to hide it and
# avoid duplicating the (richer) Channels configuration UI.
"channel_managed": var_name in channel_keys,
# Provider grouping hints derived from the unified provider catalog
# so the desktop Keys tab groups by the SAME provider identity the
# CLI `hermes model` picker uses (not desktop-only prefix guesses).
"provider": cat_meta.get("provider", ""),
"provider_label": cat_meta.get("provider_label", ""),
}
result = {}
for var_name, info in OPTIONAL_ENV_VARS.items():
result[var_name] = _row(var_name, info)
# Synthesize rows for catalog provider env vars that have no hand entry in
# OPTIONAL_ENV_VARS — these are the providers that were CLI-configurable but
# invisible in the desktop app until now.
for var_name in catalog_meta:
if var_name not in result:
result[var_name] = _row(var_name, {})
return result
@ -4146,9 +4296,9 @@ _PLATFORM_OVERRIDES: dict[str, dict[str, Any]] = {
},
"slack": {
"name": "Slack",
"description": "Use Hermes from Slack via Socket Mode.",
"description": "Use Hermes from Slack via Socket Mode. Add allowed Slack member IDs so connected bots can respond.",
"docs_url": "https://api.slack.com/apps",
"env_vars": ("SLACK_BOT_TOKEN", "SLACK_APP_TOKEN"),
"env_vars": ("SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"),
"required_env": ("SLACK_BOT_TOKEN", "SLACK_APP_TOKEN"),
},
"mattermost": {
@ -4633,6 +4783,7 @@ def _messaging_env_info(key: str) -> dict[str, Any]:
return {
"description": info.get("description", ""),
"prompt": info.get("prompt", key),
"help": info.get("help", ""),
"url": info.get("url"),
"is_password": info.get("password", False),
"advanced": info.get("advanced", False),
@ -5212,6 +5363,7 @@ async def update_messaging_platform(
)
trimmed = value.strip()
if trimmed:
_validate_messaging_env_value(platform_id, key, trimmed)
save_env_value(key, trimmed)
if body.enabled is not None:
@ -5413,13 +5565,53 @@ def _claude_code_only_status() -> Dict[str, Any]:
return {"logged_in": False, "source": None}
# Provider catalog. The order matters — it's how we render the UI list.
# ``cli_command`` is what the dashboard surfaces as the copy-to-clipboard
# fallback while Phase 2 (in-browser flows) isn't built yet.
# ``flow`` describes the OAuth shape so the future modal can pick the
# right UI: ``pkce`` = open URL + paste callback code, ``device_code`` =
# show code + verification URL + poll, ``external`` = read-only (delegated
# to a third-party CLI like Claude Code or Qwen).
def _gemini_cli_status() -> Dict[str, Any]:
"""Status for the google-gemini-cli OAuth provider (Code Assist login)."""
try:
from hermes_cli import auth as hauth
raw = hauth.get_gemini_oauth_auth_status()
except Exception as e:
return {"logged_in": False, "error": str(e)}
return {
"logged_in": bool(raw.get("logged_in")),
"source": raw.get("source") or "google_oauth",
"source_label": raw.get("email") or raw.get("auth_file") or "Google Code Assist",
"token_preview": _truncate_token(raw.get("api_key")),
"expires_at": None,
"has_refresh_token": True,
}
def _copilot_acp_status() -> Dict[str, Any]:
"""Status for copilot-acp — credentials are owned by the Copilot CLI.
There is no cheap programmatic credential probe for the ACP subprocess, so
this is a read-only "managed by the Copilot CLI" card (like claude-code):
Hermes never claims a login state it can't verify.
"""
return {
"logged_in": False,
"source": "copilot_cli",
"source_label": "Managed by the GitHub Copilot CLI",
"token_preview": None,
"expires_at": None,
"has_refresh_token": False,
}
# Explicit, hand-tuned OAuth/account provider cards. These carry the bits that
# can't be derived from the unified provider catalog: the OAuth ``flow`` shape,
# the per-provider ``status_fn``, the ``cli_command`` fallback, and curated
# display order. They are the OVERRIDE BASE for ``_build_oauth_catalog()``,
# which unions them with every accounts-tab provider in ``provider_catalog()``
# so newly-added OAuth/external providers appear automatically (no hand edit).
# This tuple also still includes two entries that are NOT catalog providers but
# must show on the Accounts tab: the api-key Anthropic PKCE card and the
# synthetic ``claude-code`` subscription row.
# ``flow`` describes the OAuth shape so the modal can pick the right UI:
# ``pkce`` = open URL + paste callback code, ``device_code`` = show code +
# verification URL + poll, ``external`` = read-only (delegated to a third-party
# CLI like Claude Code or Qwen), ``loopback`` = 127.0.0.1 callback listener.
_OAUTH_PROVIDER_CATALOG: tuple[Dict[str, Any], ...] = (
{
"id": "nous",
@ -5469,6 +5661,22 @@ _OAUTH_PROVIDER_CATALOG: tuple[Dict[str, Any], ...] = (
"docs_url": "https://hermes-agent.nousresearch.com/docs/guides/xai-grok-oauth",
"status_fn": None, # dispatched via auth.get_xai_oauth_auth_status
},
{
"id": "google-gemini-cli",
"name": "Google Gemini (OAuth + Code Assist)",
"flow": "external",
"cli_command": "hermes auth add google-gemini-cli",
"docs_url": "https://ai.google.dev/gemini-api/docs",
"status_fn": _gemini_cli_status,
},
{
"id": "copilot-acp",
"name": "GitHub Copilot (ACP)",
"flow": "external",
"cli_command": "copilot /login",
"docs_url": "https://docs.github.com/en/copilot",
"status_fn": _copilot_acp_status,
},
# ── Anthropic / Claude entries sit at the bottom: the API-key path
# first, then the subscription OAuth path (which only works with extra
# usage credits on top of a Claude Max plan — see disclaimer in name).
@ -5555,6 +5763,31 @@ def _resolve_provider_status(provider_id: str, status_fn) -> Dict[str, Any]:
"has_refresh_token": True,
"last_refresh": raw.get("last_refresh"),
}
# No hand-written branch for this provider id: fall through to the
# canonical slug-driven dispatcher so accounts-tab providers derived
# from the unified catalog (which carry status_fn=None) still reflect
# real login state instead of rendering permanently logged-out. This
# closes the membership-auto-extends-but-status-doesn't gap: add an
# OAuth/account provider plugin and its card shows the right state.
raw = hauth.get_auth_status(provider_id)
if isinstance(raw, dict) and "logged_in" in raw:
return {
"logged_in": bool(raw.get("logged_in")),
"source": raw.get("source") or raw.get("provider") or provider_id,
"source_label": (
raw.get("source_label")
or raw.get("auth_store")
or raw.get("auth_store_path")
or raw.get("base_url")
or raw.get("name")
or ""
),
"token_preview": _truncate_token(
raw.get("access_token") or raw.get("api_key")
),
"expires_at": raw.get("expires_at") or raw.get("access_expires_at"),
"has_refresh_token": bool(raw.get("has_refresh_token")),
}
except Exception as e:
return {"logged_in": False, "error": str(e)}
return {"logged_in": False}
@ -5598,6 +5831,56 @@ def _oauth_provider_disconnect_hint(provider: Dict[str, Any], status: Dict[str,
return None
def _build_oauth_catalog() -> list[Dict[str, Any]]:
"""Build the Accounts-tab provider list.
MEMBERSHIP is the union of:
1. ``_OAUTH_PROVIDER_CATALOG`` the explicit, hand-tuned cards that carry
bespoke flow / status_fn / cli_command (including the api-key Anthropic
PKCE card and the synthetic claude-code subscription row, which are not
catalog providers), and
2. every accounts-tab provider in the unified ``provider_catalog()`` (the
``hermes model`` universe) so any OAuth/external provider added as a
plugin appears automatically, with sensible defaults, even if no
explicit card was written for it.
The explicit catalog wins on metadata; the unified catalog guarantees we
never silently drop a provider the CLI picker offers. Order: explicit cards
first (their curated order), then any catalog-only providers appended in
``hermes model`` order.
"""
rows: list[Dict[str, Any]] = []
seen: set[str] = set()
# 1. Explicit hand-tuned cards (authoritative metadata + curated order).
for entry in _OAUTH_PROVIDER_CATALOG:
if entry["id"] in seen:
continue
seen.add(entry["id"])
rows.append(dict(entry))
# 2. Catalog accounts-providers not already covered — keeps the Accounts tab
# in lockstep with the `hermes model` universe (zero-edit for new plugins).
try:
from hermes_cli.provider_catalog import provider_catalog
for d in provider_catalog():
if d.tab != "accounts" or d.slug in seen:
continue
seen.add(d.slug)
rows.append({
"id": d.slug,
"name": d.label,
"flow": "external",
"cli_command": f"hermes auth add {d.slug}",
"docs_url": d.signup_url or "",
"status_fn": None,
})
except Exception:
pass
return rows
@app.get("/api/providers/oauth")
async def list_oauth_providers(profile: Optional[str] = None):
"""Enumerate every OAuth-capable LLM provider with current status.
@ -5617,10 +5900,14 @@ async def list_oauth_providers(profile: Optional[str] = None):
token_preview last N chars of the token, never the full token
expires_at ISO timestamp string or null
has_refresh_token bool
Membership is derived from the unified provider_catalog() so this stays in
sync with the `hermes model` picker; _OAUTH_OVERRIDES supplies per-provider
flow/status/cli metadata.
"""
with _profile_scope(profile):
providers = []
for p in _OAUTH_PROVIDER_CATALOG:
for p in _build_oauth_catalog():
status = _resolve_provider_status(p["id"], p.get("status_fn"))
disconnect_hint = _oauth_provider_disconnect_hint(p, status)
providers.append({
@ -5647,7 +5934,7 @@ async def disconnect_oauth_provider(
_require_token(request)
with _profile_scope(profile):
catalog_by_id = {p["id"]: p for p in _OAUTH_PROVIDER_CATALOG}
catalog_by_id = {p["id"]: p for p in _build_oauth_catalog()}
provider = catalog_by_id.get(provider_id)
if provider is None:
raise HTTPException(
@ -10914,6 +11201,7 @@ def _resolve_chat_argv(
# the dashboard PTY path.
env.setdefault("HERMES_TUI_DISABLE_MOUSE", "1")
env.setdefault("HERMES_TUI_INLINE", "1")
env["HERMES_TUI_DASHBOARD"] = "1"
if profile_dir is not None:
env["HERMES_HOME"] = str(profile_dir)

View file

@ -1836,6 +1836,43 @@ class SessionDB:
return cleaned
def _is_compression_ancestor(
self, conn, *, ancestor_id: str, descendant_id: str
) -> bool:
"""Return True if *ancestor_id* is a compression predecessor of
*descendant_id* (walking parent links up the continuation chain).
The continuation edge is the canonical one shared with
:func:`_ephemeral_child_sql` / :meth:`set_session_archived`
(``_COMPRESSION_CHILD_SQL``): a parent child edge counts only when the
parent ended with ``end_reason = 'compression'`` and the child started
at or after the parent's ``ended_at``, which distinguishes continuations
from delegate subagents / branch children that also carry a
``parent_session_id``. Expressed as a single recursive CTE rather than a
per-hop Python walk so the edge definition lives in exactly one place.
"""
if not ancestor_id or not descendant_id or ancestor_id == descendant_id:
return False
# Walk parent links up from the descendant, following only compression
# continuation edges, and check whether ancestor_id is reached.
edge = _COMPRESSION_CHILD_SQL.format(a="child")
row = conn.execute(
f"""
WITH RECURSIVE ancestors(id) AS (
SELECT ?
UNION
SELECT parent.id
FROM ancestors a
JOIN sessions child ON child.id = a.id
JOIN sessions parent ON parent.id = child.parent_session_id
WHERE {edge}
)
SELECT 1 FROM ancestors WHERE id = ? AND id != ? LIMIT 1
""",
(descendant_id, ancestor_id, descendant_id),
).fetchone()
return row is not None
def set_session_title(self, session_id: str, title: str) -> bool:
"""Set or update a session's title.
@ -1854,9 +1891,29 @@ class SessionDB:
)
conflict = cursor.fetchone()
if conflict:
raise ValueError(
f"Title '{title}' is already in use by session {conflict['id']}"
)
conflict_id = conflict["id"]
# A compression continuation is the live, projected-forward
# head of its conversation; its compressed predecessors are
# ended and hidden from the session list (list_sessions_rich
# projects roots → tip). When the title that "conflicts" is
# held by such a hidden ancestor, the user has no way to free
# it — renaming the visible tip back to the base name would
# dead-end with "already in use by <session they can't see>".
# Treat this as a transfer: move the title off the ancestor
# onto the continuation. Uniqueness is preserved (still only
# one session carries the exact title) and the parent-link
# lineage is untouched.
if self._is_compression_ancestor(
conn, ancestor_id=conflict_id, descendant_id=session_id
):
conn.execute(
"UPDATE sessions SET title = NULL WHERE id = ?",
(conflict_id,),
)
else:
raise ValueError(
f"Title '{title}' is already in use by session {conflict_id}"
)
cursor = conn.execute(
"UPDATE sessions SET title = ? WHERE id = ?",
(title, session_id),

View file

@ -12,7 +12,6 @@
let
packages = builtins.attrValues self'.packages;
hermesNpmLib = self'.packages.default.passthru.hermesNpmLib;
fixLockfilesExe = pkgs.lib.getExe self'.packages.fix-lockfiles;
# Collect all packageJsonPath values from npm workspace packages.
npmPackageJsonPaths = builtins.filter (p: p != null) (
@ -33,7 +32,7 @@
shellHook = ''
echo "Hermes Agent dev shell"
${combinedNonNpm}
${hermesNpmLib.mkNpmDevShellHook npmPackageJsonPaths fixLockfilesExe}
${hermesNpmLib.mkNpmDevShellHook npmPackageJsonPaths}
echo "Ready. Run 'hermes' to start."
'';
};

View file

@ -2,8 +2,7 @@
#
# All npm packages in this repo are workspace members sharing a single
# root package-lock.json. mkNpmPassthru provides the shared src, npmDeps,
# npmRoot, and npmDepsFetcherVersion so individual .nix files don't
# duplicate them. One hash to rule them all.
# npmRoot, and npmConfigHook so individual .nix files don't duplicate them.
#
# mkNpmPassthru returns packageJsonPath (e.g. "ui-tui/package.json")
# instead of a per-package devShellHook. The root devshell hook
@ -19,28 +18,19 @@ let
# The workspace root — where the single package-lock.json lives.
src = ../.;
# Single npm deps fetch from the workspace root lockfile.
# All workspace packages share this derivation.
npmDepsHash = "sha256-kbjJksq7limRIYqP3DwI+GNgCXkG96tXcsQqmuEedxo=";
npmDeps = pkgs.fetchNpmDeps {
inherit src;
fetcherVersion = 2;
hash = npmDepsHash;
};
# npm dependencies for the workspace, shared by all members. importNpmLock
# resolves each package from the lockfile's own `integrity` hashes, so the
# lockfile is the single source of truth — no separate dependency hash to
# keep in sync with it.
npmDeps = pkgs.importNpmLock.importNpmLock { npmRoot = src; };
in
{
# Returns a buildNpmPackage-compatible attrs set that provides:
# src, npmDeps, npmRoot, npmDepsFetcherVersion
# patchPhase — ensures root lockfile has exactly one trailing newline
# nativeBuildInputs — [ updateLockfileScript ] (list, prepend with ++ for more)
# passthru.packageJsonPath — relative path to this workspace's package.json
# nodejs — fixed nodejs version for all packages we use in the repo
#
# NOTE: npmConfigHook runs `diff` between the source lockfile and the
# npm-deps cache lockfile. fetchNpmDeps preserves whatever trailing
# newlines the lockfile has. The patchPhase normalizes to exactly one
# trailing newline so both sides always match.
# src, npmDeps, npmRoot — workspace source + importNpmLock dep set
# npmConfigHook — importNpmLock's offline `npm install` hook
# nativeBuildInputs — [ updateLockfileScript ] (list, prepend with ++ for more)
# passthru.packageJsonPath — relative path to this workspace's package.json
# nodejs — fixed nodejs version for all packages we use in the repo
#
# Usage:
# npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
@ -62,35 +52,15 @@ in
in
{
inherit src npmDeps nodejs;
# importNpmLock's hook installs the rewritten lockfile (every `resolved`
# rewritten to a /nix/store file: path) into the unpacked workspace and
# runs `npm install` offline, so every workspace member's dependencies
# resolve without network access.
npmConfigHook = pkgs.importNpmLock.npmConfigHook;
npmRoot = ".";
npmDepsFetcherVersion = 2;
ELECTRON_SKIP_BINARY_DOWNLOAD = 1;
patchPhase = ''
runHook prePatch
# Normalize trailing newlines on the root lockfile so source and
# npm-deps always match, regardless of what fetchNpmDeps preserves.
sed -i -z 's/\\n*$/\\n/' package-lock.json
# Make npmConfigHook's byte-for-byte diff newline-agnostic by
# replacing its hardcoded /nix/store/.../diff with a wrapper that
# normalizes trailing newlines on both sides before comparing.
mkdir -p "$TMPDIR/bin"
cat > "$TMPDIR/bin/diff" << DIFFWRAP
#!/bin/sh
f1=\\$(mktemp) && sed -z 's/\\n*$/\\n/' "\\$1" > "\\$f1"
f2=\\$(mktemp) && sed -z 's/\\n*$/\\n/' "\\$2" > "\\$f2"
${pkgs.diffutils}/bin/diff "\\$f1" "\\$f2" && rc=0 || rc=\\$?
rm -f "\\$f1" "\\$f2"
exit \\$rc
DIFFWRAP
chmod +x "$TMPDIR/bin/diff"
export PATH="$TMPDIR/bin:$PATH"
runHook postPatch
'';
nativeBuildInputs = [
(pkgs.writeShellScriptBin "update_${attr}_lockfile" ''
set -euox pipefail
@ -104,7 +74,6 @@ in
CI=true ${pkgs.lib.getExe' nodejs "npm"} install --workspaces
${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
# Hash lives in lib.nix — just rebuild to verify.
nix build .#${attr}
echo "Lockfile updated and build verified for .#${attr}"
'')
@ -120,12 +89,9 @@ in
# Takes a list of package.json relative paths (from mkNpmPassthru .passthru.packageJsonPath),
# stamps all of them, and if any changed:
# 1. Runs `npm i --package-lock-only` from root to update the lockfile
# 2. If the lockfile changed, runs `npm ci` + fix-lockfiles
#
# fixLockfilesExe: absolute path to the fix-lockfiles binary
# (from pkgs.lib.getExe self'.packages.fix-lockfiles in devShell.nix).
# 2. If the lockfile changed, runs `npm ci`
mkNpmDevShellHook =
packageJsonPaths: fixLockfilesExe:
packageJsonPaths:
pkgs.writeShellScript "npm-dev-hook" ''
REPO_ROOT=$(git rev-parse --show-toplevel)
@ -158,172 +124,4 @@ in
echo "$LOCK_STAMP_VALUE" > "$LOCK_STAMP"
fi
'';
# Build `fix-lockfiles` bin that checks/updates the single npmDepsHash
# fix-lockfiles --check # exit 1 if any hash is stale
# fix-lockfiles --apply # rewrite stale hashes in place
# fix-lockfiles # alias of --apply
# Writes machine-readable fields (stale, changed, report) to $GITHUB_OUTPUT
# when set, so CI workflows can post a sticky PR comment directly.
mkFixLockfiles =
{
attr, # flake package attr for fallback verification build, e.g. "tui"
}:
pkgs.writeShellScriptBin "fix-lockfiles" ''
set -uox pipefail
MODE="''${1:---apply}"
case "$MODE" in
--check|--apply) ;;
-h|--help)
echo "usage: fix-lockfiles [--check|--apply]"
exit 0 ;;
*)
echo "usage: fix-lockfiles [--check|--apply]" >&2
exit 2 ;;
esac
REPO_ROOT="$(git rev-parse --show-toplevel)"
cd "$REPO_ROOT"
# When running in GH Actions, emit Markdown links in the report pointing
# at the offending line of the nix file (and the lockfile) at the exact
# commit that was checked. LINK_SHA should be set by the workflow to the
# PR head SHA; falls back to GITHUB_SHA (which on pull_request is the
# test-merge commit, still browseable).
LINK_SERVER="''${GITHUB_SERVER_URL:-https://github.com}"
LINK_REPO="''${GITHUB_REPOSITORY:-}"
LINK_SHA="''${LINK_SHA:-''${GITHUB_SHA:-}}"
STALE=0
FIXED=0
REPORT=""
# All workspace packages share the root package-lock.json, so
# we only need to check the hash once.
LOCK_FILE="package-lock.json"
LIB_FILE="nix/lib.nix"
NEW_HASH=$(${pkgs.lib.getExe pkgs.prefetch-npm-deps} "$LOCK_FILE" 2>/dev/null)
if [ -z "$NEW_HASH" ]; then
echo "prefetch-npm-deps failed, falling back to nix build" >&2
OUTPUT=$(nix build ".#${attr}.npmDeps" --no-link --print-build-logs 2>&1)
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo "ok (via nix build)"
exit 0
fi
NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
if [ -z "$NEW_HASH" ]; then
if echo "$OUTPUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
echo "skipped (transient cache failure see primary nix build for real status)" >&2
echo "$OUTPUT" | tail -8 >&2
exit 0
fi
echo "build failed with no hash mismatch:" >&2
echo "$OUTPUT" | tail -40 >&2
exit 1
fi
fi
OLD_HASH=$(grep -oE 'npmDepsHash = "sha256-[^"]+"' "$LIB_FILE" | head -1 \
| sed -E 's/npmDepsHash = "(.*)"/\1/')
# prefetch-npm-deps says the hash already matches — but it only hashes the
# lockfile *contents* and can disagree with fetchNpmDeps + npmConfigHook,
# which validate the full source lockfile against the realized deps cache.
# Trusting prefetch alone produced false "ok" results while the actual
# build was broken (e.g. lockfile engines/os/cpu fields the pinned nixpkgs
# strips from the deps cache, tripping npmConfigHook). So when prefetch
# claims the hash is current, confirm with a real consumer build before
# believing it.
if [ "$NEW_HASH" = "$OLD_HASH" ]; then
if VERIFY_OUT=$(nix build ".#${attr}" --no-link --print-build-logs 2>&1); then
echo "ok"
if [ -n "''${GITHUB_OUTPUT:-}" ]; then
{ echo "stale=false"; echo "changed=false"; } >> "$GITHUB_OUTPUT"
fi
exit 0
fi
# Build failed despite a matching hash. A fixed-output 'got:' means
# prefetch genuinely disagreed with fetchNpmDeps — adopt the real hash
# and fall through to the stale-handling path below.
CORRECT_HASH=$(echo "$VERIFY_OUT" | awk '/got:/ {print $2; exit}')
if [ -n "$CORRECT_HASH" ]; then
echo "prefetch-npm-deps reported current ($OLD_HASH) but fetchNpmDeps wants $CORRECT_HASH" >&2
NEW_HASH="$CORRECT_HASH"
elif echo "$VERIFY_OUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
echo "skipped (transient cache failure see primary nix build for real status)" >&2
echo "$VERIFY_OUT" | tail -8 >&2
exit 0
else
# Not a stale-hash problem — surface it honestly instead of "ok".
echo "::error::nix build .#${attr} failed and it is NOT a stale npmDepsHash (no 'got:' hash in output)." >&2
echo "The committed lockfile may be incompatible with the pinned nixpkgs" >&2
echo "(e.g. engines/os/cpu fields that prefetch-npm-deps strips from the" >&2
echo "deps cache, tripping npmConfigHook). fix-lockfiles cannot repair this." >&2
echo "$VERIFY_OUT" | tail -40 >&2
if [ -n "''${GITHUB_OUTPUT:-}" ]; then
{ echo "stale=false"; echo "changed=false"; } >> "$GITHUB_OUTPUT"
fi
exit 1
fi
fi
HASH_LINE=$(grep -n 'npmDepsHash = "sha256-' "$LIB_FILE" | head -1 | cut -d: -f1)
echo "stale: $LIB_FILE:$HASH_LINE $OLD_HASH -> $NEW_HASH"
STALE=1
if [ -n "$LINK_REPO" ] && [ -n "$LINK_SHA" ]; then
LIB_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$LIB_FILE#L$HASH_LINE"
LOCK_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$LOCK_FILE"
REPORT="- [\`$LIB_FILE:$HASH_LINE\`]($LIB_URL): \`$OLD_HASH\` \`$NEW_HASH\` lockfile: [\`$LOCK_FILE\`]($LOCK_URL)"$'\\n'
else
REPORT="- \`$LIB_FILE:$HASH_LINE\`: \`$OLD_HASH\` \`$NEW_HASH\`"$'\\n'
fi
if [ "$MODE" = "--apply" ]; then
sed -i -E "s|npmDepsHash = \"sha256-[^\"]+\";|npmDepsHash = \"$NEW_HASH\";|" "$LIB_FILE"
if ! nix build ".#${attr}.npmDeps" --no-link --print-build-logs 2>/dev/null; then
# prefetch-npm-deps may disagree with fetchNpmDeps (it hashes
# the lockfile contents, not the full source tree). Extract the
# correct hash from the nix build error and retry.
RETRY_OUTPUT=$(nix build ".#${attr}.npmDeps" --no-link --print-build-logs 2>&1)
CORRECT_HASH=$(echo "$RETRY_OUTPUT" | awk '/got:/ {print $2; exit}')
if [ -n "$CORRECT_HASH" ]; then
echo "prefetch-npm-deps gave $NEW_HASH but nix wants $CORRECT_HASH retrying" >&2
sed -i -E "s|npmDepsHash = \"sha256-[^\"]+\";|npmDepsHash = \"$CORRECT_HASH\";|" "$LIB_FILE"
if ! nix build ".#${attr}.npmDeps" --no-link --print-build-logs; then
echo "verification build failed after hash retry" >&2
exit 1
fi
NEW_HASH="$CORRECT_HASH"
else
echo "verification build failed after hash update" >&2
exit 1
fi
fi
FIXED=1
echo "fixed"
fi
if [ -n "''${GITHUB_OUTPUT:-}" ]; then
{
[ "$STALE" -eq 1 ] && echo "stale=true" || echo "stale=false"
[ "$FIXED" -eq 1 ] && echo "changed=true" || echo "changed=false"
if [ -n "$REPORT" ]; then
echo "report<<REPORT_EOF"
printf "%s" "$REPORT"
echo "REPORT_EOF"
fi
} >> "$GITHUB_OUTPUT"
fi
if [ "$STALE" -eq 1 ] && [ "$MODE" = "--check" ]; then
echo
echo "Stale lockfile hash detected. Run:"
echo " nix run .#fix-lockfiles"
exit 1
fi
exit 0
'';
}

View file

@ -50,8 +50,6 @@
tui = hermesAgent.hermesTui;
web = hermesAgent.hermesWeb;
desktop = hermesAgent.hermesDesktop;
fix-lockfiles = hermesAgent.hermesNpmLib.mkFixLockfiles { attr = "tui"; };
};
};
}

View file

@ -144,4 +144,4 @@ Available in `hybrid` and `tools` memory modes:
## Client Version
Requires `hindsight-client >= 0.4.22`. The plugin auto-upgrades on session start if an older version is detected.
Requires `hindsight-client >= 0.6.1`. The plugin auto-upgrades on session start if an older version is detected.

View file

@ -50,7 +50,8 @@ logger = logging.getLogger(__name__)
_DEFAULT_API_URL = "https://api.hindsight.vectorize.io"
_DEFAULT_LOCAL_URL = "http://localhost:8888"
_MIN_CLIENT_VERSION = "0.4.22"
# Keep in sync with tools/lazy_deps.py ("memory.hindsight") and plugin.yaml.
_MIN_CLIENT_VERSION = "0.6.1"
_DEFAULT_TIMEOUT = 120 # seconds — cloud API can take 30-40s per request
_DEFAULT_IDLE_TIMEOUT = 300 # seconds — Hindsight embedded daemon default
# Mirrors hindsight-integrations/openclaw — Hindsight 0.5.0 added
@ -100,6 +101,17 @@ def _check_local_runtime() -> tuple[bool, str | None]:
return False, str(exc)
def _ensure_cloud_client_dependency() -> None:
"""Install the Hindsight cloud client lazily before importing it."""
try:
from tools.lazy_deps import ensure as _lazy_ensure
_lazy_ensure("memory.hindsight", prompt=False)
except ImportError:
pass
except Exception as exc:
raise ImportError(str(exc)) from exc
# ---------------------------------------------------------------------------
# Hindsight API capability probe — mirrors hindsight-integrations/openclaw.
# ---------------------------------------------------------------------------
@ -730,7 +742,6 @@ class HindsightMemoryProvider(MemoryProvider):
env_writes: dict = {}
# Step 2: Install/upgrade deps for selected mode
_MIN_CLIENT_VERSION = "0.4.22"
cloud_dep = f"hindsight-client>={_MIN_CLIENT_VERSION}"
local_dep = "hindsight-all"
if mode == "local_embedded":
@ -990,6 +1001,7 @@ class HindsightMemoryProvider(MemoryProvider):
kwargs["idle_timeout"] = idle_timeout
self._client = HindsightEmbedded(**kwargs)
else:
_ensure_cloud_client_dependency()
from hindsight_client import Hindsight
timeout = self._timeout or _DEFAULT_TIMEOUT
kwargs = {"base_url": self._api_url, "timeout": float(timeout)}

View file

@ -2,7 +2,7 @@ name: hindsight
version: 1.0.0
description: "Hindsight — long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval."
pip_dependencies:
- "hindsight-client>=0.4.22"
- "hindsight-client>=0.6.1"
requires_env: []
hooks:
- on_session_end

View file

@ -45,10 +45,11 @@ from typing import Any, Callable, Dict, List, Optional, Set
from urllib.parse import urlparse
from urllib.request import url2pathname
from agent.message_content import flatten_message_text
from agent.memory_provider import MemoryProvider
from agent.skill_commands import extract_user_instruction_from_skill_message
from tools.registry import tool_error
from utils import atomic_json_write
from utils import atomic_json_write, env_var_enabled
logger = logging.getLogger(__name__)
@ -70,6 +71,7 @@ _TIMEOUT = 30.0
_SESSION_DRAIN_TIMEOUT = 10.0
_DEFERRED_COMMIT_TIMEOUT = (_TIMEOUT * 2) + 5.0
_REMOTE_RESOURCE_PREFIXES = ("http://", "https://", "git@", "ssh://", "git://")
_SYNC_TRACE_ENV = "HERMES_OPENVIKING_SYNC_TRACE"
# Maps the viking_remember `category` enum to a viking:// subdirectory.
# Keep in sync with REMEMBER_SCHEMA.parameters.properties.category.enum.
@ -156,6 +158,18 @@ def _derive_openviking_user_text(content: Any) -> str:
return extract_user_instruction_from_skill_message(content) or ""
def _sync_trace_enabled() -> bool:
return env_var_enabled(_SYNC_TRACE_ENV)
def _preview(value: Any, limit: int = 160) -> str:
text = "" if value is None else str(value)
text = text.replace("\n", "\\n")
if len(text) > limit:
return text[:limit] + "..."
return text
# ---------------------------------------------------------------------------
# Process-level atexit safety net — ensures pending sessions are committed
# even if shutdown_memory_provider is never called (e.g. gateway crash,
@ -488,6 +502,25 @@ ADD_RESOURCE_SCHEMA = {
}
# Recall tools (read-only) whose results we never re-ingest into OpenViking —
# echoing recalled memory back into the session transcript would re-store it.
# Write tools (viking_remember / viking_add_resource) are intentionally NOT
# here. Derived from the canonical schema names so renames can't desync.
_OPENVIKING_RECALL_TOOL_NAMES = {
SEARCH_SCHEMA["name"],
READ_SCHEMA["name"],
BROWSE_SCHEMA["name"],
}
# Canonical tool_status values emitted in OpenViking batch tool parts.
_TOOL_STATUS_COMPLETED = "completed"
_TOOL_STATUS_ERROR = "error"
_TOOL_STATUS_PENDING = "pending"
# Inbound status aliases (from varied tool-result shapes) -> canonical above.
_TOOL_STATUS_ERROR_ALIASES = {"error", "failed", "failure"}
_TOOL_STATUS_COMPLETED_ALIASES = {"completed", "complete", "success", "succeeded"}
def _zip_directory(dir_path: Path) -> Path:
"""Create a temporary zip file containing a directory tree."""
root = dir_path.resolve()
@ -2221,7 +2254,10 @@ class OpenVikingMemoryProvider(MemoryProvider):
def _commit_session(self, sid: str, turn_count: int, *, context: str) -> bool:
try:
self._client.post(f"/api/v1/sessions/{sid}/commit")
self._client.post(
f"/api/v1/sessions/{sid}/commit",
{"keep_recent_count": 0},
)
self._mark_session_committed(sid)
logger.info("OpenViking session %s committed %s (%d turns)", sid, context, turn_count)
return True
@ -2293,7 +2329,265 @@ class OpenVikingMemoryProvider(MemoryProvider):
with self._prefetch_lock:
self._prefetch_result = ""
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
@staticmethod
def _message_text(content: Any) -> str:
"""Extract text from OpenAI-style string/list content."""
return flatten_message_text(content)
@classmethod
def _message_matches_text(cls, message: Dict[str, Any], expected: Any) -> bool:
expected_text = cls._message_text(expected).strip()
if not expected_text:
return False
actual_text = cls._message_text(message.get("content")).strip()
return actual_text == expected_text
@classmethod
def _extract_current_turn_messages(
cls,
messages: Optional[List[Dict[str, Any]]],
user_content: str,
assistant_content: str,
) -> List[Dict[str, Any]]:
"""Slice the completed turn out of Hermes' full canonical transcript."""
if not messages:
return []
end_idx: Optional[int] = None
if cls._message_text(assistant_content).strip():
for idx in range(len(messages) - 1, -1, -1):
message = messages[idx]
if (
isinstance(message, dict)
and message.get("role") == "assistant"
and cls._message_matches_text(message, assistant_content)
):
end_idx = idx
break
if end_idx is None:
for idx in range(len(messages) - 1, -1, -1):
message = messages[idx]
if isinstance(message, dict) and message.get("role") == "assistant":
end_idx = idx
break
if end_idx is None:
end_idx = len(messages) - 1
start_idx: Optional[int] = None
if cls._message_text(user_content).strip():
for idx in range(end_idx, -1, -1):
message = messages[idx]
if (
isinstance(message, dict)
and message.get("role") == "user"
and cls._message_matches_text(message, user_content)
):
start_idx = idx
break
if start_idx is None:
for idx in range(end_idx, -1, -1):
message = messages[idx]
if isinstance(message, dict) and message.get("role") == "user":
start_idx = idx
break
if start_idx is None:
return []
return [message for message in messages[start_idx : end_idx + 1] if isinstance(message, dict)]
@staticmethod
def _tool_call_id(tool_call: Dict[str, Any]) -> str:
return str(tool_call.get("id") or tool_call.get("tool_call_id") or "")
@staticmethod
def _tool_call_name(tool_call: Dict[str, Any]) -> str:
function = tool_call.get("function")
if isinstance(function, dict):
return str(function.get("name") or "")
return str(tool_call.get("name") or "")
@staticmethod
def _is_openviking_recall_tool_name(tool_name: Any) -> bool:
return str(tool_name or "").strip().lower() in _OPENVIKING_RECALL_TOOL_NAMES
@staticmethod
def _tool_call_input(tool_call: Dict[str, Any]) -> Dict[str, Any]:
function = tool_call.get("function")
raw_args: Any = None
if isinstance(function, dict):
raw_args = function.get("arguments")
if raw_args is None:
raw_args = tool_call.get("args")
if raw_args is None:
return {}
if isinstance(raw_args, dict):
return raw_args
if isinstance(raw_args, str):
if not raw_args.strip():
return {}
try:
parsed = json.loads(raw_args)
except Exception:
return {"value": raw_args}
if isinstance(parsed, dict):
return parsed
return {"value": parsed}
return {"value": raw_args}
@classmethod
def _tool_result_status(cls, message: Dict[str, Any]) -> str:
raw_status = str(message.get("status") or message.get("tool_status") or "").lower()
if raw_status in _TOOL_STATUS_ERROR_ALIASES:
return _TOOL_STATUS_ERROR
if raw_status in _TOOL_STATUS_COMPLETED_ALIASES:
return _TOOL_STATUS_COMPLETED
text = cls._message_text(message.get("content")).strip()
if text:
try:
parsed = json.loads(text)
except Exception:
parsed = None
if isinstance(parsed, dict):
status = str(parsed.get("status") or "").lower()
exit_code = parsed.get("exit_code")
if (
status in _TOOL_STATUS_ERROR_ALIASES
or parsed.get("success") is False
or bool(parsed.get("error"))
or (isinstance(exit_code, int) and exit_code != 0)
):
return _TOOL_STATUS_ERROR
return _TOOL_STATUS_COMPLETED
@classmethod
def _messages_to_openviking_batch(
cls,
messages: List[Dict[str, Any]],
*,
assistant_peer_id: str = "",
) -> List[Dict[str, Any]]:
"""Convert Hermes canonical messages into OpenViking batch payloads."""
assistant_peer_id = str(assistant_peer_id or "").strip()
tool_calls_by_id: Dict[str, Dict[str, Any]] = {}
completed_tool_ids: set[str] = set()
skipped_tool_ids: set[str] = set()
for message in messages:
if not isinstance(message, dict):
continue
if message.get("role") == "tool":
tool_id = str(message.get("tool_call_id") or message.get("id") or "")
if tool_id:
completed_tool_ids.add(tool_id)
if cls._is_openviking_recall_tool_name(message.get("name")):
skipped_tool_ids.add(tool_id)
continue
if message.get("role") != "assistant":
continue
for tool_call in message.get("tool_calls") or []:
if not isinstance(tool_call, dict):
continue
tool_id = cls._tool_call_id(tool_call)
tool_name = cls._tool_call_name(tool_call)
if tool_id:
tool_calls_by_id[tool_id] = {
"tool_name": tool_name,
"tool_input": cls._tool_call_input(tool_call),
}
if cls._is_openviking_recall_tool_name(tool_name):
skipped_tool_ids.add(tool_id)
payload_messages: List[Dict[str, Any]] = []
pending_tool_parts: List[Dict[str, Any]] = []
def payload_message(role: str, parts: List[Dict[str, Any]]) -> Dict[str, Any]:
payload: Dict[str, Any] = {"role": role, "parts": parts}
if role == "assistant" and assistant_peer_id:
payload["peer_id"] = assistant_peer_id
return payload
def flush_tool_parts() -> None:
nonlocal pending_tool_parts
if pending_tool_parts:
payload_messages.append(payload_message("assistant", pending_tool_parts))
pending_tool_parts = []
for message in messages:
if not isinstance(message, dict):
continue
role = str(message.get("role") or "")
if role in {"system", "developer"}:
continue
if role == "tool":
tool_id = str(message.get("tool_call_id") or message.get("id") or "")
prior_call = tool_calls_by_id.get(tool_id, {})
tool_name = str(message.get("name") or prior_call.get("tool_name") or "")
if tool_id in skipped_tool_ids or cls._is_openviking_recall_tool_name(tool_name):
continue
tool_part = {
"type": "tool",
"tool_id": tool_id,
"tool_name": tool_name,
"tool_input": prior_call.get("tool_input", {}),
"tool_output": cls._message_text(message.get("content")),
"tool_status": cls._tool_result_status(message),
}
pending_tool_parts.append(tool_part)
continue
if role not in {"user", "assistant"}:
continue
flush_tool_parts()
parts: List[Dict[str, Any]] = []
text = cls._message_text(message.get("content"))
if text:
parts.append({"type": "text", "text": text})
if role == "assistant":
for tool_call in message.get("tool_calls") or []:
if not isinstance(tool_call, dict):
continue
tool_id = cls._tool_call_id(tool_call)
tool_name = cls._tool_call_name(tool_call)
if tool_id in skipped_tool_ids or cls._is_openviking_recall_tool_name(tool_name):
continue
if tool_id in completed_tool_ids:
continue
# Reuse the tool_input parsed in the pre-scan when available
# (non-empty ids are cached); fall back to parsing for the
# uncached empty-id case so we never drop arguments.
prior_call = tool_calls_by_id.get(tool_id) if tool_id else None
tool_input = (
prior_call["tool_input"]
if prior_call is not None
else cls._tool_call_input(tool_call)
)
parts.append({
"type": "tool",
"tool_id": tool_id,
"tool_name": tool_name,
"tool_input": tool_input,
"tool_status": _TOOL_STATUS_PENDING,
})
if parts:
payload_messages.append(payload_message(role, parts))
flush_tool_parts()
return payload_messages
def sync_turn(
self,
user_content: str,
assistant_content: str,
*,
session_id: str = "",
messages: Optional[List[Dict[str, Any]]] = None,
) -> None:
"""Record the conversation turn in OpenViking's session (non-blocking)."""
if not self._client:
return
@ -2302,6 +2596,40 @@ class OpenVikingMemoryProvider(MemoryProvider):
if not user_content:
return
turn_messages = (
self._extract_current_turn_messages(messages, user_content, assistant_content)
if messages is not None
else []
)
if turn_messages:
turn_messages = [dict(message) for message in turn_messages]
for message in turn_messages:
if message.get("role") == "user":
message["content"] = user_content
break
batch_messages = self._messages_to_openviking_batch(
turn_messages,
assistant_peer_id=getattr(self, "_agent", _DEFAULT_AGENT),
)
if _sync_trace_enabled():
logger.info(
"OpenViking sync_turn trace: session_arg=%r cached_session=%r "
"messages_param_supported=true messages_present=%s message_count=%s "
"turn_message_count=%d batch_message_count=%d user_len=%d assistant_len=%d "
"user_preview=%r assistant_preview=%r",
session_id,
self._session_id,
messages is not None,
len(messages) if messages is not None else None,
len(turn_messages),
len(batch_messages),
len(str(user_content or "")),
len(str(assistant_content or "")),
_preview(user_content),
_preview(assistant_content),
)
# Snapshot the sid and bump the turn counter atomically so a
# concurrent on_session_switch/on_session_end can't interleave its
# snapshot+reset between the read and the increment (lost turn) and so
@ -2313,24 +2641,39 @@ class OpenVikingMemoryProvider(MemoryProvider):
self._turn_count += 1
def _sync():
try:
client = self._new_client()
def _post_turn(client: _VikingClient) -> None:
if batch_messages:
payload = {"messages": batch_messages}
if _sync_trace_enabled():
logger.info(
"OpenViking sync_turn trace: POST /api/v1/sessions/%s/messages/batch payload=%s",
sid,
json.dumps(payload, ensure_ascii=False),
)
try:
client.post(f"/api/v1/sessions/{sid}/messages/batch", payload)
return
except Exception as batch_error:
logger.warning(
"OpenViking structured sync failed; falling back to text sync: %s",
batch_error,
)
self._post_session_turn(
client,
sid,
user_content[:4000],
assistant_content[:4000],
self._message_text(assistant_content)[:4000],
)
try:
client = self._new_client()
_post_turn(client)
except Exception as e:
logger.debug("OpenViking sync_turn failed, reconnecting: %s", e)
try:
client = self._new_client()
self._post_session_turn(
client,
sid,
user_content[:4000],
assistant_content[:4000],
)
_post_turn(client)
except Exception as retry_error:
logger.warning("OpenViking sync_turn failed: %s", retry_error)

View file

@ -14,6 +14,7 @@ import hashlib
import json
import logging
import os
import re
import struct
import subprocess
import tempfile
@ -29,6 +30,7 @@ VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
_DISCORD_COMMAND_SYNC_STATE_SUBDIR = "gateway"
_DISCORD_COMMAND_SYNC_STATE_FILENAME = "discord_command_sync_state.json"
_DISCORD_NONCONVERSATIONAL_STATE_FILENAME = "discord_nonconversational_messages.json"
_DISCORD_COMMAND_SYNC_MUTATION_INTERVAL_SECONDS = 4.5
_DISCORD_COMMAND_SYNC_MAX_RATE_LIMIT_SLEEP_SECONDS = 30.0
# Discord enforces a hard cap of 100 global application (slash) commands per
@ -37,6 +39,37 @@ _DISCORD_COMMAND_SYNC_MAX_RATE_LIMIT_SLEEP_SECONDS = 30.0
# every slash command — not just the overflow ones. We keep the desired set
# at or below this limit at registration time.
_DISCORD_MAX_APP_COMMANDS = 100
_DISCORD_NONCONVERSATIONAL_METADATA_KEYS = frozenset({
"non_conversational",
"non_conversational_history",
})
# Upgrade-bridge fallback only. The primary mechanism is the persisted
# non-conversational message-ID set populated from explicitly marked sends
# (metadata["non_conversational"]). These regexes exist solely to recognize
# status bumps emitted by an older gateway version that pre-dates the marking,
# so they don't partition history after an upgrade. New emitters should set the
# metadata flag, not rely on a regex here.
_DISCORD_NONCONVERSATIONAL_HISTORY_MESSAGE_PATTERNS = (
re.compile(r"^\s*💾\s*Self-improvement review:\s+\S[\s\S]*$", re.IGNORECASE),
# Legacy/background-review test doubles used this shorter form before the
# self-improvement prefix became the stable emitter contract.
re.compile(
r"^\s*💾\s+Skill\s+['\"].+?['\"]\s+(?:created|updated|improved|patched)\.?\s*$",
re.IGNORECASE,
),
re.compile(r"^\s*⏳\s+Working\s+—\s+\d+\s+min(?:\s|$)", re.IGNORECASE),
re.compile(
r"^\s*\[Background process\s+\S+\s+"
r"(?:finished with exit code|is still running~)[\s\S]*\]\s*$",
re.IGNORECASE,
),
re.compile(
r"^\s*(?:✅|❌)\s+Hermes update\s+"
r"(?:finished|failed|timed out)[\s\S]*$",
re.IGNORECASE,
),
re.compile(r"^\s*♻️?\s+Gateway\s+(?:restarted successfully|online\b)[\s\S]*$", re.IGNORECASE),
)
try:
import discord
@ -55,7 +88,6 @@ from pathlib import Path as _Path
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
from gateway.config import Platform, PlatformConfig
import re
from gateway.platforms.helpers import MessageDeduplicator, ThreadParticipationTracker
from utils import atomic_json_write
@ -132,6 +164,73 @@ def _find_discord_windows_bundled_opus(discord_module: Any = None) -> Optional[s
return None
class _DiscordNonConversationalMessageTracker:
"""Persistent bounded set of Discord message IDs that are status noise."""
_MAX_TRACKED = 2000
def __init__(self, max_tracked: int = _MAX_TRACKED):
self._max_tracked = max_tracked
self._ids: dict[str, None] = dict.fromkeys(self._load())
def _state_path(self) -> _Path:
from hermes_constants import get_hermes_home
return (
get_hermes_home()
/ _DISCORD_COMMAND_SYNC_STATE_SUBDIR
/ _DISCORD_NONCONVERSATIONAL_STATE_FILENAME
)
def _load(self) -> list[str]:
path = self._state_path()
if not path.exists():
return []
try:
data = json.loads(path.read_text(encoding="utf-8"))
if isinstance(data, list):
return [str(message_id) for message_id in data if str(message_id).strip()]
except Exception:
logger.debug("[%s] Failed to load non-conversational Discord IDs", "Discord")
return []
def _save(self) -> None:
ids = list(self._ids)
if len(ids) > self._max_tracked:
ids = ids[-self._max_tracked:]
self._ids = dict.fromkeys(ids)
try:
atomic_json_write(self._state_path(), ids, indent=None)
except Exception:
logger.debug("[%s] Failed to save non-conversational Discord IDs", "Discord", exc_info=True)
def mark_many(self, message_ids: List[str]) -> None:
changed = False
for message_id in message_ids:
key = str(message_id or "").strip()
if key and key not in self._ids:
self._ids[key] = None
changed = True
if changed:
self._save()
def __contains__(self, message_id: str) -> bool:
return str(message_id or "") in self._ids
def _metadata_marks_nonconversational(metadata: Optional[Dict[str, Any]]) -> bool:
"""Return True when an outbound send was explicitly marked as status-only."""
if not isinstance(metadata, dict):
return False
return any(bool(metadata.get(key)) for key in _DISCORD_NONCONVERSATIONAL_METADATA_KEYS)
def _looks_like_nonconversational_history_message(content: str) -> bool:
"""Fallback recognizer for legacy status bumps missing persisted IDs."""
text = content or ""
return any(pattern.match(text) for pattern in _DISCORD_NONCONVERSATIONAL_HISTORY_MESSAGE_PATTERNS)
def _clean_discord_id(entry: str) -> str:
"""Strip common prefixes from a Discord user ID or username entry.
@ -681,6 +780,9 @@ class DiscordAdapter(BasePlatformAdapter):
# history backfill to skip the full scan on hot paths. Falls back to
# scanning channel.history() on cache miss (cold start / restart).
self._last_self_message_id: Dict[str, str] = {}
# Persistent set of bot-authored lifecycle/status message IDs that
# should not act as conversational history boundaries after restart.
self._nonconversational_messages = _DiscordNonConversationalMessageTracker()
def _handle_bot_task_done(self, task: asyncio.Task) -> None:
"""Surface post-startup discord.py task exits to the gateway supervisor.
@ -1577,6 +1679,7 @@ class DiscordAdapter(BasePlatformAdapter):
thread_id = None
if metadata and metadata.get("thread_id"):
thread_id = metadata["thread_id"]
nonconversational = _metadata_marks_nonconversational(metadata)
if thread_id:
# Fetch the thread directly — threads are addressed by their own ID.
@ -1654,7 +1757,10 @@ class DiscordAdapter(BasePlatformAdapter):
# backfill — avoids a full channel.history() scan on hot paths.
if message_ids:
_target_id = thread_id or chat_id
self._last_self_message_id[_target_id] = message_ids[-1]
if nonconversational:
self._nonconversational_messages.mark_many(message_ids)
elif not _looks_like_nonconversational_history_message(content):
self._last_self_message_id[_target_id] = message_ids[-1]
return SendResult(
success=True,
@ -4203,23 +4309,29 @@ class DiscordAdapter(BasePlatformAdapter):
after=_after_obj,
oldest_first=False,
):
# Skip system messages (pins, joins, thread renames, etc.)
if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
continue
content = getattr(msg, "clean_content", msg.content) or ""
if (
str(getattr(msg, "id", "")) in self._nonconversational_messages
or _looks_like_nonconversational_history_message(content)
):
continue
# Stop at our own message — this is the partition point.
# Everything before this is already in the session transcript.
# (Redundant when _after_obj is set, but needed for cold start.)
if msg.author == self._client.user:
break
# Skip system messages (pins, joins, thread renames, etc.)
if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
continue
# Respect DISCORD_ALLOW_BOTS for other bots.
# For history context, "mentions" is treated as "all" — we are
# deciding what context to show, not whether to respond.
if getattr(msg.author, "bot", False) and not include_other_bots:
continue
content = getattr(msg, "clean_content", msg.content) or ""
if not content and msg.attachments:
content = "(attachment)"
if not content:
@ -4566,6 +4678,13 @@ class DiscordAdapter(BasePlatformAdapter):
Open-ended mode (``choices`` empty/None): renders the question as
plain embed text no buttons. The gateway's text-intercept captures
the next message in this session and resolves the clarify.
Choice normalisation: ``choices`` may contain bare strings OR dicts
(LLMs sometimes emit ``[{"description": "..."}]`` instead of bare
strings, which would otherwise render as raw Python repr on the
button label). Dict choices are unwrapped against the canonical
LLM tool-call keys ``label``, ``description``, ``text``, ``title``
in that order. Dicts with none of those keys are dropped.
"""
if not self._client or not DISCORD_AVAILABLE:
return SendResult(success=False, error="Not connected")
@ -4591,8 +4710,37 @@ class DiscordAdapter(BasePlatformAdapter):
color=discord.Color.orange(),
)
# Normalise choices: LLMs sometimes emit `[{"description": "..."}]`
# instead of bare strings, which would render as raw Python repr on
# the button label. Unwrap the common shapes, then stringify.
def _flatten_choice(c):
if c is None:
return ""
if isinstance(c, str):
return c.strip()
if isinstance(c, dict):
# Prefer the canonical LLM tool-call user-facing keys
# in the order the LLM is most likely to emit them.
# 'name' and 'value' are deliberately NOT here: they're
# Discord-component-shaped fields that could appear in
# dicts that aren't meant to be choices (e.g., a
# developer-error wiring that passes a Button-shaped
# object). Picking them would leak raw enum values
# or 4-char model identifiers onto user-facing buttons.
# If a dict has none of the canonical keys, drop it
# rather than picking some random field — a garbage
# button label is worse than no button at all.
for key in ("label", "description", "text", "title"):
v = c.get(key)
if isinstance(v, str) and v.strip():
return v.strip()
return ""
if isinstance(c, (list, tuple)):
return " ".join(_flatten_choice(x) for x in c).strip()
return str(c).strip()
clean_choices = [
str(c).strip() for c in (choices or []) if c is not None and str(c).strip()
s for s in (_flatten_choice(c) for c in (choices or [])) if s
]
# Discord allows up to 5 buttons per row, 5 rows per view = 25.
# We reserve one slot for the "Other" button, so cap at 24 choices.
@ -4657,6 +4805,8 @@ class DiscordAdapter(BasePlatformAdapter):
)
msg = await channel.send(embed=embed, view=view)
view._message = msg # store for on_timeout expiration editing
if _metadata_marks_nonconversational(metadata):
self._nonconversational_messages.mark_many([str(msg.id)])
return SendResult(success=True, message_id=str(msg.id))
except Exception as e:
return SendResult(success=False, error=str(e))
@ -6129,10 +6279,47 @@ def _define_discord_view_classes() -> None:
self.resolved = False
for index, choice in enumerate(self.choices):
# Discord button labels are capped at 80 chars.
label_body = choice if len(choice) <= 75 else choice[:72] + "..."
# Discord button labels are capped at 80 chars. On mobile the
# visible width is much narrower (often <40 chars before it
# wraps to 2 lines and the second line gets cut off), so we
# cap aggressively and cut at a word boundary when possible
# to keep the trailing text readable.
#
# Cut strategy (most-preferred to least-preferred):
# 1. Last space in the trailing half of the budget
# (cleanest word boundary)
# 2. Last soft boundary in the trailing half of the
# budget (hyphen, comma, period, paren)
# 3. Hard cut at the budget limit (last resort)
prefix = f"{index + 1}. "
budget = 80 - len(prefix)
if len(choice) <= budget:
label_body = choice
else:
truncated = choice[: budget - 1].rstrip()
cut_at = -1
# 1. Last space in the trailing half of the budget.
space = truncated.rfind(" ")
if space >= budget // 2:
cut_at = space
# 2. Soft boundary — only if no word boundary found.
# Find the latest soft boundary in the trailing half
# of the budget; that maximizes preserved text length.
# Cut AT the soft boundary (inclusive) so the label
# ends on the soft char (e.g. "-" or ",") rather than
# on the alpha char that followed it.
if cut_at < 0:
latest_soft = max(
(truncated.rfind(s) for s in ("-", ",", ".", ")")),
default=-1,
)
if latest_soft >= budget // 2:
cut_at = latest_soft + 1
if cut_at > 0:
truncated = truncated[:cut_at]
label_body = truncated.rstrip() + ""
button = discord.ui.Button(
label=f"{index + 1}. {label_body}",
label=f"{prefix}{label_body}",
style=discord.ButtonStyle.primary,
custom_id=f"clarify:{clarify_id}:{index}",
)

View file

@ -258,7 +258,7 @@ youtube = [
# `hermes dashboard` (localhost SPA + API). Not in core to keep the default install lean.
# starlette==1.0.1 pinned for CVE-2026-48710 (BadHost) — fastapi pulls Starlette
# transitively and pre-1.0.1 is the vulnerable range. See the mcp extra above.
web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0", "starlette==1.0.1", "python-multipart==0.0.20"]
web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0", "starlette==1.0.1", "python-multipart==0.0.27"]
all = [
# Policy (2026-05-12): `[all]` includes only extras that genuinely
# CAN'T be lazy-installed via `tools/lazy_deps.py` — i.e. things every

View file

@ -4076,11 +4076,13 @@ class AIAgent:
# Defensive: strip Responses-only kwargs that can leak in under an
# api_mode-flip race (the Anthropic SDK raises a non-retryable
# TypeError on them). See #31673.
from agent.anthropic_adapter import sanitize_anthropic_kwargs
sanitize_anthropic_kwargs(
api_kwargs, log_prefix=getattr(self, "log_prefix", "")
from agent.anthropic_adapter import create_anthropic_message
return create_anthropic_message(
self._anthropic_client,
api_kwargs,
log_prefix=getattr(self, "log_prefix", ""),
prefer_stream=not bool(getattr(self, "_disable_streaming", False)),
)
return self._anthropic_client.messages.create(**api_kwargs)
def _rebuild_anthropic_client(self) -> None:
"""Rebuild the Anthropic client after an interrupt or stale call.

View file

@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
"charles@salesondemand.io": "salesondemandio",
"victor@rocketfueldev.com": "victor-kyriazakos",
"87440198+JoaoMarcos44@users.noreply.github.com": "JoaoMarcos44",
"286497132+srojk34@users.noreply.github.com": "srojk34",
@ -56,6 +57,8 @@ AUTHOR_MAP = {
"despitemeguru@gmail.com": "definitelynotguru",
"chaslui@outlook.com": "ChasLui",
"rio.jeong@thebytesize.ai": "rio-jeong",
"cdddo@users.noreply.github.com": "Cdddo",
"carlos.dddo@gmail.com": "Cdddo",
"yehaotian@xuanshudeMac-mini.local": "ArcanePivot",
"dbeyer7@gmail.com": "benegessarit",
"264773240+MrDiamondBallz@users.noreply.github.com": "MrDiamondBallz",
@ -103,6 +106,7 @@ AUTHOR_MAP = {
"290859878+synapsesx@users.noreply.github.com": "synapsesx",
"157689911+itsflownium@users.noreply.github.com": "itsflownium",
"dirtyren@users.noreply.github.com": "dirtyren",
"johnjacobkenny@users.noreply.github.com": "johnjacobkenny",
"chanyoung.kim@nota.ai": "channkim",
"stevenn.damatoo@gmail.com": "x1erra",
"evansrory@gmail.com": "zimigit2020",
@ -415,6 +419,7 @@ AUTHOR_MAP = {
"androidhtml@yandex.com": "hllqkb",
"25840394+Bongulielmi@users.noreply.github.com": "Bongulielmi",
"jonathan.troyer@overmatch.com": "JTroyerOvermatch",
"53142663+tt-a1i@users.noreply.github.com": "tt-a1i", # PR #48933 (SSE-only Anthropic stream aggregation, #48923)
"harryykyle1@gmail.com": "hharry11",
"wysie@users.noreply.github.com": "wysie",
"ronhi@buildabear1.localdomain": "RonHillDev", # PR #29523 salvage (machine-local commit email)
@ -1528,6 +1533,7 @@ AUTHOR_MAP = {
"erik.engervall@gmail.com": "erikengervall", # PR #28774 (firecrawl integration tag)
"egilewski@egilewski.com": "egilewski", # PR #30432 (MEDIA path traversal fix, GHSA-jmf9-9729-7pp8)
"edison@mcclean.codes": "McClean-Edison", # PR #29817 (register_auxiliary_task plugin API)
"OYLFLMH@users.noreply.github.com": "OYLFLMH", # PR #48312 salvage (cli_refresh_interval config, #48309)
"zhangsamuel12@gmail.com": "SamuelZ12", # PR #7480 (show recap after in-session resume)
"490408354@qq.com": "daizhonggeng", # PR #9020 (numbered /resume selection)
"claw@openclaw.ai": "wanwan2qq", # PR #10215 (strip brackets/quotes from /resume; gateway session-ID lookup)
@ -1577,6 +1583,8 @@ AUTHOR_MAP = {
"sunsky.lau@gmail.com": "liuhao1024", # PR #45494 salvage (claim session slot before auto-resume task; #45456)
"andrewdmwalker@gmail.com": "capt-marbles", # PR #38440 salvage (resolve xAI OAuth credentials across profiles; #43589)
"infinitycrew39@gmail.com": "infinitycrew39", # PR #47945 salvage (scope langfuse trace state by turn/request ids; #48292)
"eurekaxun@163.com": "huangxun375-stack", # PR #37251 / #48894 structured OpenViking sync
"218421507+Sahil-SS9@users.noreply.github.com": "Sahil-SS9", # PR #48466/#44919/#44909/#42209 salvage (cron/checkpoint/kanban/skill)
}

View file

@ -87,8 +87,20 @@ toolsets (so they can `git`, `read_file`, and `search_files`/grep).
Tell each reviewer to:
- Search the existing codebase for evidence (don't reason from the diff alone).
- Report findings as a concrete list: `file:line → problem → suggested fix`.
- Rank each finding `high` / `medium` / `low` confidence.
- **Apply Chesterton's Fence:** before flagging anything for removal, run
`git blame` on the line to understand why it exists. If you can't determine
the original purpose, mark it `confidence: low` — don't guess.
- Report findings as structured output with confidence and risk:
```
file:line → problem → suggested fix | confidence: high/medium/low | risk: SAFE/CAREFUL/RISKY
```
- **SAFE** = proven not to affect behavior (unused imports, commented-out
code, pass-through wrappers). Auto-apply these.
- **CAREFUL** = improves without changing semantics (rename local variable,
flatten nested ternary, extract helper). Apply with test verification.
- **RISKY** = may change behavior or breaks public contracts (N+1
restructuring, public API rename, memory lifecycle change). Flag for
human review — do NOT auto-apply.
- Skip nits and style-only churn. Only flag things that materially improve
the code.
@ -112,7 +124,11 @@ Pass these three goals (drop any the user's focus excludes):
> blocks that should share an abstraction); leaky abstractions (exposing
> internals, breaking an existing encapsulation boundary); stringly-typed
> code (raw strings where a constant/enum/registry already exists — check the
> canonical registries before flagging). For each, give the concrete refactor.
> canonical registries before flagging); AI-generated slop patterns (extra
> comments restating obvious code like `// increment counter` above `count++`;
> unnecessary defensive null-checks on already-validated inputs; `as any`
> casts that bypass the type system; patterns inconsistent with the rest of
> the file). For each, give the concrete refactor.
**Reviewer 3 — Efficiency**
> Review this diff for efficiency problems. Look for: unnecessary work
@ -122,8 +138,10 @@ Pass these three goals (drop any the user's focus excludes):
> TOCTOU anti-patterns (existence pre-checks before an op instead of doing
> the op and handling the error); memory issues (unbounded growth, missing
> cleanup, listener/handle leaks); overly broad reads (loading whole files
> when a slice would do). For each, give the concrete fix and why it's faster
> or lighter.
> when a slice would do); silent failures (empty catch blocks, ignored error
> returns, `except: pass`, `.catch(() => {})` with no handling, error
> propagation gaps — these hide bugs and should at minimum log before
> swallowing). For each, give the concrete fix and why it's faster or safer.
### Phase 3 — Aggregate and apply
@ -138,13 +156,22 @@ Wait for all three to return (batch mode returns them together).
Don't apply a perf "fix" that hurts clarity unless the path is genuinely
hot. When two suggestions are mutually exclusive and both defensible, pick
the one that touches less code and note the alternative.
4. **Apply** the surviving fixes directly with `patch` / `write_file` — unless
the user asked for a dry run, in which case present the list and ask first.
4. **Apply in risk-tier order:**
- **SAFE first** (auto-apply): unused imports, commented-out code,
pass-through wrappers, redundant type assertions. Run tests after.
- **CAREFUL next** (apply with verification, one file at a time): rename
locals, flatten ternaries, extract helpers, consolidate dupes. Run tests
after each file. Revert any that break.
- **RISKY last** (flag for review — do NOT auto-apply): N+1 restructuring,
public API changes, concurrency fixes, error-handling changes. Present
each with risk description and test coverage status.
If the user opted for a dry run, present all three tiers and apply nothing.
5. **Verify** you didn't break anything: run the project's targeted tests for
the touched files (not the full suite), and re-run any linter/type check the
repo uses. If a fix breaks a test, revert that one fix and report it.
6. **Summarize** what you changed: a short list of applied fixes grouped by
reviewer category, plus any findings you deliberately skipped and why.
reviewer category and risk tier, plus any findings you deliberately skipped
and why.
## Pitfalls
@ -166,6 +193,16 @@ Wait for all three to return (batch mode returns them together).
- **Large diffs blow context.** If the diff is huge, scope it down before
delegating — three subagents each carrying a 5000-line diff is expensive and
may truncate.
- **Over-trusting dead code tools.** `knip`, `ts-prune`, and `depcheck` flag
exports that ARE used dynamically (string-based imports, reflection). Always
grep for the symbol name before removing — a clean tool report is not proof.
- **Renaming without checking public contracts.** Export names, API route
paths, DB column names, and config keys are contracts — even if the name is
bad, renaming breaks consumers. Tag public-contract changes as RISKY; never
auto-rename them.
- **Removing "unnecessary" error handling.** An empty catch block or ignored
error might be intentional — the error is expected and benign in that
context. Flag it, don't remove it; let the human decide.
## Related

View file

@ -38,6 +38,20 @@ def _jwt_with_claims(claims: dict) -> str:
return f"{header}.{payload}.sig"
class _FakeAnthropicStream:
def __init__(self, final_message):
self._final_message = final_message
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def get_final_message(self):
return self._final_message
@pytest.fixture(autouse=True)
def _clean_env(monkeypatch):
"""Strip provider env vars so each test starts clean."""
@ -990,6 +1004,37 @@ class TestVisionClientFallback:
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
assert model == "claude-haiku-4-5-20251001"
def test_anthropic_auxiliary_client_aggregates_stream_response(self):
from agent.auxiliary_client import AnthropicAuxiliaryClient
final_message = SimpleNamespace(
content=[SimpleNamespace(type="text", text="streamed aux response")],
stop_reason="end_turn",
usage=SimpleNamespace(input_tokens=3, output_tokens=4),
)
messages_api = SimpleNamespace(
stream=MagicMock(return_value=_FakeAnthropicStream(final_message)),
create=MagicMock(return_value="raw event-stream text"),
)
real_client = SimpleNamespace(messages=messages_api)
client = AnthropicAuxiliaryClient(
real_client,
"claude-sonnet-4-20250514",
"sk-test",
"https://sse-only.example/v1",
)
response = client.chat.completions.create(
messages=[{"role": "user", "content": "summarize"}],
max_tokens=16,
)
messages_api.stream.assert_called_once()
messages_api.create.assert_not_called()
assert response.choices[0].message.content == "streamed aux response"
assert response.usage.prompt_tokens == 3
assert response.usage.completion_tokens == 4
class TestAuxiliaryPoolAwareness:
def test_try_nous_uses_pool_entry(self):

View file

@ -0,0 +1,25 @@
from __future__ import annotations
from types import SimpleNamespace
from agent.message_content import flatten_message_text
def test_flatten_message_text_accepts_chat_and_responses_text_parts():
content = [
{"type": "text", "text": "chat text"},
{"type": "input_text", "text": "user text"},
{"type": "output_text", "text": "assistant text"},
{"type": "summary_text", "text": "summary text"},
]
assert flatten_message_text(content) == "chat text\nuser text\nassistant text\nsummary text"
def test_flatten_message_text_accepts_object_parts():
content = [
SimpleNamespace(type="output_text", text="object text"),
{"content": "legacy content"},
]
assert flatten_message_text(content) == "object text\nlegacy content"

View file

@ -0,0 +1,130 @@
"""Tests for the profile-scoped credential primitive (Workstream A / Phase 2)."""
import pytest
from agent import secret_scope as ss
@pytest.fixture(autouse=True)
def _reset_multiplex():
"""Ensure each test starts and ends with multiplexing off (it's a global)."""
ss.set_multiplex_active(False)
yield
ss.set_multiplex_active(False)
class TestMultiplexInactiveBackwardCompat:
"""Default deployment: get_secret transparently reads os.environ."""
def test_reads_environ(self, monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test")
assert ss.get_secret("ANTHROPIC_API_KEY") == "sk-test"
def test_missing_returns_default(self, monkeypatch):
monkeypatch.delenv("NOPE_KEY", raising=False)
assert ss.get_secret("NOPE_KEY") is None
assert ss.get_secret("NOPE_KEY", "fallback") == "fallback"
def test_no_raise_without_scope(self, monkeypatch):
monkeypatch.delenv("SOME_KEY", raising=False)
# multiplex off => unscoped read is fine, returns default
assert ss.get_secret("SOME_KEY") is None
class TestMultiplexActiveFailClosed:
"""Multiplex on: an unscoped secret read raises instead of leaking."""
def test_unscoped_read_raises(self, monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-leaky")
ss.set_multiplex_active(True)
with pytest.raises(ss.UnscopedSecretError):
ss.get_secret("ANTHROPIC_API_KEY")
def test_scoped_read_uses_scope_not_environ(self, monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-from-environ")
ss.set_multiplex_active(True)
token = ss.set_secret_scope({"ANTHROPIC_API_KEY": "sk-from-scope"})
try:
assert ss.get_secret("ANTHROPIC_API_KEY") == "sk-from-scope"
finally:
ss.reset_secret_scope(token)
def test_scoped_missing_key_returns_default_not_environ(self, monkeypatch):
# Even though the value exists in os.environ, a scope is authoritative:
# an absent scope key must NOT fall through to the (cross-profile) env.
monkeypatch.setenv("OPENAI_API_KEY", "sk-other-profile")
ss.set_multiplex_active(True)
token = ss.set_secret_scope({"ANTHROPIC_API_KEY": "sk-mine"})
try:
assert ss.get_secret("OPENAI_API_KEY") is None
assert ss.get_secret("OPENAI_API_KEY", "d") == "d"
finally:
ss.reset_secret_scope(token)
def test_global_env_still_reads_environ_under_multiplex(self, monkeypatch):
monkeypatch.setenv("HERMES_HOME", "/opt/data")
ss.set_multiplex_active(True)
# No scope, multiplex on — but HERMES_HOME is global, so no raise.
assert ss.get_secret("HERMES_HOME") == "/opt/data"
def test_kanban_prefix_is_global(self, monkeypatch):
monkeypatch.setenv("HERMES_KANBAN_DB", "/x/kanban.db")
ss.set_multiplex_active(True)
assert ss.get_secret("HERMES_KANBAN_DB") == "/x/kanban.db"
class TestScopeIsolation:
"""Two scopes never see each other's secrets."""
def test_nested_scopes_restore(self):
ss.set_multiplex_active(True)
t1 = ss.set_secret_scope({"K": "a"})
try:
assert ss.get_secret("K") == "a"
t2 = ss.set_secret_scope({"K": "b"})
try:
assert ss.get_secret("K") == "b"
finally:
ss.reset_secret_scope(t2)
assert ss.get_secret("K") == "a"
finally:
ss.reset_secret_scope(t1)
class TestEnvFileParsing:
"""load_env_file parses without mutating os.environ."""
def test_parses_basic(self, tmp_path):
env = tmp_path / ".env"
env.write_text(
"# comment\n"
"ANTHROPIC_API_KEY=sk-abc\n"
"export OPENAI_API_KEY=sk-def\n"
'QUOTED="quoted-value"\n'
"SINGLE='single'\n"
"\n"
"BAD_LINE_NO_EQUALS\n"
)
out = ss.load_env_file(env)
assert out == {
"ANTHROPIC_API_KEY": "sk-abc",
"OPENAI_API_KEY": "sk-def",
"QUOTED": "quoted-value",
"SINGLE": "single",
}
def test_does_not_mutate_environ(self, tmp_path, monkeypatch):
monkeypatch.delenv("ZZZ_KEY", raising=False)
env = tmp_path / ".env"
env.write_text("ZZZ_KEY=secret\n")
ss.load_env_file(env)
import os
assert "ZZZ_KEY" not in os.environ
def test_missing_file_returns_empty(self, tmp_path):
assert ss.load_env_file(tmp_path / "nope.env") == {}
def test_build_profile_secret_scope(self, tmp_path):
(tmp_path / ".env").write_text("ANTHROPIC_API_KEY=sk-profile\n")
assert ss.build_profile_secret_scope(tmp_path) == {
"ANTHROPIC_API_KEY": "sk-profile"
}

View file

@ -534,6 +534,14 @@ def pytest_configure(config): # noqa: D401 — pytest hook
"behaviour — e.g. PTY tests that signal their own child).",
)
# The pyproject addopts pin ``--timeout-method=signal`` relies on
# ``signal.SIGALRM``, which does not exist on Windows — pytest-timeout
# raises AttributeError at timer setup and the whole run aborts before any
# test executes. Fall back to the thread-based timer on Windows so the
# suite runs natively there (POSIX keeps the more reliable signal method).
if sys.platform == "win32" and getattr(config.option, "timeout_method", None) == "signal":
config.option.timeout_method = "thread"
@pytest.fixture(autouse=True)
def _live_system_guard(request, monkeypatch):

View file

@ -75,3 +75,68 @@ async def test_send_without_transport_returns_failure():
result = await a.send("chat1", "hello")
assert result.success is False
assert result.error == "no transport"
class _CaptureTransport:
"""Minimal RelayTransport stand-in that records the outbound action."""
def __init__(self):
self.sent = None
def set_inbound_handler(self, h): # noqa: D401
self._h = h
async def send_outbound(self, action):
self.sent = action
return {"success": True, "message_id": "m1"}
def _make_event(chat_id="chan-1", guild_id="guild-9"):
from gateway.platforms.base import MessageEvent, MessageType
from gateway.session import SessionSource
src = SessionSource(
platform=Platform.RELAY,
chat_id=chat_id,
chat_type="channel",
guild_id=guild_id,
)
return MessageEvent(text="hi", source=src, message_type=MessageType.TEXT)
@pytest.mark.asyncio
async def test_send_reattaches_guild_id_from_inbound_scope():
"""The connector's egress guard resolves the owning tenant from
metadata.guild_id; the gateway's generic delivery path drops it, so the
relay adapter must re-attach the guild scope learned from the inbound event.
Regression for live 'discord egress declined: target not routed to an
onboarded tenant'."""
t = _CaptureTransport()
a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
# Simulate the connector delivering an inbound message in guild-9 / chan-1,
# but don't run the full handle_message pipeline — just the scope capture.
a._capture_scope(_make_event(chat_id="chan-1", guild_id="guild-9"))
await a.send("chan-1", "the reply")
assert t.sent["metadata"].get("guild_id") == "guild-9"
@pytest.mark.asyncio
async def test_send_without_known_scope_omits_guild_id():
"""A chat we never saw inbound (e.g. a DM) gets no guild_id — no-op, never
invents a scope."""
t = _CaptureTransport()
a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
await a.send("unknown-chat", "hi")
assert "guild_id" not in t.sent["metadata"]
@pytest.mark.asyncio
async def test_send_preserves_explicit_guild_id():
"""An explicitly-provided metadata.guild_id is never overwritten."""
t = _CaptureTransport()
a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
a._capture_scope(_make_event(chat_id="chan-1", guild_id="guild-9"))
await a.send("chan-1", "hi", metadata={"guild_id": "explicit-1"})
assert t.sent["metadata"]["guild_id"] == "explicit-1"

View file

@ -177,3 +177,25 @@ async def test_disconnect_fails_pending_waiters_cleanly(server):
# After disconnect, an outbound returns a structured failure rather than hanging.
result = await t.send_outbound({"op": "send", "chat_id": "c", "content": "x"})
assert result["success"] is False
def test_https_url_normalized_to_wss():
"""The relay URL is configured once as the http(s):// BASE (for the provision
POST), but websockets.connect needs ws(s):// and the connector mounts its WS
server at /relay. The transport must convert scheme AND ensure the /relay
path. Regression for the live staging failures 'scheme isn't ws or wss' then
'server rejected WebSocket connection: HTTP 400' (wrong path)."""
t = WebSocketRelayTransport("https://connector.example", "discord", "b")
assert t._url == "wss://connector.example/relay"
t2 = WebSocketRelayTransport("http://connector.local:8080", "discord", "b")
assert t2._url == "ws://connector.local:8080/relay"
def test_ws_dial_url_idempotent_with_scheme_and_path():
# Already ws(s):// and/or already ending in /relay -> unchanged (no double append).
t = WebSocketRelayTransport("wss://connector.example/relay", "discord", "b")
assert t._url == "wss://connector.example/relay"
t2 = WebSocketRelayTransport("https://connector.example/relay/", "discord", "b")
assert t2._url == "wss://connector.example/relay"
t3 = WebSocketRelayTransport("ws://127.0.0.1:9", "discord", "b")
assert t3._url == "ws://127.0.0.1:9/relay"

View file

@ -337,6 +337,40 @@ class TestAdapterInit:
assert isinstance(agent, FakeAgent)
assert captured["reasoning_config"] == {"enabled": True, "effort": "xhigh"}
def test_create_agent_refreshes_max_iterations_from_runtime_config(self, monkeypatch):
captured = {}
class FakeAgent:
def __init__(self, **kwargs):
captured.update(kwargs)
monkeypatch.setattr("run_agent.AIAgent", FakeAgent)
monkeypatch.setattr(
"gateway.run._resolve_runtime_agent_kwargs",
lambda: {
"provider": "openai",
"base_url": "https://example.test/v1",
"api_mode": "chat_completions",
},
)
monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "gpt-5")
monkeypatch.setattr("gateway.run._load_gateway_config", lambda: {"agent": {"max_turns": 200}})
monkeypatch.setattr(
"gateway.run.GatewayRunner._load_reasoning_config",
staticmethod(lambda: {}),
)
monkeypatch.setattr("gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None))
monkeypatch.setattr("gateway.run._current_max_iterations", lambda: 200)
monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set())
adapter = APIServerAdapter(PlatformConfig(enabled=True))
monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
agent = adapter._create_agent(session_id="api-session")
assert isinstance(agent, FakeAgent)
assert captured["max_iterations"] == 200
# ---------------------------------------------------------------------------
# Auth checking

View file

@ -0,0 +1,92 @@
"""Regression tests for PR #48127: cached agent max_iterations refresh.
When a long-lived gateway reuses an agent from its cache, the agent must run
the *current* configured iteration budget not the budget it was constructed
with on the first turn of that session. Two pieces make that true:
1. ``GatewayRunner._init_cached_agent_for_turn`` must NOT reset
``max_iterations`` itself (the gateway refreshes it explicitly right after,
from current config). If this helper ever started clobbering it, the
gateway's refresh would be silently undone.
2. The per-turn budget object is rebuilt from ``agent.max_iterations`` at the
start of every turn (``agent/turn_context.py`` -> ``IterationBudget``), so
refreshing ``max_iterations`` on the cached agent is sufficient to change
the operative cap the agent loop checks.
These tests exercise the real code paths rather than asserting a plain
assignment, so they fail if either contract regresses.
"""
import time
from types import SimpleNamespace
from agent.iteration_budget import IterationBudget
def _make_cached_agent(max_iterations: int) -> SimpleNamespace:
"""A minimal stand-in cached agent with the attributes the helpers touch."""
# The turn loop checks both api_call_count >= max_iterations AND
# iteration_budget.remaining <= 0 (turn_finalizer.py), so the budget must
# also reflect the new cap. Seed it with the stale value to prove the
# refresh propagates.
return SimpleNamespace(
_last_activity_ts=time.time() - 1000,
_last_activity_desc="previous turn",
_api_call_count=42,
_last_flushed_db_idx=5,
max_iterations=max_iterations,
iteration_budget=IterationBudget(max_iterations),
)
def test_init_cached_agent_for_turn_does_not_touch_max_iterations():
"""The per-turn reset helper must leave max_iterations untouched.
The gateway refreshes max_iterations explicitly right after calling this
helper; if the helper ever reset it, that refresh would be undone.
"""
from gateway.run import GatewayRunner
agent = _make_cached_agent(90)
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
# Per-turn state was reset...
assert agent._api_call_count == 0
assert agent._last_activity_desc == "starting new turn (cached)"
assert agent._last_flushed_db_idx == 0
# ...but the iteration budget was NOT changed by the helper itself.
assert agent.max_iterations == 90
def test_init_cached_agent_preserves_max_iterations_on_interrupt_depth():
"""Interrupt-recursive turns must also leave max_iterations alone."""
from gateway.run import GatewayRunner
agent = _make_cached_agent(200)
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
# Activity timestamps preserved for the inactivity watchdog (#15654)...
assert agent._last_activity_desc == "previous turn"
# ...and max_iterations untouched.
assert agent.max_iterations == 200
def test_refreshed_max_iterations_propagates_to_turn_budget():
"""Refreshing max_iterations on a cached agent changes the operative cap.
The gateway sets ``agent.max_iterations = max_iterations`` on cache reuse;
the new turn's setup then rebuilds ``iteration_budget`` from it. This proves
the refresh actually moves the budget the agent loop enforces the cached
agent started at 90 and ends a new turn capped at 200.
"""
agent = _make_cached_agent(90)
assert agent.iteration_budget.max_total == 90
# Gateway refresh on cache reuse:
agent.max_iterations = 200
# Start-of-turn budget rebuild (agent/turn_context.py:166):
agent.iteration_budget = IterationBudget(agent.max_iterations)
assert agent.iteration_budget.max_total == 200
assert agent.iteration_budget.remaining == 200

View file

@ -311,6 +311,55 @@ class TestLoadGatewayConfig:
assert config.quick_commands == {"limits": {"type": "exec", "command": "echo ok"}}
def test_relay_platform_enabled_from_env_url(self, tmp_path, monkeypatch):
"""GATEWAY_RELAY_URL must enable Platform.RELAY in config.platforms so
start_gateway()'s connect loop actually dials the connector. Registering
the adapter in the platform_registry is NOT enough the connect loop
iterates config.platforms, so an un-enabled RELAY never connects (the
'relay registered but no inbound' bug)."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("GATEWAY_RELAY_URL", "https://connector.example/relay/")
config = load_gateway_config()
assert Platform.RELAY in config.platforms
relay = config.platforms[Platform.RELAY]
assert relay.enabled is True
# Trailing slash stripped; mirrored into extra for the connected-checker.
assert relay.extra.get("relay_url") == "https://connector.example/relay"
assert Platform.RELAY in config.get_connected_platforms()
def test_relay_platform_absent_when_url_unset(self, tmp_path, monkeypatch):
"""No relay URL -> no RELAY platform, so direct/single-tenant gateways
are unaffected."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.delenv("GATEWAY_RELAY_URL", raising=False)
config = load_gateway_config()
assert Platform.RELAY not in config.platforms
def test_relay_platform_enabled_from_config_yaml(self, tmp_path, monkeypatch):
"""gateway.relay_url in config.yaml also enables RELAY (env-less path)."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"gateway:\n platforms:\n relay:\n extra:\n relay_url: https://connector.example/relay\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.delenv("GATEWAY_RELAY_URL", raising=False)
config = load_gateway_config()
assert Platform.RELAY in config.platforms
assert config.platforms[Platform.RELAY].enabled is True
def test_bridges_group_sessions_per_user_from_config_yaml(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()

View file

@ -122,13 +122,56 @@ class TestClarifyChoiceViewConstruction:
clarify_id="cidZ",
allowed_user_ids=set(),
)
# 75 chars + 3 ellipsis chars in the body, plus "1. " prefix
# 78 chars + single-char ellipsis in the body, plus "1. " prefix.
# Uses U+2026 (…) instead of "..." to fit the 80-char Discord cap.
first_label = view.children[0].label
assert first_label.startswith("1. ")
assert first_label.endswith("...")
assert first_label.endswith("\u2026")
# Final label total <= 80 (Discord cap on button labels)
assert len(first_label) <= 80
def test_truncates_long_choice_label_breaks_on_word_boundary(self):
# Long choice with spaces — should cut at the last whole word so the
# trailing text stays readable on Discord mobile.
long_choice = (
"Tight, well-illustrated, covers all 3 audiences "
"(patients, families, curious general readers)"
)
view = ClarifyChoiceView(
choices=[long_choice],
clarify_id="cidW",
allowed_user_ids=set(),
)
first_label = view.children[0].label
assert first_label.startswith("1. ")
assert first_label.endswith("\u2026")
# No mid-word fragment before the ellipsis.
assert not first_label.rstrip("\u2026").endswith("(")
def test_truncates_long_no_space_choice_on_soft_boundary(self):
# A long choice with soft boundaries (commas, hyphens) but no spaces
# should still cut on a soft boundary, not mid-word. We use an input
# where position 76 is NOT a soft boundary — the test only passes
# if the renderer actively searches backward for a soft char
# rather than blindly cutting at the budget limit.
long_choice = "a" * 30 + "-" + "b" * 30 + "-" + "c" * 30 + "-" + "d" * 30
# 30a-30b-30c-30d = 30 + 1 + 30 + 1 + 30 + 1 + 30 = 123 chars
# Position 76 is 'b' (a mid-word alpha). The renderer must look back
# for a '-' to cut on.
view = ClarifyChoiceView(
choices=[long_choice],
clarify_id="cidSB",
allowed_user_ids=set(),
)
first_label = view.children[0].label
assert first_label.endswith("\u2026")
assert len(first_label) <= 80
body = first_label[len("1. "):].rstrip("\u2026")
last_char = body[-1]
assert last_char in {"-", ",", ".", ")", " "}, (
f"Label cuts mid-word at {last_char!r}: {first_label!r}"
)
# ===========================================================================
# Choice callback → resolve_gateway_clarify
@ -404,3 +447,134 @@ class TestDiscordSendClarify:
# Only 1 real choice + 1 Other = 2 children
assert len(view.children) == 2
assert "real-choice" in view.children[0].label
@pytest.mark.asyncio
async def test_unwraps_dict_choices_to_description(self):
# LLMs sometimes emit [{"description": "..."}] instead of bare strings
# — the renderer must unwrap common dict shapes, not str() the whole
# dict into a Python repr on the button label.
adapter = _make_adapter()
channel = MagicMock()
sent_msg = MagicMock()
sent_msg.id = 555
channel.send = AsyncMock(return_value=sent_msg)
adapter._client.get_channel = MagicMock(return_value=channel)
malformed = [
{"description": "Tight, well-illustrated"},
{"label": "Use label key"},
{"text": "Use text key"},
"normal-string", # strings still pass through
]
await adapter.send_clarify(
chat_id="9001",
question="?",
choices=malformed,
clarify_id="cidU",
session_key="sk-U",
)
kwargs = channel.send.call_args.kwargs
view = kwargs["view"]
labels = [b.label for b in view.children[:-1]] # exclude Other
# No raw Python repr should leak onto any label.
for label in labels:
assert "{'" not in label
assert "':" not in label
# Each dict unwrapped to its inner string.
assert any("Tight, well-illustrated" in lbl for lbl in labels)
assert any("Use label key" in lbl for lbl in labels)
assert any("Use text key" in lbl for lbl in labels)
assert any("normal-string" in lbl for lbl in labels)
@pytest.mark.asyncio
async def test_unwrap_prefers_description_over_name_in_multi_key_dict(self):
# When the LLM emits both 'name' (often a short identifier in
# OpenAI-style tool calls) and 'description' (the user-facing text),
# the renderer must surface 'description'. The user should never see
# a 4-char model identifier on a button label.
adapter = _make_adapter()
channel = MagicMock()
sent_msg = MagicMock()
sent_msg.id = 666
channel.send = AsyncMock(return_value=sent_msg)
adapter._client.get_channel = MagicMock(return_value=channel)
await adapter.send_clarify(
chat_id="9001",
question="?",
choices=[{"name": "tight", "description": "Tight, well-illustrated"}],
clarify_id="cidN",
session_key="sk-N",
)
kwargs = channel.send.call_args.kwargs
view = kwargs["view"]
choice_label = view.children[0].label
assert "Tight, well-illustrated" in choice_label
# The 'name' value (a short identifier) must NOT have leaked.
body = choice_label.split("1. ", 1)[1].rstrip("\u2026")
assert "tight" not in body, f"'name' leaked onto button: {choice_label!r}"
@pytest.mark.asyncio
async def test_unwrap_prefers_label_over_description(self):
# When both 'label' and 'description' are present, 'label' wins.
# 'label' is the canonical short user-facing text in most LLM tool
# conventions; 'description' is the longer explanation.
adapter = _make_adapter()
channel = MagicMock()
sent_msg = MagicMock()
sent_msg.id = 777
channel.send = AsyncMock(return_value=sent_msg)
adapter._client.get_channel = MagicMock(return_value=channel)
await adapter.send_clarify(
chat_id="9001",
question="?",
choices=[{"label": "Short", "description": "Long verbose explanation"}],
clarify_id="cidL",
session_key="sk-L",
)
kwargs = channel.send.call_args.kwargs
view = kwargs["view"]
choice_label = view.children[0].label
assert "Short" in choice_label
# The longer description must NOT have leaked.
assert "Long verbose" not in choice_label, (
f"'description' leaked over 'label': {choice_label!r}"
)
@pytest.mark.asyncio
async def test_unwrap_does_not_pick_value_or_name_alone(self):
# 'name' and 'value' are Discord-component-shaped fields that could
# accidentally appear in dicts not intended as choices (e.g., a
# developer-error in the gateway wiring). The renderer should not
# surface them as button labels — only the well-known LLM tool-call
# keys (label, description, text, title) should win.
adapter = _make_adapter()
channel = MagicMock()
sent_msg = MagicMock()
sent_msg.id = 888
channel.send = AsyncMock(return_value=sent_msg)
adapter._client.get_channel = MagicMock(return_value=channel)
await adapter.send_clarify(
chat_id="9001",
question="?",
choices=[
{"name": "only_name_here"}, # should be filtered out
{"value": "only_value_here"}, # should be filtered out
{"description": "real choice"},
],
clarify_id="cidNV",
session_key="sk-NV",
)
kwargs = channel.send.call_args.kwargs
view = kwargs["view"]
choice_labels = [b.label for b in view.children[:-1]] # exclude Other
# Only the well-formed dict survives.
assert len(choice_labels) == 1, (
f"Expected 1 choice, got {len(choice_labels)}: {choice_labels!r}"
)
assert "real choice" in choice_labels[0]
for label in choice_labels:
assert "only_name_here" not in label, f"name leaked: {label!r}"
assert "only_value_here" not in label, f"value leaked: {label!r}"

View file

@ -666,6 +666,70 @@ async def test_fetch_channel_context_stops_at_self_message_and_reverses_to_chron
)
@pytest.mark.asyncio
async def test_fetch_channel_context_skips_self_improvement_boundary_message(adapter, monkeypatch):
"""Delayed harness status bumps must not hide messages after the real reply."""
monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all")
adapter.config.extra["history_backfill_limit"] = 10
codex = SimpleNamespace(id=55, display_name="Codex", name="Codex", bot=True)
human = SimpleNamespace(id=56, display_name="Alice", name="Alice", bot=False)
channel = FakeHistoryChannel(
[
make_history_message(
author=adapter._client.user,
content="arbitrary lifecycle text from a metadata-marked send",
msg_id=9,
),
make_history_message(
author=adapter._client.user,
content="[Background process bg-123 finished with exit code 0~ Here's the final output:\nok]",
msg_id=8,
),
make_history_message(
author=codex,
content="♻ Gateway restarted successfully. Your session continues.",
msg_id=7,
),
make_history_message(
author=codex,
content="💾 Self-improvement review: Memory updated",
msg_id=6,
),
make_history_message(author=human, content="question after reply", msg_id=5),
make_history_message(
author=adapter._client.user,
content="💾 Self-improvement review: Skill 'hermes-gateway-display-config' patched",
msg_id=4,
),
make_history_message(author=codex, content="Codex final answer", msg_id=3),
make_history_message(author=human, content="prompt before reply", msg_id=2),
make_history_message(author=adapter._client.user, content="our prior response", msg_id=1),
],
channel_id=123,
)
adapter._nonconversational_messages.mark_many(["9"])
result = await adapter._fetch_channel_context(channel, before=make_message(channel=channel, content="trigger"))
assert result == (
"[Recent channel messages]\n"
"[Alice] prompt before reply\n"
"[Codex [bot]] Codex final answer\n"
"[Alice] question after reply"
)
def test_nonconversational_fallback_requires_self_improvement_emoji():
assert discord_platform._looks_like_nonconversational_history_message(
"💾 Self-improvement review: Memory updated"
)
assert not discord_platform._looks_like_nonconversational_history_message(
"Self-improvement review: this is a normal assistant heading"
)
@pytest.mark.asyncio
async def test_fetch_channel_context_skips_other_bots_when_allow_bots_none(adapter, monkeypatch):
monkeypatch.setenv("DISCORD_ALLOW_BOTS", "none")
@ -801,6 +865,33 @@ async def test_fetch_channel_context_ignores_stale_cache(adapter, monkeypatch):
assert recorded_after["value"] is None
@pytest.mark.asyncio
async def test_discord_send_does_not_cache_nonconversational_status_as_history_boundary(adapter):
"""Automated status notifications should not move the backfill boundary."""
class SendingChannel(FakeTextChannel):
async def send(self, content, reference=None):
return SimpleNamespace(id=222)
channel = SendingChannel(channel_id=777)
adapter._client = SimpleNamespace(
user=adapter._client.user,
get_channel=lambda channel_id: channel if channel_id == 777 else None,
fetch_channel=AsyncMock(return_value=channel),
)
adapter._last_self_message_id["777"] = "111"
result = await adapter.send(
"777",
"arbitrary lifecycle text from gateway",
metadata={"non_conversational": True},
)
assert result.success is True
assert adapter._last_self_message_id["777"] == "111"
assert "222" in adapter._nonconversational_messages
@pytest.mark.asyncio
async def test_discord_shared_channel_backfill_prepends_context(adapter, monkeypatch):
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
@ -925,5 +1016,3 @@ async def test_discord_auto_thread_skips_backfill(adapter, monkeypatch):
adapter._auto_create_thread.assert_awaited_once()
adapter._fetch_channel_context.assert_not_awaited()

View file

@ -0,0 +1,60 @@
"""Tests for the strict gateway command-line matcher.
Regression guard for the Windows ``hermes gateway restart`` silent-outage bug:
the previous loose substring match (``"... gateway" in cmdline``) false-matched
``gateway status``/``dashboard`` siblings and unrelated processes such as
``python -m tui_gateway``, which let ``restart()`` race a still-draining old
process and ``status``/``start`` report false positives.
"""
from __future__ import annotations
import pytest
from gateway.status import looks_like_gateway_command_line as matches
ACCEPT = [
"pythonw.exe -m hermes_cli.main gateway run",
r"C:\Users\me\hermes\venv\Scripts\pythonw.exe -m hermes_cli.main gateway run",
"python -m hermes_cli.main --profile work gateway run",
"python -m hermes_cli.main gateway run --replace",
"python -m hermes_cli/main.py gateway run",
"python gateway/run.py",
"hermes-gateway.exe",
"hermes gateway", # bare `hermes gateway` defaults to run
"hermes gateway run",
# profile selector AFTER the `gateway` token (argv is profile-position
# agnostic — _apply_profile_override strips --profile/-p anywhere)
"hermes gateway --profile work run",
"python -m hermes_cli.main gateway -p work run",
"hermes gateway --profile=work run",
# a profile literally NAMED "gateway"
"hermes -p gateway gateway run",
"python -m hermes_cli.main --profile gateway gateway run",
# quoted Windows paths with spaces (shlex-aware tokenization)
r'"C:\Program Files\Hermes\hermes-gateway.exe"',
r'"C:\Program Files\Hermes\gateway\run.py" run',
r'"C:\Program Files\Py\pythonw.exe" -m hermes_cli.main gateway run',
]
REJECT = [
"python -m tui_gateway", # unrelated module
"python -m hermes_cli.main gateway status", # other subcommand
"python -m hermes_cli.main gateway restart",
"python -m hermes_cli.main gateway stop",
"python -m hermes_cli.main --profile x dashboard", # non-gateway subcommand
"some random python -m mygateway thing",
"",
None,
]
@pytest.mark.parametrize("cmd", ACCEPT)
def test_accepts_real_gateway_run(cmd):
assert matches(cmd) is True
@pytest.mark.parametrize("cmd", REJECT)
def test_rejects_non_gateway_run(cmd):
assert matches(cmd) is False

View file

@ -43,3 +43,27 @@ def test_watcher_loops_are_coroutines():
# The two long-running watchers are async loops.
assert inspect.iscoroutinefunction(GatewayKanbanWatchersMixin._kanban_notifier_watcher)
assert inspect.iscoroutinefunction(GatewayKanbanWatchersMixin._kanban_dispatcher_watcher)
def test_singleton_dispatcher_lock_is_exclusive(tmp_path):
"""Only one holder of the dispatcher lock at a time — the backstop that
stops concurrent dispatchers double reclaiming and corrupting shared
kanban SQLite index pages under wal_autocheckpoint=0."""
import os
from gateway.kanban_watchers import _acquire_singleton_lock, _release_singleton_lock
lock = tmp_path / "kanban" / ".dispatcher.lock"
h1, st1 = _acquire_singleton_lock(lock)
assert st1 == "held" and h1 is not None
# A second acquire while the first is held must be refused, not granted.
h2, st2 = _acquire_singleton_lock(lock)
assert st2 == "contended" and h2 is None
# Releasing the first lets a fresh acquire succeed (lock is reusable).
_release_singleton_lock(h1)
h3, st3 = _acquire_singleton_lock(lock)
assert st3 == "held" and h3 is not None
_release_singleton_lock(h3)

View file

@ -156,3 +156,46 @@ async def test_model_global_persists_when_config_has_proper_dict_model(tmp_path,
written = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
assert written["model"]["default"] == "gpt-5.5"
assert written["model"]["provider"] == "openrouter"
@pytest.mark.asyncio
async def test_model_no_flag_persists_by_default(tmp_path, monkeypatch):
"""A plain ``/model X`` (no --global) now persists to config.yaml.
This is the user-facing fix: switching models in one session survives
into the next without re-typing the switch every time.
"""
cfg_path = _setup_isolated_home(
tmp_path,
monkeypatch,
{"default": "old-model", "provider": "openai-codex"},
)
result = await _make_runner()._handle_model_command(
_make_event("/model gpt-5.5")
)
assert result is not None
assert "gpt-5.5" in result
written = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
assert written["model"]["default"] == "gpt-5.5"
@pytest.mark.asyncio
async def test_model_session_flag_does_not_persist(tmp_path, monkeypatch):
"""``/model X --session`` opts out of persistence even under the new default."""
cfg_path = _setup_isolated_home(
tmp_path,
monkeypatch,
{"default": "old-model", "provider": "openai-codex"},
)
result = await _make_runner()._handle_model_command(
_make_event("/model gpt-5.5 --session")
)
assert result is not None
assert "gpt-5.5" in result
written = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
# Config untouched — the session override is in-memory only.
assert written["model"]["default"] == "old-model"

View file

@ -0,0 +1,136 @@
"""Phase 3: secondary-profile adapter registry + same-token conflict detection."""
import pytest
from gateway.run import GatewayRunner
class _FakeAdapter:
def __init__(self, token=None):
self.token = token
class TestCredentialFingerprint:
def test_none_without_token(self):
assert GatewayRunner._adapter_credential_fingerprint(_FakeAdapter()) is None
def test_stable_and_log_safe(self):
a = _FakeAdapter(token="secret-bot-token")
fp1 = GatewayRunner._adapter_credential_fingerprint(a)
fp2 = GatewayRunner._adapter_credential_fingerprint(_FakeAdapter(token="secret-bot-token"))
assert fp1 == fp2 # stable
assert "secret-bot-token" not in (fp1 or "") # never the raw token
assert len(fp1) == 16
def test_distinct_tokens_distinct_fp(self):
a = GatewayRunner._adapter_credential_fingerprint(_FakeAdapter(token="tok-A"))
b = GatewayRunner._adapter_credential_fingerprint(_FakeAdapter(token="tok-B"))
assert a != b
def test_reads_alt_attrs(self):
class _AltAdapter:
def __init__(self):
self.bot_token = "alt-token"
assert GatewayRunner._adapter_credential_fingerprint(_AltAdapter()) is not None
class TestProfileMessageHandler:
@pytest.mark.asyncio
async def test_stamps_profile_on_unstamped_source(self):
runner = GatewayRunner.__new__(GatewayRunner)
seen = {}
async def _fake_handle(event):
seen["profile"] = event.source.profile
return "ok"
runner._handle_message = _fake_handle
handler = runner._make_profile_message_handler("coder")
class _Src:
profile = None
class _Evt:
source = _Src()
result = await handler(_Evt())
assert result == "ok"
assert seen["profile"] == "coder"
@pytest.mark.asyncio
async def test_does_not_override_existing_profile(self):
runner = GatewayRunner.__new__(GatewayRunner)
seen = {}
async def _fake_handle(event):
seen["profile"] = event.source.profile
return "ok"
runner._handle_message = _fake_handle
handler = runner._make_profile_message_handler("coder")
class _Src:
profile = "writer" # already stamped (e.g. by URL prefix)
class _Evt:
source = _Src()
await handler(_Evt())
assert seen["profile"] == "writer"
class TestPortBindingHardError:
"""A secondary profile enabling a port-binding platform aborts startup."""
@pytest.mark.asyncio
async def test_secondary_webhook_raises(self, monkeypatch):
from gateway.run import MultiplexConfigError
from gateway.config import GatewayConfig, Platform, PlatformConfig
runner = GatewayRunner.__new__(GatewayRunner)
runner.config = GatewayConfig(multiplex_profiles=True)
runner._profile_adapters = {}
# reviewer profile config enables webhook (a port-binding platform)
reviewer_cfg = GatewayConfig(multiplex_profiles=True)
reviewer_cfg.platforms = {
Platform.WEBHOOK: PlatformConfig(enabled=True, extra={"port": 8644}),
}
monkeypatch.setattr(
"gateway.config.load_gateway_config", lambda: reviewer_cfg
)
with pytest.raises(MultiplexConfigError) as ei:
await runner._start_one_profile_adapters("reviewer", "/tmp/x", {})
assert "webhook" in str(ei.value)
assert "reviewer" in str(ei.value)
@pytest.mark.asyncio
async def test_secondary_non_binding_platform_ok(self, monkeypatch):
"""A non-port-binding platform (e.g. telegram) is NOT rejected."""
from gateway.config import GatewayConfig, Platform, PlatformConfig
runner = GatewayRunner.__new__(GatewayRunner)
runner.config = GatewayConfig(multiplex_profiles=True)
runner._profile_adapters = {}
reviewer_cfg = GatewayConfig(multiplex_profiles=True)
reviewer_cfg.platforms = {
Platform.TELEGRAM: PlatformConfig(enabled=True, token="t"),
}
monkeypatch.setattr(
"gateway.config.load_gateway_config", lambda: reviewer_cfg
)
# _create_adapter returns None here (no real telegram token wiring), so
# the loop simply connects nothing — the key assertion is NO raise.
monkeypatch.setattr(runner, "_create_adapter", lambda p, c: None)
connected = await runner._start_one_profile_adapters("reviewer", "/tmp/x", {})
assert connected == 0 # nothing connected, but no MultiplexConfigError
def test_port_binding_set_covers_known_listeners(self):
from gateway.run import _PORT_BINDING_PLATFORM_VALUES
# Every adapter that binds a TCP port must be in the guard set.
for p in ("webhook", "api_server", "msgraph_webhook", "feishu",
"wecom_callback", "bluebubbles", "sms"):
assert p in _PORT_BINDING_PLATFORM_VALUES

View file

@ -0,0 +1,88 @@
"""End-to-end credential isolation proof for multiplex mode (Workstream A).
These exercise the REAL resolution path (runtime_provider, secret scope, MCP
interpolation) rather than mocking it, proving the property that matters: two
profiles with different keys never see each other's, and an unscoped read in
multiplex mode fails closed instead of leaking.
"""
import pytest
from agent import secret_scope as ss
@pytest.fixture(autouse=True)
def _reset(monkeypatch):
ss.set_multiplex_active(False)
yield
ss.set_multiplex_active(False)
class TestRuntimeProviderUsesScope:
"""hermes_cli.runtime_provider._getenv resolves through the secret scope."""
def test_getenv_reads_scope_under_multiplex(self, monkeypatch):
from hermes_cli.runtime_provider import _getenv
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-global-leak")
ss.set_multiplex_active(True)
tok = ss.set_secret_scope({"ANTHROPIC_API_KEY": "sk-profileA"})
try:
assert _getenv("ANTHROPIC_API_KEY") == "sk-profileA"
finally:
ss.reset_secret_scope(tok)
def test_getenv_two_profiles_isolated(self, monkeypatch):
from hermes_cli.runtime_provider import _getenv
ss.set_multiplex_active(True)
tok_a = ss.set_secret_scope({"OPENAI_API_KEY": "sk-A"})
try:
assert _getenv("OPENAI_API_KEY") == "sk-A"
finally:
ss.reset_secret_scope(tok_a)
tok_b = ss.set_secret_scope({"OPENAI_API_KEY": "sk-B"})
try:
assert _getenv("OPENAI_API_KEY") == "sk-B"
finally:
ss.reset_secret_scope(tok_b)
def test_getenv_fails_closed_unscoped(self, monkeypatch):
from hermes_cli.runtime_provider import _getenv
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-leak")
ss.set_multiplex_active(True)
with pytest.raises(ss.UnscopedSecretError):
_getenv("OPENROUTER_API_KEY")
def test_getenv_global_var_still_reads_environ(self, monkeypatch):
from hermes_cli.runtime_provider import _getenv
monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42")
ss.set_multiplex_active(True)
# global var: no scope needed, no raise
assert _getenv("HERMES_MAX_ITERATIONS") == "42"
class TestMcpInterpolationUsesScope:
"""MCP config ${VAR} interpolation resolves through the secret scope."""
def test_interpolation_reads_scope(self, monkeypatch):
from tools.mcp_tool import _interpolate_env_vars
monkeypatch.setenv("MY_MCP_TOKEN", "global-token")
ss.set_multiplex_active(True)
tok = ss.set_secret_scope({"MY_MCP_TOKEN": "profile-token"})
try:
cfg = {"env": {"TOKEN": "${MY_MCP_TOKEN}"}}
assert _interpolate_env_vars(cfg) == {"env": {"TOKEN": "profile-token"}}
finally:
ss.reset_secret_scope(tok)
def test_interpolation_unset_keeps_placeholder(self, monkeypatch):
from tools.mcp_tool import _interpolate_env_vars
monkeypatch.delenv("UNSET_MCP_VAR", raising=False)
# multiplex off: unset var keeps literal placeholder (legacy behavior)
assert _interpolate_env_vars("${UNSET_MCP_VAR}") == "${UNSET_MCP_VAR}"
def test_interpolation_off_reads_environ(self, monkeypatch):
from tools.mcp_tool import _interpolate_env_vars
monkeypatch.setenv("MY_MCP_TOKEN", "env-token")
# multiplex off: legacy os.environ resolution
assert _interpolate_env_vars("${MY_MCP_TOKEN}") == "env-token"

View file

@ -0,0 +1,73 @@
"""Phase 1: HTTP-inbound /p/<profile>/ routing for the webhook adapter."""
import pytest
from gateway.config import GatewayConfig, Platform
from gateway.session import SessionSource, build_session_key
class TestSessionSourceProfileField:
def test_profile_roundtrips(self):
s = SessionSource(
platform=Platform.WEBHOOK if hasattr(Platform, "WEBHOOK") else Platform.TELEGRAM,
chat_id="c1",
chat_type="webhook",
profile="coder",
)
restored = SessionSource.from_dict(s.to_dict())
assert restored.profile == "coder"
def test_profile_absent_not_serialized(self):
s = SessionSource(platform=Platform.TELEGRAM, chat_id="c1", chat_type="dm")
assert "profile" not in s.to_dict()
def test_source_profile_drives_session_key_namespace(self):
s = SessionSource(platform=Platform.TELEGRAM, chat_id="99", chat_type="dm")
# build_session_key takes profile explicitly; the adapter passes
# source.profile through. Verify the namespace follows it.
assert build_session_key(s, profile="coder") == "agent:coder:telegram:dm:99"
class TestWebhookProfileResolution:
"""_resolve_request_profile validates the /p/<profile>/ prefix."""
def _adapter(self, multiplex: bool, served=("default", "coder")):
from gateway.platforms.webhook import WebhookAdapter, _PROFILE_REJECTED
class _FakeReq:
def __init__(self, profile):
self.match_info = {"profile": profile} if profile is not None else {}
cfg = GatewayConfig(multiplex_profiles=multiplex)
class _Runner:
config = cfg
# Construct minimally; we only call _resolve_request_profile.
adapter = WebhookAdapter.__new__(WebhookAdapter)
adapter.gateway_runner = _Runner()
return adapter, _FakeReq, _PROFILE_REJECTED, served
def test_no_prefix_returns_none(self):
adapter, Req, _REJ, _ = self._adapter(multiplex=True)
assert adapter._resolve_request_profile(Req(None)) is None
def test_prefix_ignored_when_multiplex_off(self):
adapter, Req, _REJ, _ = self._adapter(multiplex=False)
# Even a bogus profile is ignored (not 404'd) when multiplexing is off.
assert adapter._resolve_request_profile(Req("anything")) is None
def test_known_profile_accepted(self, monkeypatch):
adapter, Req, _REJ, served = self._adapter(multiplex=True)
monkeypatch.setattr(
"hermes_cli.profiles.profiles_to_serve",
lambda multiplex: [(n, None) for n in served],
)
assert adapter._resolve_request_profile(Req("coder")) == "coder"
def test_unknown_profile_rejected(self, monkeypatch):
adapter, Req, REJ, served = self._adapter(multiplex=True)
monkeypatch.setattr(
"hermes_cli.profiles.profiles_to_serve",
lambda multiplex: [(n, None) for n in served],
)
assert adapter._resolve_request_profile(Req("ghost")) is REJ

View file

@ -0,0 +1,55 @@
"""Phase 4: lifecycle guard + per-profile observability."""
import pytest
class TestServedProfilesStatus:
def test_write_and_read_served_profiles(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
import importlib
import gateway.status as status
importlib.reload(status)
try:
status.write_runtime_status(
gateway_state="running", served_profiles=["default", "coder"]
)
rec = status.read_runtime_status()
assert rec.get("served_profiles") == ["default", "coder"]
finally:
importlib.reload(status)
def test_served_profiles_absent_by_default(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
import importlib
import gateway.status as status
importlib.reload(status)
try:
status.write_runtime_status(gateway_state="running")
rec = status.read_runtime_status()
assert "served_profiles" not in rec
finally:
importlib.reload(status)
class TestNamedProfileMultiplexerGuard:
"""_guard_named_profile_under_multiplexer is inert unless all conditions hold."""
def test_inert_for_default_profile(self, monkeypatch):
from hermes_cli import gateway as gw
monkeypatch.setattr(gw, "_profile_suffix", lambda: "")
# Should return without raising (default profile => guard N/A).
gw._guard_named_profile_under_multiplexer(force=False)
def test_force_bypasses(self, monkeypatch):
from hermes_cli import gateway as gw
# Even if it looks like a named profile, force returns immediately.
monkeypatch.setattr(gw, "_profile_suffix", lambda: "coder")
gw._guard_named_profile_under_multiplexer(force=True)
def test_inert_when_no_default_gateway_running(self, monkeypatch, tmp_path):
from hermes_cli import gateway as gw
monkeypatch.setattr(gw, "_profile_suffix", lambda: "coder")
monkeypatch.setattr(
"hermes_constants.get_default_hermes_root", lambda: tmp_path
)
# No gateway.pid in tmp_path => no running default gateway => no raise.
gw._guard_named_profile_under_multiplexer(force=False)

View file

@ -0,0 +1,165 @@
"""Phase 0 foundations for multi-profile gateway multiplexing.
Covers the three Phase 0 deliverables:
1. ``gateway.multiplex_profiles`` config flag (default False, round-trips).
2. ``hermes_cli.profiles.profiles_to_serve`` enumeration.
3. Profile-stamped ``build_session_key`` that is BYTE-IDENTICAL when the
flag is off (the orphan-every-session guard) and namespace-segmented when
on, without disturbing the positional key layout downstream parsers rely
on.
"""
import pytest
from unittest.mock import patch
from gateway.config import GatewayConfig, Platform
from gateway.session import SessionSource, SessionStore, build_session_key
def _src(**kw) -> SessionSource:
kw.setdefault("platform", Platform.TELEGRAM)
kw.setdefault("chat_id", "99")
kw.setdefault("chat_type", "dm")
return SessionSource(**kw)
class TestSessionKeyByteIdenticalWhenOff:
"""The non-negotiable guard: with no profile (or 'default'), every key is
byte-for-byte what it was before Phase 0. A diff here orphans every
existing session on upgrade."""
@pytest.mark.parametrize("profile", [None, "default"])
def test_dm_with_chat_id(self, profile):
s = _src(chat_id="99", chat_type="dm")
assert build_session_key(s, profile=profile) == "agent:main:telegram:dm:99"
@pytest.mark.parametrize("profile", [None, "default"])
def test_dm_with_thread(self, profile):
s = _src(chat_id="99", chat_type="dm", thread_id="t1")
assert build_session_key(s, profile=profile) == "agent:main:telegram:dm:99:t1"
@pytest.mark.parametrize("profile", [None, "default"])
def test_dm_without_chat_id_falls_back_to_user(self, profile):
s = _src(chat_id="", chat_type="dm", user_id="jordan")
assert build_session_key(s, profile=profile) == "agent:main:telegram:dm:jordan"
@pytest.mark.parametrize("profile", [None, "default"])
def test_group_per_user(self, profile):
s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
assert (
build_session_key(s, profile=profile)
== "agent:main:discord:group:g1:alice"
)
@pytest.mark.parametrize("profile", [None, "default"])
def test_group_shared_when_disabled(self, profile):
s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
assert (
build_session_key(s, group_sessions_per_user=False, profile=profile)
== "agent:main:discord:group:g1"
)
class TestSessionKeyNamespacedWhenOn:
"""A named profile occupies the namespace slot, isolating its sessions."""
def test_named_profile_dm(self):
s = _src(chat_id="99", chat_type="dm")
assert build_session_key(s, profile="coder") == "agent:coder:telegram:dm:99"
def test_named_profile_group_per_user(self):
s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
assert (
build_session_key(s, profile="coder")
== "agent:coder:discord:group:g1:alice"
)
def test_two_profiles_same_chat_do_not_collide(self):
s = _src(chat_id="99", chat_type="dm")
a = build_session_key(s, profile="default")
b = build_session_key(s, profile="coder")
c = build_session_key(s, profile="writer")
assert a != b != c and a != c
def test_positional_layout_preserved_for_parsers(self):
"""Downstream parsers split on ':' and read parts[2]=platform,
parts[3]=chat_type, parts[4]=chat_id (see qqbot adapter
_parse_gateway_session_key). The profile must occupy parts[1] only."""
s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
parts = build_session_key(s, profile="coder").split(":")
assert parts[0] == "agent"
assert parts[1] == "coder" # namespace slot (was always 'main')
assert parts[2] == "discord" # platform — unchanged offset
assert parts[3] == "group" # chat_type — unchanged offset
assert parts[4] == "g1" # chat_id — unchanged offset
def test_default_namespace_layout_matches_named(self):
"""Default and named keys differ ONLY in parts[1]."""
s = _src(platform=Platform.SLACK, chat_id="c1", chat_type="channel", user_id="u1")
d = build_session_key(s, profile="default").split(":")
n = build_session_key(s, profile="coder").split(":")
assert d[0] == n[0] == "agent"
assert d[1] == "main" and n[1] == "coder"
assert d[2:] == n[2:] # everything after the namespace is identical
class TestMultiplexConfigFlag:
"""gateway.multiplex_profiles defaults off and round-trips."""
def test_default_is_false(self):
assert GatewayConfig().multiplex_profiles is False
def test_to_dict_includes_flag(self):
assert GatewayConfig().to_dict()["multiplex_profiles"] is False
def test_from_dict_top_level(self):
cfg = GatewayConfig.from_dict({"multiplex_profiles": True})
assert cfg.multiplex_profiles is True
def test_from_dict_nested_gateway(self):
cfg = GatewayConfig.from_dict({"gateway": {"multiplex_profiles": True}})
assert cfg.multiplex_profiles is True
def test_from_dict_coerces_truthy_string(self):
cfg = GatewayConfig.from_dict({"multiplex_profiles": "true"})
assert cfg.multiplex_profiles is True
def test_roundtrip(self):
cfg = GatewayConfig.from_dict(GatewayConfig(multiplex_profiles=True).to_dict())
assert cfg.multiplex_profiles is True
class TestSessionStoreProfileResolution:
"""SessionStore._generate_session_key honors the flag: legacy namespace
when off, active-profile namespace when on."""
def _store(self, tmp_path, **cfg_kw):
config = GatewayConfig(**cfg_kw)
with patch("gateway.session.SessionStore._ensure_loaded"):
s = SessionStore(sessions_dir=tmp_path, config=config)
s._db = None
s._loaded = True
return s
def test_flag_off_uses_legacy_namespace(self, tmp_path):
store = self._store(tmp_path) # multiplex_profiles defaults False
s = _src(chat_id="99", chat_type="dm")
assert store._generate_session_key(s) == "agent:main:telegram:dm:99"
assert store._generate_session_key(s) == build_session_key(s)
def test_flag_off_resolve_profile_is_none(self, tmp_path):
store = self._store(tmp_path)
assert store._resolve_profile_for_key() is None
def test_flag_on_uses_active_profile_namespace(self, tmp_path):
store = self._store(tmp_path, multiplex_profiles=True)
s = _src(chat_id="99", chat_type="dm")
with patch("hermes_cli.profiles.get_active_profile_name", return_value="coder"):
assert store._generate_session_key(s) == "agent:coder:telegram:dm:99"
def test_flag_on_default_profile_stays_legacy(self, tmp_path):
store = self._store(tmp_path, multiplex_profiles=True)
s = _src(chat_id="99", chat_type="dm")
with patch("hermes_cli.profiles.get_active_profile_name", return_value="default"):
assert store._generate_session_key(s) == "agent:main:telegram:dm:99"

View file

@ -51,3 +51,18 @@ def test_reload_runtime_env_keeps_env_max_iterations_when_config_omits_key(
gateway_run._reload_runtime_env_preserving_config_authority()
assert os.environ["HERMES_MAX_ITERATIONS"] == "123"
def test_current_max_iterations_reloads_before_reading(monkeypatch) -> None:
monkeypatch.setenv("HERMES_MAX_ITERATIONS", "90")
def _fake_reload() -> None:
os.environ["HERMES_MAX_ITERATIONS"] = "200"
monkeypatch.setattr(
gateway_run,
"_reload_runtime_env_preserving_config_authority",
_fake_reload,
)
assert gateway_run._current_max_iterations() == 200

View file

@ -153,6 +153,39 @@ class TestShouldExclude:
assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/SKILL.md"))
assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/sub/item.txt"))
@pytest.mark.parametrize(
"rel",
[
"plugins/my-plugin/.venv/lib/python3.12/site-packages/x/__init__.py",
"plugins/my-plugin/venv/bin/python",
"mcp/server/site-packages/pkg/mod.py",
".cache/uv/wheels/abc.whl",
"plugins/p/.cache/pip/http/deadbeef",
".tox/py312/log.txt",
".nox/tests/bin/pytest",
"plugins/p/.pytest_cache/v/cache/lastfailed",
".mypy_cache/3.12/agent.meta.json",
".ruff_cache/0.4.0/abc",
],
)
def test_excludes_regeneratable_dependency_and_cache_dirs(self, rel):
"""Python dep trees and tool caches under HERMES_HOME must be skipped —
these are what balloon a backup to hundreds of thousands of files."""
from hermes_cli.backup import _should_exclude
assert _should_exclude(Path(rel))
def test_does_not_exclude_curator_archive(self):
"""skills/.archive/ holds restorable archived skills and MUST survive
a backup it is intentionally NOT in the exclusion set."""
from hermes_cli.backup import _should_exclude
assert not _should_exclude(Path("skills/.archive/old-skill/SKILL.md"))
def test_does_not_exclude_legit_files_resembling_cache_names(self):
"""Only directory-component matches are excluded; a normal file is kept."""
from hermes_cli.backup import _should_exclude
assert not _should_exclude(Path("skills/my-skill/venv-notes.md"))
assert not _should_exclude(Path("memories/cache.json"))
# ---------------------------------------------------------------------------
# Backup tests
# ---------------------------------------------------------------------------
@ -272,6 +305,37 @@ class TestBackup:
agent_files = [n for n in names if "hermes-agent" in n]
assert agent_files == [], f"hermes-agent files leaked into backup: {agent_files}"
def test_excludes_dependency_and_cache_trees(self, tmp_path, monkeypatch):
"""A plugin venv / site-packages / pip cache under HERMES_HOME must be
pruned by the walk, while real data (skills, config) is preserved.
This is the regression guard for the ballooning-backup bug."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
_make_hermes_tree(hermes_home)
# Simulate the heavy regeneratable trees that ballooned the backup.
venv_pkg = hermes_home / "plugins" / "heavy" / ".venv" / "lib" / "site-packages" / "dep"
venv_pkg.mkdir(parents=True)
(venv_pkg / "__init__.py").write_text("# dep\n")
pip_cache = hermes_home / ".cache" / "uv" / "wheels"
pip_cache.mkdir(parents=True)
(pip_cache / "abc.whl").write_bytes(b"\x00")
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
out_zip = tmp_path / "backup.zip"
from hermes_cli.backup import run_backup
run_backup(Namespace(output=str(out_zip)))
with zipfile.ZipFile(out_zip, "r") as zf:
names = zf.namelist()
leaked = [n for n in names if ".venv" in n or "site-packages" in n or ".cache" in n]
assert leaked == [], f"regeneratable trees leaked into backup: {leaked}"
# Real data still present.
assert "skills/my-skill/SKILL.md" in names
assert "config.yaml" in names
def test_includes_nested_hermes_agent_in_skills(self, tmp_path, monkeypatch):
"""Backup includes skills/.../hermes-agent/ but NOT root hermes-agent/."""
hermes_home = tmp_path / ".hermes"

View file

@ -955,6 +955,17 @@ class TestInterimAssistantMessageConfig:
assert raw["display"]["interim_assistant_messages"] is True
class TestCliRefreshIntervalConfig:
"""Test the CLI refresh_interval config default (#45592 / #48309)."""
def test_default_config_enables_cli_refresh_interval(self):
"""cli_refresh_interval defaults to 1.0 so the idle status-bar
clock keeps ticking and the bottom chrome stays alive during
idle (#45592). Users on emulators where the periodic redraw
fights auto-scroll can set it to 0 (#48309)."""
assert DEFAULT_CONFIG["display"]["cli_refresh_interval"] == 1.0
class TestDiscordChannelPromptsConfig:
def test_default_config_includes_discord_channel_prompts(self):
assert DEFAULT_CONFIG["discord"]["channel_prompts"] == {}

View file

@ -31,6 +31,9 @@ def hermes_home(tmp_path, monkeypatch):
(logs_dir / "gateway.log").write_text(
"2026-04-12 17:00:10 INFO gateway.run: started\n"
)
(logs_dir / "gui.log").write_text(
"2026-04-12 17:00:12 INFO hermes_cli.web_server: dashboard request\n"
)
(logs_dir / "desktop.log").write_text(
"2026-04-12 17:00:15 INFO desktop: backend spawned\n"
)
@ -454,6 +457,15 @@ class TestCollectDebugReport:
assert "--- gateway.log" in report
def test_report_includes_gui_log(self, hermes_home):
from hermes_cli.debug import collect_debug_report
with patch("hermes_cli.dump.run_dump"):
report = collect_debug_report(log_lines=50)
assert "--- gui.log" in report
assert "dashboard request" in report
def test_report_includes_desktop_log(self, hermes_home):
from hermes_cli.debug import collect_debug_report
@ -538,8 +550,8 @@ class TestRunDebugShare:
assert "FULL agent.log" in out
assert "FULL gateway.log" in out
def test_share_uploads_four_pastes(self, hermes_home, capsys):
"""Successful share uploads report + agent.log + gateway.log + desktop.log."""
def test_share_uploads_five_pastes(self, hermes_home, capsys):
"""Successful share uploads report + agent.log + gateway.log + gui.log + desktop.log."""
from hermes_cli.debug import run_debug_share
args = MagicMock()
@ -561,15 +573,17 @@ class TestRunDebugShare:
run_debug_share(args)
out = capsys.readouterr().out
# Should have 4 uploads: report, agent.log, gateway.log, desktop.log
assert call_count[0] == 4
# Should have 5 uploads: report, agent.log, gateway.log, gui.log, desktop.log
assert call_count[0] == 5
assert "paste.rs/paste1" in out # Report
assert "paste.rs/paste2" in out # agent.log
assert "paste.rs/paste3" in out # gateway.log
assert "paste.rs/paste4" in out # desktop.log
assert "paste.rs/paste4" in out # gui.log
assert "paste.rs/paste5" in out # desktop.log
assert "Report" in out
assert "agent.log" in out
assert "gateway.log" in out
assert "gui.log" in out
assert "desktop.log" in out
# Each log paste should start with the dump header
@ -579,7 +593,10 @@ class TestRunDebugShare:
gateway_paste = uploaded_content[2]
assert "--- hermes dump ---" in gateway_paste
assert "--- full gateway.log ---" in gateway_paste
desktop_paste = uploaded_content[3]
gui_paste = uploaded_content[3]
assert "--- hermes dump ---" in gui_paste
assert "--- full gui.log ---" in gui_paste
desktop_paste = uploaded_content[4]
assert "--- hermes dump ---" in desktop_paste
assert "--- full desktop.log ---" in desktop_paste

View file

@ -6,6 +6,7 @@ Covers:
- _contains_gateway_lifecycle_command pattern matching
"""
import json
import os
from argparse import Namespace
@ -250,3 +251,109 @@ class TestGatewaySelfTargetingGuard:
args = Namespace(gateway_command="restart", all=False, system=False)
with pytest.raises(_Reached):
gw.gateway_command(args)
# ---------------------------------------------------------------------------
# Defense 3: terminal_tool hard-blocks gateway lifecycle commands inside gateway
# ---------------------------------------------------------------------------
class TestTerminalToolGatewayLifecycleGuard:
"""terminal_tool must refuse gateway lifecycle commands when _HERMES_GATEWAY=1.
Issue #37453: systemctl --user restart hermes-gateway runs as a child of the
gateway process. When systemd delivers SIGTERM the gateway kills its own
restart command mid-execution the service may never restart. The guard
must fire before execution, unconditionally (force=True cannot bypass it).
"""
def _make_fake_env(self):
class _FakeEnv:
env = {}
def execute(self, command, **kwargs): # pragma: no cover
raise AssertionError("execute must not be reached")
return _FakeEnv()
def _minimal_config(self):
return {"env_type": "local", "cwd": "/tmp", "timeout": 60, "lifetime_seconds": 3600}
def _patch_env(self, monkeypatch, fake_env, *, inside_gateway: bool):
import tools.terminal_tool as tt
eid = "default"
monkeypatch.setattr(tt, "_active_environments", {eid: fake_env})
monkeypatch.setattr(tt, "_last_activity", {eid: 0.0})
monkeypatch.setattr(tt, "_task_env_overrides", {})
monkeypatch.setattr(tt, "_get_env_config", self._minimal_config)
if inside_gateway:
monkeypatch.setenv("_HERMES_GATEWAY", "1")
else:
monkeypatch.delenv("_HERMES_GATEWAY", raising=False)
@pytest.mark.parametrize("cmd", [
"systemctl restart hermes-gateway",
"systemctl --user restart hermes-gateway",
"systemctl stop hermes-gateway.service",
"hermes gateway restart",
"launchctl kickstart gui/501/ai.hermes.gateway",
"pkill -f hermes.*gateway",
])
def test_blocks_lifecycle_commands_inside_gateway(self, monkeypatch, cmd):
import tools.terminal_tool as tt
self._patch_env(monkeypatch, self._make_fake_env(), inside_gateway=True)
result = json.loads(tt.terminal_tool(command=cmd))
assert result["exit_code"] == 1
assert "Blocked" in result["error"]
def test_force_true_cannot_bypass_block(self, monkeypatch):
import tools.terminal_tool as tt
self._patch_env(monkeypatch, self._make_fake_env(), inside_gateway=True)
result = json.loads(tt.terminal_tool(
command="systemctl restart hermes-gateway", force=True
))
assert result["exit_code"] == 1
assert "Blocked" in result["error"]
def test_safe_systemctl_commands_pass_through(self, monkeypatch):
"""Non-hermes systemctl commands must not be blocked by this guard."""
import tools.terminal_tool as tt
calls = []
class _FakeEnv:
env = {}
def execute(self, command, **kwargs):
calls.append(command)
return {"output": "Active: running", "returncode": 0}
self._patch_env(monkeypatch, _FakeEnv(), inside_gateway=True)
monkeypatch.setattr(tt, "_check_all_guards", lambda cmd, env: {"approved": True})
result = json.loads(tt.terminal_tool(command="systemctl status nginx"))
assert result["exit_code"] == 0
assert calls == ["systemctl status nginx"]
def test_guard_inactive_outside_gateway(self, monkeypatch):
"""Without _HERMES_GATEWAY=1 the lifecycle guard must not fire."""
import tools.terminal_tool as tt
calls = []
class _FakeEnv:
env = {}
def execute(self, command, **kwargs):
calls.append(command)
return {"output": "restarting...", "returncode": 0}
self._patch_env(monkeypatch, _FakeEnv(), inside_gateway=False)
monkeypatch.setattr(tt, "_check_all_guards", lambda cmd, env: {"approved": True})
result = json.loads(tt.terminal_tool(command="systemctl restart hermes-gateway"))
# Outside the gateway the lifecycle guard doesn't block — the normal
# approval flow handles it (here mocked as approved).
assert result["exit_code"] == 0
assert calls == ["systemctl restart hermes-gateway"]

View file

@ -505,6 +505,171 @@ def test_stale_claim_with_live_pid_uses_env_ttl_override(
assert task.claim_expires > int(time.time()) + 3000
def test_stale_claim_deferred_when_live_worker_survives_termination(
kanban_home, monkeypatch,
):
"""A TTL-expired claim whose worker survives the kill must NOT be released.
Releasing would let the dispatcher spawn a duplicate beside the still-alive
worker the runaway seen when a cgroup memory.high throttle parks a worker
in uninterruptible (D) state, where a pending SIGKILL cannot land. The claim
is held (extended) and retried next tick instead.
"""
import hermes_cli.kanban_db as _kb
with kb.connect() as conn:
t = kb.create_task(conn, title="x", assignee="a")
host = _kb._claimer_id().split(":", 1)[0]
kb.claim_task(conn, t, claimer=f"{host}:worker")
kb._set_worker_pid(conn, t, 12345)
old_expires = int(time.time()) - 60
# Heartbeat stale by > 1h so the live-pid EXTEND branch is skipped and
# the terminate path (the wedged-worker case) runs.
conn.execute(
"UPDATE tasks SET claim_expires = ?, last_heartbeat_at = ? "
"WHERE id = ?",
(old_expires, int(time.time()) - 7200, t),
)
monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: True)
monkeypatch.setattr(
_kb, "_terminate_reclaimed_worker",
lambda *a, **k: {
"termination_attempted": True,
"host_local": True,
"terminated": False,
},
)
reclaimed = kb.release_stale_claims(conn, signal_fn=lambda _p, _s: None)
assert reclaimed == 0
assert kb.get_task(conn, t).status == "running"
worker_pid = conn.execute(
"SELECT worker_pid FROM tasks WHERE id = ?", (t,),
).fetchone()[0]
assert worker_pid == 12345 # worker not orphaned
claim_expires = conn.execute(
"SELECT claim_expires FROM tasks WHERE id = ?", (t,),
).fetchone()[0]
assert claim_expires > old_expires # claim held, not released
kinds = [
r["kind"] for r in conn.execute(
"SELECT kind FROM task_events WHERE task_id = ?", (t,),
).fetchall()
]
assert "reclaim_deferred" in kinds
assert "reclaimed" not in kinds
def test_stale_claim_reclaimed_when_termination_succeeds(
kanban_home, monkeypatch,
):
"""When the worker is actually killed, the claim is released as before."""
import hermes_cli.kanban_db as _kb
with kb.connect() as conn:
t = kb.create_task(conn, title="x", assignee="a")
host = _kb._claimer_id().split(":", 1)[0]
kb.claim_task(conn, t, claimer=f"{host}:worker")
kb._set_worker_pid(conn, t, 12345)
conn.execute(
"UPDATE tasks SET claim_expires = ?, last_heartbeat_at = ? "
"WHERE id = ?",
(int(time.time()) - 60, int(time.time()) - 7200, t),
)
monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: False)
monkeypatch.setattr(
_kb, "_terminate_reclaimed_worker",
lambda *a, **k: {
"termination_attempted": True,
"host_local": True,
"terminated": True,
},
)
reclaimed = kb.release_stale_claims(conn, signal_fn=lambda _p, _s: None)
assert reclaimed == 1
assert kb.get_task(conn, t).status == "ready"
def test_stale_claim_released_when_worker_not_host_local(
kanban_home, monkeypatch,
):
"""The defer guard only holds OUR own surviving workers.
A claim we cannot manage (different host, or no kill attempted) must still
be released, otherwise a foreign-host claim could strand a task forever.
"""
import hermes_cli.kanban_db as _kb
with kb.connect() as conn:
t = kb.create_task(conn, title="x", assignee="a")
host = _kb._claimer_id().split(":", 1)[0]
kb.claim_task(conn, t, claimer=f"{host}:worker")
kb._set_worker_pid(conn, t, 12345)
conn.execute(
"UPDATE tasks SET claim_expires = ?, last_heartbeat_at = ? "
"WHERE id = ?",
(int(time.time()) - 60, int(time.time()) - 7200, t),
)
monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: True)
monkeypatch.setattr(
_kb, "_terminate_reclaimed_worker",
lambda *a, **k: {
"termination_attempted": False,
"host_local": False,
"terminated": False,
},
)
reclaimed = kb.release_stale_claims(conn, signal_fn=lambda _p, _s: None)
assert reclaimed == 1
assert kb.get_task(conn, t).status == "ready"
def test_detect_stale_defers_when_live_worker_survives(kanban_home, monkeypatch):
"""detect_stale_running must also hold the claim when the worker survives."""
import hermes_cli.kanban_db as _kb
with kb.connect() as conn:
t = kb.create_task(conn, title="wedged", assignee="worker")
kb.claim_task(conn, t)
kb._set_worker_pid(conn, t, os.getpid())
five_hours_ago = int(time.time()) - (5 * 3600)
with kb.write_txn(conn):
conn.execute(
"UPDATE tasks SET started_at = ?, last_heartbeat_at = NULL "
"WHERE id = ?",
(five_hours_ago, t),
)
conn.execute(
"UPDATE task_runs SET started_at = ? "
"WHERE id = (SELECT current_run_id FROM tasks WHERE id = ?)",
(five_hours_ago, t),
)
monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: True)
monkeypatch.setattr(
_kb, "_terminate_reclaimed_worker",
lambda *a, **k: {
"termination_attempted": True,
"host_local": True,
"terminated": False,
},
)
stale = kb.detect_stale_running(
conn, stale_timeout_seconds=14400, signal_fn=lambda p, s: None,
)
assert stale == []
assert kb.get_task(conn, t).status == "running"
kinds = [
r["kind"] for r in conn.execute(
"SELECT kind FROM task_events WHERE task_id = ?", (t,),
).fetchall()
]
assert "reclaim_deferred" in kinds
def test_stale_claim_reclaim_event_records_diagnostic_payload(
kanban_home, monkeypatch,
):

View file

@ -55,10 +55,12 @@ def test_prompt_toolkit_model_picker_defers_confirmation_off_key_handler(monkeyp
lambda *_args: captured.setdefault("ran_inline", True)
)
_bound(cli_mod.HermesCLI._handle_model_picker_selection, self_)()
# The key handler now resolves persistence via resolve_persist_behavior,
# which defaults to True (persist-by-default). Simulate that call.
_bound(cli_mod.HermesCLI._handle_model_picker_selection, self_)(persist_global=True)
assert self_._model_picker_state is None
assert captured["started"] is True
assert captured["daemon"] is True
assert captured["args"] == (result, False)
assert captured["args"] == (result, True)
assert "ran_inline" not in captured

View file

@ -0,0 +1,122 @@
"""Tests for persist-by-default model switching.
Covers:
- ``parse_model_flags`` recognises ``--session`` (and keeps ``--global``).
- ``resolve_persist_behavior`` applies the config-gated default and the
``--session`` / ``--global`` overrides.
- The default (no flags) persists, which is the user-facing fix: a plain
``/model <name>`` survives across sessions.
"""
from unittest.mock import patch
from hermes_cli.model_switch import parse_model_flags, resolve_persist_behavior
# ---------------------------------------------------------------------------
# parse_model_flags
# ---------------------------------------------------------------------------
class TestParseModelFlagsSession:
def test_no_flags(self):
assert parse_model_flags("sonnet") == ("sonnet", "", False, False, False)
def test_global_flag(self):
assert parse_model_flags("sonnet --global") == ("sonnet", "", True, False, False)
def test_session_flag(self):
assert parse_model_flags("sonnet --session") == (
"sonnet",
"",
False,
False,
True,
)
def test_session_with_provider(self):
assert parse_model_flags("sonnet --provider anthropic --session") == (
"sonnet",
"anthropic",
False,
False,
True,
)
def test_refresh_flag_still_parsed(self):
assert parse_model_flags("--refresh") == ("", "", False, True, False)
def test_unicode_dash_session_normalized(self):
# Telegram/iOS auto-converts -- to en/em dashes.
assert parse_model_flags("sonnet \u2013session") == (
"sonnet",
"",
False,
False,
True,
)
# ---------------------------------------------------------------------------
# resolve_persist_behavior
# ---------------------------------------------------------------------------
class TestResolvePersistBehavior:
def test_session_flag_always_session_only(self):
# --session opts out even if the config default is True.
with _config({"model": {"persist_switch_by_default": True}}):
assert resolve_persist_behavior(False, True) is False
def test_global_flag_always_persists(self):
# --global forces persist even if the config default is False.
with _config({"model": {"persist_switch_by_default": False}}):
assert resolve_persist_behavior(True, False) is True
def test_default_persists_when_config_missing(self):
# No model section at all → built-in default (True).
with _config({}):
assert resolve_persist_behavior(False, False) is True
def test_default_persists_when_key_true(self):
with _config({"model": {"persist_switch_by_default": True}}):
assert resolve_persist_behavior(False, False) is True
def test_default_session_only_when_key_false(self):
with _config({"model": {"persist_switch_by_default": False}}):
assert resolve_persist_behavior(False, False) is False
def test_default_when_model_is_flat_string(self):
# Fresh install: ``model: ""`` (not a dict) → built-in default True.
with _config({"model": ""}):
assert resolve_persist_behavior(False, False) is True
def test_session_overrides_global_when_both_set(self):
# --session is the explicit opt-out and wins over --global.
with _config({"model": {"persist_switch_by_default": True}}):
assert resolve_persist_behavior(True, True) is False
# ---------------------------------------------------------------------------
# helper
# ---------------------------------------------------------------------------
class _config:
"""Context manager that patches ``load_config`` to return a fixed dict."""
def __init__(self, cfg: dict):
self.cfg = cfg
def __enter__(self):
self._patch = patch(
"hermes_cli.config.load_config",
return_value=self.cfg,
)
# resolve_persist_behavior imports load_config lazily inside the
# function, so patching the source module is sufficient.
self._patch.start()
return self
def __exit__(self, *exc):
self._patch.stop()

View file

@ -35,6 +35,7 @@ from hermes_cli.profiles import (
has_bundled_skills_opt_out,
NO_BUNDLED_SKILLS_MARKER,
backfill_profile_envs,
profiles_to_serve,
)
from hermes_cli.config import DEFAULT_CONFIG
@ -1487,3 +1488,48 @@ class TestEdgeCases:
delete_profile("coder", yes=True)
assert get_active_profile() == "default"
class TestProfilesToServe:
"""profiles_to_serve(multiplex) — the gateway's profile-enumeration chokepoint."""
def test_off_returns_only_active_default(self, profile_env):
serve = profiles_to_serve(multiplex=False)
assert len(serve) == 1
name, home = serve[0]
assert name == "default"
assert home == _get_default_hermes_home()
def test_off_returns_only_active_named(self, profile_env, monkeypatch):
# A named profile's gateway runs with HERMES_HOME pointing at the
# profile dir; get_active_profile_name() infers the name from there.
create_profile("coder", no_alias=True)
monkeypatch.setenv("HERMES_HOME", str(get_profile_dir("coder")))
serve = profiles_to_serve(multiplex=False)
assert len(serve) == 1
assert serve[0][0] == "coder"
assert serve[0][1] == get_profile_dir("coder")
def test_on_returns_default_plus_all_named(self, profile_env):
create_profile("coder", no_alias=True)
create_profile("writer", no_alias=True)
serve = dict(profiles_to_serve(multiplex=True))
assert set(serve) == {"default", "coder", "writer"}
assert serve["default"] == _get_default_hermes_home()
assert serve["coder"] == get_profile_dir("coder")
def test_on_default_always_first(self, profile_env):
create_profile("coder", no_alias=True)
serve = profiles_to_serve(multiplex=True)
assert serve[0][0] == "default"
def test_on_active_profile_does_not_change_set(self, profile_env):
"""Enumeration is independent of which profile is active."""
create_profile("coder", no_alias=True)
set_active_profile("coder")
serve = dict(profiles_to_serve(multiplex=True))
assert set(serve) == {"default", "coder"}
def test_on_no_named_profiles_returns_just_default(self, profile_env):
serve = profiles_to_serve(multiplex=True)
assert [n for n, _ in serve] == ["default"]

View file

@ -0,0 +1,127 @@
"""Tests for the unified provider catalog (hermes_cli.provider_catalog).
These are invariant tests, not snapshots: they assert the parity *contract*
between what ``hermes model`` shows (``CANONICAL_PROVIDERS``) and what the
catalog exposes, plus how each provider's ``auth_type`` maps to a desktop tab —
never a specific provider count or a frozen vendor list (both change over time).
"""
from hermes_cli.models import CANONICAL_PROVIDERS
from hermes_cli.provider_catalog import (
ProviderDescriptor,
provider_catalog,
provider_catalog_by_slug,
tab_for_auth_type,
)
def test_catalog_covers_every_hermes_model_provider():
"""PARITY CONTRACT: the catalog == the `hermes model` universe."""
slugs = {d.slug for d in provider_catalog()}
for entry in CANONICAL_PROVIDERS:
assert entry.slug in slugs, (
f"{entry.slug} is shown in `hermes model` but missing from provider_catalog()"
)
def test_catalog_has_no_providers_outside_hermes_model():
"""The catalog must not invent providers `hermes model` doesn't show."""
canonical = {e.slug for e in CANONICAL_PROVIDERS}
for d in provider_catalog():
assert d.slug in canonical, f"{d.slug} in catalog but not in CANONICAL_PROVIDERS"
def test_every_descriptor_lands_on_exactly_one_known_tab():
for d in provider_catalog():
assert d.tab in {"keys", "accounts"}, f"{d.slug} has bad tab {d.tab!r}"
def test_descriptor_count_matches_canonical():
"""One descriptor per canonical entry (no dupes, no drops)."""
cat = provider_catalog()
assert len(cat) == len(CANONICAL_PROVIDERS)
assert len({d.slug for d in cat}) == len(cat)
def test_profileless_providers_still_present():
"""Providers without a ProviderProfile must still resolve via fallbacks.
lmstudio / openai-api / tencent-tokenhub / xai-oauth have no profile on
main; they exist only as registry + canonical entries. The catalog must
not require a profile to include a provider.
"""
by = provider_catalog_by_slug()
for slug in ("lmstudio", "openai-api", "tencent-tokenhub", "xai-oauth"):
assert slug in by, f"{slug} dropped from catalog (profile-less provider)"
assert by[slug].label, f"{slug} has empty label despite canonical fallback"
assert by[slug].description, f"{slug} has empty description despite fallback"
def test_api_key_providers_route_to_keys_oauth_to_accounts():
by = provider_catalog_by_slug()
# api_key → keys
assert by["kilocode"].tab == "keys"
assert by["openai-api"].tab == "keys"
# account / sign-in flows → accounts
assert by["google-gemini-cli"].tab == "accounts"
assert by["copilot-acp"].tab == "accounts"
def test_copilot_surfaces_as_a_provider_with_its_own_token_var():
"""Regression for the reported bug: a GitHub Copilot login showed up under
tools, never as a provider, because the shared GITHUB_TOKEN is tool-category.
Copilot authenticates via the `copilot`/api_key path, so it belongs on the
keys tab but its PRIMARY credential var must be the provider-owned
COPILOT_GITHUB_TOKEN, not the shared tool-category GITHUB_TOKEN. That is what
lets the desktop render Copilot as its own provider card.
"""
by = provider_catalog_by_slug()
assert "copilot" in by
d = by["copilot"]
assert d.tab == "keys"
assert d.api_key_env_vars, "Copilot must expose a credential env var"
assert d.api_key_env_vars[0] == "COPILOT_GITHUB_TOKEN", (
"Copilot's primary var must be the provider-owned token, not shared GITHUB_TOKEN"
)
def test_bedrock_routes_to_keys():
"""Bedrock is aws_sdk (AWS_REGION/AWS_PROFILE), configured on the keys tab."""
by = provider_catalog_by_slug()
assert by["bedrock"].tab == "keys"
def test_api_key_providers_expose_a_credential_env_var():
"""Every keys-tab provider that authenticates via a pasted API key must
surface at least one env var to write the key into (otherwise the GUI can't
configure it).
Exemptions: ``aws_sdk`` (bedrock uses AWS_REGION/AWS_PROFILE) and the
``custom`` bring-your-own-endpoint pseudo-provider, which is configured
inline via the local-endpoint flow rather than a fixed env var.
"""
exempt = {"custom"}
for d in provider_catalog():
if d.auth_type == "api_key" and d.slug not in exempt:
assert d.api_key_env_vars, f"{d.slug} is api_key but exposes no env var"
def test_order_mirrors_canonical_declaration():
cat = provider_catalog()
assert [d.order for d in cat] == list(range(len(cat)))
assert [d.slug for d in cat] == [e.slug for e in CANONICAL_PROVIDERS]
def test_descriptors_are_provider_descriptor_instances():
for d in provider_catalog():
assert isinstance(d, ProviderDescriptor)
def test_tab_for_auth_type_helper():
assert tab_for_auth_type("api_key") == "keys"
assert tab_for_auth_type("aws_sdk") == "keys"
assert tab_for_auth_type("oauth_external") == "accounts"
assert tab_for_auth_type("oauth_device_code") == "accounts"
assert tab_for_auth_type("copilot") == "accounts"
assert tab_for_auth_type("external_process") == "accounts"

View file

@ -0,0 +1,90 @@
"""End-to-end provider parity contract: the desktop Providers tabs must show
the SAME provider universe as ``hermes model`` (the CLI/TUI picker).
This is the single load-bearing invariant of the unified provider catalog:
keys(/api/env provider rows) ids(/api/providers/oauth) CANONICAL_PROVIDERS
i.e. every provider the CLI picker offers is configurable from the desktop app,
on one of the two Providers sub-tabs (API keys or Accounts). It is asserted as
an invariant against the real FastAPI endpoints (not a snapshot / count), so it
can never silently drift again when a provider plugin is added.
"""
from fastapi.testclient import TestClient
from hermes_cli.models import CANONICAL_PROVIDERS
from hermes_cli.provider_catalog import provider_catalog
from hermes_cli.web_server import _SESSION_TOKEN, app
client = TestClient(app)
HEADERS = {"X-Hermes-Session-Token": _SESSION_TOKEN}
# `custom` is the bring-your-own-endpoint pseudo-provider configured inline via
# the model picker's local-endpoint flow, not a fixed credential card. It is in
# the CLI picker's universe but intentionally has no dedicated Providers-tab
# card. Exempt it from the union check.
_EXEMPT = {"custom"}
# Providers that legitimately offer BOTH auth methods and so intentionally
# appear on both desktop tabs (an API-key card AND an account sign-in card).
# Anthropic supports a direct API key (Keys tab) and a subscription OAuth /
# Claude Code login (Accounts tab); surfacing both is correct, not a bug.
_DUAL_TAB = {"anthropic"}
def _keys_tab_providers() -> set[str]:
"""Provider slugs that have at least one card on the desktop API-keys tab."""
data = client.get("/api/env", headers=HEADERS).json()
return {
info.get("provider")
for info in data.values()
if info.get("category") == "provider" and info.get("provider")
}
def _accounts_tab_providers() -> set[str]:
"""Provider slugs offered on the desktop Accounts tab."""
data = client.get("/api/providers/oauth", headers=HEADERS).json()
return {p["id"] for p in data["providers"]}
def test_every_hermes_model_provider_is_configurable_in_desktop():
"""PARITY CONTRACT: GUI (keys accounts) ⊇ `hermes model` universe."""
gui = _keys_tab_providers() | _accounts_tab_providers()
missing = [
e.slug
for e in CANONICAL_PROVIDERS
if e.slug not in _EXEMPT and e.slug not in gui
]
assert not missing, (
"providers shown in `hermes model` but not configurable in the desktop "
f"Providers tabs: {missing}"
)
def test_each_provider_lands_on_the_tab_its_auth_type_dictates():
"""A keys-tab provider must surface under /api/env; an accounts-tab provider
under /api/providers/oauth. Cross-checks the catalog's tab routing against
where each provider actually renders.
"""
keys = _keys_tab_providers()
accounts = _accounts_tab_providers()
for d in provider_catalog():
if d.slug in _EXEMPT:
continue
if d.tab == "keys" and d.api_key_env_vars:
assert d.slug in keys, f"{d.slug} (keys tab) missing from /api/env"
elif d.tab == "accounts":
assert d.slug in accounts, f"{d.slug} (accounts tab) missing from /api/providers/oauth"
def test_no_provider_appears_on_both_tabs():
"""A provider should be configured exactly one way — not duplicated across
both tabs (which would confuse users about where to put credentials).
Exception: genuinely dual-auth providers (see ``_DUAL_TAB``) intentionally
appear on both tabs.
"""
overlap = (_keys_tab_providers() & _accounts_tab_providers()) - _EXEMPT - _DUAL_TAB
assert not overlap, f"providers appearing on BOTH desktop tabs: {sorted(overlap)}"

View file

@ -470,6 +470,39 @@ def test_xai_oauth_listed_as_loopback_flow():
assert "grok" in providers["xai-oauth"]["name"].lower()
def test_accounts_offers_every_oauth_provider_from_catalog():
"""PARITY CONTRACT: every accounts-tab provider in the unified catalog (the
`hermes model` universe) must be offered by /api/providers/oauth. This keeps
the desktop Accounts tab in lockstep with the CLI picker no provider the
CLI can sign into may be missing from the GUI.
"""
from hermes_cli.provider_catalog import provider_catalog
resp = client.get("/api/providers/oauth", headers=HEADERS)
assert resp.status_code == 200, resp.text
offered = {p["id"] for p in resp.json()["providers"]}
for d in provider_catalog():
if d.tab == "accounts":
assert d.slug in offered, (
f"{d.slug} is an accounts-tab provider in `hermes model` but is "
f"missing from the desktop Accounts tab (/api/providers/oauth)"
)
def test_gemini_cli_and_copilot_acp_now_in_accounts():
"""Regression: google-gemini-cli and copilot-acp were canonical providers the
CLI could configure, but had no Accounts card (the reported GUI/CLI drift).
"""
resp = client.get("/api/providers/oauth", headers=HEADERS)
assert resp.status_code == 200, resp.text
providers = {p["id"]: p for p in resp.json()["providers"]}
assert "google-gemini-cli" in providers
assert "copilot-acp" in providers
# copilot-acp is managed by an external CLI: read-only card, not auto-removable.
assert providers["copilot-acp"]["flow"] == "external"
assert providers["copilot-acp"]["disconnectable"] is False
def test_oauth_catalog_marks_external_providers_not_disconnectable():
"""External CLI credentials are visible in Accounts but cannot be removed by Hermes."""
resp = client.get("/api/providers/oauth", headers=HEADERS)
@ -804,3 +837,56 @@ def test_unknown_pkce_provider_rejected_cleanly():
# 4xx — what we MUST NOT see is a 200 with claude.ai in the body.
assert resp.status_code >= 400, resp.text
assert "claude.ai" not in resp.text.lower()
def test_status_falls_through_to_generic_dispatcher_for_catalog_only_provider():
"""Accounts-tab providers with no hardcoded branch reflect REAL status.
Providers appended to the Accounts tab from the unified provider_catalog()
carry status_fn=None and may have no explicit branch in
_resolve_provider_status. Before the fallthrough they rendered permanently
logged-out; now they dispatch to hermes_cli.auth.get_auth_status (the
canonical slug dispatcher) so membership AND status both auto-extend.
"""
import hermes_cli.web_server as ws
fake_status = {
"logged_in": True,
"provider": "some-future-oauth",
"name": "Future OAuth Provider",
"access_token": "sk-future-secret-token-xyz",
"expires_at": "2026-12-01T00:00:00Z",
"has_refresh_token": True,
}
with patch("hermes_cli.auth.get_auth_status", return_value=fake_status):
out = ws._resolve_provider_status("some-future-oauth", None)
assert out["logged_in"] is True
assert out["source"] == "some-future-oauth"
assert out["source_label"] == "Future OAuth Provider"
# Token is previewed, never returned whole.
assert out["token_preview"] and "sk-future-secret-token-xyz" not in out["token_preview"]
assert out["expires_at"] == "2026-12-01T00:00:00Z"
assert out["has_refresh_token"] is True
def test_status_hardcoded_branch_wins_over_generic_fallback():
"""An existing hardcoded branch (nous) is unaffected by the fallthrough."""
import hermes_cli.web_server as ws
with patch(
"hermes_cli.auth.get_nous_auth_status",
return_value={"logged_in": True, "portal_base_url": "https://portal.test"},
):
out = ws._resolve_provider_status("nous", None)
assert out["source"] == "nous_portal"
assert out["source_label"] == "https://portal.test"
def test_status_unknown_provider_degrades_to_logged_out():
"""A provider the generic dispatcher can't resolve stays logged-out cleanly."""
import hermes_cli.web_server as ws
with patch("hermes_cli.auth.get_auth_status", return_value={"logged_in": False}):
out = ws._resolve_provider_status("totally-unknown", None)
assert out["logged_in"] is False

View file

@ -1299,6 +1299,57 @@ class TestWebServerEndpoints:
for key, info in data.items():
assert info["channel_managed"] is (key in channel_keys)
def test_get_env_vars_surfaces_catalog_providers(self):
"""Every keys-tab provider in the unified catalog must appear in /api/env
as a provider card, even when it has no hand entry in OPTIONAL_ENV_VARS.
Regression for the GUICLI drift: openai-api, kilocode, novita,
tencent-tokenhub, copilot were configurable via `hermes model` but
invisible in the desktop Providers API keys tab.
"""
from hermes_cli.provider_catalog import provider_catalog
data = self.client.get("/api/env").json()
for d in provider_catalog():
if d.tab != "keys" or not d.api_key_env_vars:
continue
# The PRIMARY credential var must surface as this provider's card.
# (Shared aliases like GITHUB_TOKEN are intentionally left on their
# existing tool category and not hijacked — see the copilot test.)
primary = d.api_key_env_vars[0]
assert primary in data, f"{primary} ({d.slug}) missing from /api/env"
info = data[primary]
assert info["category"] == "provider"
assert info["provider"] == d.slug
assert info["provider_label"] == d.label
def test_get_env_vars_provider_rows_carry_grouping_hints(self):
"""Provider env rows expose the backend `provider`/`provider_label` the
desktop Keys tab groups by (so it no longer relies on prefix guesses)."""
data = self.client.get("/api/env").json()
# OPENAI_API_KEY is a hand-listed protected var AND a catalog provider;
# it must come back tagged to the openai-api provider.
assert data["OPENAI_API_KEY"]["provider"] == "openai-api"
assert data["OPENAI_API_KEY"]["category"] == "provider"
def test_get_env_vars_copilot_uses_provider_token_not_shared_github_token(self):
"""Copilot surfaces as its own provider card via COPILOT_GITHUB_TOKEN;
the shared GITHUB_TOKEN keeps its existing (tool) category."""
data = self.client.get("/api/env").json()
assert data["COPILOT_GITHUB_TOKEN"]["provider"] == "copilot"
assert data["COPILOT_GITHUB_TOKEN"]["category"] == "provider"
# Shared GITHUB_TOKEN must NOT be hijacked into the copilot provider card.
assert data.get("GITHUB_TOKEN", {}).get("provider", "") != "copilot"
def test_get_env_vars_bedrock_aws_vars_tagged_to_provider(self):
"""Bedrock (aws_sdk, no api-key) must still appear on the Keys tab: its
AWS_REGION/AWS_PROFILE settings are tagged to the bedrock provider card.
"""
data = self.client.get("/api/env").json()
assert data["AWS_REGION"]["provider"] == "bedrock"
assert data["AWS_REGION"]["category"] == "provider"
assert data["AWS_PROFILE"]["provider"] == "bedrock"
def test_platform_scoped_messaging_env_vars_are_channel_managed(self):
from hermes_cli.web_server import (
_MESSAGING_KEYS_PAGE_KEYS,
@ -1552,6 +1603,27 @@ class TestWebServerEndpoints:
assert telegram["enabled"] is False
assert any(field["key"] == "TELEGRAM_BOT_TOKEN" and field["required"] for field in telegram["env_vars"])
def test_slack_messaging_platform_exposes_user_allowlist(self):
resp = self.client.get("/api/messaging/platforms")
assert resp.status_code == 200
platforms = resp.json()["platforms"]
slack = next(platform for platform in platforms if platform["id"] == "slack")
fields = {field["key"]: field for field in slack["env_vars"]}
assert "allowed Slack member IDs" in slack["description"]
assert set(fields) >= {
"SLACK_BOT_TOKEN",
"SLACK_APP_TOKEN",
"SLACK_ALLOWED_USERS",
}
assert fields["SLACK_ALLOWED_USERS"]["prompt"] == "Allowed Slack member IDs"
assert fields["SLACK_ALLOWED_USERS"]["is_password"] is False
assert "member IDs" in fields["SLACK_ALLOWED_USERS"]["description"]
assert "Bot User OAuth Token" in fields["SLACK_BOT_TOKEN"]["help"]
assert "App-Level Tokens" in fields["SLACK_APP_TOKEN"]["help"]
assert "Copy member ID" in fields["SLACK_ALLOWED_USERS"]["help"]
def test_weixin_messaging_metadata_describes_personal_ilink_setup(self):
resp = self.client.get("/api/messaging/platforms")
@ -1628,6 +1700,70 @@ class TestWebServerEndpoints:
telegram = next(platform for platform in status if platform["id"] == "telegram")
assert telegram["enabled"] is False
def test_update_messaging_platform_saves_slack_allowed_users(self):
from hermes_cli.config import load_env
resp = self.client.put(
"/api/messaging/platforms/slack",
json={"env": {"SLACK_ALLOWED_USERS": "U01ABC2DEF3,U04XYZ5LMN6"}},
)
assert resp.status_code == 200
assert load_env()["SLACK_ALLOWED_USERS"] == "U01ABC2DEF3,U04XYZ5LMN6"
def test_update_messaging_platform_rejects_swapped_slack_bot_token(self):
resp = self.client.put(
"/api/messaging/platforms/slack",
json={"env": {"SLACK_BOT_TOKEN": "xapp-wrong-token-type"}},
)
assert resp.status_code == 400
assert "xoxb-" in resp.json()["detail"]
def test_update_messaging_platform_rejects_swapped_slack_app_token(self):
resp = self.client.put(
"/api/messaging/platforms/slack",
json={"env": {"SLACK_APP_TOKEN": "xoxb-wrong-token-type"}},
)
assert resp.status_code == 400
assert "xapp-" in resp.json()["detail"]
def test_update_messaging_platform_rejects_invalid_slack_allowed_users(self):
resp = self.client.put(
"/api/messaging/platforms/slack",
json={"env": {"SLACK_ALLOWED_USERS": "U01ABC2DEF3,not-a-user"}},
)
assert resp.status_code == 400
assert "member IDs" in resp.json()["detail"]
def test_update_messaging_platform_accepts_slack_allowed_users_wildcard(self):
# "*" is the gateway's allow-all wildcard (gateway/platforms/slack.py),
# so the dashboard must accept it rather than rejecting it as malformed.
from hermes_cli.config import load_env
resp = self.client.put(
"/api/messaging/platforms/slack",
json={"env": {"SLACK_ALLOWED_USERS": "*"}},
)
assert resp.status_code == 200
assert load_env()["SLACK_ALLOWED_USERS"] == "*"
def test_update_messaging_platform_accepts_slack_allowed_users_trailing_comma(self):
# The gateway drops empty entries (gateway/platforms/slack.py), so a
# trailing/interior comma must not be rejected by the dashboard.
from hermes_cli.config import load_env
resp = self.client.put(
"/api/messaging/platforms/slack",
json={"env": {"SLACK_ALLOWED_USERS": "U01ABC2DEF3,,W04XYZ5LMN6,"}},
)
assert resp.status_code == 200
assert load_env()["SLACK_ALLOWED_USERS"] == "U01ABC2DEF3,,W04XYZ5LMN6,"
def test_messaging_platform_test_reports_missing_required_setup(self):
resp = self.client.put("/api/messaging/platforms/discord", json={"enabled": True})
assert resp.status_code == 200
@ -5062,6 +5198,7 @@ class TestPtyWebSocket:
_argv, _cwd, env = self.ws_module._resolve_chat_argv()
assert env["HERMES_TUI_DASHBOARD"] == "1"
assert env["HERMES_TUI_INLINE"] == "1"
assert env["HERMES_TUI_DISABLE_MOUSE"] == "1"

View file

@ -436,3 +436,55 @@ def test_stream_upload_large_file_under_cap_succeeds(forced_files_client, monkey
assert created.status_code == 200
assert file_path.stat().st_size == len(payload)
assert file_path.read_bytes() == payload
def test_stream_upload_cleans_temp_on_cancellation(forced_files_client):
"""A client disconnect mid-stream (asyncio.CancelledError) must not leak a temp file.
CancelledError is a BaseException, not an Exception, so it bypasses the
endpoint's ``except`` clauses entirely. The cleanup therefore lives in a
``finally`` keyed on a success flag without it, every aborted large
upload (the exact NS-501 scenario) would orphan a partial ``.upload`` temp
file in the target directory. We invoke the endpoint coroutine directly so
the BaseException propagates instead of being swallowed by the test client.
"""
import asyncio
_client, root = forced_files_client
target = root / "out" / "aborted.bin"
target.parent.mkdir(parents=True, exist_ok=True)
class _AbortingUpload:
"""UploadFile stand-in that yields one chunk then aborts like a dropped client."""
filename = "aborted.bin"
def __init__(self):
self._calls = 0
async def read(self, _size):
self._calls += 1
if self._calls == 1:
return b"partial chunk before the client vanished"
raise asyncio.CancelledError()
async def close(self):
return None
request = SimpleNamespace()
with pytest.raises(asyncio.CancelledError):
asyncio.run(
web_server.upload_managed_file_stream(
request=request,
file=_AbortingUpload(),
path=str(target),
overwrite=True,
)
)
# No partial data was promoted into place ...
assert not target.exists()
# ... and no .upload temp file was left behind.
leftovers = [p.name for p in target.parent.iterdir() if ".upload" in p.name]
assert leftovers == [], f"temp upload files leaked on cancellation: {leftovers}"

View file

@ -265,6 +265,355 @@ class TestOpenVikingSkillQuerySafety:
assert RecordingVikingClient.calls == []
class TestOpenVikingTurnConversion:
def test_extract_current_turn_anchors_on_latest_matching_user_and_assistant(self):
messages = [
{"role": "user", "content": "Please inspect the repository for assemble hooks."},
{"role": "assistant", "content": "Earlier answer."},
{"role": "user", "content": "Please inspect the repository for assemble hooks."},
{
"role": "assistant",
"content": "I will search the codebase.",
"tool_calls": [
{
"id": "call_rg_1",
"type": "function",
"function": {
"name": "shell_command",
"arguments": json.dumps({"command": "rg assemble"}),
},
}
],
},
{
"role": "tool",
"tool_call_id": "call_rg_1",
"name": "shell_command",
"content": "agent/context_engine.py: no preassemble hook",
},
{"role": "assistant", "content": "The current main does not expose assemble."},
]
turn = OpenVikingMemoryProvider._extract_current_turn_messages(
messages,
"Please inspect the repository for assemble hooks.",
"The current main does not expose assemble.",
)
assert turn == messages[2:]
def test_messages_to_openviking_batch_coalesces_tool_results(self):
turn = [
{"role": "user", "content": "Please inspect the repository for assemble hooks."},
{
"role": "assistant",
"content": "I will search the codebase.",
"tool_calls": [
{
"id": "call_rg_1",
"type": "function",
"function": {
"name": "shell_command",
"arguments": json.dumps({"command": "rg assemble"}),
},
}
],
},
{
"role": "tool",
"tool_call_id": "call_rg_1",
"name": "shell_command",
"content": "agent/context_engine.py: no preassemble hook",
},
{"role": "assistant", "content": "The current main does not expose assemble."},
]
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
assert [message["role"] for message in batch] == ["user", "assistant", "assistant", "assistant"]
assert batch[0]["parts"] == [
{"type": "text", "text": "Please inspect the repository for assemble hooks."}
]
assert batch[1]["parts"] == [
{"type": "text", "text": "I will search the codebase."}
]
assert batch[2]["parts"] == [
{
"type": "tool",
"tool_id": "call_rg_1",
"tool_name": "shell_command",
"tool_input": {"command": "rg assemble"},
"tool_output": "agent/context_engine.py: no preassemble hook",
"tool_status": "completed",
}
]
assert batch[3]["parts"] == [
{"type": "text", "text": "The current main does not expose assemble."}
]
def test_messages_to_openviking_batch_marks_json_tool_error_results(self):
turn = [
{"role": "user", "content": "Check the file."},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_read_1",
"type": "function",
"function": {
"name": "read_file",
"arguments": json.dumps({"path": "missing.md"}),
},
}
],
},
{
"role": "tool",
"tool_call_id": "call_read_1",
"name": "read_file",
"content": json.dumps({"error": "File not found", "exit_code": 1}),
},
]
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
assert batch[1]["role"] == "assistant"
assert batch[1]["parts"] == [
{
"type": "tool",
"tool_id": "call_read_1",
"tool_name": "read_file",
"tool_input": {"path": "missing.md"},
"tool_output": json.dumps({"error": "File not found", "exit_code": 1}),
"tool_status": "error",
}
]
def test_messages_to_openviking_batch_keeps_pending_tool_call_without_result(self):
turn = [
{"role": "user", "content": "Start a long running check."},
{
"role": "assistant",
"content": "Starting it now.",
"tool_calls": [
{
"id": "call_long_1",
"type": "function",
"function": {
"name": "long_check",
"arguments": json.dumps({"target": "repo"}),
},
}
],
},
]
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
assert batch[1]["parts"] == [
{"type": "text", "text": "Starting it now."},
{
"type": "tool",
"tool_id": "call_long_1",
"tool_name": "long_check",
"tool_input": {"target": "repo"},
"tool_status": "pending",
},
]
def test_messages_to_openviking_batch_coalesces_adjacent_tool_results(self):
turn = [
{"role": "user", "content": "Run both tools."},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_a",
"type": "function",
"function": {
"name": "first_tool",
"arguments": json.dumps({"x": 1}),
},
},
{
"id": "call_b",
"type": "function",
"function": {
"name": "second_tool",
"arguments": json.dumps({"y": 2}),
},
},
],
},
{"role": "tool", "tool_call_id": "call_a", "name": "first_tool", "content": "a"},
{"role": "tool", "tool_call_id": "call_b", "name": "second_tool", "content": "b"},
{"role": "assistant", "content": "Done."},
]
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
assert [message["role"] for message in batch] == ["user", "assistant", "assistant"]
assert batch[1]["parts"] == [
{
"type": "tool",
"tool_id": "call_a",
"tool_name": "first_tool",
"tool_input": {"x": 1},
"tool_output": "a",
"tool_status": "completed",
},
{
"type": "tool",
"tool_id": "call_b",
"tool_name": "second_tool",
"tool_input": {"y": 2},
"tool_output": "b",
"tool_status": "completed",
},
]
def test_messages_to_openviking_batch_skips_openviking_recall_tool_results(self):
for recall_tool_name in ("viking_search", "viking_read", "viking_browse"):
turn = [
{"role": "user", "content": "What did we decide about context assembly?"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_recall_1",
"type": "function",
"function": {
"name": recall_tool_name,
"arguments": json.dumps({"query": "context assembly decision"}),
},
},
{
"id": "call_shell_1",
"type": "function",
"function": {
"name": "shell_command",
"arguments": json.dumps({"command": "rg preassemble"}),
},
},
],
},
{
"role": "tool",
"tool_call_id": "call_recall_1",
"name": recall_tool_name,
"content": json.dumps({
"results": [
{
"uri": "viking://user/hermes/memories/context",
"abstract": "Old OpenViking memory content",
}
]
}),
},
{
"role": "tool",
"tool_call_id": "call_shell_1",
"name": "shell_command",
"content": "plugins/memory/openviking/__init__.py",
},
{"role": "assistant", "content": "We decided to keep sync_turn scoped to ingestion."},
]
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
assert [message["role"] for message in batch] == ["user", "assistant", "assistant"]
assert batch[1]["parts"] == [
{
"type": "tool",
"tool_id": "call_shell_1",
"tool_name": "shell_command",
"tool_input": {"command": "rg preassemble"},
"tool_output": "plugins/memory/openviking/__init__.py",
"tool_status": "completed",
}
]
batch_text = json.dumps(batch)
assert recall_tool_name not in batch_text
assert "Old OpenViking memory content" not in batch_text
def test_messages_to_openviking_batch_empty_tool_id_does_not_drop_other_results(self):
# A recall tool result that arrives with an empty tool_call_id must not
# poison the skip set with "" and silently drop unrelated tool results
# that also lack an id. Empty tool_call_id is reachable in the canonical
# transcript (agent_runtime_helpers defaults it to "").
turn = [
{"role": "user", "content": "What did we decide?"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "",
"type": "function",
"function": {
"name": "viking_search",
"arguments": json.dumps({"query": "decision"}),
},
}
],
},
{
"role": "tool",
"tool_call_id": "",
"name": "viking_search",
"content": json.dumps({"results": ["recall stuff"]}),
},
{
"role": "tool",
"tool_call_id": "",
"name": "shell_command",
"content": "important shell output",
},
{"role": "assistant", "content": "done"},
]
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
batch_text = json.dumps(batch)
# The unrelated (empty-id) shell result must survive.
assert "important shell output" in batch_text
# The recall tool result must still be excluded.
assert "recall stuff" not in batch_text
assert "viking_search" not in batch_text
def test_messages_to_openviking_batch_preserves_responses_text_parts(self):
turn = [
{"role": "user", "content": [{"type": "input_text", "text": "hello"}]},
{"role": "assistant", "content": [{"type": "output_text", "text": "answer"}]},
]
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
assert batch == [
{"role": "user", "parts": [{"type": "text", "text": "hello"}]},
{"role": "assistant", "parts": [{"type": "text", "text": "answer"}]},
]
def test_messages_to_openviking_batch_adds_assistant_peer_id_when_requested(self):
turn = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "answer"},
]
batch = OpenVikingMemoryProvider._messages_to_openviking_batch(
turn,
assistant_peer_id="hermes",
)
assert batch == [
{"role": "user", "parts": [{"type": "text", "text": "hello"}]},
{"role": "assistant", "parts": [{"type": "text", "text": "answer"}], "peer_id": "hermes"},
]
class TestOpenVikingRead:
def test_overview_read_normalizes_uri_and_unwraps_result(self):
provider = OpenVikingMemoryProvider()

View file

@ -83,6 +83,66 @@ def _make_mock_client():
return client
def _provider_for_mode(tmp_path, monkeypatch, mode: str):
"""Create an initialized provider without pre-seeding its client."""
config = {
"mode": mode,
"apiKey": "test-key",
"api_url": "http://localhost:9999",
"bank_id": "test-bank",
"budget": "mid",
"memory_mode": "hybrid",
}
config_path = tmp_path / "hindsight" / "config.json"
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(config))
monkeypatch.setattr(
"plugins.memory.hindsight.get_hermes_home", lambda: tmp_path
)
provider = HindsightMemoryProvider()
provider.initialize(session_id="test-session", hermes_home=str(tmp_path), platform="cli")
return provider
def _assert_cloud_client_lazy_installed_before_import(tmp_path, monkeypatch, mode: str):
"""Cloud/local-external clients must ensure lazy deps before importing."""
import builtins
provider = _provider_for_mode(tmp_path, monkeypatch, mode)
ensure_calls = []
def fake_ensure(feature, prompt=True):
ensure_calls.append((feature, prompt))
class FakeHindsight:
def __init__(self, **kwargs):
self.kwargs = kwargs
real_import = builtins.__import__
def guarded_import(name, globals=None, locals=None, fromlist=(), level=0):
if name == "hindsight_client":
if ensure_calls != [("memory.hindsight", False)]:
raise ModuleNotFoundError("No module named 'hindsight_client'")
return SimpleNamespace(Hindsight=FakeHindsight)
return real_import(name, globals, locals, fromlist, level)
monkeypatch.setattr("tools.lazy_deps.ensure", fake_ensure)
monkeypatch.setattr(builtins, "__import__", guarded_import)
client = provider._get_client()
assert ensure_calls == [("memory.hindsight", False)]
assert isinstance(client, FakeHindsight)
assert client.kwargs == {
"base_url": "http://localhost:9999",
"timeout": 120.0,
"api_key": "test-key",
}
class _FakeSessionDB:
def __init__(self, messages=None):
self._messages = list(messages or [])
@ -232,6 +292,14 @@ class TestSchemas:
class TestConfig:
def test_cloud_client_lazy_installs_dependency_before_import(self, tmp_path, monkeypatch):
_assert_cloud_client_lazy_installed_before_import(tmp_path, monkeypatch, "cloud")
def test_local_external_client_lazy_installs_dependency_before_import(self, tmp_path, monkeypatch):
_assert_cloud_client_lazy_installed_before_import(
tmp_path, monkeypatch, "local_external"
)
def test_default_values(self, provider):
assert provider._auto_retain is True
assert provider._auto_recall is True

View file

@ -1975,7 +1975,10 @@ def test_on_session_switch_commits_old_session_and_rotates_id():
provider.on_session_switch("new-sid", parent_session_id="old-sid")
provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
provider._client.post.assert_called_once_with(
"/api/v1/sessions/old-sid/commit",
{"keep_recent_count": 0},
)
assert provider._session_id == "new-sid"
assert provider._turn_count == 0
@ -1998,7 +2001,10 @@ def test_on_session_switch_commits_pending_tokens_without_turn_count():
provider.on_session_switch("new-sid")
provider._client.get.assert_called_once_with("/api/v1/sessions/old-sid")
provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
provider._client.post.assert_called_once_with(
"/api/v1/sessions/old-sid/commit",
{"keep_recent_count": 0},
)
assert provider._session_id == "new-sid"
assert provider._turn_count == 0
@ -2051,7 +2057,10 @@ def test_on_session_switch_waits_for_inflight_sync_thread():
provider.on_session_switch("new-sid")
assert join_calls, "expected on_session_switch to join the in-flight sync thread"
provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
provider._client.post.assert_called_once_with(
"/api/v1/sessions/old-sid/commit",
{"keep_recent_count": 0},
)
def test_on_session_switch_noop_on_empty_new_id():
@ -2186,6 +2195,78 @@ def test_sync_turn_retries_batch_write_with_fresh_client():
)]
def test_sync_turn_structured_messages_include_assistant_peer_id():
provider = OpenVikingMemoryProvider()
provider._client = MagicMock()
provider._endpoint = "http://test"
provider._api_key = ""
provider._account = "acct"
provider._user = "usr"
provider._agent = "hermes"
provider._session_id = "sid-structured"
captured = []
class StubClient:
def __init__(self, *a, **kw):
pass
def post(self, path, payload=None, **kwargs):
captured.append((path, payload))
return {}
import plugins.memory.openviking as _mod
real_client_cls = _mod._VikingClient
_mod._VikingClient = StubClient
messages = [
{"role": "user", "content": [{"type": "input_text", "text": "u"}]},
{
"role": "assistant",
"content": "Looking.",
"tool_calls": [
{
"id": "call-1",
"type": "function",
"function": {"name": "shell_command", "arguments": json.dumps({"cmd": "pwd"})},
}
],
},
{"role": "tool", "tool_call_id": "call-1", "name": "shell_command", "content": "ok"},
{"role": "assistant", "content": [{"type": "output_text", "text": "a"}]},
]
try:
provider.sync_turn("u", "a", messages=messages)
assert provider._drain_writers("sid-structured", timeout=2.0)
finally:
_mod._VikingClient = real_client_cls
assert captured == [(
"/api/v1/sessions/sid-structured/messages/batch",
{
"messages": [
{"role": "user", "parts": [{"type": "text", "text": "u"}]},
{"role": "assistant", "parts": [{"type": "text", "text": "Looking."}], "peer_id": "hermes"},
{
"role": "assistant",
"parts": [
{
"type": "tool",
"tool_id": "call-1",
"tool_name": "shell_command",
"tool_input": {"cmd": "pwd"},
"tool_output": "ok",
"tool_status": "completed",
}
],
"peer_id": "hermes",
},
{"role": "assistant", "parts": [{"type": "text", "text": "a"}], "peer_id": "hermes"},
]
},
)]
def test_sync_turn_noop_when_session_id_blank():
provider = OpenVikingMemoryProvider()
provider._client = MagicMock()
@ -2206,7 +2287,10 @@ def test_on_session_end_marks_session_clean_after_successful_commit():
provider.on_session_end([])
provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
provider._client.post.assert_called_once_with(
"/api/v1/sessions/old-sid/commit",
{"keep_recent_count": 0},
)
assert provider._turn_count == 0
@ -2228,7 +2312,10 @@ def test_on_session_end_commits_pending_tokens_without_turn_count():
provider.on_session_end([])
provider._client.get.assert_called_once_with("/api/v1/sessions/old-sid")
provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
provider._client.post.assert_called_once_with(
"/api/v1/sessions/old-sid/commit",
{"keep_recent_count": 0},
)
def test_end_then_switch_does_not_double_commit():
@ -2241,7 +2328,10 @@ def test_end_then_switch_does_not_double_commit():
provider.on_session_switch("new-sid", parent_session_id="old-sid")
# Exactly one commit call, on the OLD session, fired by on_session_end.
provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
provider._client.post.assert_called_once_with(
"/api/v1/sessions/old-sid/commit",
{"keep_recent_count": 0},
)
assert provider._session_id == "new-sid"
assert provider._turn_count == 0
@ -2253,7 +2343,10 @@ def test_end_then_switch_with_pending_tokens_does_not_double_commit():
provider.on_session_end([])
provider.on_session_switch("new-sid", parent_session_id="old-sid")
provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
provider._client.post.assert_called_once_with(
"/api/v1/sessions/old-sid/commit",
{"keep_recent_count": 0},
)
assert provider._session_id == "new-sid"
assert provider._turn_count == 0
@ -2400,7 +2493,10 @@ def test_on_session_switch_does_not_block_caller_on_slow_drain():
# Let the finalizer finish so it doesn't leak past the test.
release_drain.set()
assert provider._drain_finalizers(timeout=5.0)
provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
provider._client.post.assert_called_once_with(
"/api/v1/sessions/old-sid/commit",
{"keep_recent_count": 0},
)
def test_on_session_switch_defers_old_commit_to_finalizer_thread():
@ -2415,7 +2511,7 @@ def test_on_session_switch_defers_old_commit_to_finalizer_thread():
committed = threading.Event()
drain_timeouts = []
def fake_post(path):
def fake_post(path, payload=None):
committed.set()
return {}
@ -2433,7 +2529,10 @@ def test_on_session_switch_defers_old_commit_to_finalizer_thread():
assert provider._turn_count == 0
# The old-session commit lands on the finalizer thread, not inline.
assert committed.wait(timeout=5.0), "old session was not finalized off-thread"
provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
provider._client.post.assert_called_once_with(
"/api/v1/sessions/old-sid/commit",
{"keep_recent_count": 0},
)
# The finalizer drains with the deferred (longer) budget, not inline 10s.
assert drain_timeouts == [_DEFERRED_COMMIT_TIMEOUT]

View file

@ -12,7 +12,7 @@ Verifies that:
from __future__ import annotations
from unittest.mock import patch
from unittest.mock import MagicMock, patch
import pytest
@ -148,6 +148,17 @@ class TestRunConversationCodexPath:
and m.get("content") == "echo: hello"]
assert final, f"expected final assistant message in {msgs}"
def test_projected_messages_are_synced_to_external_memory(self, fake_session):
agent = _make_codex_agent()
agent._memory_manager = MagicMock()
agent._memory_manager.build_system_prompt.return_value = ""
with patch.object(agent, "_spawn_background_review", return_value=None):
result = agent.run_conversation("hello")
agent._memory_manager.sync_all.assert_called_once()
assert agent._memory_manager.sync_all.call_args.kwargs["messages"] == result["messages"]
def test_nudge_counters_tick(self, fake_session):
"""The skill nudge counter must accumulate tool_iterations across
turns. The memory nudge counter is gated on memory being configured

Some files were not shown because too many files have changed in this diff Show more