diff --git a/gateway/run.py b/gateway/run.py index 63f3f4062..ed257607f 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3632,6 +3632,8 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew resolved_session_key = None model = _resolve_gateway_model(user_config) + if resolved_session_key: + self._rehydrate_session_model_override(resolved_session_key) override = self._session_model_overrides.get(resolved_session_key) if resolved_session_key else None if override: override_model = override.get("model", model) @@ -7448,6 +7450,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew _update_prompt_pending.pop(key, None) with self.session_store._lock: entry.expiry_finalized = True + # Session finalization is a conversation boundary — + # drop the persisted /model override too so a later + # message doesn't rehydrate it after the in-memory + # override was popped above. + entry.model_override = None self.session_store._save() logger.debug( "Session expiry finalized for %s", @@ -15157,6 +15164,63 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew ) return hashlib.sha256(blob.encode()).hexdigest()[:16] + def _rehydrate_session_model_override(self, session_key: str) -> None: + """Lazily restore a persisted /model override after a gateway restart. + + ``_session_model_overrides`` is in-memory only, so before persistence + a restart silently reverted every session to the global default model. + The non-secret parts (model/provider/base_url) are written through to + the session store when /model runs (and cleared on /new); here we read + them back on first use and re-resolve credentials via the normal + runtime provider resolution — api_key is never persisted to disk. + + No-op when an in-memory override already exists (live state wins) or + when the store has nothing persisted (e.g. the user ran /new, which + clears both the in-memory dict and the persisted field). + """ + if session_key in self._session_model_overrides: + return + store = getattr(self, "session_store", None) + if store is None: + return + try: + persisted = store.get_model_override(session_key) + except Exception: + logger.debug( + "Failed to read persisted session model override", exc_info=True + ) + return + if not persisted: + return + override: Dict[str, Any] = { + "model": persisted.get("model"), + "provider": persisted.get("provider"), + "base_url": persisted.get("base_url"), + } + provider = persisted.get("provider") + if provider: + # Re-resolve credentials for the persisted provider. On failure + # (e.g. credentials were removed since the switch) keep the + # credential-less override — _resolve_session_agent_runtime falls + # back to env-based resolution and applies model/provider on top. + try: + runtime = _resolve_runtime_agent_kwargs_for_provider(provider) + override["api_key"] = runtime.get("api_key") + override["api_mode"] = runtime.get("api_mode") + if not override.get("base_url"): + override["base_url"] = runtime.get("base_url") + except Exception: + logger.debug( + "Credential re-resolution failed for persisted override " + "(provider=%s); using credential-less override", + provider, exc_info=True, + ) + self._session_model_overrides[session_key] = override + logger.info( + "Rehydrated persisted /model override for session=%s: model=%s provider=%s", + session_key, override.get("model"), provider or "", + ) + def _apply_session_model_override( self, session_key: str, model: str, runtime_kwargs: dict ) -> tuple: diff --git a/gateway/session.py b/gateway/session.py index c741cfba5..fd2fae87f 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -574,6 +574,31 @@ def build_session_context_prompt( return "\n".join(lines) +# Keys of a /model session override that are safe to persist to disk. +# ``api_key`` (and anything else, e.g. ``api_mode`` which is re-derived from +# provider resolution) is intentionally excluded: credentials must NEVER be +# written to sessions.json. On rehydration after a gateway restart the +# runner re-resolves credentials via the normal runtime provider resolution. +PERSISTABLE_MODEL_OVERRIDE_KEYS = ("model", "provider", "base_url") + + +def sanitize_model_override(override: Optional[Dict[str, Any]]) -> Optional[Dict[str, str]]: + """Return a copy of *override* containing only persistable, non-secret keys. + + Returns ``None`` when the input is empty/not a dict or no persistable + values remain, so callers can store the result directly on + ``SessionEntry.model_override``. + """ + if not isinstance(override, dict): + return None + cleaned = { + k: str(v) + for k, v in override.items() + if k in PERSISTABLE_MODEL_OVERRIDE_KEYS and v not in (None, "") + } + return cleaned or None + + @dataclass class SessionEntry: """ @@ -644,6 +669,15 @@ class SessionEntry: resume_reason: Optional[str] = None # e.g. "restart_timeout" last_resume_marked_at: Optional[datetime] = None + # Session-scoped /model override (model/provider/base_url ONLY — never + # credentials). ``_session_model_overrides`` in the gateway runner is + # in-memory, so before this field a gateway restart silently reverted + # every session to the global default model. api_key/api_mode are + # re-resolved through the normal runtime provider resolution when the + # override is rehydrated after a restart and are never written to disk + # (see sanitize_model_override / SessionStore.set_model_override). + model_override: Optional[Dict[str, str]] = None + def to_dict(self) -> Dict[str, Any]: result = { "session_key": self.session_key, @@ -675,6 +709,10 @@ class SessionEntry: "auto_reset_reason": self.auto_reset_reason, "reset_had_activity": self.reset_had_activity, } + if self.model_override: + # Defence-in-depth: strip credentials even if a caller stored an + # unsanitized dict directly on the entry. + result["model_override"] = sanitize_model_override(self.model_override) if self.origin: result["origin"] = self.origin.to_dict() return result @@ -736,6 +774,7 @@ class SessionEntry: was_auto_reset=data.get("was_auto_reset", False), auto_reset_reason=data.get("auto_reset_reason"), reset_had_activity=data.get("reset_had_activity", False), + model_override=sanitize_model_override(data.get("model_override")), ) @@ -1515,6 +1554,37 @@ class SessionStore: entry.origin, ) + def set_model_override( + self, session_key: str, override: Optional[Dict[str, Any]] + ) -> None: + """Persist (or clear) the session-scoped /model override. + + Only non-secret keys (model/provider/base_url — see + ``sanitize_model_override``) are written; ``api_key``/``api_mode`` + are re-resolved at rehydration time via the normal runtime provider + resolution. Pass ``None`` (or a dict with no persistable values) + to clear the persisted override, e.g. on /new. + """ + with self._lock: + self._ensure_loaded_locked() + entry = self._entries.get(session_key) + if entry is None: + return + cleaned = sanitize_model_override(override) + if entry.model_override == cleaned: + return + entry.model_override = cleaned + self._save() + + def get_model_override(self, session_key: str) -> Optional[Dict[str, str]]: + """Return the persisted /model override for *session_key*, if any.""" + with self._lock: + self._ensure_loaded_locked() + entry = self._entries.get(session_key) + if entry is None: + return None + return dict(entry.model_override) if entry.model_override else None + def suspend_session(self, session_key: str) -> bool: """Mark a session as suspended so it auto-resets on next access. diff --git a/gateway/slash_commands.py b/gateway/slash_commands.py index 341efb4b0..066911dcf 100644 --- a/gateway/slash_commands.py +++ b/gateway/slash_commands.py @@ -1597,6 +1597,20 @@ class GatewaySlashCommandsMixin: "api_mode": result.api_mode, } + # Write-through the non-secret parts to the session + # store so the picked model survives a gateway restart + # (api_key is never persisted). + try: + _self.session_store.set_model_override( + _session_key, + _self._session_model_overrides[_session_key], + ) + except Exception: + logger.debug( + "Failed to persist session model override", + exc_info=True, + ) + # Evict cached agent so the next turn creates a fresh # agent from the override rather than relying on the # stale cache signature to trigger a rebuild. @@ -1831,6 +1845,19 @@ class GatewaySlashCommandsMixin: "api_mode": result.api_mode, } + # Write-through the non-secret parts (model/provider/base_url) to + # the session store so the override survives a gateway restart. + # api_key/api_mode are never persisted — they are re-resolved via + # runtime provider resolution on rehydration. + try: + self.session_store.set_model_override( + session_key, self._session_model_overrides[session_key] + ) + except Exception: + logger.debug( + "Failed to persist session model override", exc_info=True + ) + # Evict cached agent so the next turn creates a fresh agent from the # override rather than relying on cache signature mismatch detection. self._evict_cached_agent(session_key) diff --git a/scripts/release.py b/scripts/release.py index b03fcb543..5f676add6 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -52,6 +52,7 @@ AUTHOR_MAP = { "r0gersm1th@users.noreply.github.com": "r0gersm1th", # PR #3219 salvage (whatsapp bridge: resolve LID sender IDs to phone numbers in the message payload so phone-based allowlists match; commit authored by collaborator r0gersm1th, PR by @ajmeese7) "louis@letsfive.io": "Mibayy", # PR #3296 salvage (status: provider label honors config.yaml model.base_url, not just OPENAI_BASE_URL env) "me@keslerm.com": "keslerm", # PR #3459 salvage (gateway: 'log' tool_progress mode — silent in chat, tool calls appended to ~/.hermes/logs/tool_calls.log via rotating handler; duplicate of #3458 by @dlkakbs who submitted 4 min earlier — both credited) + "david.d.zhang@gmail.com": "Git-on-my-level", # PR #3659 salvage (gateway: persist per-session /model overrides across gateway restarts) "tarunravi@gmail.com": "tarunravi", # PR #2696 salvage (api-server: inline MEDIA: image tags as base64 data URLs in final responses so remote OpenAI-compatible frontends can render server-local screenshots; the PR's tool-progress-streaming and SSE-sentinel pieces were independently superseded on main) "aqdrgg19@gmail.com": "VolodymyrBg", # PR #2861 salvage (webhook: drop the unused full request payload from retained _delivery_info entries — up to ~1MB dead weight per delivery for the 1h idempotency TTL) "ohyes9711@gmail.com": "CharmingGroot", # PR #2794 salvage (email: guard msg_data[0][1] against malformed IMAP fetch structures so one bad response can't abort the batch and permanently lose seen-marked messages; Message-ID domain falls back to localhost when EMAIL_ADDRESS lacks '@') diff --git a/tests/gateway/test_session_model_override_persistence.py b/tests/gateway/test_session_model_override_persistence.py new file mode 100644 index 000000000..cd4039725 --- /dev/null +++ b/tests/gateway/test_session_model_override_persistence.py @@ -0,0 +1,234 @@ +"""Per-session /model overrides must survive gateway restarts (#3659 salvage). + +``GatewayRunner._session_model_overrides`` is in-memory, so before persistence +a gateway restart silently reverted every session to the global default model. +The non-secret parts (model/provider/base_url) are now written through to the +session store (``SessionEntry.model_override`` in sessions.json) and lazily +rehydrated on first use after a restart, with credentials re-resolved through +the normal runtime provider resolution. + +Covers: + - the override survives a simulated restart (a second SessionStore instance + reading the same sessions dir, and a fresh runner rehydrating from it) + - /new (SessionStore.reset_session) clears the persisted override so a + restart cannot resurrect it + - api_key is NEVER serialized to sessions.json +""" +import json +from unittest.mock import patch + +import pytest + +from gateway.config import GatewayConfig, Platform +from gateway.session import ( + SessionEntry, + SessionSource, + SessionStore, + sanitize_model_override, +) + +OVERRIDE = { + "model": "gpt-5o", + "provider": "openai", + "api_key": "sk-SUPER-SECRET-do-not-persist", + "base_url": "https://api.openai.example/v1", + "api_mode": "responses", +} + + +def _make_source() -> SessionSource: + return SessionSource( + platform=Platform.TELEGRAM, + user_id="u1", + chat_id="c1", + user_name="tester", + chat_type="dm", + ) + + +@pytest.fixture +def store_factory(tmp_path, monkeypatch): + """Build SessionStores over a shared sessions dir, without SQLite.""" + + def _raise(): + raise RuntimeError("SQLite disabled in test") + + import hermes_state + + monkeypatch.setattr(hermes_state, "SessionDB", _raise) + + def _make() -> SessionStore: + store = SessionStore(sessions_dir=tmp_path, config=GatewayConfig()) + assert store._db is None + return store + + return _make + + +def _sessions_json(tmp_path) -> str: + return (tmp_path / "sessions.json").read_text(encoding="utf-8") + + +def test_override_persists_and_survives_restart(store_factory, tmp_path): + store = store_factory() + entry = store.get_or_create_session(_make_source()) + session_key = entry.session_key + + store.set_model_override(session_key, OVERRIDE) + + # Simulated restart: a brand-new store instance reads the same dir. + store2 = store_factory() + persisted = store2.get_model_override(session_key) + assert persisted == { + "model": "gpt-5o", + "provider": "openai", + "base_url": "https://api.openai.example/v1", + } + + +def test_api_key_never_serialized(store_factory, tmp_path): + store = store_factory() + entry = store.get_or_create_session(_make_source()) + + store.set_model_override(entry.session_key, OVERRIDE) + + raw = _sessions_json(tmp_path) + assert "sk-SUPER-SECRET-do-not-persist" not in raw + assert "api_key" not in raw + # api_mode is re-derived from provider resolution; not persisted either. + data = json.loads(raw) + stored = data[entry.session_key]["model_override"] + assert set(stored) == {"model", "provider", "base_url"} + + +def test_from_dict_strips_api_key_from_tampered_json(): + """Even a hand-edited sessions.json with an api_key must not load one.""" + store_entry = SessionEntry.from_dict( + { + "session_key": "k1", + "session_id": "s1", + "created_at": "2026-01-01T00:00:00", + "updated_at": "2026-01-01T00:00:00", + "model_override": { + "model": "m1", + "provider": "p1", + "api_key": "sk-injected", + "api_mode": "chat_completions", + }, + } + ) + assert store_entry.model_override == {"model": "m1", "provider": "p1"} + + +def test_new_clears_persisted_override(store_factory, tmp_path): + """/new resets the session; the persisted override must not survive it.""" + store = store_factory() + entry = store.get_or_create_session(_make_source()) + session_key = entry.session_key + + store.set_model_override(session_key, OVERRIDE) + assert store.get_model_override(session_key) is not None + + # /new path -> SessionStore.reset_session creates a fresh entry. + new_entry = store.reset_session(session_key) + assert new_entry is not None + assert store.get_model_override(session_key) is None + + # Restart after /new must NOT resurrect the override. + store2 = store_factory() + assert store2.get_model_override(session_key) is None + assert "gpt-5o" not in _sessions_json(tmp_path) + + +def _make_runner(store): + from gateway.run import GatewayRunner + + runner = object.__new__(GatewayRunner) + runner._session_model_overrides = {} + runner.session_store = store + return runner + + +def test_runner_rehydrates_override_after_restart(store_factory): + store = store_factory() + entry = store.get_or_create_session(_make_source()) + session_key = entry.session_key + store.set_model_override(session_key, OVERRIDE) + + # Simulated restart: fresh store + fresh runner with an empty in-memory + # override map, credentials re-resolved via runtime provider resolution. + runner = _make_runner(store_factory()) + with patch( + "gateway.run._resolve_runtime_agent_kwargs_for_provider", + return_value={ + "api_key": "sk-fresh-from-keychain", + "api_mode": "responses", + "base_url": "https://api.openai.example/v1", + "provider": "openai", + }, + ): + runner._rehydrate_session_model_override(session_key) + + override = runner._session_model_overrides[session_key] + assert override["model"] == "gpt-5o" + assert override["provider"] == "openai" + assert override["base_url"] == "https://api.openai.example/v1" + # Credentials come from live resolution, never from disk. + assert override["api_key"] == "sk-fresh-from-keychain" + assert override["api_mode"] == "responses" + + +def test_runner_rehydrate_keeps_live_override(store_factory): + """An in-memory override (live gateway state) always wins over disk.""" + store = store_factory() + entry = store.get_or_create_session(_make_source()) + session_key = entry.session_key + store.set_model_override(session_key, OVERRIDE) + + runner = _make_runner(store) + live = {"model": "live-model", "provider": "anthropic"} + runner._session_model_overrides[session_key] = live + + runner._rehydrate_session_model_override(session_key) + + assert runner._session_model_overrides[session_key] is live + + +def test_runner_rehydrate_noop_without_persisted_override(store_factory): + store = store_factory() + entry = store.get_or_create_session(_make_source()) + + runner = _make_runner(store) + runner._rehydrate_session_model_override(entry.session_key) + + assert runner._session_model_overrides == {} + + +def test_runner_rehydrate_survives_credential_resolution_failure(store_factory): + """Missing credentials degrade to a credential-less override, not a crash.""" + store = store_factory() + entry = store.get_or_create_session(_make_source()) + session_key = entry.session_key + store.set_model_override(session_key, OVERRIDE) + + runner = _make_runner(store) + with patch( + "gateway.run._resolve_runtime_agent_kwargs_for_provider", + side_effect=RuntimeError("no credentials"), + ): + runner._rehydrate_session_model_override(session_key) + + override = runner._session_model_overrides[session_key] + assert override["model"] == "gpt-5o" + assert override.get("api_key") is None + + +def test_sanitize_model_override(): + assert sanitize_model_override(None) is None + assert sanitize_model_override({}) is None + assert sanitize_model_override({"api_key": "sk-x", "api_mode": "chat"}) is None + assert sanitize_model_override(OVERRIDE) == { + "model": "gpt-5o", + "provider": "openai", + "base_url": "https://api.openai.example/v1", + }