From 5a6720b884eb9ab373da8986a0b7ddb571e312e7 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 2 Jul 2026 15:18:05 -0500 Subject: [PATCH] fix(desktop,tui-gateway,zai): stop thinking-off from reverting to medium MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Z.ai desktop user reported thinking reverting to medium after one turn, burning ~200% of a week's credits in 4 days despite reasoning_effort: false in config.yaml. Four compounding bugs: - _session_info reported reasoning_effort "" for disabled reasoning, indistinguishable from unset — the desktop adopted it after the first turn, wiping its sticky "thinking off" pick so every later chat reverted to the default effort. - config.set key=reasoning always wrote agent.reasoning_effort to global config.yaml, so every desktop model-menu selection (preset.effort ?? 'medium') clobbered the user's configured value. Now session-scoped like the messaging gateway's /reasoning, landing on create_reasoning_override so lazily-built sessions keep it too. - YAML `reasoning_effort: false`/`off`/`no` (boolean False) was coerced to "" by every loader's `str(x or "")`, silently re-enabling thinking. parse_reasoning_effort now treats False/"false"/"disabled" as {"enabled": False}; loaders (tui gateway, gateway, cli, cron, delegate) pass the raw value through. The desktop config reader also crashed on the boolean (false.trim()), aborting voice/STT settings. - The zai provider profile never sent thinking on the wire, and GLM-4.5+ defaults to thinking ON server-side — so disabling reasoning was a silent no-op on direct Z.ai, the actual token burner. The profile now emits extra_body.thinking {"type": "enabled"|"disabled"} for thinking-capable GLM models, mirroring the DeepSeek profile. Also: /new (session reset) now carries reasoning_config across the rebuild like model_override; config.get reasoning prefers the session's live value and maps a config False to "none"; Settings shows "Off" instead of a blank select for hand-written false. --- .../app/session/hooks/use-hermes-config.ts | 19 ++- .../src/app/settings/model-settings.tsx | 10 +- cli.py | 10 +- cron/scheduler.py | 8 +- gateway/run.py | 7 +- hermes_constants.py | 16 +- plugins/model-providers/zai/__init__.py | 62 +++++++- .../model_providers/test_zai_profile.py | 141 ++++++++++++++++++ tests/test_hermes_constants.py | 12 ++ .../test_reasoning_session_scope.py | 121 +++++++++++++++ tools/delegate_tool.py | 7 +- tui_gateway/server.py | 89 +++++++---- 12 files changed, 455 insertions(+), 47 deletions(-) create mode 100644 tests/plugins/model_providers/test_zai_profile.py create mode 100644 tests/tui_gateway/test_reasoning_session_scope.py diff --git a/apps/desktop/src/app/session/hooks/use-hermes-config.ts b/apps/desktop/src/app/session/hooks/use-hermes-config.ts index 16242ba71..fe2a42d46 100644 --- a/apps/desktop/src/app/session/hooks/use-hermes-config.ts +++ b/apps/desktop/src/app/session/hooks/use-hermes-config.ts @@ -21,6 +21,23 @@ function recordingLimit(value: unknown) { return typeof value === 'number' && Number.isFinite(value) && value > 0 ? value : DEFAULT_VOICE_SECONDS } +/** config.yaml hands back whatever the user wrote — `reasoning_effort: false` + * (or `off`/`no`, which YAML also parses to boolean false) means thinking + * disabled, and a bare boolean must not throw on `.trim()`. */ +function normalizeConfigEffort(value: unknown): string { + if (value === false) { + return 'none' + } + + if (typeof value !== 'string') { + return '' + } + + const effort = value.trim().toLowerCase() + + return effort === 'false' || effort === 'disabled' ? 'none' : effort +} + interface HermesConfigOptions { activeSessionIdRef: MutableRefObject refreshProjectBranch: (cwd: string) => Promise @@ -60,7 +77,7 @@ export function useHermesConfig({ activeSessionIdRef, refreshProjectBranch }: He void refreshProjectBranch($currentCwd.get() || cwd) } - const reasoning = (config.agent?.reasoning_effort ?? '').trim() + const reasoning = normalizeConfigEffort(config.agent?.reasoning_effort) const tier = (config.agent?.service_tier ?? '').trim() setCurrentReasoningEffort(prev => (activeSessionIdRef.current ? prev : reasoning)) diff --git a/apps/desktop/src/app/settings/model-settings.tsx b/apps/desktop/src/app/settings/model-settings.tsx index 8230519f4..6459370dc 100644 --- a/apps/desktop/src/app/settings/model-settings.tsx +++ b/apps/desktop/src/app/settings/model-settings.tsx @@ -307,10 +307,12 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) { const reasoningSupported = mainCaps?.reasoning ?? true const fastSupported = mainCaps?.fast ?? false - const effortValue = - String(getNested(config ?? {}, 'agent.reasoning_effort') ?? '') - .trim() - .toLowerCase() || 'medium' + // Hand-written `reasoning_effort: false`/`off` reaches us as boolean false + // ("false" once stringified) — show it as Off, not an empty select. + const rawEffort = String(getNested(config ?? {}, 'agent.reasoning_effort') ?? '') + .trim() + .toLowerCase() + const effortValue = rawEffort === 'false' || rawEffort === 'disabled' ? 'none' : rawEffort || 'medium' const fastOn = isFastTier(getNested(config ?? {}, 'agent.service_tier')) diff --git a/cli.py b/cli.py index c3f438690..d2dbbbb01 100644 --- a/cli.py +++ b/cli.py @@ -334,11 +334,15 @@ def _resolve_prefill_messages_file(config: Dict[str, Any]) -> str: return "" -def _parse_reasoning_config(effort: str) -> dict | None: - """Parse a reasoning effort level into an OpenRouter reasoning config dict.""" +def _parse_reasoning_config(effort) -> dict | None: + """Parse a reasoning effort level into an OpenRouter reasoning config dict. + + Accepts the raw config value (string or YAML boolean — ``false``/``off`` + parse as thinking disabled, see parse_reasoning_effort). + """ from hermes_constants import parse_reasoning_effort result = parse_reasoning_effort(effort) - if effort and effort.strip() and result is None: + if effort and str(effort).strip() and result is None: logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort) return result diff --git a/cron/scheduler.py b/cron/scheduler.py index 4c764bd13..e072fce7f 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -2620,10 +2620,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: except Exception: pass - # Reasoning config from config.yaml + # Reasoning config from config.yaml (raw value — a YAML boolean False + # means thinking disabled, see parse_reasoning_effort) from hermes_constants import parse_reasoning_effort - effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip() - reasoning_config = parse_reasoning_effort(effort) + reasoning_config = parse_reasoning_effort( + _cfg.get("agent", {}).get("reasoning_effort", "") + ) # Prefill messages from env or config.yaml. The top-level # prefill_messages_file key is canonical; agent.prefill_messages_file is diff --git a/gateway/run.py b/gateway/run.py index ed257607f..cf6dae7d8 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -4643,9 +4643,12 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew """ from hermes_constants import parse_reasoning_effort cfg = _load_gateway_runtime_config() - effort = str(cfg_get(cfg, "agent", "reasoning_effort", default="") or "").strip() + # Keep the raw value — coercing with ``or ""`` turns a YAML boolean + # False (``reasoning_effort: false``/``off``/``no``) into "", silently + # re-enabling thinking for users who explicitly disabled it. + effort = cfg_get(cfg, "agent", "reasoning_effort", default="") result = parse_reasoning_effort(effort) - if effort and effort.strip() and result is None: + if effort and str(effort).strip() and result is None: logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort) return result diff --git a/hermes_constants.py b/hermes_constants.py index 526bb0ed4..c0f4d48e1 100644 --- a/hermes_constants.py +++ b/hermes_constants.py @@ -794,18 +794,26 @@ def apply_subprocess_home_env(env: dict[str, str]) -> None: VALID_REASONING_EFFORTS = ("minimal", "low", "medium", "high", "xhigh") -def parse_reasoning_effort(effort: str) -> dict | None: +def parse_reasoning_effort(effort) -> dict | None: """Parse a reasoning effort level into a config dict. Valid levels: "none", "minimal", "low", "medium", "high", "xhigh". Returns None when the input is empty or unrecognized (caller uses default). - Returns {"enabled": False} for "none". + Returns {"enabled": False} for "none" (aliases: "false", "disabled", and + YAML boolean False — users write ``reasoning_effort: false``/``off``/``no`` + in config.yaml and YAML hands us a bool, which must mean disabled, not + "fall back to the default and keep thinking"). Returns {"enabled": True, "effort": } for valid effort levels. """ - if not effort or not effort.strip(): + if effort is False: + return {"enabled": False} + if effort is None or effort is True: + return None + effort = str(effort) + if not effort.strip(): return None effort = effort.strip().lower() - if effort == "none": + if effort in {"none", "false", "disabled"}: return {"enabled": False} if effort in VALID_REASONING_EFFORTS: return {"enabled": True, "effort": effort} diff --git a/plugins/model-providers/zai/__init__.py b/plugins/model-providers/zai/__init__.py index 9fcdb2bec..7a53ec166 100644 --- a/plugins/model-providers/zai/__init__.py +++ b/plugins/model-providers/zai/__init__.py @@ -1,9 +1,67 @@ -"""ZAI / GLM provider profile.""" +"""ZAI / GLM provider profile. + +Z.AI's GLM-4.5-and-later chat models default to thinking-mode ON when the +request omits ``thinking``. Hermes' ``reasoning_config = {"enabled": False}`` +was previously a silent no-op on this route — the base profile emits nothing, +so users who turned thinking off (desktop toggle, ``/reasoning none``, +``reasoning_effort: none``/``false`` in config.yaml) kept burning thinking +tokens on every turn. + +:meth:`ZaiProfile.build_api_kwargs_extras` translates the Hermes reasoning +config into the wire shape Z.AI's OpenAI-compat endpoint expects: + + {"extra_body": {"thinking": {"type": "enabled" | "disabled"}}} + +When no reasoning preference is set (``reasoning_config is None``) the field +is omitted so the server default applies, matching prior behavior. GLM +models before 4.5 (e.g. ``glm-4-9b``) don't accept ``thinking`` and are left +untouched. +""" + +from __future__ import annotations + +import re +from typing import Any from providers import register_provider from providers.base import ProviderProfile -zai = ProviderProfile( +_GLM_VERSION_RE = re.compile(r"^glm-(\d+)(?:\.(\d+))?") + + +def _model_supports_thinking(model: str | None) -> bool: + """GLM thinking-capable model families: glm-4.5 and later (4.5, 4.6, 5…).""" + m = (model or "").strip().lower() + match = _GLM_VERSION_RE.match(m) + if not match: + return False + major = int(match.group(1)) + minor = int(match.group(2) or 0) + return (major, minor) >= (4, 5) + + +class ZaiProfile(ProviderProfile): + """Z.AI / GLM — extra_body.thinking enabled/disabled.""" + + def build_api_kwargs_extras( + self, *, reasoning_config: dict | None = None, model: str | None = None, **context + ) -> tuple[dict[str, Any], dict[str, Any]]: + extra_body: dict[str, Any] = {} + top_level: dict[str, Any] = {} + + if not _model_supports_thinking(model): + return extra_body, top_level + + # Only emit when the user expressed a preference; omitting the field + # keeps the server default (enabled) exactly as before. + if isinstance(reasoning_config, dict): + enabled = reasoning_config.get("enabled") is not False + extra_body["thinking"] = {"type": "enabled" if enabled else "disabled"} + + return extra_body, top_level + + +zai = ZaiProfile( name="zai", aliases=("glm", "z-ai", "z.ai", "zhipu"), env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), diff --git a/tests/plugins/model_providers/test_zai_profile.py b/tests/plugins/model_providers/test_zai_profile.py new file mode 100644 index 000000000..feb209c88 --- /dev/null +++ b/tests/plugins/model_providers/test_zai_profile.py @@ -0,0 +1,141 @@ +"""Unit tests for the Z.AI / GLM provider profile's thinking-mode wiring. + +Z.AI's GLM-4.5-and-later chat models default to thinking-mode ON when the +request omits ``thinking``. Before the profile emitted the parameter, +``reasoning_config = {"enabled": False}`` was a silent no-op on the direct +Z.AI route — users who turned thinking off kept burning thinking tokens on +every turn (the desktop "thinking reverts to medium" report). + +These tests pin the profile's wire-shape contract so Z.AI requests stay +correctly shaped without going live. +""" + +from __future__ import annotations + +import pytest + + +@pytest.fixture +def zai_profile(): + """Resolve the registered Z.AI profile through the real discovery path.""" + # ``model_tools`` triggers plugin discovery on import, which is what + # registers the Z.AI profile in the global provider registry. + import model_tools # noqa: F401 + import providers + + profile = providers.get_provider_profile("zai") + assert profile is not None, "zai provider profile must be registered" + return profile + + +class TestZaiThinkingWireShape: + """``build_api_kwargs_extras`` produces Z.AI's exact wire format.""" + + def test_no_preference_omits_thinking(self, zai_profile): + """No reasoning_config → omit ``thinking`` so the server default + applies (matches prior behavior for users with no preference).""" + extra_body, top_level = zai_profile.build_api_kwargs_extras( + reasoning_config=None, model="glm-5" + ) + assert extra_body == {} + assert top_level == {} + + def test_enabled_sends_enabled_marker(self, zai_profile): + extra_body, top_level = zai_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "medium"}, model="glm-5" + ) + assert extra_body == {"thinking": {"type": "enabled"}} + assert top_level == {} + + def test_explicitly_disabled_sends_disabled_marker(self, zai_profile): + """``reasoning_config.enabled=False`` → ``thinking.type=disabled``. + + The crucial bit is that the parameter is *sent* at all — GLM defaults + to thinking-on when ``thinking`` is absent, so an unsent disable + burns thinking tokens forever. + """ + extra_body, top_level = zai_profile.build_api_kwargs_extras( + reasoning_config={"enabled": False}, model="glm-5" + ) + assert extra_body == {"thinking": {"type": "disabled"}} + assert top_level == {} + + def test_no_effort_levels_leak_to_top_level(self, zai_profile): + """GLM has no effort knob — never emit ``reasoning_effort``.""" + for effort in ("minimal", "low", "medium", "high", "xhigh"): + _, top_level = zai_profile.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": effort}, model="glm-5.2" + ) + assert top_level == {} + + +class TestZaiModelGating: + """GLM 4.5+ get thinking; earlier GLM models are left untouched.""" + + @pytest.mark.parametrize( + "model", + [ + "glm-4.5", + "glm-4.5-air", + "glm-4.5-flash", + "glm-4.6", + "glm-5", + "glm-5.2", + "GLM-5", # case-insensitive + ], + ) + def test_thinking_capable_models_emit_thinking(self, zai_profile, model): + extra_body, _ = zai_profile.build_api_kwargs_extras( + reasoning_config={"enabled": False}, model=model + ) + assert extra_body == {"thinking": {"type": "disabled"}} + + @pytest.mark.parametrize( + "model", + [ + "glm-4-9b", # pre-4.5, no thinking param + "glm-4", + "glm-3-turbo", + "", # bare/unknown + None, # missing + "charglm-3", # non-GLM-versioned id + ], + ) + def test_non_thinking_models_emit_nothing(self, zai_profile, model): + extra_body, top_level = zai_profile.build_api_kwargs_extras( + reasoning_config={"enabled": False}, model=model + ) + assert extra_body == {} + assert top_level == {} + + +class TestZaiFullKwargsIntegration: + """End-to-end: the transport's full kwargs carry the thinking marker.""" + + def test_disabled_reaches_the_wire(self, zai_profile): + from agent.transports.chat_completions import ChatCompletionsTransport + + kwargs = ChatCompletionsTransport().build_kwargs( + model="glm-5", + messages=[{"role": "user", "content": "ping"}], + tools=None, + provider_profile=zai_profile, + reasoning_config={"enabled": False}, + base_url="https://api.z.ai/api/paas/v4", + provider_name="zai", + ) + assert kwargs["extra_body"]["thinking"] == {"type": "disabled"} + + def test_no_preference_keeps_wire_clean(self, zai_profile): + from agent.transports.chat_completions import ChatCompletionsTransport + + kwargs = ChatCompletionsTransport().build_kwargs( + model="glm-5", + messages=[{"role": "user", "content": "ping"}], + tools=None, + provider_profile=zai_profile, + reasoning_config=None, + base_url="https://api.z.ai/api/paas/v4", + provider_name="zai", + ) + assert "thinking" not in kwargs.get("extra_body", {}) diff --git a/tests/test_hermes_constants.py b/tests/test_hermes_constants.py index 8635c6827..f23bed43a 100644 --- a/tests/test_hermes_constants.py +++ b/tests/test_hermes_constants.py @@ -436,6 +436,18 @@ class TestParseReasoningEffort: """The literal "none" disables reasoning explicitly.""" assert parse_reasoning_effort("none") == {"enabled": False} + @pytest.mark.parametrize("value", [False, "false", "FALSE", "disabled", " Disabled "]) + def test_false_aliases_disable_reasoning(self, value): + """YAML `reasoning_effort: false`/`off`/`no` reaches loaders as a + boolean; users also hand-write "false"/"disabled". All must mean + disabled — not "unset, fall back to the default and keep thinking".""" + assert parse_reasoning_effort(value) == {"enabled": False} + + @pytest.mark.parametrize("value", [None, True]) + def test_non_string_non_false_returns_none(self, value): + """None and boolean True fall back to the caller default.""" + assert parse_reasoning_effort(value) is None + @pytest.mark.parametrize("level", list(VALID_REASONING_EFFORTS)) def test_each_valid_level(self, level): """Every level listed in VALID_REASONING_EFFORTS is accepted as-is.""" diff --git a/tests/tui_gateway/test_reasoning_session_scope.py b/tests/tui_gateway/test_reasoning_session_scope.py new file mode 100644 index 000000000..0c560cd80 --- /dev/null +++ b/tests/tui_gateway/test_reasoning_session_scope.py @@ -0,0 +1,121 @@ +"""Reasoning-effort session scoping in the TUI gateway (desktop backend). + +Covers the "desktop reverts thinking to medium after one turn" report: + +1. ``_session_info`` must report ``reasoning_effort: "none"`` when reasoning + is disabled — reporting ``""`` (indistinguishable from "unset") made the + desktop adopt the empty value after the first turn, wiping its sticky + "thinking off" pick so every later chat reverted to the default effort. + +2. ``config.set key=reasoning`` with a live session must be session-scoped: + it must NOT rewrite the global ``agent.reasoning_effort`` in config.yaml + (the desktop model menu applies a per-model preset on every selection, + which was silently clobbering the user's configured value), and it must + land on ``create_reasoning_override`` so lazily-built sessions (agent not + constructed until the first prompt) don't drop the change. + +3. ``_load_reasoning_config`` must honor a YAML boolean False + (``reasoning_effort: false`` / ``off`` / ``no``) as thinking-disabled. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import patch + +import tui_gateway.server as server +from tui_gateway.server import _session_info + + +def _agent(reasoning_config): + return SimpleNamespace( + reasoning_config=reasoning_config, + service_tier=None, + model="glm-5", + provider="zai", + session_id="sess-key", + ) + + +class TestSessionInfoReasoningEffort: + """Disabled reasoning must be reported as 'none', never ''.""" + + def test_disabled_reports_none(self) -> None: + info = _session_info(_agent({"enabled": False})) + assert info["reasoning_effort"] == "none" + + def test_enabled_reports_effort(self) -> None: + info = _session_info(_agent({"enabled": True, "effort": "high"})) + assert info["reasoning_effort"] == "high" + + def test_unset_reports_empty(self) -> None: + info = _session_info(_agent(None)) + assert info["reasoning_effort"] == "" + + +class TestConfigSetReasoningSessionScope: + """Session-targeted reasoning changes must not touch global config.""" + + def _dispatch(self, params: dict) -> dict: + handler = server._methods["config.set"] + return handler("rid-1", params) + + def test_session_scoped_set_skips_global_write(self) -> None: + agent = _agent(None) + session = {"session_key": "k1", "agent": agent} + with patch.dict(server._sessions, {"s1": session}, clear=False), \ + patch.object(server, "_write_config_key") as write_key, \ + patch.object(server, "_persist_live_session_runtime"), \ + patch.object(server, "_emit"): + resp = self._dispatch( + {"key": "reasoning", "session_id": "s1", "value": "none"} + ) + assert resp["result"]["value"] == "none" + assert agent.reasoning_config == {"enabled": False} + write_key.assert_not_called() + + def test_session_scoped_set_updates_create_override_for_lazy_session(self) -> None: + """A pre-build (agent=None) session must keep the change for the + deferred agent build instead of dropping it.""" + session = {"session_key": "k2", "agent": None} + with patch.dict(server._sessions, {"s2": session}, clear=False), \ + patch.object(server, "_write_config_key") as write_key: + resp = self._dispatch( + {"key": "reasoning", "session_id": "s2", "value": "high"} + ) + assert resp["result"]["value"] == "high" + assert session["create_reasoning_override"] == { + "enabled": True, + "effort": "high", + } + write_key.assert_not_called() + + def test_no_session_persists_globally(self) -> None: + with patch.object(server, "_write_config_key") as write_key: + resp = self._dispatch({"key": "reasoning", "value": "low"}) + assert resp["result"]["value"] == "low" + write_key.assert_called_once_with("agent.reasoning_effort", "low") + + def test_unknown_value_rejected(self) -> None: + resp = self._dispatch({"key": "reasoning", "value": "bogus"}) + assert "error" in resp + + +class TestLoadReasoningConfigYamlBoolean: + """YAML `reasoning_effort: false` means disabled, not default.""" + + def test_boolean_false_disables(self) -> None: + with patch.object( + server, "_load_cfg", return_value={"agent": {"reasoning_effort": False}} + ): + assert server._load_reasoning_config() == {"enabled": False} + + def test_string_false_disables(self) -> None: + with patch.object( + server, "_load_cfg", return_value={"agent": {"reasoning_effort": "false"}} + ): + assert server._load_reasoning_config() == {"enabled": False} + + def test_unset_returns_default(self) -> None: + with patch.object(server, "_load_cfg", return_value={"agent": {}}): + assert server._load_reasoning_config() is None diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 2895733ab..b3172e51a 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -1255,8 +1255,11 @@ def _build_child_agent( parent_reasoning = getattr(parent_agent, "reasoning_config", None) child_reasoning = parent_reasoning try: - delegation_effort = str(delegation_cfg.get("reasoning_effort") or "").strip() - if delegation_effort: + # Keep the raw value — ``str(x or "")`` would coerce a YAML boolean + # False (``reasoning_effort: false``) to "" and inherit the parent + # instead of disabling thinking for children. + delegation_effort = delegation_cfg.get("reasoning_effort") + if delegation_effort or delegation_effort is False: from hermes_constants import parse_reasoning_effort parsed = parse_reasoning_effort(delegation_effort) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 6bd1ed13a..faa09ff18 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -2318,10 +2318,12 @@ def _display_mouse_tracking(display: dict) -> str: def _load_reasoning_config() -> dict | None: from hermes_constants import parse_reasoning_effort - effort = str( - (_load_cfg().get("agent") or {}).get("reasoning_effort", "") or "" - ).strip() - return parse_reasoning_effort(effort) + # Pass the raw value through — ``or ""`` would coerce a YAML boolean + # False (``reasoning_effort: false``/``off``/``no``) to "", silently + # re-enabling thinking for users who explicitly turned it off. + return parse_reasoning_effort( + (_load_cfg().get("agent") or {}).get("reasoning_effort", "") + ) def _load_service_tier() -> str | None: @@ -3095,11 +3097,15 @@ def _session_info(agent, session: dict | None = None) -> dict: personality = (session or {}).get("personality", cfg_personality) reasoning_config = getattr(agent, "reasoning_config", None) reasoning_effort = "" - if ( - isinstance(reasoning_config, dict) - and reasoning_config.get("enabled") is not False - ): - reasoning_effort = str(reasoning_config.get("effort", "") or "") + if isinstance(reasoning_config, dict): + if reasoning_config.get("enabled") is False: + # Disabled must be distinguishable from unset ("" = provider + # default). Reporting "" here made the desktop adopt the empty + # value after the first turn, wiping its sticky "thinking off" + # pick and re-creating every later chat at the default effort. + reasoning_effort = "none" + else: + reasoning_effort = str(reasoning_config.get("effort", "") or "") service_tier = getattr(agent, "service_tier", None) or "" # Effective approval-bypass state — the same three sources that # check_all_command_guards() ORs together: persistent config @@ -4055,15 +4061,21 @@ def _preview_restart_callbacks(parent: str, task_id: str) -> dict: def _reset_session_agent(sid: str, session: dict) -> dict: tokens = _set_session_context(session["session_key"]) try: + # Preserve this session's chosen model AND reasoning across /new so a + # reset doesn't silently revert to global config (or to a model + # another session set). See the cross-session-contamination note in + # _apply_model_switch. + reset_kw = {"model_override": session.get("model_override")} + old_reasoning = getattr(session.get("agent"), "reasoning_config", None) + if old_reasoning is None: + old_reasoning = session.get("create_reasoning_override") + if isinstance(old_reasoning, dict): + reset_kw["reasoning_config_override"] = old_reasoning new_agent = _make_agent( sid, session["session_key"], session_id=session["session_key"], - # Preserve this session's chosen model across /new so a reset - # doesn't silently revert to global config (or to a model another - # session set). See the cross-session-contamination note in - # _apply_model_switch. - model_override=session.get("model_override"), + **reset_kw, ) finally: _clear_session_context(tokens) @@ -10093,15 +10105,23 @@ def _(rid, params: dict) -> dict: parsed = parse_reasoning_effort(arg) if parsed is None: return _err(rid, 4002, f"unknown reasoning value: {value}") - _write_config_key("agent.reasoning_effort", arg) - if session and session.get("agent") is not None: - session["agent"].reasoning_config = parsed - _persist_live_session_runtime(session) - _emit( - "session.info", - params.get("session_id", ""), - _session_info(session["agent"], session), - ) + if session is not None: + # Session-scoped, like the messaging gateway's `/reasoning + # ` (global persistence is `--global` / Settings → + # Model territory). Writing config.yaml here let every + # desktop model-menu selection rewrite the user's global + # agent.reasoning_effort to the preset default. + session["create_reasoning_override"] = parsed + if session.get("agent") is not None: + session["agent"].reasoning_config = parsed + _persist_live_session_runtime(session) + _emit( + "session.info", + params.get("session_id", ""), + _session_info(session["agent"], session), + ) + else: + _write_config_key("agent.reasoning_effort", arg) return _ok(rid, {"key": key, "value": arg}) except Exception as e: return _err(rid, 5001, str(e)) @@ -10776,9 +10796,26 @@ def _(rid, params: dict) -> dict: ) if key == "reasoning": cfg = _load_cfg() - effort = str( - (cfg.get("agent") or {}).get("reasoning_effort", "medium") or "medium" - ) + effort = "" + # Prefer the session's live value — `config.set reasoning` is + # session-scoped, so the global key may not reflect this chat. + session = _sessions.get(params.get("session_id", "")) + live = getattr((session or {}).get("agent"), "reasoning_config", None) + if live is None and session is not None: + live = session.get("create_reasoning_override") + if isinstance(live, dict): + if live.get("enabled") is False: + effort = "none" + else: + effort = str(live.get("effort", "") or "") + if not effort: + raw_effort = (cfg.get("agent") or {}).get("reasoning_effort", "") + if raw_effort is False: + # YAML `reasoning_effort: false`/`off`/`no` — thinking + # disabled, not "unset, show the medium default". + effort = "none" + else: + effort = str(raw_effort or "medium") display = ( "show" if bool((cfg.get("display") or {}).get("show_reasoning", False))