fix(desktop,tui-gateway,zai): stop thinking-off from reverting to medium

A Z.ai desktop user reported thinking reverting to medium after one turn, burning ~200% of a week's credits in 4 days despite reasoning_effort: false in config.yaml. Four compounding bugs: - _session_info reported reasoning_effort "" for disabled reasoning, indistinguishable from unset — the desktop adopted it after the first turn, wiping its sticky "thinking off" pick so every later chat reverted to the default effort. - config.set key=reasoning always wrote agent.reasoning_effort to global config.yaml, so every desktop model-menu selection (preset.effort ?? 'medium') clobbered the user's configured value. Now session-scoped like the messaging gateway's /reasoning, landing on create_reasoning_override so lazily-built sessions keep it too. - YAML `reasoning_effort: false`/`off`/`no` (boolean False) was coerced to "" by every loader's `str(x or "")`, silently re-enabling thinking. parse_reasoning_effort now treats False/"false"/"disabled" as {"enabled": False}; loaders (tui gateway, gateway, cli, cron, delegate) pass the raw value through. The desktop config reader also crashed on the boolean (false.trim()), aborting voice/STT settings. - The zai provider profile never sent thinking on the wire, and GLM-4.5+ defaults to thinking ON server-side — so disabling reasoning was a silent no-op on direct Z.ai, the actual token burner. The profile now emits extra_body.thinking {"type": "enabled"|"disabled"} for thinking-capable GLM models, mirroring the DeepSeek profile. Also: /new (session reset) now carries reasoning_config across the rebuild like model_override; config.get reasoning prefers the session's live value and maps a config False to "none"; Settings shows "Off" instead of a blank select for hand-written false.
2026-07-02 15:18:05 -05:00 · 2026-07-02 15:18:05 -05:00 · 5a6720b884
commit 5a6720b884
parent c3f06a8fda
12 changed files with 455 additions and 47 deletions
--- a/apps/desktop/src/app/session/hooks/use-hermes-config.ts
+++ b/apps/desktop/src/app/session/hooks/use-hermes-config.ts
@ -21,6 +21,23 @@ function recordingLimit(value: unknown) {
  return typeof value === 'number' && Number.isFinite(value) && value > 0 ? value : DEFAULT_VOICE_SECONDS
 }

+/** config.yaml hands back whatever the user wrote — `reasoning_effort: false`
+ *  (or `off`/`no`, which YAML also parses to boolean false) means thinking
+ *  disabled, and a bare boolean must not throw on `.trim()`. */
+function normalizeConfigEffort(value: unknown): string {
+  if (value === false) {
+    return 'none'
+  }
+
+  if (typeof value !== 'string') {
+    return ''
+  }
+
+  const effort = value.trim().toLowerCase()
+
+  return effort === 'false' || effort === 'disabled' ? 'none' : effort
+}
+
 interface HermesConfigOptions {
  activeSessionIdRef: MutableRefObject<string | null>
  refreshProjectBranch: (cwd: string) => Promise<void>
@ -60,7 +77,7 @@ export function useHermesConfig({ activeSessionIdRef, refreshProjectBranch }: He
        void refreshProjectBranch($currentCwd.get() || cwd)
      }

-      const reasoning = (config.agent?.reasoning_effort ?? '').trim()
+      const reasoning = normalizeConfigEffort(config.agent?.reasoning_effort)
      const tier = (config.agent?.service_tier ?? '').trim()

      setCurrentReasoningEffort(prev => (activeSessionIdRef.current ? prev : reasoning))
--- a/apps/desktop/src/app/settings/model-settings.tsx
+++ b/apps/desktop/src/app/settings/model-settings.tsx
@ -307,10 +307,12 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
  const reasoningSupported = mainCaps?.reasoning ?? true
  const fastSupported = mainCaps?.fast ?? false

-  const effortValue =
-    String(getNested(config ?? {}, 'agent.reasoning_effort') ?? '')
-      .trim()
-      .toLowerCase() || 'medium'
+  // Hand-written `reasoning_effort: false`/`off` reaches us as boolean false
+  // ("false" once stringified) — show it as Off, not an empty select.
+  const rawEffort = String(getNested(config ?? {}, 'agent.reasoning_effort') ?? '')
+    .trim()
+    .toLowerCase()
+  const effortValue = rawEffort === 'false' || rawEffort === 'disabled' ? 'none' : rawEffort || 'medium'

  const fastOn = isFastTier(getNested(config ?? {}, 'agent.service_tier'))

--- a/cli.py
+++ b/cli.py
@ -334,11 +334,15 @@ def _resolve_prefill_messages_file(config: Dict[str, Any]) -> str:
    return ""


-def _parse_reasoning_config(effort: str) -> dict | None:
-    """Parse a reasoning effort level into an OpenRouter reasoning config dict."""
+def _parse_reasoning_config(effort) -> dict | None:
+    """Parse a reasoning effort level into an OpenRouter reasoning config dict.
+
+    Accepts the raw config value (string or YAML boolean — ``false``/``off``
+    parse as thinking disabled, see parse_reasoning_effort).
+    """
    from hermes_constants import parse_reasoning_effort
    result = parse_reasoning_effort(effort)
-    if effort and effort.strip() and result is None:
+    if effort and str(effort).strip() and result is None:
        logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
    return result

--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@ -2620,10 +2620,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        except Exception:
            pass

-        # Reasoning config from config.yaml
+        # Reasoning config from config.yaml (raw value — a YAML boolean False
+        # means thinking disabled, see parse_reasoning_effort)
        from hermes_constants import parse_reasoning_effort
-        effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
-        reasoning_config = parse_reasoning_effort(effort)
+        reasoning_config = parse_reasoning_effort(
+            _cfg.get("agent", {}).get("reasoning_effort", "")
+        )

        # Prefill messages from env or config.yaml. The top-level
        # prefill_messages_file key is canonical; agent.prefill_messages_file is
--- a/gateway/run.py
+++ b/gateway/run.py
@ -4643,9 +4643,12 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
        """
        from hermes_constants import parse_reasoning_effort
        cfg = _load_gateway_runtime_config()
-        effort = str(cfg_get(cfg, "agent", "reasoning_effort", default="") or "").strip()
+        # Keep the raw value — coercing with ``or ""`` turns a YAML boolean
+        # False (``reasoning_effort: false``/``off``/``no``) into "", silently
+        # re-enabling thinking for users who explicitly disabled it.
+        effort = cfg_get(cfg, "agent", "reasoning_effort", default="")
        result = parse_reasoning_effort(effort)
-        if effort and effort.strip() and result is None:
+        if effort and str(effort).strip() and result is None:
            logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
        return result

--- a/hermes_constants.py
+++ b/hermes_constants.py
@ -794,18 +794,26 @@ def apply_subprocess_home_env(env: dict[str, str]) -> None:
 VALID_REASONING_EFFORTS = ("minimal", "low", "medium", "high", "xhigh")


-def parse_reasoning_effort(effort: str) -> dict | None:
+def parse_reasoning_effort(effort) -> dict | None:
    """Parse a reasoning effort level into a config dict.

    Valid levels: "none", "minimal", "low", "medium", "high", "xhigh".
    Returns None when the input is empty or unrecognized (caller uses default).
-    Returns {"enabled": False} for "none".
+    Returns {"enabled": False} for "none" (aliases: "false", "disabled", and
+    YAML boolean False — users write ``reasoning_effort: false``/``off``/``no``
+    in config.yaml and YAML hands us a bool, which must mean disabled, not
+    "fall back to the default and keep thinking").
    Returns {"enabled": True, "effort": <level>} for valid effort levels.
    """
-    if not effort or not effort.strip():
+    if effort is False:
+        return {"enabled": False}
+    if effort is None or effort is True:
+        return None
+    effort = str(effort)
+    if not effort.strip():
        return None
    effort = effort.strip().lower()
-    if effort == "none":
+    if effort in {"none", "false", "disabled"}:
        return {"enabled": False}
    if effort in VALID_REASONING_EFFORTS:
        return {"enabled": True, "effort": effort}
--- a/plugins/model-providers/zai/init.py
+++ b/plugins/model-providers/zai/init.py
@ -1,9 +1,67 @@
-"""ZAI / GLM provider profile."""
+"""ZAI / GLM provider profile.
+
+Z.AI's GLM-4.5-and-later chat models default to thinking-mode ON when the
+request omits ``thinking``.  Hermes' ``reasoning_config = {"enabled": False}``
+was previously a silent no-op on this route — the base profile emits nothing,
+so users who turned thinking off (desktop toggle, ``/reasoning none``,
+``reasoning_effort: none``/``false`` in config.yaml) kept burning thinking
+tokens on every turn.
+
+:meth:`ZaiProfile.build_api_kwargs_extras` translates the Hermes reasoning
+config into the wire shape Z.AI's OpenAI-compat endpoint expects:
+
+    {"extra_body": {"thinking": {"type": "enabled" | "disabled"}}}
+
+When no reasoning preference is set (``reasoning_config is None``) the field
+is omitted so the server default applies, matching prior behavior.  GLM
+models before 4.5 (e.g. ``glm-4-9b``) don't accept ``thinking`` and are left
+untouched.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Any

 from providers import register_provider
 from providers.base import ProviderProfile

-zai = ProviderProfile(
+_GLM_VERSION_RE = re.compile(r"^glm-(\d+)(?:\.(\d+))?")
+
+
+def _model_supports_thinking(model: str | None) -> bool:
+    """GLM thinking-capable model families: glm-4.5 and later (4.5, 4.6, 5…)."""
+    m = (model or "").strip().lower()
+    match = _GLM_VERSION_RE.match(m)
+    if not match:
+        return False
+    major = int(match.group(1))
+    minor = int(match.group(2) or 0)
+    return (major, minor) >= (4, 5)
+
+
+class ZaiProfile(ProviderProfile):
+    """Z.AI / GLM — extra_body.thinking enabled/disabled."""
+
+    def build_api_kwargs_extras(
+        self, *, reasoning_config: dict | None = None, model: str | None = None, **context
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        extra_body: dict[str, Any] = {}
+        top_level: dict[str, Any] = {}
+
+        if not _model_supports_thinking(model):
+            return extra_body, top_level
+
+        # Only emit when the user expressed a preference; omitting the field
+        # keeps the server default (enabled) exactly as before.
+        if isinstance(reasoning_config, dict):
+            enabled = reasoning_config.get("enabled") is not False
+            extra_body["thinking"] = {"type": "enabled" if enabled else "disabled"}
+
+        return extra_body, top_level
+
+
+zai = ZaiProfile(
    name="zai",
    aliases=("glm", "z-ai", "z.ai", "zhipu"),
    env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
--- a/tests/plugins/model_providers/test_zai_profile.py
+++ b/tests/plugins/model_providers/test_zai_profile.py
@ -0,0 +1,141 @@
+"""Unit tests for the Z.AI / GLM provider profile's thinking-mode wiring.
+
+Z.AI's GLM-4.5-and-later chat models default to thinking-mode ON when the
+request omits ``thinking``.  Before the profile emitted the parameter,
+``reasoning_config = {"enabled": False}`` was a silent no-op on the direct
+Z.AI route — users who turned thinking off kept burning thinking tokens on
+every turn (the desktop "thinking reverts to medium" report).
+
+These tests pin the profile's wire-shape contract so Z.AI requests stay
+correctly shaped without going live.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+@pytest.fixture
+def zai_profile():
+    """Resolve the registered Z.AI profile through the real discovery path."""
+    # ``model_tools`` triggers plugin discovery on import, which is what
+    # registers the Z.AI profile in the global provider registry.
+    import model_tools  # noqa: F401
+    import providers
+
+    profile = providers.get_provider_profile("zai")
+    assert profile is not None, "zai provider profile must be registered"
+    return profile
+
+
+class TestZaiThinkingWireShape:
+    """``build_api_kwargs_extras`` produces Z.AI's exact wire format."""
+
+    def test_no_preference_omits_thinking(self, zai_profile):
+        """No reasoning_config → omit ``thinking`` so the server default
+        applies (matches prior behavior for users with no preference)."""
+        extra_body, top_level = zai_profile.build_api_kwargs_extras(
+            reasoning_config=None, model="glm-5"
+        )
+        assert extra_body == {}
+        assert top_level == {}
+
+    def test_enabled_sends_enabled_marker(self, zai_profile):
+        extra_body, top_level = zai_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": True, "effort": "medium"}, model="glm-5"
+        )
+        assert extra_body == {"thinking": {"type": "enabled"}}
+        assert top_level == {}
+
+    def test_explicitly_disabled_sends_disabled_marker(self, zai_profile):
+        """``reasoning_config.enabled=False`` → ``thinking.type=disabled``.
+
+        The crucial bit is that the parameter is *sent* at all — GLM defaults
+        to thinking-on when ``thinking`` is absent, so an unsent disable
+        burns thinking tokens forever.
+        """
+        extra_body, top_level = zai_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": False}, model="glm-5"
+        )
+        assert extra_body == {"thinking": {"type": "disabled"}}
+        assert top_level == {}
+
+    def test_no_effort_levels_leak_to_top_level(self, zai_profile):
+        """GLM has no effort knob — never emit ``reasoning_effort``."""
+        for effort in ("minimal", "low", "medium", "high", "xhigh"):
+            _, top_level = zai_profile.build_api_kwargs_extras(
+                reasoning_config={"enabled": True, "effort": effort}, model="glm-5.2"
+            )
+            assert top_level == {}
+
+
+class TestZaiModelGating:
+    """GLM 4.5+ get thinking; earlier GLM models are left untouched."""
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "glm-4.5",
+            "glm-4.5-air",
+            "glm-4.5-flash",
+            "glm-4.6",
+            "glm-5",
+            "glm-5.2",
+            "GLM-5",  # case-insensitive
+        ],
+    )
+    def test_thinking_capable_models_emit_thinking(self, zai_profile, model):
+        extra_body, _ = zai_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": False}, model=model
+        )
+        assert extra_body == {"thinking": {"type": "disabled"}}
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "glm-4-9b",   # pre-4.5, no thinking param
+            "glm-4",
+            "glm-3-turbo",
+            "",            # bare/unknown
+            None,          # missing
+            "charglm-3",  # non-GLM-versioned id
+        ],
+    )
+    def test_non_thinking_models_emit_nothing(self, zai_profile, model):
+        extra_body, top_level = zai_profile.build_api_kwargs_extras(
+            reasoning_config={"enabled": False}, model=model
+        )
+        assert extra_body == {}
+        assert top_level == {}
+
+
+class TestZaiFullKwargsIntegration:
+    """End-to-end: the transport's full kwargs carry the thinking marker."""
+
+    def test_disabled_reaches_the_wire(self, zai_profile):
+        from agent.transports.chat_completions import ChatCompletionsTransport
+
+        kwargs = ChatCompletionsTransport().build_kwargs(
+            model="glm-5",
+            messages=[{"role": "user", "content": "ping"}],
+            tools=None,
+            provider_profile=zai_profile,
+            reasoning_config={"enabled": False},
+            base_url="https://api.z.ai/api/paas/v4",
+            provider_name="zai",
+        )
+        assert kwargs["extra_body"]["thinking"] == {"type": "disabled"}
+
+    def test_no_preference_keeps_wire_clean(self, zai_profile):
+        from agent.transports.chat_completions import ChatCompletionsTransport
+
+        kwargs = ChatCompletionsTransport().build_kwargs(
+            model="glm-5",
+            messages=[{"role": "user", "content": "ping"}],
+            tools=None,
+            provider_profile=zai_profile,
+            reasoning_config=None,
+            base_url="https://api.z.ai/api/paas/v4",
+            provider_name="zai",
+        )
+        assert "thinking" not in kwargs.get("extra_body", {})
--- a/tests/test_hermes_constants.py
+++ b/tests/test_hermes_constants.py
@ -436,6 +436,18 @@ class TestParseReasoningEffort:
        """The literal "none" disables reasoning explicitly."""
        assert parse_reasoning_effort("none") == {"enabled": False}

+    @pytest.mark.parametrize("value", [False, "false", "FALSE", "disabled", " Disabled "])
+    def test_false_aliases_disable_reasoning(self, value):
+        """YAML `reasoning_effort: false`/`off`/`no` reaches loaders as a
+        boolean; users also hand-write "false"/"disabled". All must mean
+        disabled — not "unset, fall back to the default and keep thinking"."""
+        assert parse_reasoning_effort(value) == {"enabled": False}
+
+    @pytest.mark.parametrize("value", [None, True])
+    def test_non_string_non_false_returns_none(self, value):
+        """None and boolean True fall back to the caller default."""
+        assert parse_reasoning_effort(value) is None
+
    @pytest.mark.parametrize("level", list(VALID_REASONING_EFFORTS))
    def test_each_valid_level(self, level):
        """Every level listed in VALID_REASONING_EFFORTS is accepted as-is."""
--- a/tests/tui_gateway/test_reasoning_session_scope.py
+++ b/tests/tui_gateway/test_reasoning_session_scope.py
@ -0,0 +1,121 @@
+"""Reasoning-effort session scoping in the TUI gateway (desktop backend).
+
+Covers the "desktop reverts thinking to medium after one turn" report:
+
+1. ``_session_info`` must report ``reasoning_effort: "none"`` when reasoning
+   is disabled — reporting ``""`` (indistinguishable from "unset") made the
+   desktop adopt the empty value after the first turn, wiping its sticky
+   "thinking off" pick so every later chat reverted to the default effort.
+
+2. ``config.set key=reasoning`` with a live session must be session-scoped:
+   it must NOT rewrite the global ``agent.reasoning_effort`` in config.yaml
+   (the desktop model menu applies a per-model preset on every selection,
+   which was silently clobbering the user's configured value), and it must
+   land on ``create_reasoning_override`` so lazily-built sessions (agent not
+   constructed until the first prompt) don't drop the change.
+
+3. ``_load_reasoning_config`` must honor a YAML boolean False
+   (``reasoning_effort: false`` / ``off`` / ``no``) as thinking-disabled.
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import tui_gateway.server as server
+from tui_gateway.server import _session_info
+
+
+def _agent(reasoning_config):
+    return SimpleNamespace(
+        reasoning_config=reasoning_config,
+        service_tier=None,
+        model="glm-5",
+        provider="zai",
+        session_id="sess-key",
+    )
+
+
+class TestSessionInfoReasoningEffort:
+    """Disabled reasoning must be reported as 'none', never ''."""
+
+    def test_disabled_reports_none(self) -> None:
+        info = _session_info(_agent({"enabled": False}))
+        assert info["reasoning_effort"] == "none"
+
+    def test_enabled_reports_effort(self) -> None:
+        info = _session_info(_agent({"enabled": True, "effort": "high"}))
+        assert info["reasoning_effort"] == "high"
+
+    def test_unset_reports_empty(self) -> None:
+        info = _session_info(_agent(None))
+        assert info["reasoning_effort"] == ""
+
+
+class TestConfigSetReasoningSessionScope:
+    """Session-targeted reasoning changes must not touch global config."""
+
+    def _dispatch(self, params: dict) -> dict:
+        handler = server._methods["config.set"]
+        return handler("rid-1", params)
+
+    def test_session_scoped_set_skips_global_write(self) -> None:
+        agent = _agent(None)
+        session = {"session_key": "k1", "agent": agent}
+        with patch.dict(server._sessions, {"s1": session}, clear=False), \
+                patch.object(server, "_write_config_key") as write_key, \
+                patch.object(server, "_persist_live_session_runtime"), \
+                patch.object(server, "_emit"):
+            resp = self._dispatch(
+                {"key": "reasoning", "session_id": "s1", "value": "none"}
+            )
+        assert resp["result"]["value"] == "none"
+        assert agent.reasoning_config == {"enabled": False}
+        write_key.assert_not_called()
+
+    def test_session_scoped_set_updates_create_override_for_lazy_session(self) -> None:
+        """A pre-build (agent=None) session must keep the change for the
+        deferred agent build instead of dropping it."""
+        session = {"session_key": "k2", "agent": None}
+        with patch.dict(server._sessions, {"s2": session}, clear=False), \
+                patch.object(server, "_write_config_key") as write_key:
+            resp = self._dispatch(
+                {"key": "reasoning", "session_id": "s2", "value": "high"}
+            )
+        assert resp["result"]["value"] == "high"
+        assert session["create_reasoning_override"] == {
+            "enabled": True,
+            "effort": "high",
+        }
+        write_key.assert_not_called()
+
+    def test_no_session_persists_globally(self) -> None:
+        with patch.object(server, "_write_config_key") as write_key:
+            resp = self._dispatch({"key": "reasoning", "value": "low"})
+        assert resp["result"]["value"] == "low"
+        write_key.assert_called_once_with("agent.reasoning_effort", "low")
+
+    def test_unknown_value_rejected(self) -> None:
+        resp = self._dispatch({"key": "reasoning", "value": "bogus"})
+        assert "error" in resp
+
+
+class TestLoadReasoningConfigYamlBoolean:
+    """YAML `reasoning_effort: false` means disabled, not default."""
+
+    def test_boolean_false_disables(self) -> None:
+        with patch.object(
+            server, "_load_cfg", return_value={"agent": {"reasoning_effort": False}}
+        ):
+            assert server._load_reasoning_config() == {"enabled": False}
+
+    def test_string_false_disables(self) -> None:
+        with patch.object(
+            server, "_load_cfg", return_value={"agent": {"reasoning_effort": "false"}}
+        ):
+            assert server._load_reasoning_config() == {"enabled": False}
+
+    def test_unset_returns_default(self) -> None:
+        with patch.object(server, "_load_cfg", return_value={"agent": {}}):
+            assert server._load_reasoning_config() is None
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@ -1255,8 +1255,11 @@ def _build_child_agent(
    parent_reasoning = getattr(parent_agent, "reasoning_config", None)
    child_reasoning = parent_reasoning
    try:
-        delegation_effort = str(delegation_cfg.get("reasoning_effort") or "").strip()
-        if delegation_effort:
+        # Keep the raw value — ``str(x or "")`` would coerce a YAML boolean
+        # False (``reasoning_effort: false``) to "" and inherit the parent
+        # instead of disabling thinking for children.
+        delegation_effort = delegation_cfg.get("reasoning_effort")
+        if delegation_effort or delegation_effort is False:
            from hermes_constants import parse_reasoning_effort

            parsed = parse_reasoning_effort(delegation_effort)
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -2318,10 +2318,12 @@ def _display_mouse_tracking(display: dict) -> str:
 def _load_reasoning_config() -> dict | None:
    from hermes_constants import parse_reasoning_effort

-    effort = str(
-        (_load_cfg().get("agent") or {}).get("reasoning_effort", "") or ""
-    ).strip()
-    return parse_reasoning_effort(effort)
+    # Pass the raw value through — ``or ""`` would coerce a YAML boolean
+    # False (``reasoning_effort: false``/``off``/``no``) to "", silently
+    # re-enabling thinking for users who explicitly turned it off.
+    return parse_reasoning_effort(
+        (_load_cfg().get("agent") or {}).get("reasoning_effort", "")
+    )


 def _load_service_tier() -> str | None:
@ -3095,11 +3097,15 @@ def _session_info(agent, session: dict | None = None) -> dict:
    personality = (session or {}).get("personality", cfg_personality)
    reasoning_config = getattr(agent, "reasoning_config", None)
    reasoning_effort = ""
-    if (
-        isinstance(reasoning_config, dict)
-        and reasoning_config.get("enabled") is not False
-    ):
-        reasoning_effort = str(reasoning_config.get("effort", "") or "")
+    if isinstance(reasoning_config, dict):
+        if reasoning_config.get("enabled") is False:
+            # Disabled must be distinguishable from unset ("" = provider
+            # default). Reporting "" here made the desktop adopt the empty
+            # value after the first turn, wiping its sticky "thinking off"
+            # pick and re-creating every later chat at the default effort.
+            reasoning_effort = "none"
+        else:
+            reasoning_effort = str(reasoning_config.get("effort", "") or "")
    service_tier = getattr(agent, "service_tier", None) or ""
    # Effective approval-bypass state — the same three sources that
    # check_all_command_guards() ORs together: persistent config
@ -4055,15 +4061,21 @@ def _preview_restart_callbacks(parent: str, task_id: str) -> dict:
 def _reset_session_agent(sid: str, session: dict) -> dict:
    tokens = _set_session_context(session["session_key"])
    try:
+        # Preserve this session's chosen model AND reasoning across /new so a
+        # reset doesn't silently revert to global config (or to a model
+        # another session set). See the cross-session-contamination note in
+        # _apply_model_switch.
+        reset_kw = {"model_override": session.get("model_override")}
+        old_reasoning = getattr(session.get("agent"), "reasoning_config", None)
+        if old_reasoning is None:
+            old_reasoning = session.get("create_reasoning_override")
+        if isinstance(old_reasoning, dict):
+            reset_kw["reasoning_config_override"] = old_reasoning
        new_agent = _make_agent(
            sid,
            session["session_key"],
            session_id=session["session_key"],
-            # Preserve this session's chosen model across /new so a reset
-            # doesn't silently revert to global config (or to a model another
-            # session set). See the cross-session-contamination note in
-            # _apply_model_switch.
-            model_override=session.get("model_override"),
+            **reset_kw,
        )
    finally:
        _clear_session_context(tokens)
@ -10093,15 +10105,23 @@ def _(rid, params: dict) -> dict:
            parsed = parse_reasoning_effort(arg)
            if parsed is None:
                return _err(rid, 4002, f"unknown reasoning value: {value}")
-            _write_config_key("agent.reasoning_effort", arg)
-            if session and session.get("agent") is not None:
-                session["agent"].reasoning_config = parsed
-                _persist_live_session_runtime(session)
-                _emit(
-                    "session.info",
-                    params.get("session_id", ""),
-                    _session_info(session["agent"], session),
-                )
+            if session is not None:
+                # Session-scoped, like the messaging gateway's `/reasoning
+                # <level>` (global persistence is `--global` / Settings →
+                # Model territory). Writing config.yaml here let every
+                # desktop model-menu selection rewrite the user's global
+                # agent.reasoning_effort to the preset default.
+                session["create_reasoning_override"] = parsed
+                if session.get("agent") is not None:
+                    session["agent"].reasoning_config = parsed
+                    _persist_live_session_runtime(session)
+                    _emit(
+                        "session.info",
+                        params.get("session_id", ""),
+                        _session_info(session["agent"], session),
+                    )
+            else:
+                _write_config_key("agent.reasoning_effort", arg)
            return _ok(rid, {"key": key, "value": arg})
        except Exception as e:
            return _err(rid, 5001, str(e))
@ -10776,9 +10796,26 @@ def _(rid, params: dict) -> dict:
        )
    if key == "reasoning":
        cfg = _load_cfg()
-        effort = str(
-            (cfg.get("agent") or {}).get("reasoning_effort", "medium") or "medium"
-        )
+        effort = ""
+        # Prefer the session's live value — `config.set reasoning` is
+        # session-scoped, so the global key may not reflect this chat.
+        session = _sessions.get(params.get("session_id", ""))
+        live = getattr((session or {}).get("agent"), "reasoning_config", None)
+        if live is None and session is not None:
+            live = session.get("create_reasoning_override")
+        if isinstance(live, dict):
+            if live.get("enabled") is False:
+                effort = "none"
+            else:
+                effort = str(live.get("effort", "") or "")
+        if not effort:
+            raw_effort = (cfg.get("agent") or {}).get("reasoning_effort", "")
+            if raw_effort is False:
+                # YAML `reasoning_effort: false`/`off`/`no` — thinking
+                # disabled, not "unset, show the medium default".
+                effort = "none"
+            else:
+                effort = str(raw_effort or "medium")
        display = (
            "show"
            if bool((cfg.get("display") or {}).get("show_reasoning", False))