fix(desktop,tui-gateway,zai): stop thinking-off from reverting to medium
A Z.ai desktop user reported thinking reverting to medium after one turn,
burning ~200% of a week's credits in 4 days despite reasoning_effort: false
in config.yaml. Four compounding bugs:
- _session_info reported reasoning_effort "" for disabled reasoning,
indistinguishable from unset — the desktop adopted it after the first
turn, wiping its sticky "thinking off" pick so every later chat
reverted to the default effort.
- config.set key=reasoning always wrote agent.reasoning_effort to global
config.yaml, so every desktop model-menu selection (preset.effort ??
'medium') clobbered the user's configured value. Now session-scoped
like the messaging gateway's /reasoning, landing on
create_reasoning_override so lazily-built sessions keep it too.
- YAML `reasoning_effort: false`/`off`/`no` (boolean False) was coerced
to "" by every loader's `str(x or "")`, silently re-enabling thinking.
parse_reasoning_effort now treats False/"false"/"disabled" as
{"enabled": False}; loaders (tui gateway, gateway, cli, cron,
delegate) pass the raw value through. The desktop config reader also
crashed on the boolean (false.trim()), aborting voice/STT settings.
- The zai provider profile never sent thinking on the wire, and GLM-4.5+
defaults to thinking ON server-side — so disabling reasoning was a
silent no-op on direct Z.ai, the actual token burner. The profile now
emits extra_body.thinking {"type": "enabled"|"disabled"} for
thinking-capable GLM models, mirroring the DeepSeek profile.
Also: /new (session reset) now carries reasoning_config across the
rebuild like model_override; config.get reasoning prefers the session's
live value and maps a config False to "none"; Settings shows "Off"
instead of a blank select for hand-written false.
This commit is contained in:
parent
c3f06a8fda
commit
5a6720b884
12 changed files with 455 additions and 47 deletions
|
|
@ -21,6 +21,23 @@ function recordingLimit(value: unknown) {
|
|||
return typeof value === 'number' && Number.isFinite(value) && value > 0 ? value : DEFAULT_VOICE_SECONDS
|
||||
}
|
||||
|
||||
/** config.yaml hands back whatever the user wrote — `reasoning_effort: false`
|
||||
* (or `off`/`no`, which YAML also parses to boolean false) means thinking
|
||||
* disabled, and a bare boolean must not throw on `.trim()`. */
|
||||
function normalizeConfigEffort(value: unknown): string {
|
||||
if (value === false) {
|
||||
return 'none'
|
||||
}
|
||||
|
||||
if (typeof value !== 'string') {
|
||||
return ''
|
||||
}
|
||||
|
||||
const effort = value.trim().toLowerCase()
|
||||
|
||||
return effort === 'false' || effort === 'disabled' ? 'none' : effort
|
||||
}
|
||||
|
||||
interface HermesConfigOptions {
|
||||
activeSessionIdRef: MutableRefObject<string | null>
|
||||
refreshProjectBranch: (cwd: string) => Promise<void>
|
||||
|
|
@ -60,7 +77,7 @@ export function useHermesConfig({ activeSessionIdRef, refreshProjectBranch }: He
|
|||
void refreshProjectBranch($currentCwd.get() || cwd)
|
||||
}
|
||||
|
||||
const reasoning = (config.agent?.reasoning_effort ?? '').trim()
|
||||
const reasoning = normalizeConfigEffort(config.agent?.reasoning_effort)
|
||||
const tier = (config.agent?.service_tier ?? '').trim()
|
||||
|
||||
setCurrentReasoningEffort(prev => (activeSessionIdRef.current ? prev : reasoning))
|
||||
|
|
|
|||
|
|
@ -307,10 +307,12 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
|
|||
const reasoningSupported = mainCaps?.reasoning ?? true
|
||||
const fastSupported = mainCaps?.fast ?? false
|
||||
|
||||
const effortValue =
|
||||
String(getNested(config ?? {}, 'agent.reasoning_effort') ?? '')
|
||||
.trim()
|
||||
.toLowerCase() || 'medium'
|
||||
// Hand-written `reasoning_effort: false`/`off` reaches us as boolean false
|
||||
// ("false" once stringified) — show it as Off, not an empty select.
|
||||
const rawEffort = String(getNested(config ?? {}, 'agent.reasoning_effort') ?? '')
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
const effortValue = rawEffort === 'false' || rawEffort === 'disabled' ? 'none' : rawEffort || 'medium'
|
||||
|
||||
const fastOn = isFastTier(getNested(config ?? {}, 'agent.service_tier'))
|
||||
|
||||
|
|
|
|||
10
cli.py
10
cli.py
|
|
@ -334,11 +334,15 @@ def _resolve_prefill_messages_file(config: Dict[str, Any]) -> str:
|
|||
return ""
|
||||
|
||||
|
||||
def _parse_reasoning_config(effort: str) -> dict | None:
|
||||
"""Parse a reasoning effort level into an OpenRouter reasoning config dict."""
|
||||
def _parse_reasoning_config(effort) -> dict | None:
|
||||
"""Parse a reasoning effort level into an OpenRouter reasoning config dict.
|
||||
|
||||
Accepts the raw config value (string or YAML boolean — ``false``/``off``
|
||||
parse as thinking disabled, see parse_reasoning_effort).
|
||||
"""
|
||||
from hermes_constants import parse_reasoning_effort
|
||||
result = parse_reasoning_effort(effort)
|
||||
if effort and effort.strip() and result is None:
|
||||
if effort and str(effort).strip() and result is None:
|
||||
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
|
||||
return result
|
||||
|
||||
|
|
|
|||
|
|
@ -2620,10 +2620,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Reasoning config from config.yaml
|
||||
# Reasoning config from config.yaml (raw value — a YAML boolean False
|
||||
# means thinking disabled, see parse_reasoning_effort)
|
||||
from hermes_constants import parse_reasoning_effort
|
||||
effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
|
||||
reasoning_config = parse_reasoning_effort(effort)
|
||||
reasoning_config = parse_reasoning_effort(
|
||||
_cfg.get("agent", {}).get("reasoning_effort", "")
|
||||
)
|
||||
|
||||
# Prefill messages from env or config.yaml. The top-level
|
||||
# prefill_messages_file key is canonical; agent.prefill_messages_file is
|
||||
|
|
|
|||
|
|
@ -4643,9 +4643,12 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
"""
|
||||
from hermes_constants import parse_reasoning_effort
|
||||
cfg = _load_gateway_runtime_config()
|
||||
effort = str(cfg_get(cfg, "agent", "reasoning_effort", default="") or "").strip()
|
||||
# Keep the raw value — coercing with ``or ""`` turns a YAML boolean
|
||||
# False (``reasoning_effort: false``/``off``/``no``) into "", silently
|
||||
# re-enabling thinking for users who explicitly disabled it.
|
||||
effort = cfg_get(cfg, "agent", "reasoning_effort", default="")
|
||||
result = parse_reasoning_effort(effort)
|
||||
if effort and effort.strip() and result is None:
|
||||
if effort and str(effort).strip() and result is None:
|
||||
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
|
||||
return result
|
||||
|
||||
|
|
|
|||
|
|
@ -794,18 +794,26 @@ def apply_subprocess_home_env(env: dict[str, str]) -> None:
|
|||
VALID_REASONING_EFFORTS = ("minimal", "low", "medium", "high", "xhigh")
|
||||
|
||||
|
||||
def parse_reasoning_effort(effort: str) -> dict | None:
|
||||
def parse_reasoning_effort(effort) -> dict | None:
|
||||
"""Parse a reasoning effort level into a config dict.
|
||||
|
||||
Valid levels: "none", "minimal", "low", "medium", "high", "xhigh".
|
||||
Returns None when the input is empty or unrecognized (caller uses default).
|
||||
Returns {"enabled": False} for "none".
|
||||
Returns {"enabled": False} for "none" (aliases: "false", "disabled", and
|
||||
YAML boolean False — users write ``reasoning_effort: false``/``off``/``no``
|
||||
in config.yaml and YAML hands us a bool, which must mean disabled, not
|
||||
"fall back to the default and keep thinking").
|
||||
Returns {"enabled": True, "effort": <level>} for valid effort levels.
|
||||
"""
|
||||
if not effort or not effort.strip():
|
||||
if effort is False:
|
||||
return {"enabled": False}
|
||||
if effort is None or effort is True:
|
||||
return None
|
||||
effort = str(effort)
|
||||
if not effort.strip():
|
||||
return None
|
||||
effort = effort.strip().lower()
|
||||
if effort == "none":
|
||||
if effort in {"none", "false", "disabled"}:
|
||||
return {"enabled": False}
|
||||
if effort in VALID_REASONING_EFFORTS:
|
||||
return {"enabled": True, "effort": effort}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,67 @@
|
|||
"""ZAI / GLM provider profile."""
|
||||
"""ZAI / GLM provider profile.
|
||||
|
||||
Z.AI's GLM-4.5-and-later chat models default to thinking-mode ON when the
|
||||
request omits ``thinking``. Hermes' ``reasoning_config = {"enabled": False}``
|
||||
was previously a silent no-op on this route — the base profile emits nothing,
|
||||
so users who turned thinking off (desktop toggle, ``/reasoning none``,
|
||||
``reasoning_effort: none``/``false`` in config.yaml) kept burning thinking
|
||||
tokens on every turn.
|
||||
|
||||
:meth:`ZaiProfile.build_api_kwargs_extras` translates the Hermes reasoning
|
||||
config into the wire shape Z.AI's OpenAI-compat endpoint expects:
|
||||
|
||||
{"extra_body": {"thinking": {"type": "enabled" | "disabled"}}}
|
||||
|
||||
When no reasoning preference is set (``reasoning_config is None``) the field
|
||||
is omitted so the server default applies, matching prior behavior. GLM
|
||||
models before 4.5 (e.g. ``glm-4-9b``) don't accept ``thinking`` and are left
|
||||
untouched.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
zai = ProviderProfile(
|
||||
_GLM_VERSION_RE = re.compile(r"^glm-(\d+)(?:\.(\d+))?")
|
||||
|
||||
|
||||
def _model_supports_thinking(model: str | None) -> bool:
|
||||
"""GLM thinking-capable model families: glm-4.5 and later (4.5, 4.6, 5…)."""
|
||||
m = (model or "").strip().lower()
|
||||
match = _GLM_VERSION_RE.match(m)
|
||||
if not match:
|
||||
return False
|
||||
major = int(match.group(1))
|
||||
minor = int(match.group(2) or 0)
|
||||
return (major, minor) >= (4, 5)
|
||||
|
||||
|
||||
class ZaiProfile(ProviderProfile):
|
||||
"""Z.AI / GLM — extra_body.thinking enabled/disabled."""
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self, *, reasoning_config: dict | None = None, model: str | None = None, **context
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
extra_body: dict[str, Any] = {}
|
||||
top_level: dict[str, Any] = {}
|
||||
|
||||
if not _model_supports_thinking(model):
|
||||
return extra_body, top_level
|
||||
|
||||
# Only emit when the user expressed a preference; omitting the field
|
||||
# keeps the server default (enabled) exactly as before.
|
||||
if isinstance(reasoning_config, dict):
|
||||
enabled = reasoning_config.get("enabled") is not False
|
||||
extra_body["thinking"] = {"type": "enabled" if enabled else "disabled"}
|
||||
|
||||
return extra_body, top_level
|
||||
|
||||
|
||||
zai = ZaiProfile(
|
||||
name="zai",
|
||||
aliases=("glm", "z-ai", "z.ai", "zhipu"),
|
||||
env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
|
||||
|
|
|
|||
141
tests/plugins/model_providers/test_zai_profile.py
Normal file
141
tests/plugins/model_providers/test_zai_profile.py
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
"""Unit tests for the Z.AI / GLM provider profile's thinking-mode wiring.
|
||||
|
||||
Z.AI's GLM-4.5-and-later chat models default to thinking-mode ON when the
|
||||
request omits ``thinking``. Before the profile emitted the parameter,
|
||||
``reasoning_config = {"enabled": False}`` was a silent no-op on the direct
|
||||
Z.AI route — users who turned thinking off kept burning thinking tokens on
|
||||
every turn (the desktop "thinking reverts to medium" report).
|
||||
|
||||
These tests pin the profile's wire-shape contract so Z.AI requests stay
|
||||
correctly shaped without going live.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def zai_profile():
|
||||
"""Resolve the registered Z.AI profile through the real discovery path."""
|
||||
# ``model_tools`` triggers plugin discovery on import, which is what
|
||||
# registers the Z.AI profile in the global provider registry.
|
||||
import model_tools # noqa: F401
|
||||
import providers
|
||||
|
||||
profile = providers.get_provider_profile("zai")
|
||||
assert profile is not None, "zai provider profile must be registered"
|
||||
return profile
|
||||
|
||||
|
||||
class TestZaiThinkingWireShape:
|
||||
"""``build_api_kwargs_extras`` produces Z.AI's exact wire format."""
|
||||
|
||||
def test_no_preference_omits_thinking(self, zai_profile):
|
||||
"""No reasoning_config → omit ``thinking`` so the server default
|
||||
applies (matches prior behavior for users with no preference)."""
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config=None, model="glm-5"
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {}
|
||||
|
||||
def test_enabled_sends_enabled_marker(self, zai_profile):
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True, "effort": "medium"}, model="glm-5"
|
||||
)
|
||||
assert extra_body == {"thinking": {"type": "enabled"}}
|
||||
assert top_level == {}
|
||||
|
||||
def test_explicitly_disabled_sends_disabled_marker(self, zai_profile):
|
||||
"""``reasoning_config.enabled=False`` → ``thinking.type=disabled``.
|
||||
|
||||
The crucial bit is that the parameter is *sent* at all — GLM defaults
|
||||
to thinking-on when ``thinking`` is absent, so an unsent disable
|
||||
burns thinking tokens forever.
|
||||
"""
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": False}, model="glm-5"
|
||||
)
|
||||
assert extra_body == {"thinking": {"type": "disabled"}}
|
||||
assert top_level == {}
|
||||
|
||||
def test_no_effort_levels_leak_to_top_level(self, zai_profile):
|
||||
"""GLM has no effort knob — never emit ``reasoning_effort``."""
|
||||
for effort in ("minimal", "low", "medium", "high", "xhigh"):
|
||||
_, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True, "effort": effort}, model="glm-5.2"
|
||||
)
|
||||
assert top_level == {}
|
||||
|
||||
|
||||
class TestZaiModelGating:
|
||||
"""GLM 4.5+ get thinking; earlier GLM models are left untouched."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"glm-4.5",
|
||||
"glm-4.5-air",
|
||||
"glm-4.5-flash",
|
||||
"glm-4.6",
|
||||
"glm-5",
|
||||
"glm-5.2",
|
||||
"GLM-5", # case-insensitive
|
||||
],
|
||||
)
|
||||
def test_thinking_capable_models_emit_thinking(self, zai_profile, model):
|
||||
extra_body, _ = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": False}, model=model
|
||||
)
|
||||
assert extra_body == {"thinking": {"type": "disabled"}}
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"glm-4-9b", # pre-4.5, no thinking param
|
||||
"glm-4",
|
||||
"glm-3-turbo",
|
||||
"", # bare/unknown
|
||||
None, # missing
|
||||
"charglm-3", # non-GLM-versioned id
|
||||
],
|
||||
)
|
||||
def test_non_thinking_models_emit_nothing(self, zai_profile, model):
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": False}, model=model
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {}
|
||||
|
||||
|
||||
class TestZaiFullKwargsIntegration:
|
||||
"""End-to-end: the transport's full kwargs carry the thinking marker."""
|
||||
|
||||
def test_disabled_reaches_the_wire(self, zai_profile):
|
||||
from agent.transports.chat_completions import ChatCompletionsTransport
|
||||
|
||||
kwargs = ChatCompletionsTransport().build_kwargs(
|
||||
model="glm-5",
|
||||
messages=[{"role": "user", "content": "ping"}],
|
||||
tools=None,
|
||||
provider_profile=zai_profile,
|
||||
reasoning_config={"enabled": False},
|
||||
base_url="https://api.z.ai/api/paas/v4",
|
||||
provider_name="zai",
|
||||
)
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "disabled"}
|
||||
|
||||
def test_no_preference_keeps_wire_clean(self, zai_profile):
|
||||
from agent.transports.chat_completions import ChatCompletionsTransport
|
||||
|
||||
kwargs = ChatCompletionsTransport().build_kwargs(
|
||||
model="glm-5",
|
||||
messages=[{"role": "user", "content": "ping"}],
|
||||
tools=None,
|
||||
provider_profile=zai_profile,
|
||||
reasoning_config=None,
|
||||
base_url="https://api.z.ai/api/paas/v4",
|
||||
provider_name="zai",
|
||||
)
|
||||
assert "thinking" not in kwargs.get("extra_body", {})
|
||||
|
|
@ -436,6 +436,18 @@ class TestParseReasoningEffort:
|
|||
"""The literal "none" disables reasoning explicitly."""
|
||||
assert parse_reasoning_effort("none") == {"enabled": False}
|
||||
|
||||
@pytest.mark.parametrize("value", [False, "false", "FALSE", "disabled", " Disabled "])
|
||||
def test_false_aliases_disable_reasoning(self, value):
|
||||
"""YAML `reasoning_effort: false`/`off`/`no` reaches loaders as a
|
||||
boolean; users also hand-write "false"/"disabled". All must mean
|
||||
disabled — not "unset, fall back to the default and keep thinking"."""
|
||||
assert parse_reasoning_effort(value) == {"enabled": False}
|
||||
|
||||
@pytest.mark.parametrize("value", [None, True])
|
||||
def test_non_string_non_false_returns_none(self, value):
|
||||
"""None and boolean True fall back to the caller default."""
|
||||
assert parse_reasoning_effort(value) is None
|
||||
|
||||
@pytest.mark.parametrize("level", list(VALID_REASONING_EFFORTS))
|
||||
def test_each_valid_level(self, level):
|
||||
"""Every level listed in VALID_REASONING_EFFORTS is accepted as-is."""
|
||||
|
|
|
|||
121
tests/tui_gateway/test_reasoning_session_scope.py
Normal file
121
tests/tui_gateway/test_reasoning_session_scope.py
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
"""Reasoning-effort session scoping in the TUI gateway (desktop backend).
|
||||
|
||||
Covers the "desktop reverts thinking to medium after one turn" report:
|
||||
|
||||
1. ``_session_info`` must report ``reasoning_effort: "none"`` when reasoning
|
||||
is disabled — reporting ``""`` (indistinguishable from "unset") made the
|
||||
desktop adopt the empty value after the first turn, wiping its sticky
|
||||
"thinking off" pick so every later chat reverted to the default effort.
|
||||
|
||||
2. ``config.set key=reasoning`` with a live session must be session-scoped:
|
||||
it must NOT rewrite the global ``agent.reasoning_effort`` in config.yaml
|
||||
(the desktop model menu applies a per-model preset on every selection,
|
||||
which was silently clobbering the user's configured value), and it must
|
||||
land on ``create_reasoning_override`` so lazily-built sessions (agent not
|
||||
constructed until the first prompt) don't drop the change.
|
||||
|
||||
3. ``_load_reasoning_config`` must honor a YAML boolean False
|
||||
(``reasoning_effort: false`` / ``off`` / ``no``) as thinking-disabled.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
import tui_gateway.server as server
|
||||
from tui_gateway.server import _session_info
|
||||
|
||||
|
||||
def _agent(reasoning_config):
|
||||
return SimpleNamespace(
|
||||
reasoning_config=reasoning_config,
|
||||
service_tier=None,
|
||||
model="glm-5",
|
||||
provider="zai",
|
||||
session_id="sess-key",
|
||||
)
|
||||
|
||||
|
||||
class TestSessionInfoReasoningEffort:
|
||||
"""Disabled reasoning must be reported as 'none', never ''."""
|
||||
|
||||
def test_disabled_reports_none(self) -> None:
|
||||
info = _session_info(_agent({"enabled": False}))
|
||||
assert info["reasoning_effort"] == "none"
|
||||
|
||||
def test_enabled_reports_effort(self) -> None:
|
||||
info = _session_info(_agent({"enabled": True, "effort": "high"}))
|
||||
assert info["reasoning_effort"] == "high"
|
||||
|
||||
def test_unset_reports_empty(self) -> None:
|
||||
info = _session_info(_agent(None))
|
||||
assert info["reasoning_effort"] == ""
|
||||
|
||||
|
||||
class TestConfigSetReasoningSessionScope:
|
||||
"""Session-targeted reasoning changes must not touch global config."""
|
||||
|
||||
def _dispatch(self, params: dict) -> dict:
|
||||
handler = server._methods["config.set"]
|
||||
return handler("rid-1", params)
|
||||
|
||||
def test_session_scoped_set_skips_global_write(self) -> None:
|
||||
agent = _agent(None)
|
||||
session = {"session_key": "k1", "agent": agent}
|
||||
with patch.dict(server._sessions, {"s1": session}, clear=False), \
|
||||
patch.object(server, "_write_config_key") as write_key, \
|
||||
patch.object(server, "_persist_live_session_runtime"), \
|
||||
patch.object(server, "_emit"):
|
||||
resp = self._dispatch(
|
||||
{"key": "reasoning", "session_id": "s1", "value": "none"}
|
||||
)
|
||||
assert resp["result"]["value"] == "none"
|
||||
assert agent.reasoning_config == {"enabled": False}
|
||||
write_key.assert_not_called()
|
||||
|
||||
def test_session_scoped_set_updates_create_override_for_lazy_session(self) -> None:
|
||||
"""A pre-build (agent=None) session must keep the change for the
|
||||
deferred agent build instead of dropping it."""
|
||||
session = {"session_key": "k2", "agent": None}
|
||||
with patch.dict(server._sessions, {"s2": session}, clear=False), \
|
||||
patch.object(server, "_write_config_key") as write_key:
|
||||
resp = self._dispatch(
|
||||
{"key": "reasoning", "session_id": "s2", "value": "high"}
|
||||
)
|
||||
assert resp["result"]["value"] == "high"
|
||||
assert session["create_reasoning_override"] == {
|
||||
"enabled": True,
|
||||
"effort": "high",
|
||||
}
|
||||
write_key.assert_not_called()
|
||||
|
||||
def test_no_session_persists_globally(self) -> None:
|
||||
with patch.object(server, "_write_config_key") as write_key:
|
||||
resp = self._dispatch({"key": "reasoning", "value": "low"})
|
||||
assert resp["result"]["value"] == "low"
|
||||
write_key.assert_called_once_with("agent.reasoning_effort", "low")
|
||||
|
||||
def test_unknown_value_rejected(self) -> None:
|
||||
resp = self._dispatch({"key": "reasoning", "value": "bogus"})
|
||||
assert "error" in resp
|
||||
|
||||
|
||||
class TestLoadReasoningConfigYamlBoolean:
|
||||
"""YAML `reasoning_effort: false` means disabled, not default."""
|
||||
|
||||
def test_boolean_false_disables(self) -> None:
|
||||
with patch.object(
|
||||
server, "_load_cfg", return_value={"agent": {"reasoning_effort": False}}
|
||||
):
|
||||
assert server._load_reasoning_config() == {"enabled": False}
|
||||
|
||||
def test_string_false_disables(self) -> None:
|
||||
with patch.object(
|
||||
server, "_load_cfg", return_value={"agent": {"reasoning_effort": "false"}}
|
||||
):
|
||||
assert server._load_reasoning_config() == {"enabled": False}
|
||||
|
||||
def test_unset_returns_default(self) -> None:
|
||||
with patch.object(server, "_load_cfg", return_value={"agent": {}}):
|
||||
assert server._load_reasoning_config() is None
|
||||
|
|
@ -1255,8 +1255,11 @@ def _build_child_agent(
|
|||
parent_reasoning = getattr(parent_agent, "reasoning_config", None)
|
||||
child_reasoning = parent_reasoning
|
||||
try:
|
||||
delegation_effort = str(delegation_cfg.get("reasoning_effort") or "").strip()
|
||||
if delegation_effort:
|
||||
# Keep the raw value — ``str(x or "")`` would coerce a YAML boolean
|
||||
# False (``reasoning_effort: false``) to "" and inherit the parent
|
||||
# instead of disabling thinking for children.
|
||||
delegation_effort = delegation_cfg.get("reasoning_effort")
|
||||
if delegation_effort or delegation_effort is False:
|
||||
from hermes_constants import parse_reasoning_effort
|
||||
|
||||
parsed = parse_reasoning_effort(delegation_effort)
|
||||
|
|
|
|||
|
|
@ -2318,10 +2318,12 @@ def _display_mouse_tracking(display: dict) -> str:
|
|||
def _load_reasoning_config() -> dict | None:
|
||||
from hermes_constants import parse_reasoning_effort
|
||||
|
||||
effort = str(
|
||||
(_load_cfg().get("agent") or {}).get("reasoning_effort", "") or ""
|
||||
).strip()
|
||||
return parse_reasoning_effort(effort)
|
||||
# Pass the raw value through — ``or ""`` would coerce a YAML boolean
|
||||
# False (``reasoning_effort: false``/``off``/``no``) to "", silently
|
||||
# re-enabling thinking for users who explicitly turned it off.
|
||||
return parse_reasoning_effort(
|
||||
(_load_cfg().get("agent") or {}).get("reasoning_effort", "")
|
||||
)
|
||||
|
||||
|
||||
def _load_service_tier() -> str | None:
|
||||
|
|
@ -3095,11 +3097,15 @@ def _session_info(agent, session: dict | None = None) -> dict:
|
|||
personality = (session or {}).get("personality", cfg_personality)
|
||||
reasoning_config = getattr(agent, "reasoning_config", None)
|
||||
reasoning_effort = ""
|
||||
if (
|
||||
isinstance(reasoning_config, dict)
|
||||
and reasoning_config.get("enabled") is not False
|
||||
):
|
||||
reasoning_effort = str(reasoning_config.get("effort", "") or "")
|
||||
if isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
# Disabled must be distinguishable from unset ("" = provider
|
||||
# default). Reporting "" here made the desktop adopt the empty
|
||||
# value after the first turn, wiping its sticky "thinking off"
|
||||
# pick and re-creating every later chat at the default effort.
|
||||
reasoning_effort = "none"
|
||||
else:
|
||||
reasoning_effort = str(reasoning_config.get("effort", "") or "")
|
||||
service_tier = getattr(agent, "service_tier", None) or ""
|
||||
# Effective approval-bypass state — the same three sources that
|
||||
# check_all_command_guards() ORs together: persistent config
|
||||
|
|
@ -4055,15 +4061,21 @@ def _preview_restart_callbacks(parent: str, task_id: str) -> dict:
|
|||
def _reset_session_agent(sid: str, session: dict) -> dict:
|
||||
tokens = _set_session_context(session["session_key"])
|
||||
try:
|
||||
# Preserve this session's chosen model AND reasoning across /new so a
|
||||
# reset doesn't silently revert to global config (or to a model
|
||||
# another session set). See the cross-session-contamination note in
|
||||
# _apply_model_switch.
|
||||
reset_kw = {"model_override": session.get("model_override")}
|
||||
old_reasoning = getattr(session.get("agent"), "reasoning_config", None)
|
||||
if old_reasoning is None:
|
||||
old_reasoning = session.get("create_reasoning_override")
|
||||
if isinstance(old_reasoning, dict):
|
||||
reset_kw["reasoning_config_override"] = old_reasoning
|
||||
new_agent = _make_agent(
|
||||
sid,
|
||||
session["session_key"],
|
||||
session_id=session["session_key"],
|
||||
# Preserve this session's chosen model across /new so a reset
|
||||
# doesn't silently revert to global config (or to a model another
|
||||
# session set). See the cross-session-contamination note in
|
||||
# _apply_model_switch.
|
||||
model_override=session.get("model_override"),
|
||||
**reset_kw,
|
||||
)
|
||||
finally:
|
||||
_clear_session_context(tokens)
|
||||
|
|
@ -10093,15 +10105,23 @@ def _(rid, params: dict) -> dict:
|
|||
parsed = parse_reasoning_effort(arg)
|
||||
if parsed is None:
|
||||
return _err(rid, 4002, f"unknown reasoning value: {value}")
|
||||
_write_config_key("agent.reasoning_effort", arg)
|
||||
if session and session.get("agent") is not None:
|
||||
session["agent"].reasoning_config = parsed
|
||||
_persist_live_session_runtime(session)
|
||||
_emit(
|
||||
"session.info",
|
||||
params.get("session_id", ""),
|
||||
_session_info(session["agent"], session),
|
||||
)
|
||||
if session is not None:
|
||||
# Session-scoped, like the messaging gateway's `/reasoning
|
||||
# <level>` (global persistence is `--global` / Settings →
|
||||
# Model territory). Writing config.yaml here let every
|
||||
# desktop model-menu selection rewrite the user's global
|
||||
# agent.reasoning_effort to the preset default.
|
||||
session["create_reasoning_override"] = parsed
|
||||
if session.get("agent") is not None:
|
||||
session["agent"].reasoning_config = parsed
|
||||
_persist_live_session_runtime(session)
|
||||
_emit(
|
||||
"session.info",
|
||||
params.get("session_id", ""),
|
||||
_session_info(session["agent"], session),
|
||||
)
|
||||
else:
|
||||
_write_config_key("agent.reasoning_effort", arg)
|
||||
return _ok(rid, {"key": key, "value": arg})
|
||||
except Exception as e:
|
||||
return _err(rid, 5001, str(e))
|
||||
|
|
@ -10776,9 +10796,26 @@ def _(rid, params: dict) -> dict:
|
|||
)
|
||||
if key == "reasoning":
|
||||
cfg = _load_cfg()
|
||||
effort = str(
|
||||
(cfg.get("agent") or {}).get("reasoning_effort", "medium") or "medium"
|
||||
)
|
||||
effort = ""
|
||||
# Prefer the session's live value — `config.set reasoning` is
|
||||
# session-scoped, so the global key may not reflect this chat.
|
||||
session = _sessions.get(params.get("session_id", ""))
|
||||
live = getattr((session or {}).get("agent"), "reasoning_config", None)
|
||||
if live is None and session is not None:
|
||||
live = session.get("create_reasoning_override")
|
||||
if isinstance(live, dict):
|
||||
if live.get("enabled") is False:
|
||||
effort = "none"
|
||||
else:
|
||||
effort = str(live.get("effort", "") or "")
|
||||
if not effort:
|
||||
raw_effort = (cfg.get("agent") or {}).get("reasoning_effort", "")
|
||||
if raw_effort is False:
|
||||
# YAML `reasoning_effort: false`/`off`/`no` — thinking
|
||||
# disabled, not "unset, show the medium default".
|
||||
effort = "none"
|
||||
else:
|
||||
effort = str(raw_effort or "medium")
|
||||
display = (
|
||||
"show"
|
||||
if bool((cfg.get("display") or {}).get("show_reasoning", False))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue