hermes-agent/tests/hermes_cli/test_codex_runtime_switch.py
snav 35eb93c8df fix(codex-runtime): re-running /codex-runtime codex_app_server when already enabled now triggers migration
The /codex-runtime slash command short-circuits with "openai_runtime
already set" when invoked with the same value as the current config,
and crucially skips the entire migration block below. The check
conflates two things: (a) "the config value is correct" and (b) "the
world state (managed block in ~/.codex/config.toml, hermes-tools MCP
callback, plugin discovery) is converged".

Common footgun this exposes: a user who pre-sets
`model.openai_runtime: codex_app_server` directly in config.yaml
(reasonable thing to do) and then runs /codex-runtime codex_app_server
to trigger migration sees "already set" and silently gets no migration.
~/.codex/config.toml never receives the managed block, the hermes-tools
MCP callback never registers, and codex falls through to its default
runtime instead of the app-server one — visibly successful but
functionally partial setup.

The migration is idempotent by design (it replaces its own managed
block in place between MIGRATION_MARKER and MIGRATION_END_MARKER), so
re-running it is safe and cheap. Fix the short-circuit to fall through
to migration when re-applying codex_app_server while skipping the
config persist (no value-level change needed). The disable case
(re-applying "auto") still short-circuits because disabling doesn't
touch ~/.codex/config.toml at all.

The user-visible message changes to "openai_runtime already set to
codex_app_server — re-applying migration" so re-runs surface what
happened.

Regression test (test_reapply_codex_app_server_runs_migration) asserts:
- migrate() was called when re-applying
- persist_callback was NOT called (no config write on no-op transitions)
- migration output (MCP servers, sandbox default) surfaces in the
  user-visible message
- requires_new_session is True so callers know to /reset

Verified RED→GREEN: the test fails on origin/main with
"migration must run on reapply, not just first enable" and passes with
this fix. Full test_codex_runtime_switch.py suite: 31 passed.
2026-07-01 23:51:54 +05:30

288 lines
12 KiB
Python

"""Tests for the /codex-runtime slash-command shared logic.
These cover the pure-Python state machine; CLI and gateway handlers are
tested separately because they involve config persistence and prompt
formatting that's surface-specific."""
from __future__ import annotations
from unittest.mock import patch
import pytest
from hermes_cli import codex_runtime_switch as crs
class TestParseArgs:
@pytest.mark.parametrize("arg,expected", [
("", None),
(" ", None),
("auto", "auto"),
("codex_app_server", "codex_app_server"),
("on", "codex_app_server"),
("off", "auto"),
("codex", "codex_app_server"),
("default", "auto"),
("hermes", "auto"),
("ENABLE", "codex_app_server"), # case-insensitive
("DiSaBlE", "auto"),
])
def test_valid_args(self, arg, expected):
value, errors = crs.parse_args(arg)
assert errors == []
assert value == expected
def test_invalid_arg_returns_error(self):
value, errors = crs.parse_args("turbo")
assert value is None
assert errors and "Unknown runtime" in errors[0]
class TestGetCurrentRuntime:
def test_default_when_unset(self):
assert crs.get_current_runtime({}) == "auto"
assert crs.get_current_runtime({"model": {}}) == "auto"
assert crs.get_current_runtime({"model": {"openai_runtime": ""}}) == "auto"
def test_unrecognized_falls_back_to_auto(self):
assert crs.get_current_runtime(
{"model": {"openai_runtime": "garbage"}}
) == "auto"
def test_explicit_codex(self):
assert crs.get_current_runtime(
{"model": {"openai_runtime": "codex_app_server"}}
) == "codex_app_server"
def test_handles_non_dict_config(self):
assert crs.get_current_runtime(None) == "auto" # type: ignore[arg-type]
assert crs.get_current_runtime("notadict") == "auto" # type: ignore[arg-type]
assert crs.get_current_runtime({"model": "notadict"}) == "auto"
class TestSetRuntime:
def test_creates_model_section_if_missing(self):
cfg = {}
old = crs.set_runtime(cfg, "codex_app_server")
assert old == "auto"
assert cfg["model"]["openai_runtime"] == "codex_app_server"
def test_returns_previous_value(self):
cfg = {"model": {"openai_runtime": "codex_app_server"}}
old = crs.set_runtime(cfg, "auto")
assert old == "codex_app_server"
assert cfg["model"]["openai_runtime"] == "auto"
def test_invalid_value_raises(self):
with pytest.raises(ValueError):
crs.set_runtime({}, "garbage")
class TestApply:
def test_read_only_call_reports_state(self):
cfg = {"model": {"openai_runtime": "codex_app_server"}}
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")):
r = crs.apply(cfg, None)
assert r.success
assert r.new_value == "codex_app_server"
assert r.old_value == "codex_app_server"
assert "codex_app_server" in r.message
assert "0.130.0" in r.message
def test_no_change_when_already_set(self):
cfg = {"model": {"openai_runtime": "auto"}}
r = crs.apply(cfg, "auto")
assert r.success
assert r.message == "openai_runtime already set to auto"
def test_reapply_codex_app_server_runs_migration(self):
"""Re-applying codex_app_server when already enabled must still
run the migration. Common footgun: user pre-sets
`openai_runtime: codex_app_server` in config.yaml, then runs
/codex-runtime codex_app_server expecting the migration. Without
this, the slash command short-circuits with "already set" and
~/.codex/config.toml never gets the hermes-tools MCP callback
or plugin migration — silent partial setup.
"""
cfg = {
"model": {"openai_runtime": "codex_app_server"},
"mcp_servers": {
"filesystem": {"command": "npx", "args": ["-y", "fs-server"]},
},
}
persisted = {}
def persist(c):
persisted.update(c)
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")), \
patch("hermes_cli.codex_runtime_plugin_migration.migrate") as mig:
mig.return_value.migrated = ["filesystem", "hermes-tools"]
mig.return_value.migrated_plugins = []
mig.return_value.plugin_query_error = None
mig.return_value.wrote_permissions_default = ":workspace"
mig.return_value.errors = []
mig.return_value.target_path = "/fake/.codex/config.toml"
r = crs.apply(cfg, "codex_app_server",
persist_callback=persist)
assert r.success
assert mig.called, "migration must run on reapply, not just first enable"
# Re-apply should signal "already set" but still announce migration ran
assert "already set" in r.message
assert "re-applying migration" in r.message
# Migration output still surfaces
assert "Migrated 1 MCP server" in r.message
assert "filesystem" in r.message
assert "Default sandbox: :workspace" in r.message
# No config write needed when value is unchanged — the persist
# callback should NOT have fired (avoids spurious config.yaml mtimes
# on every re-apply).
assert persisted == {}, (
"persist_callback fired despite no config-value change"
)
# Caller still needs a fresh session for the cached agent to pick
# up any migration-driven changes.
assert r.requires_new_session is True
def test_enable_blocked_when_codex_missing(self):
cfg = {}
with patch.object(crs, "check_codex_binary_ok",
return_value=(False, "codex not found")):
r = crs.apply(cfg, "codex_app_server")
assert r.success is False
assert "Cannot enable" in r.message
assert "npm i -g @openai/codex" in r.message
# Config NOT mutated on failure
assert cfg.get("model", {}).get("openai_runtime") in {None, ""}
def test_enable_succeeds_when_codex_present(self):
cfg = {}
persisted = {}
def persist(c):
persisted.update(c)
# Patch migrate so this test doesn't reach into the user's real
# ~/.codex/config.toml. See issue #26250 Bug C — without this patch,
# crs.apply() invokes the real migrate() which writes to
# Path.home() / ".codex" using whatever HERMES_HOME the running pytest
# session has set, leaking pytest tempdir paths into the user's
# codex config.
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")), \
patch("hermes_cli.codex_runtime_plugin_migration.migrate"):
r = crs.apply(cfg, "codex_app_server", persist_callback=persist)
assert r.success
assert r.new_value == "codex_app_server"
assert r.old_value == "auto"
assert r.requires_new_session is True
assert "via MCP" in r.message # hermes-tools callback message
assert cfg["model"]["openai_runtime"] == "codex_app_server"
assert persisted["model"]["openai_runtime"] == "codex_app_server"
def test_disable_does_not_check_binary(self):
cfg = {"model": {"openai_runtime": "codex_app_server"}}
with patch.object(crs, "check_codex_binary_ok") as bin_check:
r = crs.apply(cfg, "auto")
assert r.success
# Binary check is irrelevant when disabling — should not be called
# with the codex_app_server enable-gate signature.
assert r.new_value == "auto"
assert r.old_value == "codex_app_server"
def test_persist_callback_failure_reported(self):
cfg = {}
def persist_boom(c):
raise IOError("disk full")
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")):
r = crs.apply(cfg, "codex_app_server", persist_callback=persist_boom)
assert r.success is False
assert "persist failed" in r.message
assert "disk full" in r.message
def test_enable_triggers_mcp_migration(self):
"""Enabling codex_app_server should auto-migrate Hermes mcp_servers
to ~/.codex/config.toml so the spawned subprocess sees them."""
cfg = {
"mcp_servers": {
"filesystem": {"command": "npx", "args": ["-y", "fs-server"]},
}
}
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")), \
patch("hermes_cli.codex_runtime_plugin_migration.migrate") as mig:
mig.return_value.migrated = ["filesystem", "hermes-tools"]
mig.return_value.migrated_plugins = []
mig.return_value.plugin_query_error = None
mig.return_value.wrote_permissions_default = ":workspace"
mig.return_value.errors = []
mig.return_value.target_path = "/fake/.codex/config.toml"
r = crs.apply(cfg, "codex_app_server")
assert r.success
assert mig.called # migration was triggered
# User MCP servers are reported (excluding internal hermes-tools)
assert "Migrated 1 MCP server" in r.message
assert "filesystem" in r.message
# Permissions default surfaces
assert "Default sandbox: :workspace" in r.message
# Hermes tool callback announcement
assert "via MCP" in r.message
def test_disable_does_not_trigger_migration(self):
"""Switching back to auto must not write to ~/.codex/."""
cfg = {
"model": {"openai_runtime": "codex_app_server"},
"mcp_servers": {"x": {"command": "y"}},
}
with patch("hermes_cli.codex_runtime_plugin_migration.migrate") as mig:
r = crs.apply(cfg, "auto")
assert r.success
assert not mig.called # disabling does not migrate
def test_migration_failure_does_not_block_enable(self):
"""If MCP migration raises, the runtime change still proceeds —
users can manually re-run migration later."""
cfg = {"mcp_servers": {"x": {"command": "y"}}}
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")), \
patch("hermes_cli.codex_runtime_plugin_migration.migrate",
side_effect=RuntimeError("disk full")):
r = crs.apply(cfg, "codex_app_server")
assert r.success # change still applied
assert r.new_value == "codex_app_server"
assert "MCP migration skipped" in r.message
assert "disk full" in r.message
def test_binary_check_cached_within_apply(self):
"""check_codex_binary_ok is invoked at most once per apply() call.
The enable path has three sites that need the version (state report,
enable gate, success message). Without caching, a single
/codex-runtime invocation spawns `codex --version` three times.
Regression guard against a refactor that drops the cache.
"""
cfg = {}
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")) as bin_check, \
patch("hermes_cli.codex_runtime_plugin_migration.migrate"):
r = crs.apply(cfg, "codex_app_server")
assert r.success
assert bin_check.call_count == 1, (
f"check_codex_binary_ok was called {bin_check.call_count} time(s); "
"should be cached and called exactly once per apply()"
)
def test_binary_check_cached_on_read_only_call(self):
"""Read-only call (new_value=None) calls the binary check exactly
once and reuses the result for the message."""
cfg = {"model": {"openai_runtime": "codex_app_server"}}
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")) as bin_check:
crs.apply(cfg, None)
assert bin_check.call_count == 1