From 35eb93c8df3297d30a3ef9650be667080e222c8a Mon Sep 17 00:00:00 2001 From: snav Date: Fri, 15 May 2026 15:05:09 -0400 Subject: [PATCH] fix(codex-runtime): re-running /codex-runtime codex_app_server when already enabled now triggers migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /codex-runtime slash command short-circuits with "openai_runtime already set" when invoked with the same value as the current config, and crucially skips the entire migration block below. The check conflates two things: (a) "the config value is correct" and (b) "the world state (managed block in ~/.codex/config.toml, hermes-tools MCP callback, plugin discovery) is converged". Common footgun this exposes: a user who pre-sets `model.openai_runtime: codex_app_server` directly in config.yaml (reasonable thing to do) and then runs /codex-runtime codex_app_server to trigger migration sees "already set" and silently gets no migration. ~/.codex/config.toml never receives the managed block, the hermes-tools MCP callback never registers, and codex falls through to its default runtime instead of the app-server one — visibly successful but functionally partial setup. The migration is idempotent by design (it replaces its own managed block in place between MIGRATION_MARKER and MIGRATION_END_MARKER), so re-running it is safe and cheap. Fix the short-circuit to fall through to migration when re-applying codex_app_server while skipping the config persist (no value-level change needed). The disable case (re-applying "auto") still short-circuits because disabling doesn't touch ~/.codex/config.toml at all. The user-visible message changes to "openai_runtime already set to codex_app_server — re-applying migration" so re-runs surface what happened. Regression test (test_reapply_codex_app_server_runs_migration) asserts: - migrate() was called when re-applying - persist_callback was NOT called (no config write on no-op transitions) - migration output (MCP servers, sandbox default) surfaces in the user-visible message - requires_new_session is True so callers know to /reset Verified RED→GREEN: the test fails on origin/main with "migration must run on reapply, not just first enable" and passes with this fix. Full test_codex_runtime_switch.py suite: 31 passed. --- hermes_cli/codex_runtime_switch.py | 51 ++++++++++++------- tests/hermes_cli/test_codex_runtime_switch.py | 50 ++++++++++++++++++ 2 files changed, 84 insertions(+), 17 deletions(-) diff --git a/hermes_cli/codex_runtime_switch.py b/hermes_cli/codex_runtime_switch.py index 98b40b1e8..e12434627 100644 --- a/hermes_cli/codex_runtime_switch.py +++ b/hermes_cli/codex_runtime_switch.py @@ -144,8 +144,19 @@ def apply( codex_version=ver if ok else None, ) - # No change requested - if new_value == current: + # No-config-change paths. For `auto` we return immediately — disabling + # doesn't touch ~/.codex/. For `codex_app_server`, we fall through to + # the migration block below: the config value is already correct, but + # the world state (managed block in ~/.codex/config.toml, hermes-tools + # MCP callback, plugin discovery) may be stale or missing — common + # footgun when users pre-set `openai_runtime: codex_app_server` in + # config.yaml without ever running the slash command. The migration is + # idempotent by design (it replaces its own managed block in place), so + # re-running is cheap and safe. + reapplying_enable = ( + new_value == current and new_value == "codex_app_server" + ) + if new_value == current and not reapplying_enable: return CodexRuntimeStatus( success=True, new_value=current, @@ -172,22 +183,28 @@ def apply( codex_version=None, ) - set_runtime(config, new_value) - if persist_callback is not None: - try: - persist_callback(config) - except Exception as exc: - logger.exception("failed to persist openai_runtime change") - return CodexRuntimeStatus( - success=False, - new_value=new_value, - old_value=current, - message=f"updated config in memory but persist failed: {exc}", - ) + if not reapplying_enable: + set_runtime(config, new_value) + if persist_callback is not None: + try: + persist_callback(config) + except Exception as exc: + logger.exception("failed to persist openai_runtime change") + return CodexRuntimeStatus( + success=False, + new_value=new_value, + old_value=current, + message=f"updated config in memory but persist failed: {exc}", + ) - msg_lines = [ - f"openai_runtime: {current} → {new_value}", - ] + if reapplying_enable: + msg_lines = [ + f"openai_runtime already set to {current} — re-applying migration", + ] + else: + msg_lines = [ + f"openai_runtime: {current} → {new_value}", + ] if new_value == "codex_app_server": ok, ver = _check_binary_cached() if ok: diff --git a/tests/hermes_cli/test_codex_runtime_switch.py b/tests/hermes_cli/test_codex_runtime_switch.py index a0b4aa5fd..d9f53f048 100644 --- a/tests/hermes_cli/test_codex_runtime_switch.py +++ b/tests/hermes_cli/test_codex_runtime_switch.py @@ -96,6 +96,56 @@ class TestApply: assert r.success assert r.message == "openai_runtime already set to auto" + def test_reapply_codex_app_server_runs_migration(self): + """Re-applying codex_app_server when already enabled must still + run the migration. Common footgun: user pre-sets + `openai_runtime: codex_app_server` in config.yaml, then runs + /codex-runtime codex_app_server expecting the migration. Without + this, the slash command short-circuits with "already set" and + ~/.codex/config.toml never gets the hermes-tools MCP callback + or plugin migration — silent partial setup. + """ + cfg = { + "model": {"openai_runtime": "codex_app_server"}, + "mcp_servers": { + "filesystem": {"command": "npx", "args": ["-y", "fs-server"]}, + }, + } + persisted = {} + + def persist(c): + persisted.update(c) + + with patch.object(crs, "check_codex_binary_ok", + return_value=(True, "0.130.0")), \ + patch("hermes_cli.codex_runtime_plugin_migration.migrate") as mig: + mig.return_value.migrated = ["filesystem", "hermes-tools"] + mig.return_value.migrated_plugins = [] + mig.return_value.plugin_query_error = None + mig.return_value.wrote_permissions_default = ":workspace" + mig.return_value.errors = [] + mig.return_value.target_path = "/fake/.codex/config.toml" + r = crs.apply(cfg, "codex_app_server", + persist_callback=persist) + assert r.success + assert mig.called, "migration must run on reapply, not just first enable" + # Re-apply should signal "already set" but still announce migration ran + assert "already set" in r.message + assert "re-applying migration" in r.message + # Migration output still surfaces + assert "Migrated 1 MCP server" in r.message + assert "filesystem" in r.message + assert "Default sandbox: :workspace" in r.message + # No config write needed when value is unchanged — the persist + # callback should NOT have fired (avoids spurious config.yaml mtimes + # on every re-apply). + assert persisted == {}, ( + "persist_callback fired despite no config-value change" + ) + # Caller still needs a fresh session for the cached agent to pick + # up any migration-driven changes. + assert r.requires_new_session is True + def test_enable_blocked_when_codex_missing(self): cfg = {} with patch.object(crs, "check_codex_binary_ok",