fix(cron): use shared get_fallback_chain in job runner (#36734)

Cron's job runner was the last entry point still reading
fallback_providers/fallback_model as an either/or, silently dropping the
legacy fallback_model when fallback_providers was set. Every other entry
point (cli, gateway, oneshot, fallback_cmd, tui_gateway, auxiliary_client)
already merges both keys via get_fallback_chain(). This aligns cron with
them at both call sites: the auth-fallback resolution loop and the
AIAgent(fallback_model=...) argument.

Co-authored-by: xxxigm <tuancanhnguyen706@gmail.com>
This commit is contained in:
xxxigm 2026-07-01 01:08:03 -07:00 committed by Teknium
parent 5505dbbf43
commit 32bc36522e
2 changed files with 33 additions and 5 deletions

View file

@ -41,6 +41,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
from hermes_constants import get_hermes_home
from hermes_cli._subprocess_compat import windows_hide_flags
from hermes_cli.config import load_config, _expand_env_vars
from hermes_cli.fallback_config import get_fallback_chain
from hermes_time import now as _hermes_now
logger = logging.getLogger(__name__)
@ -2370,12 +2371,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
except AuthError as auth_exc:
# Primary provider auth failed — try fallback chain before giving up.
logger.warning("Job '%s': primary auth failed (%s), trying fallback", job_id, auth_exc)
fb = _cfg.get("fallback_providers") or _cfg.get("fallback_model")
fb_list = (fb if isinstance(fb, list) else [fb]) if fb else []
fb_list = get_fallback_chain(_cfg)
runtime = None
for entry in fb_list:
if not isinstance(entry, dict):
continue
try:
fb_kwargs = {"requested": entry.get("provider")}
if entry.get("base_url"):
@ -2447,7 +2445,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
f"(or pin the original values to keep them). See #44585."
)
fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None
fallback_model = get_fallback_chain(_cfg) or None
credential_pool = None
runtime_provider = str(runtime.get("provider") or "").strip().lower()
if runtime_provider:

View file

@ -1926,6 +1926,36 @@ class TestRunJobConfigEnvVarExpansion:
"config.yaml ${VAR} in fallback_providers was not expanded."
)
def test_fallback_chain_merges_providers_and_legacy_model(self, tmp_path, monkeypatch):
"""Cron uses get_fallback_chain so legacy fallback_model is not dropped."""
(tmp_path / "config.yaml").write_text(
"fallback_providers:\n"
" - provider: openrouter\n"
" model: gpt-4o-mini\n"
"fallback_model:\n"
" provider: anthropic\n"
" model: claude-sonnet-4-6\n"
)
job = {"id": "fb-merge", "name": "fallback merge", "prompt": "hi"}
fake_db = MagicMock()
with patch("cron.scheduler._hermes_home", tmp_path), \
patch("cron.scheduler._resolve_origin", return_value=None), \
patch("dotenv.load_dotenv"), \
patch("hermes_state.SessionDB", return_value=fake_db), \
patch("hermes_cli.runtime_provider.resolve_runtime_provider",
return_value=self._RUNTIME), \
patch("run_agent.AIAgent") as mock_agent_cls:
mock_agent = MagicMock()
mock_agent.run_conversation.return_value = {"final_response": "ok"}
mock_agent_cls.return_value = mock_agent
run_job(job)
fb = mock_agent_cls.call_args.kwargs.get("fallback_model") or []
models = [e.get("model") for e in fb if isinstance(e, dict)]
assert models == ["gpt-4o-mini", "claude-sonnet-4-6"]
def test_unexpanded_ref_passthrough_when_var_unset(self, tmp_path, monkeypatch):
"""When the env var is not set, the literal ${VAR} is kept verbatim (not crashed)."""
(tmp_path / "config.yaml").write_text("model: ${_HERMES_TEST_CRON_UNSET_VAR}\n")