fix(vision): read auxiliary model from config.yaml before env var

_handlers for vision_analyze and video_analyze read model name from config.yaml (auxiliary.vision.model / auxiliary.video.model) before falling back to AUXILIARY_VISION_MODEL / AUXILIARY_VIDEO_MODEL env vars. Matches the existing config-first pattern for timeout and temperature in the same file. Fixes #53749
2026-06-28 01:41:23 +08:00 · 2026-06-28 01:41:23 +08:00 · 149641485c
commit 149641485c
parent 25aa626cb4
2 changed files with 67 additions and 2 deletions
--- a/tests/tools/test_vision_tools.py
+++ b/tests/tools/test_vision_tools.py
@ -261,6 +261,48 @@ class TestHandleVisionAnalyze:
            # (the centralized call_llm router picks the default)
            assert model is None

+    def test_config_yaml_model_takes_priority_over_env(self):
+        """config.yaml auxiliary.vision.model should be preferred over env var."""
+        with (
+            patch(
+                "tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock
+            ) as mock_tool,
+            patch(
+                "hermes_cli.config.load_config",
+                return_value={"auxiliary": {"vision": {"model": "qwen3.7-plus"}}},
+            ),
+            patch.dict(os.environ, {"AUXILIARY_VISION_MODEL": "env-model"}),
+        ):
+            mock_tool.return_value = json.dumps({"result": "ok"})
+            coro = _handle_vision_analyze(
+                {"image_url": "https://example.com/img.png", "question": "test"}
+            )
+            coro.close()
+            call_args = mock_tool.call_args
+            model = call_args[0][2]  # third positional arg
+            assert model == "qwen3.7-plus"
+
+    def test_env_var_used_when_config_missing_model(self):
+        """Env var should be used when config.yaml has no auxiliary.vision.model."""
+        with (
+            patch(
+                "tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock
+            ) as mock_tool,
+            patch(
+                "hermes_cli.config.load_config",
+                return_value={"auxiliary": {"vision": {}}},
+            ),
+            patch.dict(os.environ, {"AUXILIARY_VISION_MODEL": "fallback-model"}),
+        ):
+            mock_tool.return_value = json.dumps({"result": "ok"})
+            coro = _handle_vision_analyze(
+                {"image_url": "https://example.com/img.png", "question": "test"}
+            )
+            coro.close()
+            call_args = mock_tool.call_args
+            model = call_args[0][2]
+            assert model == "fallback-model"
+
    def test_empty_args_graceful(self):
        """Missing keys should default to empty strings, not raise."""
        with patch(
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -1356,7 +1356,18 @@ async def _handle_vision_analyze(args: Dict[str, Any], **kw: Any) -> str:
        "Fully describe and explain everything about this image, then answer the "
        f"following question:\n\n{question}"
    )
-    model = os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
+    # Prefer config.yaml auxiliary.vision.model; env var is a legacy override.
+    model = None
+    try:
+        from hermes_cli.config import cfg_get, load_config
+        _cfg = load_config()
+        _vmodel = cfg_get(_cfg, "auxiliary", "vision", "model")
+        if _vmodel:
+            model = str(_vmodel).strip() or None
+    except Exception:
+        pass
+    if not model:
+        model = os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
    return await vision_analyze_tool(image_url, full_prompt, model)


@ -1718,7 +1729,19 @@ def _handle_video_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]:
        "including visual content, motion, audio cues, text overlays, and scene "
        f"transitions. Then answer the following question:\n\n{question}"
    )
-    model = os.getenv("AUXILIARY_VIDEO_MODEL", "").strip() or os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
+    # Prefer config.yaml auxiliary.video.model (falling back to vision);
+    # env vars are a legacy override.
+    model = None
+    try:
+        from hermes_cli.config import cfg_get, load_config
+        _cfg = load_config()
+        _vmodel = cfg_get(_cfg, "auxiliary", "video", "model") or cfg_get(_cfg, "auxiliary", "vision", "model")
+        if _vmodel:
+            model = str(_vmodel).strip() or None
+    except Exception:
+        pass
+    if not model:
+        model = os.getenv("AUXILIARY_VIDEO_MODEL", "").strip() or os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
    return video_analyze_tool(video_url, full_prompt, model)