hermes-agent/tests/hermes_cli/test_moa_config.py
Teknium 372f8195c7
fix(moa): default temperatures to unset — provider default, like single-model agents (#57440)
A single-model Hermes agent never sends temperature; the provider default
applies. MoA hardcoded reference_temperature=0.6 / aggregator_temperature=0.4,
and the coercion float(preset.get(key, 0.6) or 0.6) made unset IMPOSSIBLE to
express: absent, null, empty, and even an explicit 0 all collapsed to the
baked-in default. Every MoA advisor and aggregator therefore ran at 0.6/0.4
while the same model running solo used the provider default — silently
skewing solo-vs-MoA comparisons and overriding provider-tuned defaults.

- moa_config normalization: temperatures coerce to None when absent/blank/
  invalid (new _coerce_float_or_none); explicit values incl. 0 honored.
- moa_loop: _preset_temperature() resolves preset values; None flows to
  call_llm, which already omits the parameter when None (same contract as
  max_tokens). Aggregator still inherits the acting agent's own configured
  temperature when the preset doesn't pin one.
- conversation_loop (context-mode MoA): same resolution, no more hardcoded
  0.6/0.4 at the call site.
- DEFAULT_CONFIG preset + web_server payload models + docs updated: unset
  is the default, pinning stays available.
2026-07-03 00:22:49 -07:00

283 lines
10 KiB
Python

from hermes_cli.moa_config import (
DEFAULT_MOA_AGGREGATOR,
DEFAULT_MOA_PRESET_NAME,
DEFAULT_MOA_REFERENCE_MODELS,
build_moa_turn_prompt,
decode_moa_turn,
exact_moa_preset_name,
normalize_moa_config,
resolve_moa_preset,
set_active_moa_preset,
)
def test_normalize_moa_config_uses_default_named_preset():
cfg = normalize_moa_config({})
assert cfg["default_preset"] == DEFAULT_MOA_PRESET_NAME
assert list(cfg["presets"]) == [DEFAULT_MOA_PRESET_NAME]
assert cfg["reference_models"] == DEFAULT_MOA_REFERENCE_MODELS
assert cfg["aggregator"] == DEFAULT_MOA_AGGREGATOR
def test_normalize_moa_config_preserves_named_presets():
cfg = normalize_moa_config(
{
"default_preset": "coding",
"presets": {
"coding": {
"reference_models": [{"provider": "openai-codex", "model": "gpt-5.5"}],
"aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
},
"review": {
"reference_models": [{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"}],
"aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
},
},
}
)
assert cfg["default_preset"] == "coding"
assert set(cfg["presets"]) == {"coding", "review"}
assert cfg["reference_models"] == [{"provider": "openai-codex", "model": "gpt-5.5"}]
def test_legacy_flat_config_becomes_default_preset():
cfg = normalize_moa_config(
{
"reference_models": [{"provider": "openai-codex", "model": "gpt-5.5"}],
"aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
}
)
assert cfg["presets"][DEFAULT_MOA_PRESET_NAME]["reference_models"] == [
{"provider": "openai-codex", "model": "gpt-5.5"}
]
def test_normalize_moa_config_tolerates_non_numeric_values():
"""Non-numeric strings in hand-edited config.yaml must degrade to defaults
instead of crashing normalize_moa_config with ValueError."""
cfg = normalize_moa_config(
{
"presets": {
"broken": {
"max_tokens": "notanumber",
"reference_temperature": "hot",
"aggregator_temperature": "",
}
}
}
)
preset = cfg["presets"]["broken"]
assert preset["max_tokens"] == 4096
# Unparseable/blank temperatures degrade to None = "don't send the
# parameter; provider default applies" (matching single-model behavior),
# not to a hardcoded sampling value.
assert preset["reference_temperature"] is None
assert preset["aggregator_temperature"] is None
def test_normalize_moa_config_tolerates_non_list_reference_models():
"""A hand-edited scalar reference_models must degrade to defaults instead of
crashing normalize_moa_config with TypeError (symmetric with the non-numeric
scalar-field tolerance)."""
cfg = normalize_moa_config(
{"presets": {"broken": {"reference_models": 2}}}
)
assert cfg["presets"]["broken"]["reference_models"] == DEFAULT_MOA_REFERENCE_MODELS
def test_normalize_moa_config_wraps_bare_dict_reference_models():
"""A single reference slot written without the list wrapper is rescued."""
cfg = normalize_moa_config(
{"presets": {"p": {"reference_models": {"provider": "openai", "model": "gpt-4o"}}}}
)
assert cfg["presets"]["p"]["reference_models"] == [{"provider": "openai", "model": "gpt-4o"}]
def test_normalize_moa_config_coerces_numeric_strings():
"""Valid numeric strings (e.g. from YAML round-trip) must coerce correctly."""
cfg = normalize_moa_config({"max_tokens": "8192", "reference_temperature": "0.9"})
preset = cfg["presets"][DEFAULT_MOA_PRESET_NAME]
assert preset["max_tokens"] == 8192
assert preset["reference_temperature"] == 0.9
def test_normalize_moa_config_coerces_float_max_tokens():
"""max_tokens: 4096.0 (float from YAML) must coerce to int."""
cfg = normalize_moa_config({"max_tokens": 4096.0})
assert cfg["presets"][DEFAULT_MOA_PRESET_NAME]["max_tokens"] == 4096
cfg2 = normalize_moa_config({"max_tokens": "4096.5"})
assert cfg2["presets"][DEFAULT_MOA_PRESET_NAME]["max_tokens"] == 4096
def test_exact_preset_matching_is_not_fuzzy():
config = {"presets": {"coding": {}, "review": {}}}
assert exact_moa_preset_name(config, "coding") == "coding"
assert exact_moa_preset_name(config, "cod") is None
assert exact_moa_preset_name(config, "coding please fix this") is None
def test_exact_preset_matching_skips_disabled_presets():
"""A disabled preset must not match the implicit bare-name switch path.
Regression for #55187: with ``enabled: false`` presets, a plain model
switch whose name collides with a preset key (e.g. ``default``) silently
pivoted the session onto the MoA virtual provider. The per-preset
``enabled`` opt-out must gate this implicit match.
"""
config = {
"presets": {
"default": {"enabled": False},
"klo": {"enabled": False},
},
}
assert exact_moa_preset_name(config, "default") is None
assert exact_moa_preset_name(config, "klo") is None
def test_exact_preset_matching_allows_enabled_presets():
"""An explicitly enabled preset still matches the bare-name switch path."""
config = {
"presets": {
"fast": {"enabled": True},
"slow": {"enabled": False},
},
}
assert exact_moa_preset_name(config, "fast") == "fast"
assert exact_moa_preset_name(config, "slow") is None
# Default (no explicit enabled key) is enabled and still matches.
assert exact_moa_preset_name({"presets": {"x": {}}}, "x") == "x"
def test_active_preset_toggle_validation():
config = {"default_preset": "coding", "presets": {"coding": {}, "review": {}}}
active = set_active_moa_preset(config, "review")
assert active["active_preset"] == "review"
inactive = set_active_moa_preset(active, "")
assert inactive["active_preset"] == ""
def test_resolve_moa_preset_returns_requested_model_set():
cfg = normalize_moa_config(
{
"presets": {
"coding": {"reference_models": [{"provider": "openai-codex", "model": "gpt-5.5"}]},
"review": {"reference_models": [{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"}]},
}
}
)
assert resolve_moa_preset(cfg, "review")["reference_models"] == [
{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"}
]
def test_build_moa_turn_prompt_encodes_one_shot_default_preset():
prompt = build_moa_turn_prompt("write a file then inspect it")
decoded_prompt, cfg = decode_moa_turn(prompt)
assert decoded_prompt == "write a file then inspect it"
assert cfg is not None
assert cfg["reference_models"] == DEFAULT_MOA_REFERENCE_MODELS
def test_moa_provider_rejected_as_reference_slot():
"""A reference slot pointing at the moa virtual provider is dropped, so a
preset cannot recursively reference another MoA run."""
cfg = normalize_moa_config(
{
"presets": {
"p": {
"reference_models": [
{"provider": "moa", "model": "default"},
{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"},
],
"aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
}
}
}
)
refs = cfg["presets"]["p"]["reference_models"]
assert {"provider": "moa", "model": "default"} not in refs
assert refs == [{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"}]
def test_moa_provider_rejected_as_aggregator_slot():
"""An aggregator slot pointing at the moa virtual provider is dropped and
falls back to the default aggregator, never a recursive MoA aggregator."""
cfg = normalize_moa_config(
{
"presets": {
"p": {
"reference_models": [{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"}],
"aggregator": {"provider": "moa", "model": "default"},
}
}
}
)
agg = cfg["presets"]["p"]["aggregator"]
assert agg["provider"] != "moa"
assert agg == DEFAULT_MOA_AGGREGATOR
def test_moa_provider_rejected_case_insensitive():
"""Case variants like ``MoA`` are also blocked."""
cfg = normalize_moa_config(
{"presets": {"p": {"aggregator": {"provider": "MoA", "model": "default"}}}}
)
assert cfg["presets"]["p"]["aggregator"]["provider"] != "moa"
assert cfg["presets"]["p"]["aggregator"] == DEFAULT_MOA_AGGREGATOR
def _preset(**extra):
base = {
"reference_models": [{"provider": "openrouter", "model": "anthropic/claude-opus-4.8"}],
"aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
}
base.update(extra)
return {"default_preset": "p", "presets": {"p": base}}
def test_reference_max_tokens_defaults_to_none_uncapped():
"""Unset reference_max_tokens resolves to None (no cap) so existing presets
keep their prior uncapped advisor behavior — no silent regression."""
p = resolve_moa_preset(_preset(), "p")
assert p["reference_max_tokens"] is None
def test_reference_max_tokens_positive_value_preserved():
"""A positive cap flows through resolve_moa_preset to the runtime path."""
p = resolve_moa_preset(_preset(reference_max_tokens=600), "p")
assert p["reference_max_tokens"] == 600
def test_reference_max_tokens_invalid_falls_back_to_none():
"""Non-positive / non-numeric caps degrade to None (uncapped) rather than
clamping advisors to a nonsense value or crashing."""
for bad in (0, -5, "abc", "", None):
p = resolve_moa_preset(_preset(reference_max_tokens=bad), "p")
assert p["reference_max_tokens"] is None, bad
def test_reference_max_tokens_string_number_coerced():
"""A hand-edited config.yaml string like '600' coerces to int."""
p = resolve_moa_preset(_preset(reference_max_tokens="600"), "p")
assert p["reference_max_tokens"] == 600
def test_reference_max_tokens_in_flattened_view():
"""The flattened compatibility view (dashboard/desktop callers) exposes the
active preset's reference_max_tokens."""
cfg = normalize_moa_config(_preset(reference_max_tokens=750))
assert cfg["reference_max_tokens"] == 750