hermes-agent/tests/hermes_cli/test_moa_config.py
Teknium 543d305bbb
feat(moa): add reference_max_tokens to cap advisor output and cut turn latency (#56756)
MoA per-turn latency is dominated by advisor GENERATION: turn wall time
correlates ~0.88 with output tokens and ~-0.03 with input tokens (measured over
52 turns). Each turn waits for the slowest advisor to finish writing, and
advisors were uncapped — writing multi-thousand-token essays the aggregator
only needs the gist of.

Add an opt-in per-preset reference_max_tokens knob (mirrors reference_temperature)
that caps ADVISOR output only; the acting aggregator is never capped. Default
None = uncapped, so existing presets are byte-for-byte unchanged (no regression).
Wired through both MoA execution paths (MoAChatCompletions.create and
aggregate_moa_context).

E2E: same task, closed preset uncapped vs reference_max_tokens=600 -> 59s to 33s
(~44% faster), final answer identical/correct.

- hermes_cli/moa_config.py: _coerce_int_or_none helper + reference_max_tokens
  in _normalize_preset/_default_preset/flattened view
- agent/moa_loop.py: read preset.reference_max_tokens, pass to reference fan-out
- agent/conversation_loop.py: pass reference_max_tokens on the per-turn path
- tests + docs
2026-07-02 00:16:35 -07:00

280 lines
10 KiB
Python

from hermes_cli.moa_config import (
DEFAULT_MOA_AGGREGATOR,
DEFAULT_MOA_PRESET_NAME,
DEFAULT_MOA_REFERENCE_MODELS,
build_moa_turn_prompt,
decode_moa_turn,
exact_moa_preset_name,
normalize_moa_config,
resolve_moa_preset,
set_active_moa_preset,
)
def test_normalize_moa_config_uses_default_named_preset():
cfg = normalize_moa_config({})
assert cfg["default_preset"] == DEFAULT_MOA_PRESET_NAME
assert list(cfg["presets"]) == [DEFAULT_MOA_PRESET_NAME]
assert cfg["reference_models"] == DEFAULT_MOA_REFERENCE_MODELS
assert cfg["aggregator"] == DEFAULT_MOA_AGGREGATOR
def test_normalize_moa_config_preserves_named_presets():
cfg = normalize_moa_config(
{
"default_preset": "coding",
"presets": {
"coding": {
"reference_models": [{"provider": "openai-codex", "model": "gpt-5.5"}],
"aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
},
"review": {
"reference_models": [{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"}],
"aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
},
},
}
)
assert cfg["default_preset"] == "coding"
assert set(cfg["presets"]) == {"coding", "review"}
assert cfg["reference_models"] == [{"provider": "openai-codex", "model": "gpt-5.5"}]
def test_legacy_flat_config_becomes_default_preset():
cfg = normalize_moa_config(
{
"reference_models": [{"provider": "openai-codex", "model": "gpt-5.5"}],
"aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
}
)
assert cfg["presets"][DEFAULT_MOA_PRESET_NAME]["reference_models"] == [
{"provider": "openai-codex", "model": "gpt-5.5"}
]
def test_normalize_moa_config_tolerates_non_numeric_values():
"""Non-numeric strings in hand-edited config.yaml must degrade to defaults
instead of crashing normalize_moa_config with ValueError."""
cfg = normalize_moa_config(
{
"presets": {
"broken": {
"max_tokens": "notanumber",
"reference_temperature": "hot",
"aggregator_temperature": "",
}
}
}
)
preset = cfg["presets"]["broken"]
assert preset["max_tokens"] == 4096
assert preset["reference_temperature"] == 0.6
assert preset["aggregator_temperature"] == 0.4
def test_normalize_moa_config_tolerates_non_list_reference_models():
"""A hand-edited scalar reference_models must degrade to defaults instead of
crashing normalize_moa_config with TypeError (symmetric with the non-numeric
scalar-field tolerance)."""
cfg = normalize_moa_config(
{"presets": {"broken": {"reference_models": 2}}}
)
assert cfg["presets"]["broken"]["reference_models"] == DEFAULT_MOA_REFERENCE_MODELS
def test_normalize_moa_config_wraps_bare_dict_reference_models():
"""A single reference slot written without the list wrapper is rescued."""
cfg = normalize_moa_config(
{"presets": {"p": {"reference_models": {"provider": "openai", "model": "gpt-4o"}}}}
)
assert cfg["presets"]["p"]["reference_models"] == [{"provider": "openai", "model": "gpt-4o"}]
def test_normalize_moa_config_coerces_numeric_strings():
"""Valid numeric strings (e.g. from YAML round-trip) must coerce correctly."""
cfg = normalize_moa_config({"max_tokens": "8192", "reference_temperature": "0.9"})
preset = cfg["presets"][DEFAULT_MOA_PRESET_NAME]
assert preset["max_tokens"] == 8192
assert preset["reference_temperature"] == 0.9
def test_normalize_moa_config_coerces_float_max_tokens():
"""max_tokens: 4096.0 (float from YAML) must coerce to int."""
cfg = normalize_moa_config({"max_tokens": 4096.0})
assert cfg["presets"][DEFAULT_MOA_PRESET_NAME]["max_tokens"] == 4096
cfg2 = normalize_moa_config({"max_tokens": "4096.5"})
assert cfg2["presets"][DEFAULT_MOA_PRESET_NAME]["max_tokens"] == 4096
def test_exact_preset_matching_is_not_fuzzy():
config = {"presets": {"coding": {}, "review": {}}}
assert exact_moa_preset_name(config, "coding") == "coding"
assert exact_moa_preset_name(config, "cod") is None
assert exact_moa_preset_name(config, "coding please fix this") is None
def test_exact_preset_matching_skips_disabled_presets():
"""A disabled preset must not match the implicit bare-name switch path.
Regression for #55187: with ``enabled: false`` presets, a plain model
switch whose name collides with a preset key (e.g. ``default``) silently
pivoted the session onto the MoA virtual provider. The per-preset
``enabled`` opt-out must gate this implicit match.
"""
config = {
"presets": {
"default": {"enabled": False},
"klo": {"enabled": False},
},
}
assert exact_moa_preset_name(config, "default") is None
assert exact_moa_preset_name(config, "klo") is None
def test_exact_preset_matching_allows_enabled_presets():
"""An explicitly enabled preset still matches the bare-name switch path."""
config = {
"presets": {
"fast": {"enabled": True},
"slow": {"enabled": False},
},
}
assert exact_moa_preset_name(config, "fast") == "fast"
assert exact_moa_preset_name(config, "slow") is None
# Default (no explicit enabled key) is enabled and still matches.
assert exact_moa_preset_name({"presets": {"x": {}}}, "x") == "x"
def test_active_preset_toggle_validation():
config = {"default_preset": "coding", "presets": {"coding": {}, "review": {}}}
active = set_active_moa_preset(config, "review")
assert active["active_preset"] == "review"
inactive = set_active_moa_preset(active, "")
assert inactive["active_preset"] == ""
def test_resolve_moa_preset_returns_requested_model_set():
cfg = normalize_moa_config(
{
"presets": {
"coding": {"reference_models": [{"provider": "openai-codex", "model": "gpt-5.5"}]},
"review": {"reference_models": [{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"}]},
}
}
)
assert resolve_moa_preset(cfg, "review")["reference_models"] == [
{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"}
]
def test_build_moa_turn_prompt_encodes_one_shot_default_preset():
prompt = build_moa_turn_prompt("write a file then inspect it")
decoded_prompt, cfg = decode_moa_turn(prompt)
assert decoded_prompt == "write a file then inspect it"
assert cfg is not None
assert cfg["reference_models"] == DEFAULT_MOA_REFERENCE_MODELS
def test_moa_provider_rejected_as_reference_slot():
"""A reference slot pointing at the moa virtual provider is dropped, so a
preset cannot recursively reference another MoA run."""
cfg = normalize_moa_config(
{
"presets": {
"p": {
"reference_models": [
{"provider": "moa", "model": "default"},
{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"},
],
"aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
}
}
}
)
refs = cfg["presets"]["p"]["reference_models"]
assert {"provider": "moa", "model": "default"} not in refs
assert refs == [{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"}]
def test_moa_provider_rejected_as_aggregator_slot():
"""An aggregator slot pointing at the moa virtual provider is dropped and
falls back to the default aggregator, never a recursive MoA aggregator."""
cfg = normalize_moa_config(
{
"presets": {
"p": {
"reference_models": [{"provider": "openrouter", "model": "deepseek/deepseek-v4-pro"}],
"aggregator": {"provider": "moa", "model": "default"},
}
}
}
)
agg = cfg["presets"]["p"]["aggregator"]
assert agg["provider"] != "moa"
assert agg == DEFAULT_MOA_AGGREGATOR
def test_moa_provider_rejected_case_insensitive():
"""Case variants like ``MoA`` are also blocked."""
cfg = normalize_moa_config(
{"presets": {"p": {"aggregator": {"provider": "MoA", "model": "default"}}}}
)
assert cfg["presets"]["p"]["aggregator"]["provider"] != "moa"
assert cfg["presets"]["p"]["aggregator"] == DEFAULT_MOA_AGGREGATOR
def _preset(**extra):
base = {
"reference_models": [{"provider": "openrouter", "model": "anthropic/claude-opus-4.8"}],
"aggregator": {"provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
}
base.update(extra)
return {"default_preset": "p", "presets": {"p": base}}
def test_reference_max_tokens_defaults_to_none_uncapped():
"""Unset reference_max_tokens resolves to None (no cap) so existing presets
keep their prior uncapped advisor behavior — no silent regression."""
p = resolve_moa_preset(_preset(), "p")
assert p["reference_max_tokens"] is None
def test_reference_max_tokens_positive_value_preserved():
"""A positive cap flows through resolve_moa_preset to the runtime path."""
p = resolve_moa_preset(_preset(reference_max_tokens=600), "p")
assert p["reference_max_tokens"] == 600
def test_reference_max_tokens_invalid_falls_back_to_none():
"""Non-positive / non-numeric caps degrade to None (uncapped) rather than
clamping advisors to a nonsense value or crashing."""
for bad in (0, -5, "abc", "", None):
p = resolve_moa_preset(_preset(reference_max_tokens=bad), "p")
assert p["reference_max_tokens"] is None, bad
def test_reference_max_tokens_string_number_coerced():
"""A hand-edited config.yaml string like '600' coerces to int."""
p = resolve_moa_preset(_preset(reference_max_tokens="600"), "p")
assert p["reference_max_tokens"] == 600
def test_reference_max_tokens_in_flattened_view():
"""The flattened compatibility view (dashboard/desktop callers) exposes the
active preset's reference_max_tokens."""
cfg = normalize_moa_config(_preset(reference_max_tokens=750))
assert cfg["reference_max_tokens"] == 750