diff --git a/hermes_cli/config.py b/hermes_cli/config.py index ac2dedd71..4dc42e702 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -2118,8 +2118,11 @@ DEFAULT_CONFIG = { # (floor 30s) to enforce a hard cap. "reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium", # "low", "minimal", "none" (empty = inherit parent's level) - "max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling - "max_async_children": 3, # max concurrent background (background=true) subagents; new dispatches rejected at capacity + "max_concurrent_children": 3, # unified concurrency cap: max parallel children per batch + # AND max concurrent background (background=true) + # delegation units. New async dispatches beyond the cap + # fall back to synchronous execution. Floor of 1, no ceiling. + # (Replaces the deprecated max_async_children.) # Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth # and _get_orchestrator_enabled). Floored at 1, no upper ceiling — # raise deliberately, each level multiplies API cost. @@ -3114,7 +3117,7 @@ DEFAULT_CONFIG = { }, # Config schema version - bump this when adding new required fields - "_config_version": 32, + "_config_version": 33, } # ============================================================================= @@ -5690,6 +5693,41 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A "legacy surface-aware behavior." ) + # ── Version 32 → 33: unify delegation concurrency caps ── + # delegation.max_async_children is deprecated: max_concurrent_children now + # caps both a single batch's parallelism and concurrent background + # delegation units. Fold a raised max_async_children into + # max_concurrent_children (take the max so nobody loses headroom), then + # drop the stale key. + if current_ver < 33: + config = read_raw_config() + raw_deleg = config.get("delegation") + if isinstance(raw_deleg, dict) and "max_async_children" in raw_deleg: + old_async = raw_deleg.pop("max_async_children") + try: + old_async_i = int(old_async) + except (TypeError, ValueError): + old_async_i = None + if old_async_i is not None and old_async_i > 3: + try: + cur_children = int(raw_deleg.get("max_concurrent_children", 3)) + except (TypeError, ValueError): + cur_children = 3 + if old_async_i > cur_children: + raw_deleg["max_concurrent_children"] = old_async_i + results["config_added"].append( + f"delegation.max_concurrent_children={old_async_i} " + f"(folded from deprecated max_async_children)" + ) + config["delegation"] = raw_deleg + _persist_migration(config) + if not quiet: + print( + " ✓ Removed deprecated delegation.max_async_children — " + "delegation.max_concurrent_children now caps background " + "delegations too." + ) + # ── Post-migration: disable exfiltration-shaped MCP stdio entries ── # Users can hand-edit mcp_servers, and older installs may already contain a # malicious entry. Preserve the stanza for auditability but mark it diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py index 836d7bba2..41eef04a1 100644 --- a/tests/hermes_cli/test_config.py +++ b/tests/hermes_cli/test_config.py @@ -1528,6 +1528,61 @@ class TestVerifyOnStopMigration: raw = yaml.safe_load((tmp_path / "config.yaml").read_text()) assert raw["agent"]["verify_on_stop"] is True +class TestDelegationCapUnificationMigration: + """v32 → v33: fold deprecated max_async_children into max_concurrent_children.""" + + def _write(self, tmp_path, body): + (tmp_path / "config.yaml").write_text(body, encoding="utf-8") + + def test_stale_default_key_removed(self, tmp_path): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + self._write( + tmp_path, + "_config_version: 32\ndelegation:\n max_async_children: 3\n" + " max_concurrent_children: 15\n", + ) + migrate_config(interactive=False, quiet=True) + raw = yaml.safe_load((tmp_path / "config.yaml").read_text()) + assert "max_async_children" not in raw["delegation"] + # Default-valued (3) async cap must not shrink a raised children cap. + assert raw["delegation"]["max_concurrent_children"] == 15 + + def test_raised_async_cap_folded_into_children_cap(self, tmp_path): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + self._write( + tmp_path, + "_config_version: 32\ndelegation:\n max_async_children: 20\n" + " max_concurrent_children: 5\n", + ) + migrate_config(interactive=False, quiet=True) + raw = yaml.safe_load((tmp_path / "config.yaml").read_text()) + assert "max_async_children" not in raw["delegation"] + assert raw["delegation"]["max_concurrent_children"] == 20 + + def test_higher_children_cap_wins(self, tmp_path): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + self._write( + tmp_path, + "_config_version: 32\ndelegation:\n max_async_children: 8\n" + " max_concurrent_children: 15\n", + ) + migrate_config(interactive=False, quiet=True) + raw = yaml.safe_load((tmp_path / "config.yaml").read_text()) + assert "max_async_children" not in raw["delegation"] + assert raw["delegation"]["max_concurrent_children"] == 15 + + def test_no_delegation_section_is_noop(self, tmp_path): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + self._write(tmp_path, "_config_version: 32\nmodel:\n provider: openrouter\n") + migrate_config(interactive=False, quiet=True) + raw = yaml.safe_load((tmp_path / "config.yaml").read_text()) + # Migration must not materialize a delegation section it never had. + assert "delegation" not in raw + + def test_default_config_has_no_max_async_children(self): + assert "max_async_children" not in DEFAULT_CONFIG["delegation"] + + class TestConfigNormalizationDoesNotOverwriteUserValues: """Regression tests for #27354.""" diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 7cd6bf500..d35d3627e 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -2473,6 +2473,29 @@ class TestConcurrencyDefaults(unittest.TestCase): self.assertEqual(_get_max_concurrent_children(), 6) +class TestAsyncCapUnified(unittest.TestCase): + """max_async_children is deprecated: the async cap IS max_concurrent_children.""" + + @patch("tools.delegate_tool._load_config", + return_value={"max_concurrent_children": 15}) + def test_async_cap_follows_concurrent_children(self, mock_cfg): + from tools.delegate_tool import _get_max_async_children + self.assertEqual(_get_max_async_children(), 15) + + @patch("tools.delegate_tool._load_config", + return_value={"max_concurrent_children": 15, "max_async_children": 3}) + def test_stale_max_async_children_ignored(self, mock_cfg): + """A leftover max_async_children in config must not shrink the cap.""" + from tools.delegate_tool import _get_max_async_children + self.assertEqual(_get_max_async_children(), 15) + + @patch("tools.delegate_tool._load_config", return_value={}) + def test_default_matches_concurrent_children_default(self, mock_cfg): + from tools.delegate_tool import _get_max_async_children + with patch.dict(os.environ, {}, clear=True): + self.assertEqual(_get_max_async_children(), _get_max_concurrent_children()) + + # ========================================================================= # max_spawn_depth clamping # ========================================================================= diff --git a/tools/async_delegation.py b/tools/async_delegation.py index 6e0d515a6..460660675 100644 --- a/tools/async_delegation.py +++ b/tools/async_delegation.py @@ -221,7 +221,7 @@ def dispatch_async_delegation( f"Async delegation capacity reached ({max_async_children} " f"running). Wait for one to finish (its result will re-enter " f"the chat), or run this task synchronously " - f"(background=false). Raise delegation.max_async_children in " + f"(background=false). Raise delegation.max_concurrent_children in " f"config.yaml to allow more concurrent background subagents." ), } @@ -402,7 +402,7 @@ def dispatch_async_delegation_batch( "error": ( f"Async delegation capacity reached ({max_async_children} " f"running). Wait for one to finish (its result will re-enter " - f"the chat), or raise delegation.max_async_children in " + f"the chat), or raise delegation.max_concurrent_children in " f"config.yaml to allow more concurrent background units." ), } diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 893502ec0..c96cb4c99 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -392,36 +392,34 @@ def _get_max_concurrent_children() -> int: return _DEFAULT_MAX_CONCURRENT_CHILDREN -_DEFAULT_MAX_ASYNC_CHILDREN = 3 +_LEGACY_MAX_ASYNC_WARNED = False def _get_max_async_children() -> int: - """Read delegation.max_async_children from config (floor 1, no ceiling). + """Concurrency cap for background (``background=true``) delegations. - Caps how many background (``background=true``) subagents can run at once. - When at capacity, a new async dispatch is REJECTED (not queued) so a - runaway model can't pile up unbounded background work. Separate from - max_concurrent_children, which bounds a single synchronous batch. + DEPRECATED KNOB: ``delegation.max_async_children`` has been unified into + ``delegation.max_concurrent_children`` — one cap governs both a single + synchronous batch's parallelism and how many background delegation units + may run at once. When at capacity, a new async dispatch is REJECTED (not + queued) so a runaway model can't pile up unbounded background work; the + caller falls back to running the work synchronously. + + A leftover ``max_async_children`` in config.yaml is ignored (the config + migration removes it, folding a raised value into + ``max_concurrent_children``); we log a one-time deprecation warning if + one is still present. """ + global _LEGACY_MAX_ASYNC_WARNED cfg = _load_config() - val = cfg.get("max_async_children") - if val is not None: - try: - return max(1, int(val)) - except (TypeError, ValueError): - logger.warning( - "delegation.max_async_children=%r is not a valid integer; " - "using default %d", - val, _DEFAULT_MAX_ASYNC_CHILDREN, - ) - return _DEFAULT_MAX_ASYNC_CHILDREN - env_val = os.getenv("DELEGATION_MAX_ASYNC_CHILDREN") - if env_val: - try: - return max(1, int(env_val)) - except (TypeError, ValueError): - return _DEFAULT_MAX_ASYNC_CHILDREN - return _DEFAULT_MAX_ASYNC_CHILDREN + if cfg.get("max_async_children") is not None and not _LEGACY_MAX_ASYNC_WARNED: + _LEGACY_MAX_ASYNC_WARNED = True + logger.warning( + "delegation.max_async_children is deprecated and ignored; " + "delegation.max_concurrent_children now caps background " + "delegations too. Remove the stale key from config.yaml." + ) + return _get_max_concurrent_children() def _get_child_timeout() -> Optional[float]: @@ -2875,7 +2873,16 @@ def delegate_task( "batch synchronously instead.", dispatch.get("error", "rejected"), ) - return json.dumps(_execute_and_aggregate(), ensure_ascii=False) + _cap_result = _execute_and_aggregate() + if isinstance(_cap_result, dict): + _cap_result["note"] = ( + "The background delegation pool was at capacity " + "(delegation.max_concurrent_children), so the subagent(s) ran " + "SYNCHRONOUSLY and the result is included above. Raise " + "delegation.max_concurrent_children in config.yaml to allow " + "more concurrent background delegations." + ) + return json.dumps(_cap_result, ensure_ascii=False) # ----- Synchronous path ----- return json.dumps(_execute_and_aggregate(), ensure_ascii=False)