feat(delegation): unify concurrency caps — deprecate max_async_children (#56955)

delegation.max_concurrent_children is now the single cap for both a
batch's parallelism and concurrent background delegation units.

- _get_max_async_children() delegates to _get_max_concurrent_children();
  a leftover max_async_children key logs a one-time deprecation warning
- config v32→33 migration removes the stale key, folding a raised
  max_async_children into max_concurrent_children (max wins, no lost
  headroom)
- capacity error messages now point at max_concurrent_children
- pool-at-capacity sync fallback now attaches an explanatory note so
  the model/user know why the call blocked instead of dispatching async

Previously users who raised max_concurrent_children (e.g. to 15) still
hit the invisible default-3 async cap: the 4th background delegate_task
silently ran inline, blocking the turn with no signal.
This commit is contained in:
Teknium 2026-07-02 02:53:39 -07:00 committed by GitHub
parent 14639ded77
commit 6e369a3762
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 153 additions and 30 deletions

View file

@ -2118,8 +2118,11 @@ DEFAULT_CONFIG = {
# (floor 30s) to enforce a hard cap.
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
# "low", "minimal", "none" (empty = inherit parent's level)
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
"max_async_children": 3, # max concurrent background (background=true) subagents; new dispatches rejected at capacity
"max_concurrent_children": 3, # unified concurrency cap: max parallel children per batch
# AND max concurrent background (background=true)
# delegation units. New async dispatches beyond the cap
# fall back to synchronous execution. Floor of 1, no ceiling.
# (Replaces the deprecated max_async_children.)
# Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
# and _get_orchestrator_enabled). Floored at 1, no upper ceiling —
# raise deliberately, each level multiplies API cost.
@ -3114,7 +3117,7 @@ DEFAULT_CONFIG = {
},
# Config schema version - bump this when adding new required fields
"_config_version": 32,
"_config_version": 33,
}
# =============================================================================
@ -5690,6 +5693,41 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
"legacy surface-aware behavior."
)
# ── Version 32 → 33: unify delegation concurrency caps ──
# delegation.max_async_children is deprecated: max_concurrent_children now
# caps both a single batch's parallelism and concurrent background
# delegation units. Fold a raised max_async_children into
# max_concurrent_children (take the max so nobody loses headroom), then
# drop the stale key.
if current_ver < 33:
config = read_raw_config()
raw_deleg = config.get("delegation")
if isinstance(raw_deleg, dict) and "max_async_children" in raw_deleg:
old_async = raw_deleg.pop("max_async_children")
try:
old_async_i = int(old_async)
except (TypeError, ValueError):
old_async_i = None
if old_async_i is not None and old_async_i > 3:
try:
cur_children = int(raw_deleg.get("max_concurrent_children", 3))
except (TypeError, ValueError):
cur_children = 3
if old_async_i > cur_children:
raw_deleg["max_concurrent_children"] = old_async_i
results["config_added"].append(
f"delegation.max_concurrent_children={old_async_i} "
f"(folded from deprecated max_async_children)"
)
config["delegation"] = raw_deleg
_persist_migration(config)
if not quiet:
print(
" ✓ Removed deprecated delegation.max_async_children — "
"delegation.max_concurrent_children now caps background "
"delegations too."
)
# ── Post-migration: disable exfiltration-shaped MCP stdio entries ──
# Users can hand-edit mcp_servers, and older installs may already contain a
# malicious entry. Preserve the stanza for auditability but mark it

View file

@ -1528,6 +1528,61 @@ class TestVerifyOnStopMigration:
raw = yaml.safe_load((tmp_path / "config.yaml").read_text())
assert raw["agent"]["verify_on_stop"] is True
class TestDelegationCapUnificationMigration:
"""v32 → v33: fold deprecated max_async_children into max_concurrent_children."""
def _write(self, tmp_path, body):
(tmp_path / "config.yaml").write_text(body, encoding="utf-8")
def test_stale_default_key_removed(self, tmp_path):
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
self._write(
tmp_path,
"_config_version: 32\ndelegation:\n max_async_children: 3\n"
" max_concurrent_children: 15\n",
)
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load((tmp_path / "config.yaml").read_text())
assert "max_async_children" not in raw["delegation"]
# Default-valued (3) async cap must not shrink a raised children cap.
assert raw["delegation"]["max_concurrent_children"] == 15
def test_raised_async_cap_folded_into_children_cap(self, tmp_path):
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
self._write(
tmp_path,
"_config_version: 32\ndelegation:\n max_async_children: 20\n"
" max_concurrent_children: 5\n",
)
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load((tmp_path / "config.yaml").read_text())
assert "max_async_children" not in raw["delegation"]
assert raw["delegation"]["max_concurrent_children"] == 20
def test_higher_children_cap_wins(self, tmp_path):
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
self._write(
tmp_path,
"_config_version: 32\ndelegation:\n max_async_children: 8\n"
" max_concurrent_children: 15\n",
)
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load((tmp_path / "config.yaml").read_text())
assert "max_async_children" not in raw["delegation"]
assert raw["delegation"]["max_concurrent_children"] == 15
def test_no_delegation_section_is_noop(self, tmp_path):
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
self._write(tmp_path, "_config_version: 32\nmodel:\n provider: openrouter\n")
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load((tmp_path / "config.yaml").read_text())
# Migration must not materialize a delegation section it never had.
assert "delegation" not in raw
def test_default_config_has_no_max_async_children(self):
assert "max_async_children" not in DEFAULT_CONFIG["delegation"]
class TestConfigNormalizationDoesNotOverwriteUserValues:
"""Regression tests for #27354."""

View file

@ -2473,6 +2473,29 @@ class TestConcurrencyDefaults(unittest.TestCase):
self.assertEqual(_get_max_concurrent_children(), 6)
class TestAsyncCapUnified(unittest.TestCase):
"""max_async_children is deprecated: the async cap IS max_concurrent_children."""
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 15})
def test_async_cap_follows_concurrent_children(self, mock_cfg):
from tools.delegate_tool import _get_max_async_children
self.assertEqual(_get_max_async_children(), 15)
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 15, "max_async_children": 3})
def test_stale_max_async_children_ignored(self, mock_cfg):
"""A leftover max_async_children in config must not shrink the cap."""
from tools.delegate_tool import _get_max_async_children
self.assertEqual(_get_max_async_children(), 15)
@patch("tools.delegate_tool._load_config", return_value={})
def test_default_matches_concurrent_children_default(self, mock_cfg):
from tools.delegate_tool import _get_max_async_children
with patch.dict(os.environ, {}, clear=True):
self.assertEqual(_get_max_async_children(), _get_max_concurrent_children())
# =========================================================================
# max_spawn_depth clamping
# =========================================================================

View file

@ -221,7 +221,7 @@ def dispatch_async_delegation(
f"Async delegation capacity reached ({max_async_children} "
f"running). Wait for one to finish (its result will re-enter "
f"the chat), or run this task synchronously "
f"(background=false). Raise delegation.max_async_children in "
f"(background=false). Raise delegation.max_concurrent_children in "
f"config.yaml to allow more concurrent background subagents."
),
}
@ -402,7 +402,7 @@ def dispatch_async_delegation_batch(
"error": (
f"Async delegation capacity reached ({max_async_children} "
f"running). Wait for one to finish (its result will re-enter "
f"the chat), or raise delegation.max_async_children in "
f"the chat), or raise delegation.max_concurrent_children in "
f"config.yaml to allow more concurrent background units."
),
}

View file

@ -392,36 +392,34 @@ def _get_max_concurrent_children() -> int:
return _DEFAULT_MAX_CONCURRENT_CHILDREN
_DEFAULT_MAX_ASYNC_CHILDREN = 3
_LEGACY_MAX_ASYNC_WARNED = False
def _get_max_async_children() -> int:
"""Read delegation.max_async_children from config (floor 1, no ceiling).
"""Concurrency cap for background (``background=true``) delegations.
Caps how many background (``background=true``) subagents can run at once.
When at capacity, a new async dispatch is REJECTED (not queued) so a
runaway model can't pile up unbounded background work. Separate from
max_concurrent_children, which bounds a single synchronous batch.
DEPRECATED KNOB: ``delegation.max_async_children`` has been unified into
``delegation.max_concurrent_children`` one cap governs both a single
synchronous batch's parallelism and how many background delegation units
may run at once. When at capacity, a new async dispatch is REJECTED (not
queued) so a runaway model can't pile up unbounded background work; the
caller falls back to running the work synchronously.
A leftover ``max_async_children`` in config.yaml is ignored (the config
migration removes it, folding a raised value into
``max_concurrent_children``); we log a one-time deprecation warning if
one is still present.
"""
global _LEGACY_MAX_ASYNC_WARNED
cfg = _load_config()
val = cfg.get("max_async_children")
if val is not None:
try:
return max(1, int(val))
except (TypeError, ValueError):
logger.warning(
"delegation.max_async_children=%r is not a valid integer; "
"using default %d",
val, _DEFAULT_MAX_ASYNC_CHILDREN,
)
return _DEFAULT_MAX_ASYNC_CHILDREN
env_val = os.getenv("DELEGATION_MAX_ASYNC_CHILDREN")
if env_val:
try:
return max(1, int(env_val))
except (TypeError, ValueError):
return _DEFAULT_MAX_ASYNC_CHILDREN
return _DEFAULT_MAX_ASYNC_CHILDREN
if cfg.get("max_async_children") is not None and not _LEGACY_MAX_ASYNC_WARNED:
_LEGACY_MAX_ASYNC_WARNED = True
logger.warning(
"delegation.max_async_children is deprecated and ignored; "
"delegation.max_concurrent_children now caps background "
"delegations too. Remove the stale key from config.yaml."
)
return _get_max_concurrent_children()
def _get_child_timeout() -> Optional[float]:
@ -2875,7 +2873,16 @@ def delegate_task(
"batch synchronously instead.",
dispatch.get("error", "rejected"),
)
return json.dumps(_execute_and_aggregate(), ensure_ascii=False)
_cap_result = _execute_and_aggregate()
if isinstance(_cap_result, dict):
_cap_result["note"] = (
"The background delegation pool was at capacity "
"(delegation.max_concurrent_children), so the subagent(s) ran "
"SYNCHRONOUSLY and the result is included above. Raise "
"delegation.max_concurrent_children in config.yaml to allow "
"more concurrent background delegations."
)
return json.dumps(_cap_result, ensure_ascii=False)
# ----- Synchronous path -----
return json.dumps(_execute_and_aggregate(), ensure_ascii=False)