hermes-agent/tests/tools/test_delegate.py
Eugeniusz Gilewski e4dbb67bf5 fix(security): remove model-controlled delegate ACP transport
Source: https://github.com/NousResearch/hermes-agent/pull/52346
Related prior work: https://github.com/NousResearch/hermes-agent/pull/39462
Related prior work: https://github.com/NousResearch/hermes-agent/pull/27426
Maintainer direction: https://github.com/NousResearch/hermes-agent/pull/52346#issuecomment-4854881612

Remove acp_command and acp_args from the model-facing delegate_task schema and
dispatch paths. Child agents can still use ACP subprocess transport when it
comes from trusted delegation config or parent inheritance, but a model tool
call can no longer choose the command or arguments that reach child
construction.

This is salvageable because the risky boundary is model control over child ACP
transport, not ACP itself. The patch follows the maintainer direction from the
source discussion by preserving trusted ACP configuration and prior integration
work while removing the untrusted tool-call fields from both top-level and
per-task delegate inputs.

Reproduced on main by passing acp_command through delegate_task and observing it
reach _build_child_agent. Verified after the fix that model dispatch strips the
hidden top-level fields and per-task hidden fields are ignored before child
construction.

Co-authored-by: Carlosian <claudlos@agentmail.to>
Co-authored-by: ssiweifnag <120658181+ssiweifnag@users.noreply.github.com>
Co-authored-by: nikshepsvn <23241247+nikshepsvn@users.noreply.github.com>
2026-07-03 03:27:47 -07:00

3034 lines
127 KiB
Python

#!/usr/bin/env python3
"""
Tests for the subagent delegation tool.
Uses mock AIAgent instances to test the delegation logic without
requiring API keys or real LLM calls.
Run with: python -m pytest tests/test_delegate.py -v
or: python tests/test_delegate.py
"""
import json
import os
import threading
import time
import unittest
from unittest.mock import MagicMock, patch
from tools.delegate_tool import (
DELEGATE_BLOCKED_TOOLS,
DELEGATE_TASK_SCHEMA,
DelegateEvent,
_get_max_concurrent_children,
_LEGACY_EVENT_MAP,
MAX_DEPTH,
check_delegate_requirements,
delegate_task,
_build_child_agent,
_build_child_progress_callback,
_build_child_system_prompt,
_extract_output_tail,
_strip_blocked_tools,
_resolve_child_credential_pool,
_resolve_delegation_credentials,
_inherit_parent_base_url,
)
def _make_mock_parent(depth=0):
"""Create a mock parent agent with the fields delegate_task expects."""
parent = MagicMock()
parent.base_url = "https://openrouter.ai/api/v1"
parent.api_key="***"
parent.provider = "openrouter"
parent.api_mode = "chat_completions"
parent.model = "anthropic/claude-sonnet-4"
parent.platform = "cli"
parent.providers_allowed = None
parent.providers_ignored = None
parent.providers_order = None
parent.provider_sort = None
parent._session_db = None
parent._delegate_depth = depth
parent._active_children = []
parent._active_children_lock = threading.Lock()
parent._print_fn = None
parent.tool_progress_callback = None
parent.thinking_callback = None
return parent
class TestDelegateRequirements(unittest.TestCase):
def test_always_available(self):
self.assertTrue(check_delegate_requirements())
def test_schema_valid(self):
self.assertEqual(DELEGATE_TASK_SCHEMA["name"], "delegate_task")
props = DELEGATE_TASK_SCHEMA["parameters"]["properties"]
self.assertIn("goal", props)
self.assertIn("tasks", props)
self.assertIn("context", props)
# toolsets is intentionally NOT exposed to the model — subagents always
# inherit the parent's toolsets. Letting the model name toolsets was a
# capability-selection surface the model should not control.
self.assertNotIn("toolsets", props)
self.assertNotIn("toolsets", props["tasks"]["items"]["properties"])
# max_iterations is intentionally NOT exposed to the model — it's
# config-authoritative via delegation.max_iterations so users get
# predictable budgets.
self.assertNotIn("max_iterations", props)
# ACP subprocess transport is operator-controlled via config.yaml, not
# model-controlled via delegate_task arguments.
self.assertNotIn("acp_command", props)
self.assertNotIn("acp_args", props)
self.assertNotIn("acp_command", props["tasks"]["items"]["properties"])
self.assertNotIn("acp_args", props["tasks"]["items"]["properties"])
self.assertNotIn("maxItems", props["tasks"]) # removed — limit is now runtime-configurable
def test_schema_description_advertises_runtime_limits(self):
"""The model must see the user's actual concurrency / spawn-depth caps,
not the framework defaults. Without this, models that read 'default 3'
will self-cap below the user's real limit.
"""
from tools.delegate_tool import (
_build_dynamic_schema_overrides,
_get_max_concurrent_children,
_get_max_spawn_depth,
)
overrides = _build_dynamic_schema_overrides()
max_children = _get_max_concurrent_children()
max_depth = _get_max_spawn_depth()
desc = overrides["description"]
tasks_desc = overrides["parameters"]["properties"]["tasks"]["description"]
role_desc = overrides["parameters"]["properties"]["role"]["description"]
# Top-level description names the user's concurrency limit explicitly.
self.assertIn(f"up to {max_children}", desc)
# Top-level description names the user's spawn-depth limit explicitly.
self.assertIn(f"max_spawn_depth={max_depth}", desc)
# tasks parameter description repeats the concurrency cap.
self.assertIn(f"up to {max_children}", tasks_desc)
# role parameter description names the spawn-depth limit.
self.assertIn(f"max_spawn_depth={max_depth}", role_desc)
# The misleading "default 3" / "default 2" wording is gone from
# every dynamic surface (model-facing).
for surface in (desc, tasks_desc, role_desc):
self.assertNotIn("default 3", surface)
self.assertNotIn("default 2", surface)
def test_schema_overrides_applied_via_get_definitions(self):
"""Registry.get_definitions() must apply dynamic_schema_overrides so
the model API call sees current values, not the static import-time text.
"""
from tools.registry import registry
defs = registry.get_definitions({"delegate_task"})
self.assertEqual(len(defs), 1)
fn = defs[0]["function"]
# Description should mention the user's actual limits, not "default 3".
from tools.delegate_tool import (
_get_max_concurrent_children,
_get_max_spawn_depth,
)
self.assertIn(f"up to {_get_max_concurrent_children()}", fn["description"])
self.assertIn(f"max_spawn_depth={_get_max_spawn_depth()}", fn["description"])
class TestChildSystemPrompt(unittest.TestCase):
def test_goal_only(self):
prompt = _build_child_system_prompt("Fix the tests")
self.assertIn("Fix the tests", prompt)
self.assertIn("YOUR TASK", prompt)
self.assertNotIn("CONTEXT", prompt)
def test_goal_with_context(self):
prompt = _build_child_system_prompt("Fix the tests", "Error: assertion failed in test_foo.py line 42")
self.assertIn("Fix the tests", prompt)
self.assertIn("CONTEXT", prompt)
self.assertIn("assertion failed", prompt)
def test_empty_context_ignored(self):
prompt = _build_child_system_prompt("Do something", " ")
self.assertNotIn("CONTEXT", prompt)
class TestStripBlockedTools(unittest.TestCase):
def test_removes_blocked_toolsets(self):
result = _strip_blocked_tools(["terminal", "file", "delegation", "clarify", "memory", "code_execution"])
self.assertEqual(sorted(result), ["file", "terminal"])
def test_preserves_allowed_toolsets(self):
result = _strip_blocked_tools(["terminal", "file", "web", "browser"])
self.assertEqual(sorted(result), ["browser", "file", "terminal", "web"])
def test_empty_input(self):
result = _strip_blocked_tools([])
self.assertEqual(result, [])
def test_strips_cronjob_toolset(self):
"""Regression for issue #43466: child subagents must not inherit
the cronjob toolset from a parent running on a gateway platform.
Without this guard, a delegated child could schedule new cron jobs
under the parent's identity.
"""
result = _strip_blocked_tools(
["terminal", "file", "cronjob", "web"]
)
self.assertNotIn("cronjob", result)
self.assertIn("terminal", result)
self.assertIn("file", result)
self.assertIn("web", result)
def test_strip_set_derived_from_blocklist(self):
"""The strip set must be derived from DELEGATE_BLOCKED_TOOLS so a
new blocked tool can't silently leak through as a toolset name
(regression for issue #43466's 'more robust variant' suggestion).
"""
from tools.delegate_tool import TOOLSETS, _strip_blocked_tools
# Every toolset whose tools are ALL in the blocklist should be stripped
for name, defn in TOOLSETS.items():
tools = defn.get("tools", [])
if tools and all(t in DELEGATE_BLOCKED_TOOLS for t in tools):
self.assertNotIn(
name,
_strip_blocked_tools([name, "terminal"]),
f"Toolset {name!r} (tools={tools}) is fully blocked "
f"but was not stripped",
)
class TestDelegateTask(unittest.TestCase):
def test_no_parent_agent(self):
result = json.loads(delegate_task(goal="test"))
self.assertIn("error", result)
self.assertIn("parent agent", result["error"])
def test_depth_limit(self):
parent = _make_mock_parent(depth=2)
result = json.loads(delegate_task(goal="test", parent_agent=parent))
self.assertIn("error", result)
self.assertIn("depth limit", result["error"].lower())
def test_no_goal_or_tasks(self):
parent = _make_mock_parent()
result = json.loads(delegate_task(parent_agent=parent))
self.assertIn("error", result)
def test_empty_goal(self):
parent = _make_mock_parent()
result = json.loads(delegate_task(goal=" ", parent_agent=parent))
self.assertIn("error", result)
def test_task_missing_goal(self):
parent = _make_mock_parent()
result = json.loads(delegate_task(tasks=[{"context": "no goal here"}], parent_agent=parent))
self.assertIn("error", result)
@patch("tools.delegate_tool._run_single_child")
def test_single_task_mode(self, mock_run):
mock_run.return_value = {
"task_index": 0, "status": "completed",
"summary": "Done!", "api_calls": 3, "duration_seconds": 5.0
}
parent = _make_mock_parent()
result = json.loads(delegate_task(goal="Fix tests", context="error log...", parent_agent=parent))
self.assertIn("results", result)
self.assertEqual(len(result["results"]), 1)
self.assertEqual(result["results"][0]["status"], "completed")
self.assertEqual(result["results"][0]["summary"], "Done!")
mock_run.assert_called_once()
@patch("tools.delegate_tool._run_single_child")
def test_batch_mode(self, mock_run):
mock_run.side_effect = [
{"task_index": 0, "status": "completed", "summary": "Result A", "api_calls": 2, "duration_seconds": 3.0},
{"task_index": 1, "status": "completed", "summary": "Result B", "api_calls": 4, "duration_seconds": 6.0},
]
parent = _make_mock_parent()
tasks = [
{"goal": "Research topic A"},
{"goal": "Research topic B"},
]
result = json.loads(delegate_task(tasks=tasks, parent_agent=parent))
self.assertIn("results", result)
self.assertEqual(len(result["results"]), 2)
self.assertEqual(result["results"][0]["summary"], "Result A")
self.assertEqual(result["results"][1]["summary"], "Result B")
self.assertIn("total_duration_seconds", result)
@patch("tools.delegate_tool._run_single_child")
def test_batch_mode_accepts_json_string_tasks(self, mock_run):
mock_run.side_effect = [
{
"task_index": 0,
"status": "completed",
"summary": "Result A",
"api_calls": 2,
"duration_seconds": 3.0,
},
{
"task_index": 1,
"status": "completed",
"summary": "Result B",
"api_calls": 4,
"duration_seconds": 6.0,
},
]
parent = _make_mock_parent()
tasks = json.dumps(
[
{"goal": "Research topic A"},
{"goal": "Research topic B"},
]
)
result = json.loads(delegate_task(tasks=tasks, parent_agent=parent))
self.assertIn("results", result)
self.assertEqual(len(result["results"]), 2)
self.assertEqual(result["results"][0]["summary"], "Result A")
self.assertEqual(result["results"][1]["summary"], "Result B")
@patch("tools.delegate_tool._run_single_child")
def test_batch_mode_rejects_non_object_tasks(self, mock_run):
parent = _make_mock_parent()
result = json.loads(
delegate_task(tasks=["not a task object"], parent_agent=parent)
)
self.assertIn("error", result)
self.assertIn("Task 0 must be an object", result["error"])
mock_run.assert_not_called()
@patch("tools.delegate_tool._run_single_child")
def test_batch_mode_rejects_malformed_json_string_tasks(self, mock_run):
parent = _make_mock_parent()
result = json.loads(
delegate_task(tasks='[{"goal": "bad}', parent_agent=parent)
)
self.assertIn("error", result)
self.assertIn("could not be parsed as JSON", result["error"])
mock_run.assert_not_called()
@patch("tools.delegate_tool._run_single_child")
def test_batch_capped_at_3(self, mock_run):
mock_run.return_value = {
"task_index": 0, "status": "completed",
"summary": "Done", "api_calls": 1, "duration_seconds": 1.0
}
parent = _make_mock_parent()
limit = _get_max_concurrent_children()
tasks = [{"goal": f"Task {i}"} for i in range(limit + 2)]
result = json.loads(delegate_task(tasks=tasks, parent_agent=parent))
# Should return an error instead of silently truncating
self.assertIn("error", result)
self.assertIn("Too many tasks", result["error"])
mock_run.assert_not_called()
@patch("tools.delegate_tool._run_single_child")
def test_batch_ignores_toplevel_goal(self, mock_run):
"""When tasks array is provided, top-level goal/context/toolsets are ignored."""
mock_run.return_value = {
"task_index": 0, "status": "completed",
"summary": "Done", "api_calls": 1, "duration_seconds": 1.0
}
parent = _make_mock_parent()
result = json.loads(delegate_task(
goal="This should be ignored",
tasks=[{"goal": "Actual task"}],
parent_agent=parent,
))
# The mock was called with the tasks array item, not the top-level goal
call_args = mock_run.call_args
self.assertEqual(call_args.kwargs.get("goal") or call_args[1].get("goal", call_args[0][1] if len(call_args[0]) > 1 else None), "Actual task")
@patch("tools.delegate_tool._run_single_child")
def test_failed_child_included_in_results(self, mock_run):
mock_run.return_value = {
"task_index": 0, "status": "error",
"summary": None, "error": "Something broke",
"api_calls": 0, "duration_seconds": 0.5
}
parent = _make_mock_parent()
result = json.loads(delegate_task(goal="Break things", parent_agent=parent))
self.assertEqual(result["results"][0]["status"], "error")
self.assertIn("Something broke", result["results"][0]["error"])
def test_depth_increments(self):
"""Verify child gets parent's depth + 1."""
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1
}
MockAgent.return_value = mock_child
delegate_task(goal="Test depth", parent_agent=parent)
self.assertEqual(mock_child._delegate_depth, 1)
def test_active_children_tracking(self):
"""Verify children are registered/unregistered for interrupt propagation."""
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1
}
MockAgent.return_value = mock_child
delegate_task(goal="Test tracking", parent_agent=parent)
self.assertEqual(len(parent._active_children), 0)
def test_child_inherits_runtime_credentials(self):
parent = _make_mock_parent(depth=0)
parent.base_url = "https://chatgpt.com/backend-api/codex"
parent.api_key="***"
parent.provider = "openai-codex"
parent.api_mode = "codex_responses"
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "ok",
"completed": True,
"api_calls": 1,
}
MockAgent.return_value = mock_child
delegate_task(goal="Test runtime inheritance", parent_agent=parent)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["base_url"], parent.base_url)
self.assertEqual(kwargs["api_key"], parent.api_key)
self.assertEqual(kwargs["provider"], parent.provider)
self.assertEqual(kwargs["api_mode"], parent.api_mode)
def test_child_inherits_parent_print_fn(self):
parent = _make_mock_parent(depth=0)
sink = MagicMock()
parent._print_fn = sink
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
MockAgent.return_value = mock_child
_build_child_agent(
task_index=0,
goal="Keep stdout clean",
context=None,
toolsets=None,
model=None,
max_iterations=10,
parent_agent=parent,
task_count=1,
)
self.assertIs(mock_child._print_fn, sink)
def test_child_uses_thinking_callback_when_progress_callback_available(self):
parent = _make_mock_parent(depth=0)
parent.tool_progress_callback = MagicMock()
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
MockAgent.return_value = mock_child
_build_child_agent(
task_index=0,
goal="Avoid raw child spinners",
context=None,
toolsets=None,
model=None,
max_iterations=10,
parent_agent=parent,
task_count=1,
)
self.assertTrue(callable(mock_child.thinking_callback))
mock_child.thinking_callback("deliberating...")
parent.tool_progress_callback.assert_not_called()
class TestToolNamePreservation(unittest.TestCase):
"""Verify _last_resolved_tool_names is restored after subagent runs."""
def test_global_tool_names_restored_after_delegation(self):
"""The process-global _last_resolved_tool_names must be restored
after a subagent completes so the parent's execute_code sandbox
generates correct imports."""
import model_tools
parent = _make_mock_parent(depth=0)
original_tools = ["terminal", "read_file", "web_search", "execute_code", "delegate_task"]
model_tools._last_resolved_tool_names = list(original_tools)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1,
}
MockAgent.return_value = mock_child
delegate_task(goal="Test tool preservation", parent_agent=parent)
self.assertEqual(model_tools._last_resolved_tool_names, original_tools)
def test_global_tool_names_restored_after_child_failure(self):
"""Even when the child agent raises, the global must be restored."""
import model_tools
parent = _make_mock_parent(depth=0)
original_tools = ["terminal", "read_file", "web_search"]
model_tools._last_resolved_tool_names = list(original_tools)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.side_effect = RuntimeError("boom")
MockAgent.return_value = mock_child
result = json.loads(delegate_task(goal="Crash test", parent_agent=parent))
self.assertEqual(result["results"][0]["status"], "error")
self.assertEqual(model_tools._last_resolved_tool_names, original_tools)
def test_build_child_agent_does_not_raise_name_error(self):
"""Regression: _build_child_agent must not reference _saved_tool_names.
The bug introduced by the e7844e9c merge conflict: line 235 inside
_build_child_agent read `list(_saved_tool_names)` where that variable
is only defined later in _run_single_child. Calling _build_child_agent
standalone (without _run_single_child's scope) must never raise NameError.
"""
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent"):
try:
_build_child_agent(
task_index=0,
goal="regression check",
context=None,
toolsets=None,
model=None,
max_iterations=10,
parent_agent=parent,
task_count=1,
)
except NameError as exc:
self.fail(
f"_build_child_agent raised NameError — "
f"_saved_tool_names leaked back into wrong scope: {exc}"
)
def test_build_child_agent_ignores_acp_command_when_binary_missing(self):
"""Stale delegation.command config must not force ACP subprocess mode."""
parent = _make_mock_parent(depth=0)
# The crash scenario is a TG/cron agent on a host with no ACP CLI —
# parent itself has no acp_command, so clearing the override must NOT
# fall through to a stray parent value.
parent.acp_command = None
parent.acp_args = []
captured = {}
with patch("run_agent.AIAgent") as MockAgent, \
patch("shutil.which", return_value=None) as mock_which:
mock_child = MagicMock()
MockAgent.return_value = mock_child
_build_child_agent(
task_index=0,
goal="search X for crypto twitter",
context=None,
toolsets=None,
model=None,
max_iterations=10,
parent_agent=parent,
task_count=1,
override_acp_command="copilot",
override_acp_args=["--foo"],
)
_, kwargs = MockAgent.call_args
captured["provider"] = kwargs.get("provider")
captured["acp_command"] = kwargs.get("acp_command")
captured["acp_args"] = kwargs.get("acp_args")
mock_which.assert_called_with("copilot")
self.assertNotEqual(
captured["provider"],
"copilot-acp",
"missing acp_command binary must NOT force copilot-acp provider",
)
self.assertIsNone(captured["acp_command"])
self.assertEqual(captured["acp_args"], [])
def test_build_child_agent_honors_acp_command_when_binary_present(self):
"""When the acp_command binary exists on PATH, behavior is unchanged:
provider is forced to copilot-acp and command/args propagate to the
child agent. Guards against the missing-binary check accidentally
breaking working ACP delegation setups.
"""
parent = _make_mock_parent(depth=0)
captured = {}
with patch("run_agent.AIAgent") as MockAgent, \
patch("shutil.which", return_value="/usr/local/bin/copilot"):
mock_child = MagicMock()
MockAgent.return_value = mock_child
_build_child_agent(
task_index=0,
goal="copilot path",
context=None,
toolsets=None,
model=None,
max_iterations=10,
parent_agent=parent,
task_count=1,
override_acp_command="copilot",
override_acp_args=["--foo"],
)
_, kwargs = MockAgent.call_args
captured["provider"] = kwargs.get("provider")
captured["acp_command"] = kwargs.get("acp_command")
self.assertEqual(captured["provider"], "copilot-acp")
self.assertEqual(captured["acp_command"], "copilot")
def test_schema_never_exposes_acp_transport_fields(self):
"""delegate_task must never make ACP transport model-facing."""
from tools.delegate_tool import _build_dynamic_schema_overrides
with patch("shutil.which", return_value="/usr/local/bin/copilot"):
overrides = _build_dynamic_schema_overrides()
props = overrides["parameters"]["properties"]
self.assertNotIn("acp_command", props)
self.assertNotIn("acp_args", props)
task_item_props = props["tasks"]["items"]["properties"]
self.assertNotIn("acp_command", task_item_props)
self.assertNotIn("acp_args", task_item_props)
def test_saved_tool_names_set_on_child_before_run(self):
"""_run_single_child must set _delegate_saved_tool_names on the child
from model_tools._last_resolved_tool_names before run_conversation."""
import model_tools
parent = _make_mock_parent(depth=0)
expected_tools = ["read_file", "web_search", "execute_code"]
model_tools._last_resolved_tool_names = list(expected_tools)
captured = {}
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
def capture_and_return(user_message, task_id=None, stream_callback=None):
captured["saved"] = list(mock_child._delegate_saved_tool_names)
return {"final_response": "ok", "completed": True, "api_calls": 1}
mock_child.run_conversation.side_effect = capture_and_return
MockAgent.return_value = mock_child
delegate_task(goal="capture test", parent_agent=parent)
self.assertEqual(captured["saved"], expected_tools)
class TestDelegateObservability(unittest.TestCase):
"""Tests for enriched metadata returned by _run_single_child."""
def test_observability_fields_present(self):
"""Completed child should return tool_trace, tokens, model, exit_reason."""
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.model = "claude-sonnet-4-6"
mock_child.session_prompt_tokens = 5000
mock_child.session_completion_tokens = 1200
mock_child.run_conversation.return_value = {
"final_response": "done",
"completed": True,
"interrupted": False,
"api_calls": 3,
"messages": [
{"role": "user", "content": "do something"},
{"role": "assistant", "tool_calls": [
{"id": "tc_1", "function": {"name": "web_search", "arguments": '{"query": "test"}'}}
]},
{"role": "tool", "tool_call_id": "tc_1", "content": '{"results": [1,2,3]}'},
{"role": "assistant", "content": "done"},
],
}
MockAgent.return_value = mock_child
result = json.loads(delegate_task(goal="Test observability", parent_agent=parent))
entry = result["results"][0]
# Core observability fields
self.assertEqual(entry["model"], "claude-sonnet-4-6")
self.assertEqual(entry["exit_reason"], "completed")
self.assertEqual(entry["tokens"]["input"], 5000)
self.assertEqual(entry["tokens"]["output"], 1200)
# Tool trace
self.assertEqual(len(entry["tool_trace"]), 1)
self.assertEqual(entry["tool_trace"][0]["tool"], "web_search")
self.assertIn("args_bytes", entry["tool_trace"][0])
self.assertIn("result_bytes", entry["tool_trace"][0])
self.assertEqual(entry["tool_trace"][0]["status"], "ok")
def test_tool_trace_handles_list_content_blocks(self):
"""Tool-result content blocks should not crash observability metadata."""
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.model = "claude-sonnet-4-6"
mock_child.session_prompt_tokens = 0
mock_child.session_completion_tokens = 0
mock_child.run_conversation.return_value = {
"final_response": "done",
"completed": True,
"interrupted": False,
"api_calls": 1,
"messages": [
{"role": "assistant", "tool_calls": [
{"id": "tc_1", "function": {"name": "image_generate", "arguments": '{"prompt": "x"}'}}
]},
{"role": "tool", "tool_call_id": "tc_1", "content": [
{"type": "text", "text": '{"success": true}'},
]},
],
}
MockAgent.return_value = mock_child
result = json.loads(delegate_task(goal="Test list content", parent_agent=parent))
trace = result["results"][0]["tool_trace"]
self.assertEqual(trace[0]["tool"], "image_generate")
self.assertEqual(trace[0]["status"], "ok")
self.assertGreater(trace[0]["result_bytes"], 0)
def test_output_tail_flattens_list_content_blocks(self):
"""_extract_output_tail (live overlay) must flatten content-block lists
so error markers buried inside blocks are detected and previews are
real text, not a "[{'type': 'text'...}]" repr blob."""
result = {
"messages": [
{"role": "assistant", "tool_calls": [
{"id": "t1", "function": {"name": "terminal", "arguments": "{}"}}
]},
{"role": "tool", "tool_call_id": "t1", "content": [
{"type": "text", "text": "Error: command not found"},
]},
{"role": "assistant", "tool_calls": [
{"id": "t2", "function": {"name": "vision", "arguments": "{}"}}
]},
{"role": "tool", "tool_call_id": "t2", "content": [
{"type": "text", "text": "all good"},
{"type": "image_url", "image_url": {"url": "data:x"}},
]},
]
}
tail = _extract_output_tail(result, max_entries=8, max_chars=600)
by_tool = {t["tool"]: t for t in tail}
# Block-wrapped error is correctly flagged (crude str() would miss it).
self.assertTrue(by_tool["terminal"]["is_error"])
self.assertEqual(by_tool["terminal"]["preview"], "Error: command not found")
# Non-error multimodal result is not flagged, and the text is readable.
self.assertFalse(by_tool["vision"]["is_error"])
self.assertIn("all good", by_tool["vision"]["preview"])
# No raw content-block repr leaked into any preview.
for entry in tail:
self.assertNotIn("'type'", entry["preview"])
def test_tool_trace_detects_error(self):
"""Tool results containing 'error' should be marked as error status."""
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.model = "claude-sonnet-4-6"
mock_child.session_prompt_tokens = 0
mock_child.session_completion_tokens = 0
mock_child.run_conversation.return_value = {
"final_response": "failed",
"completed": True,
"interrupted": False,
"api_calls": 1,
"messages": [
{"role": "assistant", "tool_calls": [
{"id": "tc_1", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}}
]},
{"role": "tool", "tool_call_id": "tc_1", "content": "Error: command not found"},
],
}
MockAgent.return_value = mock_child
result = json.loads(delegate_task(goal="Test error trace", parent_agent=parent))
trace = result["results"][0]["tool_trace"]
self.assertEqual(trace[0]["status"], "error")
def test_parallel_tool_calls_paired_correctly(self):
"""Parallel tool calls should each get their own result via tool_call_id matching."""
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.model = "claude-sonnet-4-6"
mock_child.session_prompt_tokens = 3000
mock_child.session_completion_tokens = 800
mock_child.run_conversation.return_value = {
"final_response": "done",
"completed": True,
"interrupted": False,
"api_calls": 1,
"messages": [
{"role": "assistant", "tool_calls": [
{"id": "tc_a", "function": {"name": "web_search", "arguments": '{"q": "a"}'}},
{"id": "tc_b", "function": {"name": "web_search", "arguments": '{"q": "b"}'}},
{"id": "tc_c", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}},
]},
{"role": "tool", "tool_call_id": "tc_a", "content": '{"ok": true}'},
{"role": "tool", "tool_call_id": "tc_b", "content": "Error: rate limited"},
{"role": "tool", "tool_call_id": "tc_c", "content": "file1.txt\nfile2.txt"},
{"role": "assistant", "content": "done"},
],
}
MockAgent.return_value = mock_child
result = json.loads(delegate_task(goal="Test parallel", parent_agent=parent))
trace = result["results"][0]["tool_trace"]
# All three tool calls should have results
self.assertEqual(len(trace), 3)
# First: web_search → ok
self.assertEqual(trace[0]["tool"], "web_search")
self.assertEqual(trace[0]["status"], "ok")
self.assertIn("result_bytes", trace[0])
# Second: web_search → error
self.assertEqual(trace[1]["tool"], "web_search")
self.assertEqual(trace[1]["status"], "error")
self.assertIn("result_bytes", trace[1])
# Third: terminal → ok
self.assertEqual(trace[2]["tool"], "terminal")
self.assertEqual(trace[2]["status"], "ok")
self.assertIn("result_bytes", trace[2])
def test_exit_reason_interrupted(self):
"""Interrupted child should report exit_reason='interrupted'."""
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.model = "claude-sonnet-4-6"
mock_child.session_prompt_tokens = 0
mock_child.session_completion_tokens = 0
mock_child.run_conversation.return_value = {
"final_response": "",
"completed": False,
"interrupted": True,
"api_calls": 2,
"messages": [],
}
MockAgent.return_value = mock_child
result = json.loads(delegate_task(goal="Test interrupt", parent_agent=parent))
self.assertEqual(result["results"][0]["exit_reason"], "interrupted")
def test_exit_reason_max_iterations(self):
"""Child that didn't complete and wasn't interrupted hit max_iterations."""
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.model = "claude-sonnet-4-6"
mock_child.session_prompt_tokens = 0
mock_child.session_completion_tokens = 0
mock_child.run_conversation.return_value = {
"final_response": "",
"completed": False,
"interrupted": False,
"api_calls": 50,
"messages": [],
}
MockAgent.return_value = mock_child
result = json.loads(delegate_task(goal="Test max iter", parent_agent=parent))
self.assertEqual(result["results"][0]["exit_reason"], "max_iterations")
def test_empty_sentinel_marks_status_failed(self):
"""Regression: a child that returns the literal '(empty)' sentinel
(emitted by run_agent.py when the LLM returns empty responses after
retries — e.g. transport misrouting) must be reported as failed, not
silently accepted as a completed delegation. Otherwise the parent
surfaces an empty string as if the subagent succeeded."""
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.model = "claude-sonnet-4-6"
mock_child.session_prompt_tokens = 0
mock_child.session_completion_tokens = 0
mock_child.run_conversation.return_value = {
"final_response": "(empty)",
"completed": True,
"interrupted": False,
"api_calls": 4,
"messages": [],
}
MockAgent.return_value = mock_child
result = json.loads(delegate_task(goal="Test empty sentinel", parent_agent=parent))
self.assertEqual(result["results"][0]["status"], "failed")
class TestSubagentCostRollup(unittest.TestCase):
"""Port of Kilo-Org/kilocode#9448 — parent's session_estimated_cost_usd
must include subagent spend, not just the parent's own API calls."""
def _make_parent_with_cost_counters(self, depth=0, starting_cost=0.0):
parent = _make_mock_parent(depth=depth)
# The fields AIAgent exposes and the footer reads from. Set real
# floats/strings so the rollup can add to them rather than tripping
# on MagicMock auto-attrs.
parent.session_estimated_cost_usd = starting_cost
parent.session_cost_status = "unknown"
parent.session_cost_source = "none"
return parent
def test_single_child_cost_folded_into_parent(self):
parent = self._make_parent_with_cost_counters(starting_cost=0.10)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.model = "claude-sonnet-4-6"
mock_child.session_prompt_tokens = 1000
mock_child.session_completion_tokens = 200
mock_child.session_estimated_cost_usd = 0.42
mock_child.run_conversation.return_value = {
"final_response": "done",
"completed": True,
"interrupted": False,
"api_calls": 2,
"messages": [],
}
MockAgent.return_value = mock_child
result = json.loads(delegate_task(goal="do stuff", parent_agent=parent))
# Parent footer must reflect parent_cost + child_cost.
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.52, places=6)
# Rollup must strip the internal field before serialising to the model.
self.assertNotIn("_child_cost_usd", result["results"][0])
self.assertNotIn("_child_role", result["results"][0])
def test_batch_children_costs_sum_into_parent(self):
parent = self._make_parent_with_cost_counters(starting_cost=0.00)
with patch("tools.delegate_tool._run_single_child") as mock_run:
mock_run.side_effect = [
{
"task_index": 0,
"status": "completed",
"summary": "A",
"api_calls": 2,
"duration_seconds": 1.0,
"_child_role": "leaf",
"_child_cost_usd": 0.15,
},
{
"task_index": 1,
"status": "completed",
"summary": "B",
"api_calls": 2,
"duration_seconds": 1.0,
"_child_role": "leaf",
"_child_cost_usd": 0.27,
},
{
"task_index": 2,
"status": "failed",
"summary": "",
"error": "boom",
"api_calls": 0,
"duration_seconds": 0.1,
"_child_role": "leaf",
"_child_cost_usd": 0.03,
},
]
result = json.loads(
delegate_task(
tasks=[{"goal": "A"}, {"goal": "B"}, {"goal": "C"}],
parent_agent=parent,
)
)
# 0.15 + 0.27 + 0.03 even though one child failed — the API calls it
# made before failing still cost money.
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.45, places=6)
# cost_source promoted from "none" since the parent had no direct spend.
self.assertEqual(parent.session_cost_source, "subagent")
self.assertEqual(parent.session_cost_status, "estimated")
# All internal fields stripped from results.
for entry in result["results"]:
self.assertNotIn("_child_cost_usd", entry)
self.assertNotIn("_child_role", entry)
def test_zero_cost_children_leave_parent_source_untouched(self):
"""If every child reports 0 cost (e.g. free local model), we should
not invent a fake 'subagent' source — the parent's 'none' stays."""
parent = self._make_parent_with_cost_counters(starting_cost=0.00)
with patch("tools.delegate_tool._run_single_child") as mock_run:
mock_run.return_value = {
"task_index": 0,
"status": "completed",
"summary": "done",
"api_calls": 1,
"duration_seconds": 0.5,
"_child_role": "leaf",
"_child_cost_usd": 0.0,
}
delegate_task(goal="free local run", parent_agent=parent)
self.assertEqual(parent.session_estimated_cost_usd, 0.0)
self.assertEqual(parent.session_cost_source, "none")
def test_parent_with_real_source_not_overwritten(self):
"""If the parent already has its own cost billed (cost_source != 'none'),
adding subagent cost must not clobber the existing source label."""
parent = self._make_parent_with_cost_counters(starting_cost=0.20)
parent.session_cost_status = "exact"
parent.session_cost_source = "openrouter"
with patch("tools.delegate_tool._run_single_child") as mock_run:
mock_run.return_value = {
"task_index": 0,
"status": "completed",
"summary": "done",
"api_calls": 1,
"duration_seconds": 0.5,
"_child_role": "leaf",
"_child_cost_usd": 0.30,
}
delegate_task(goal="billed run", parent_agent=parent)
self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.50, places=6)
# Real source label preserved.
self.assertEqual(parent.session_cost_source, "openrouter")
self.assertEqual(parent.session_cost_status, "exact")
def test_rollup_tolerates_missing_cost_fields(self):
"""Older fixtures / fabricated error entries may not carry
_child_cost_usd. Rollup must degrade to zero-add silently."""
parent = self._make_parent_with_cost_counters(starting_cost=0.10)
with patch("tools.delegate_tool._run_single_child") as mock_run:
mock_run.return_value = {
"task_index": 0,
"status": "completed",
"summary": "done",
"api_calls": 1,
"duration_seconds": 0.5,
# no _child_role, no _child_cost_usd
}
result = json.loads(delegate_task(goal="legacy", parent_agent=parent))
# Parent cost unchanged.
self.assertEqual(parent.session_estimated_cost_usd, 0.10)
self.assertEqual(len(result["results"]), 1)
class TestBlockedTools(unittest.TestCase):
def test_blocked_tools_constant(self):
for tool in ["delegate_task", "clarify", "memory", "send_message", "execute_code"]:
self.assertIn(tool, DELEGATE_BLOCKED_TOOLS)
def test_constants(self):
from tools.delegate_tool import (
_get_max_spawn_depth, _get_orchestrator_enabled,
_MIN_SPAWN_DEPTH,
)
self.assertEqual(_get_max_concurrent_children(), 3)
self.assertEqual(MAX_DEPTH, 1)
self.assertEqual(_get_max_spawn_depth(), 1) # default: flat
self.assertTrue(_get_orchestrator_enabled()) # default
self.assertEqual(_MIN_SPAWN_DEPTH, 1)
class TestDelegationCredentialResolution(unittest.TestCase):
"""Tests for provider:model credential resolution in delegation config."""
def test_no_provider_returns_none_credentials(self):
"""When delegation.provider is empty, all credentials are None (inherit parent)."""
parent = _make_mock_parent(depth=0)
cfg = {"model": "", "provider": ""}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertIsNone(creds["provider"])
self.assertIsNone(creds["base_url"])
self.assertIsNone(creds["api_key"])
self.assertIsNone(creds["api_mode"])
self.assertIsNone(creds["model"])
def test_model_only_no_provider(self):
"""When only model is set (no provider), model is returned but credentials are None."""
parent = _make_mock_parent(depth=0)
cfg = {"model": "google/gemini-3-flash-preview", "provider": ""}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["model"], "google/gemini-3-flash-preview")
self.assertIsNone(creds["provider"])
self.assertIsNone(creds["base_url"])
self.assertIsNone(creds["api_key"])
def test_direct_endpoint_uses_configured_base_url_and_api_key(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"provider": "openrouter",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["model"], "qwen2.5-coder")
self.assertEqual(creds["provider"], "custom")
self.assertEqual(creds["base_url"], "http://localhost:1234/v1")
self.assertEqual(creds["api_key"], "local-key")
self.assertEqual(creds["api_mode"], "chat_completions")
def test_direct_endpoint_auto_detects_anthropic_messages_suffix(self):
# Issue #10213: Azure AI Foundry exposes Anthropic-compatible models at
# a /anthropic URL suffix. Subagents must pick anthropic_messages
# automatically, matching the main agent's runtime resolver.
parent = _make_mock_parent(depth=0)
cfg = {
"model": "claude-opus-4-6",
"provider": "custom",
"base_url": "https://myfoundry.services.ai.azure.com/anthropic",
"api_key": "foundry-key",
}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["provider"], "custom")
self.assertEqual(creds["base_url"], "https://myfoundry.services.ai.azure.com/anthropic")
self.assertEqual(creds["api_key"], "foundry-key")
self.assertEqual(creds["api_mode"], "anthropic_messages")
def test_direct_endpoint_honors_explicit_api_mode(self):
# When delegation.api_mode is set explicitly, it overrides URL-based
# detection so users can force a transport on non-standard endpoints.
parent = _make_mock_parent(depth=0)
cfg = {
"model": "claude-opus-4-6",
"provider": "custom",
"base_url": "https://proxy.example.com/v1",
"api_key": "proxy-key",
"api_mode": "anthropic_messages",
}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["api_mode"], "anthropic_messages")
def test_direct_endpoint_explicit_api_mode_overrides_url_detection(self):
# Explicit api_mode in config always wins over auto-detection.
parent = _make_mock_parent(depth=0)
cfg = {
"model": "claude-opus-4-6",
"provider": "custom",
"base_url": "https://myfoundry.services.ai.azure.com/anthropic",
"api_key": "foundry-key",
"api_mode": "chat_completions",
}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["api_mode"], "chat_completions")
def test_direct_endpoint_invalid_api_mode_falls_back_to_detection(self):
# An invalid api_mode string must not break detection; fall back to URL heuristic.
parent = _make_mock_parent(depth=0)
cfg = {
"model": "claude-opus-4-6",
"provider": "custom",
"base_url": "https://myfoundry.services.ai.azure.com/anthropic",
"api_key": "foundry-key",
"api_mode": "garbage",
}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["api_mode"], "anthropic_messages")
def test_direct_endpoint_returns_none_api_key_when_not_configured(self):
# When base_url is set without api_key, api_key should be None so
# _build_child_agent inherits the parent's key (effective_api_key = override or parent).
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
}
with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False):
creds = _resolve_delegation_credentials(cfg, parent)
self.assertIsNone(creds["api_key"])
self.assertEqual(creds["provider"], "custom")
def test_direct_endpoint_no_raise_when_only_provider_env_key_present(self):
# Even if OPENAI_API_KEY is absent, no ValueError — _build_child_agent uses parent key.
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
}
with patch.dict(
os.environ,
{
"OPENROUTER_API_KEY": "env-openrouter-key",
"OPENAI_API_KEY": "",
},
clear=False,
):
creds = _resolve_delegation_credentials(cfg, parent)
self.assertIsNone(creds["api_key"])
self.assertEqual(creds["provider"], "custom")
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_provider_resolution_failure_raises_valueerror(self, mock_resolve):
"""When provider resolution fails, ValueError is raised with helpful message."""
mock_resolve.side_effect = RuntimeError("OPENROUTER_API_KEY not set")
parent = _make_mock_parent(depth=0)
cfg = {"model": "some-model", "provider": "openrouter"}
with self.assertRaises(ValueError) as ctx:
_resolve_delegation_credentials(cfg, parent)
self.assertIn("openrouter", str(ctx.exception).lower())
self.assertIn("Cannot resolve", str(ctx.exception))
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_provider_resolves_but_no_api_key_raises(self, mock_resolve):
"""When provider resolves but has no API key, ValueError is raised."""
mock_resolve.return_value = {
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
cfg = {"model": "some-model", "provider": "openrouter"}
with self.assertRaises(ValueError) as ctx:
_resolve_delegation_credentials(cfg, parent)
self.assertIn("no API key", str(ctx.exception))
def test_missing_config_keys_inherit_parent(self):
"""When config dict has no model/provider keys at all, inherits parent."""
parent = _make_mock_parent(depth=0)
cfg = {"max_iterations": 45}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertIsNone(creds["model"])
self.assertIsNone(creds["provider"])
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_named_custom_provider_preserves_provider_name(self, mock_resolve):
"""Named custom provider (e.g. crof.ai) resolves to 'custom' at runtime level
but the subagent must retain the original provider identity so that
resolve_provider_client routes to the correct endpoint on retry/fallback.
Regression test for #26954.
"""
mock_resolve.return_value = {
"provider": "custom", # runtime marks it as "custom" type
"model": "deepseek-v4-pro-CEER",
"base_url": "https://api.crof.ai/v1",
"api_key": "crof-key-abc",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
cfg = {"model": "deepseek-v4-pro-CEER", "provider": "crof.ai"}
creds = _resolve_delegation_credentials(cfg, parent)
# The key assertion: subagent must keep "crof.ai", NOT "custom"
self.assertEqual(creds["provider"], "crof.ai")
self.assertEqual(creds["model"], "deepseek-v4-pro-CEER")
self.assertEqual(creds["base_url"], "https://api.crof.ai/v1")
self.assertEqual(creds["api_key"], "crof-key-abc")
# Verify resolve_runtime_provider was called with the configured name
mock_resolve.assert_called_once_with(
requested="crof.ai", target_model="deepseek-v4-pro-CEER"
)
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_standard_provider_not_overwritten_by_configured_name(self, mock_resolve):
"""Standard (non-custom) providers must still return runtime identity,
not the configured name, to preserve existing behaviour for openrouter,
nous, etc.
"""
mock_resolve.return_value = {
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "or-key-xyz",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
cfg = {"model": "anthropic/claude-sonnet-4", "provider": "openrouter"}
creds = _resolve_delegation_credentials(cfg, parent)
# Standard provider returns its own name, not "custom"
self.assertEqual(creds["provider"], "openrouter")
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_custom_provider_with_empty_configured_provider_falls_back_to_runtime(self, mock_resolve):
"""When configured_provider is empty/None, the early return kicks in and
we return provider=None regardless of what runtime resolved. The runtime
path is only reached when configured_provider is a non-empty string.
"""
mock_resolve.return_value = {
"provider": "custom",
"model": "some-model",
"base_url": "https://fallback.example.com/v1",
"api_key": "key-fallback",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
cfg = {"model": "some-model", "provider": ""}
creds = _resolve_delegation_credentials(cfg, parent)
# Empty provider → early return with None (child inherits parent)
self.assertIsNone(creds["provider"])
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_runtime_missing_provider_key_returns_none(self, mock_resolve):
"""When resolve_runtime_provider returns a dict without 'provider' key,
the result must be None regardless of configured_provider.
This protects against malformed runtime responses.
"""
mock_resolve.return_value = {
# deliberately missing "provider"
"model": "some-model",
"base_url": "https://example.com/v1",
"api_key": "key-123",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
cfg = {"model": "some-model", "provider": "crof.ai"}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertIsNone(creds["provider"])
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_bedrock_provider_with_base_url_uses_runtime_resolver(self, mock_resolve):
"""Regression: provider=bedrock + base_url set must NOT fall through the
direct-base_url branch (which would force provider='custom' +
chat_completions and silently misroute OpenAI JSON to the Bedrock
native endpoint, returning empty responses)."""
mock_resolve.return_value = {
"provider": "bedrock",
"base_url": "https://bedrock-runtime.us-west-2.amazonaws.com",
"api_key": "aws-resolved-key",
"api_mode": "bedrock_converse",
}
parent = _make_mock_parent(depth=0)
cfg = {
"model": "us.anthropic.claude-sonnet-4-6",
"provider": "bedrock",
"base_url": "https://bedrock-runtime.us-west-2.amazonaws.com",
}
creds = _resolve_delegation_credentials(cfg, parent)
# Must use Bedrock, not 'custom'
self.assertEqual(creds["provider"], "bedrock")
self.assertEqual(creds["api_mode"], "bedrock_converse")
mock_resolve.assert_called_once()
self.assertEqual(mock_resolve.call_args.kwargs.get("requested"), "bedrock")
class TestDelegationProviderIntegration(unittest.TestCase):
"""Integration tests: delegation config → _run_single_child → AIAgent construction."""
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_config_provider_credentials_reach_child_agent(self, mock_creds, mock_cfg):
"""When delegation.provider is configured, child agent gets resolved credentials."""
mock_cfg.return_value = {
"max_iterations": 45,
"model": "google/gemini-3-flash-preview",
"provider": "openrouter",
}
mock_creds.return_value = {
"model": "google/gemini-3-flash-preview",
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "sk-or-delegation-key",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1
}
MockAgent.return_value = mock_child
delegate_task(goal="Test provider routing", parent_agent=parent)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview")
self.assertEqual(kwargs["provider"], "openrouter")
self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1")
self.assertEqual(kwargs["api_key"], "sk-or-delegation-key")
self.assertEqual(kwargs["api_mode"], "chat_completions")
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_cross_provider_delegation(self, mock_creds, mock_cfg):
"""Parent on Nous, subagent on OpenRouter — full credential switch."""
mock_cfg.return_value = {
"max_iterations": 45,
"model": "google/gemini-3-flash-preview",
"provider": "openrouter",
}
mock_creds.return_value = {
"model": "google/gemini-3-flash-preview",
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "sk-or-key",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
parent.provider = "nous"
parent.base_url = "https://inference-api.nousresearch.com/v1"
parent.api_key = "nous-key-abc"
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1
}
MockAgent.return_value = mock_child
delegate_task(goal="Cross-provider test", parent_agent=parent)
_, kwargs = MockAgent.call_args
# Child should use OpenRouter, NOT Nous
self.assertEqual(kwargs["provider"], "openrouter")
self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1")
self.assertEqual(kwargs["api_key"], "sk-or-key")
self.assertNotEqual(kwargs["base_url"], parent.base_url)
self.assertNotEqual(kwargs["api_key"], parent.api_key)
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_provider_override_clears_parent_openrouter_filters(
self, mock_creds, mock_cfg
):
"""Delegated provider should not inherit parent provider-preference filters."""
mock_cfg.return_value = {
"max_iterations": 45,
"model": "google/gemini-3-flash-preview",
"provider": "openrouter",
}
mock_creds.return_value = {
"model": "google/gemini-3-flash-preview",
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "sk-or-key",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
parent.providers_allowed = ["anthropic/claude-3.5-sonnet"]
parent.providers_ignored = ["openai/gpt-4o-mini"]
parent.providers_order = ["google/gemini-2.5-pro"]
parent.provider_sort = "price"
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done",
"completed": True,
"api_calls": 1,
}
MockAgent.return_value = mock_child
delegate_task(goal="Cross-provider test", parent_agent=parent)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["provider"], "openrouter")
self.assertIsNone(kwargs["providers_allowed"])
self.assertIsNone(kwargs["providers_ignored"])
self.assertIsNone(kwargs["providers_order"])
self.assertIsNone(kwargs["provider_sort"])
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg):
mock_cfg.return_value = {
"max_iterations": 45,
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
}
mock_creds.return_value = {
"model": "qwen2.5-coder",
"provider": "custom",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1
}
MockAgent.return_value = mock_child
delegate_task(goal="Direct endpoint test", parent_agent=parent)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["model"], "qwen2.5-coder")
self.assertEqual(kwargs["provider"], "custom")
self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1")
self.assertEqual(kwargs["api_key"], "local-key")
self.assertEqual(kwargs["api_mode"], "chat_completions")
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):
"""When delegation config is empty, child inherits parent credentials."""
mock_cfg.return_value = {"max_iterations": 45, "model": "", "provider": ""}
mock_creds.return_value = {
"model": None,
"provider": None,
"base_url": None,
"api_key": None,
"api_mode": None,
}
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1
}
MockAgent.return_value = mock_child
delegate_task(goal="Test inherit", parent_agent=parent)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["model"], parent.model)
self.assertEqual(kwargs["provider"], parent.provider)
self.assertEqual(kwargs["base_url"], parent.base_url)
def test_inherit_parent_base_url_prefers_client_kwargs(self):
parent = _make_mock_parent(depth=0)
parent.base_url = "https://openrouter.ai/api/v1"
parent._client_kwargs = {
"api_key": "no-key-required",
"base_url": "http://localhost:11434/v1",
}
self.assertEqual(
_inherit_parent_base_url(parent, parent.base_url),
"http://localhost:11434/v1",
)
def test_build_child_agent_inherits_active_client_endpoint(self):
"""Regression: stale parent.base_url must not route subagents to OpenRouter."""
parent = _make_mock_parent(depth=0)
parent.provider = "ollama"
parent.base_url = "https://openrouter.ai/api/v1"
parent.api_key = "ollama"
parent._client_kwargs = {
"api_key": "no-key-required",
"base_url": "http://localhost:11434/v1",
}
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
MockAgent.return_value = mock_child
_build_child_agent(
task_index=0,
goal="Use local Ollama",
context=None,
toolsets=["terminal"],
model=None,
max_iterations=10,
parent_agent=parent,
task_count=1,
)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["base_url"], "http://localhost:11434/v1")
self.assertEqual(kwargs["api_key"], "ollama")
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_credential_error_returns_json_error(self, mock_creds, mock_cfg):
"""When credential resolution fails, delegate_task returns a JSON error."""
mock_cfg.return_value = {"model": "bad-model", "provider": "nonexistent"}
mock_creds.side_effect = ValueError(
"Cannot resolve delegation provider 'nonexistent': Unknown provider"
)
parent = _make_mock_parent(depth=0)
result = json.loads(delegate_task(goal="Should fail", parent_agent=parent))
self.assertIn("error", result)
self.assertIn("Cannot resolve", result["error"])
self.assertIn("nonexistent", result["error"])
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_batch_mode_all_children_get_credentials(self, mock_creds, mock_cfg):
"""In batch mode, all children receive the resolved credentials."""
mock_cfg.return_value = {
"max_iterations": 45,
"model": "meta-llama/llama-4-scout",
"provider": "openrouter",
}
mock_creds.return_value = {
"model": "meta-llama/llama-4-scout",
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "sk-or-batch",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
# Patch _build_child_agent since credentials are now passed there
# (agents are built in the main thread before being handed to workers)
with patch("tools.delegate_tool._build_child_agent") as mock_build, \
patch("tools.delegate_tool._run_single_child") as mock_run:
mock_child = MagicMock()
mock_build.return_value = mock_child
mock_run.return_value = {
"task_index": 0, "status": "completed",
"summary": "Done", "api_calls": 1, "duration_seconds": 1.0
}
tasks = [{"goal": "Task A"}, {"goal": "Task B"}]
delegate_task(tasks=tasks, parent_agent=parent)
self.assertEqual(mock_build.call_count, 2)
for call in mock_build.call_args_list:
self.assertEqual(call.kwargs.get("model"), "meta-llama/llama-4-scout")
self.assertEqual(call.kwargs.get("override_provider"), "openrouter")
self.assertEqual(call.kwargs.get("override_base_url"), "https://openrouter.ai/api/v1")
self.assertEqual(call.kwargs.get("override_api_key"), "sk-or-batch")
self.assertEqual(call.kwargs.get("override_api_mode"), "chat_completions")
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_delegation_acp_runtime_reaches_child_agent(self, mock_creds, mock_cfg):
"""Resolved ACP runtime command/args must be forwarded to child agents."""
mock_cfg.return_value = {
"max_iterations": 45,
"model": "copilot-model",
"provider": "copilot-acp",
}
mock_creds.return_value = {
"model": "copilot-model",
"provider": "copilot-acp",
"base_url": "acp://copilot",
"api_key": "copilot-acp",
"api_mode": "chat_completions",
"command": "custom-copilot",
"args": ["--stdio-custom"],
}
parent = _make_mock_parent(depth=0)
with patch("tools.delegate_tool._build_child_agent") as mock_build, \
patch("tools.delegate_tool._run_single_child") as mock_run:
mock_child = MagicMock()
mock_build.return_value = mock_child
mock_run.return_value = {
"task_index": 0, "status": "completed",
"summary": "Done", "api_calls": 1, "duration_seconds": 1.0
}
delegate_task(goal="ACP delegation test", parent_agent=parent)
_, kwargs = mock_build.call_args
self.assertEqual(kwargs.get("override_provider"), "copilot-acp")
self.assertEqual(kwargs.get("override_base_url"), "acp://copilot")
self.assertEqual(kwargs.get("override_api_key"), "copilot-acp")
self.assertEqual(kwargs.get("override_api_mode"), "chat_completions")
self.assertEqual(kwargs.get("override_acp_command"), "custom-copilot")
self.assertEqual(kwargs.get("override_acp_args"), ["--stdio-custom"])
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_model_only_no_provider_inherits_parent_credentials(self, mock_creds, mock_cfg):
"""Setting only model (no provider) changes model but keeps parent credentials."""
mock_cfg.return_value = {
"max_iterations": 45,
"model": "google/gemini-3-flash-preview",
"provider": "",
}
mock_creds.return_value = {
"model": "google/gemini-3-flash-preview",
"provider": None,
"base_url": None,
"api_key": None,
"api_mode": None,
}
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1
}
MockAgent.return_value = mock_child
delegate_task(goal="Model only test", parent_agent=parent)
_, kwargs = MockAgent.call_args
# Model should be overridden
self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview")
# But provider/base_url/api_key should inherit from parent
self.assertEqual(kwargs["provider"], parent.provider)
self.assertEqual(kwargs["base_url"], parent.base_url)
class TestChildCredentialPoolResolution(unittest.TestCase):
def test_same_provider_shares_parent_pool(self):
parent = _make_mock_parent()
mock_pool = MagicMock()
parent._credential_pool = mock_pool
result = _resolve_child_credential_pool("openrouter", parent)
self.assertIs(result, mock_pool)
def test_no_provider_inherits_parent_pool(self):
parent = _make_mock_parent()
mock_pool = MagicMock()
parent._credential_pool = mock_pool
result = _resolve_child_credential_pool(None, parent)
self.assertIs(result, mock_pool)
def test_different_provider_loads_own_pool(self):
parent = _make_mock_parent()
parent._credential_pool = MagicMock()
mock_pool = MagicMock()
mock_pool.has_credentials.return_value = True
with patch("agent.credential_pool.load_pool", return_value=mock_pool):
result = _resolve_child_credential_pool("anthropic", parent)
self.assertIs(result, mock_pool)
def test_different_provider_empty_pool_returns_none(self):
parent = _make_mock_parent()
parent._credential_pool = MagicMock()
mock_pool = MagicMock()
mock_pool.has_credentials.return_value = False
with patch("agent.credential_pool.load_pool", return_value=mock_pool):
result = _resolve_child_credential_pool("anthropic", parent)
self.assertIsNone(result)
def test_different_provider_load_failure_returns_none(self):
parent = _make_mock_parent()
parent._credential_pool = MagicMock()
with patch("agent.credential_pool.load_pool", side_effect=Exception("disk error")):
result = _resolve_child_credential_pool("anthropic", parent)
self.assertIsNone(result)
# --- Custom-endpoint identity resolution (issue #7833) ---
def test_custom_different_endpoint_does_not_inherit_parent_pool(self):
"""A child on custom endpoint B must not inherit the parent's custom
endpoint A pool just because both normalize to provider='custom'."""
parent = _make_mock_parent()
parent.provider = "custom"
parent.base_url = "https://endpoint-a.example.com/v1"
parent._credential_pool = MagicMock(name="parent_custom_a_pool")
child_pool = MagicMock(name="endpoint_b_pool")
child_pool.has_credentials.return_value = True
def fake_key(base_url, provider_name=None):
return {
"https://endpoint-a.example.com/v1": "custom:endpoint-a",
"https://endpoint-b.example.com/v1": "custom:endpoint-b",
}.get(base_url)
with patch("agent.credential_pool.get_custom_provider_pool_key", side_effect=fake_key), \
patch("agent.credential_pool.load_pool", return_value=child_pool) as load_mock:
result = _resolve_child_credential_pool(
"custom", parent, "https://endpoint-b.example.com/v1"
)
# Loaded the child's OWN endpoint pool, not the parent's.
load_mock.assert_called_once_with("custom:endpoint-b")
self.assertIs(result, child_pool)
self.assertIsNot(result, parent._credential_pool)
def test_custom_same_endpoint_shares_parent_pool(self):
"""A child on the SAME custom endpoint as the parent reuses the parent's
pool so rotation/cooldown state stays synchronized."""
parent = _make_mock_parent()
parent.provider = "custom"
parent.base_url = "https://endpoint-a.example.com/v1"
parent._credential_pool = MagicMock(name="parent_custom_a_pool")
with patch(
"agent.credential_pool.get_custom_provider_pool_key",
return_value="custom:endpoint-a",
):
result = _resolve_child_credential_pool(
"custom", parent, "https://endpoint-a.example.com/v1"
)
self.assertIs(result, parent._credential_pool)
def test_custom_unregistered_endpoint_returns_none(self):
"""A raw delegation.base_url with no matching custom_providers entry
must NOT inherit the parent's pool — return None so the child keeps its
fixed delegated credential."""
parent = _make_mock_parent()
parent.provider = "custom"
parent.base_url = "https://endpoint-a.example.com/v1"
parent._credential_pool = MagicMock(name="parent_custom_a_pool")
with patch(
"agent.credential_pool.get_custom_provider_pool_key",
return_value=None,
):
result = _resolve_child_credential_pool(
"custom", parent, "https://raw-unregistered.example.com/v1"
)
self.assertIsNone(result)
def test_build_child_agent_assigns_parent_pool_when_shared(self):
parent = _make_mock_parent()
mock_pool = MagicMock()
parent._credential_pool = mock_pool
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
MockAgent.return_value = mock_child
_build_child_agent(
task_index=0,
goal="Test pool assignment",
context=None,
toolsets=["terminal"],
model=None,
max_iterations=10,
parent_agent=parent,
task_count=1,
)
self.assertEqual(mock_child._credential_pool, mock_pool)
@patch("tools.delegate_tool._load_config", return_value={})
def test_build_child_agent_preserves_mcp_toolsets_by_default(self, mock_cfg):
parent = _make_mock_parent()
parent.enabled_toolsets = ["web", "browser", "mcp-MiniMax"]
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
MockAgent.return_value = mock_child
_build_child_agent(
task_index=0,
goal="Test narrowed toolsets",
context=None,
toolsets=["web", "browser"],
model=None,
max_iterations=10,
parent_agent=parent,
task_count=1,
)
self.assertEqual(
MockAgent.call_args[1]["enabled_toolsets"],
["web", "browser", "mcp-MiniMax"],
)
@patch(
"tools.delegate_tool._load_config",
return_value={"inherit_mcp_toolsets": False},
)
def test_build_child_agent_strict_intersection_when_opted_out(self, mock_cfg):
parent = _make_mock_parent()
parent.enabled_toolsets = ["web", "browser", "mcp-MiniMax"]
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
MockAgent.return_value = mock_child
_build_child_agent(
task_index=0,
goal="Test narrowed toolsets",
context=None,
toolsets=["web", "browser"],
model=None,
max_iterations=10,
parent_agent=parent,
task_count=1,
)
self.assertEqual(
MockAgent.call_args[1]["enabled_toolsets"],
["web", "browser"],
)
class TestChildCredentialLeasing(unittest.TestCase):
def test_run_single_child_acquires_and_releases_lease(self):
from tools.delegate_tool import _run_single_child
leased_entry = MagicMock()
leased_entry.id = "cred-b"
child = MagicMock()
child._credential_pool = MagicMock()
child._credential_pool.acquire_lease.return_value = "cred-b"
child._credential_pool.current.return_value = leased_entry
child.run_conversation.return_value = {
"final_response": "done",
"completed": True,
"interrupted": False,
"api_calls": 1,
"messages": [],
}
result = _run_single_child(
task_index=0,
goal="Investigate rate limits",
child=child,
parent_agent=_make_mock_parent(),
)
self.assertEqual(result["status"], "completed")
child._credential_pool.acquire_lease.assert_called_once_with()
child._swap_credential.assert_called_once_with(leased_entry)
child._credential_pool.release_lease.assert_called_once_with("cred-b")
def test_run_single_child_releases_lease_after_failure(self):
from tools.delegate_tool import _run_single_child
child = MagicMock()
child._credential_pool = MagicMock()
child._credential_pool.acquire_lease.return_value = "cred-a"
child._credential_pool.current.return_value = MagicMock(id="cred-a")
child.run_conversation.side_effect = RuntimeError("boom")
result = _run_single_child(
task_index=1,
goal="Trigger failure",
child=child,
parent_agent=_make_mock_parent(),
)
self.assertEqual(result["status"], "error")
child._credential_pool.release_lease.assert_called_once_with("cred-a")
class TestDelegateHeartbeat(unittest.TestCase):
"""Heartbeat propagates child activity to parent during delegation.
Without the heartbeat, the gateway inactivity timeout fires because the
parent's _last_activity_ts freezes when delegate_task starts.
"""
def test_heartbeat_touches_parent_activity_during_child_run(self):
"""Parent's _touch_activity is called while child.run_conversation blocks."""
from tools.delegate_tool import _run_single_child
parent = _make_mock_parent()
touch_calls = []
parent._touch_activity = lambda desc: touch_calls.append(desc)
child = MagicMock()
child.get_activity_summary.return_value = {
"current_tool": "terminal",
"api_call_count": 3,
"max_iterations": 50,
"last_activity_desc": "executing tool: terminal",
}
# Make run_conversation block long enough for heartbeats to fire
def slow_run(**kwargs):
time.sleep(0.25)
return {"final_response": "done", "completed": True, "api_calls": 3}
child.run_conversation.side_effect = slow_run
# Patch the heartbeat interval to fire quickly
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
_run_single_child(
task_index=0,
goal="Test heartbeat",
child=child,
parent_agent=parent,
)
# Heartbeat should have fired at least once during the 0.25s sleep
self.assertGreater(len(touch_calls), 0,
"Heartbeat did not propagate activity to parent")
# Verify the description includes child's current tool detail
self.assertTrue(
any("terminal" in desc for desc in touch_calls),
f"Heartbeat descriptions should include child tool info: {touch_calls}")
def test_heartbeat_stops_after_child_completes(self):
"""Heartbeat thread is cleaned up when the child finishes."""
from tools.delegate_tool import _run_single_child
parent = _make_mock_parent()
touch_calls = []
parent._touch_activity = lambda desc: touch_calls.append(desc)
child = MagicMock()
child.get_activity_summary.return_value = {
"current_tool": None,
"api_call_count": 1,
"max_iterations": 50,
"last_activity_desc": "done",
}
child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1,
}
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
_run_single_child(
task_index=0,
goal="Test cleanup",
child=child,
parent_agent=parent,
)
# Record count after completion, wait, and verify no more calls
count_after = len(touch_calls)
time.sleep(0.15)
self.assertEqual(len(touch_calls), count_after,
"Heartbeat continued firing after child completed")
def test_heartbeat_stops_after_child_error(self):
"""Heartbeat thread is cleaned up even when the child raises."""
from tools.delegate_tool import _run_single_child
parent = _make_mock_parent()
touch_calls = []
parent._touch_activity = lambda desc: touch_calls.append(desc)
child = MagicMock()
child.get_activity_summary.return_value = {
"current_tool": "web_search",
"api_call_count": 2,
"max_iterations": 50,
"last_activity_desc": "executing tool: web_search",
}
def slow_fail(**kwargs):
time.sleep(0.15)
raise RuntimeError("network timeout")
child.run_conversation.side_effect = slow_fail
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
result = _run_single_child(
task_index=0,
goal="Test error cleanup",
child=child,
parent_agent=parent,
)
self.assertEqual(result["status"], "error")
# Verify heartbeat stopped
count_after = len(touch_calls)
time.sleep(0.15)
self.assertEqual(len(touch_calls), count_after,
"Heartbeat continued firing after child error")
def test_heartbeat_includes_child_activity_desc_when_no_tool(self):
"""When child has no current_tool, heartbeat uses last_activity_desc."""
from tools.delegate_tool import _run_single_child
parent = _make_mock_parent()
touch_calls = []
parent._touch_activity = lambda desc: touch_calls.append(desc)
child = MagicMock()
child.get_activity_summary.return_value = {
"current_tool": None,
"api_call_count": 5,
"max_iterations": 90,
"last_activity_desc": "API call #5 completed",
}
def slow_run(**kwargs):
time.sleep(0.15)
return {"final_response": "done", "completed": True, "api_calls": 5}
child.run_conversation.side_effect = slow_run
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
_run_single_child(
task_index=0,
goal="Test desc fallback",
child=child,
parent_agent=parent,
)
self.assertGreater(len(touch_calls), 0)
self.assertTrue(
any("API call #5 completed" in desc for desc in touch_calls),
f"Heartbeat should include last_activity_desc: {touch_calls}")
def test_heartbeat_does_not_trip_idle_stale_while_inside_tool(self):
"""A long-running tool (no iteration advance, but current_tool set)
must not be flagged stale at the idle threshold.
Bug #13041: when a child is legitimately busy inside a slow tool
(terminal command, browser fetch), api_call_count does not advance.
The previous stale check treated this as idle and stopped the
heartbeat after 5 cycles (~150s), letting the gateway kill the
session. The fix uses a much higher in-tool threshold and only
applies the tight idle threshold when current_tool is None.
"""
from tools.delegate_tool import _run_single_child
parent = _make_mock_parent()
touch_calls = []
parent._touch_activity = lambda desc: touch_calls.append(desc)
child = MagicMock()
# Child is stuck inside a single terminal call for the whole run.
# api_call_count never advances, current_tool is always set.
child.get_activity_summary.return_value = {
"current_tool": "terminal",
"api_call_count": 1,
"max_iterations": 50,
"last_activity_desc": "executing tool: terminal",
}
def slow_run(**kwargs):
# Long enough to exceed the OLD idle threshold (5 cycles) at
# the patched interval, but shorter than the new in-tool
# threshold.
time.sleep(0.4)
return {"final_response": "done", "completed": True, "api_calls": 1}
child.run_conversation.side_effect = slow_run
# Use tiny thresholds so the assertion is scheduler-robust in CI:
# if idle rules were used for in-tool work, heartbeat would stop after
# ~2 cycles. The in-tool branch should keep touching well past that.
with (
patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05),
patch("tools.delegate_tool._HEARTBEAT_STALE_CYCLES_IDLE", 2),
patch("tools.delegate_tool._HEARTBEAT_STALE_CYCLES_IN_TOOL", 40),
):
_run_single_child(
task_index=0,
goal="Test long-running tool",
child=child,
parent_agent=parent,
)
# If idle-threshold logic applied, we'd cap around 2 touches; prove we
# continued beyond that while inside a long-running tool.
self.assertGreater(
len(touch_calls), 2,
f"Heartbeat stopped too early while child was inside a tool; "
f"got {len(touch_calls)} touches over 0.4s at 0.05s interval",
)
class TestDelegationReasoningEffort(unittest.TestCase):
"""Tests for delegation.reasoning_effort config override."""
@patch("tools.delegate_tool._load_config")
@patch("run_agent.AIAgent")
def test_inherits_parent_reasoning_when_no_override(self, MockAgent, mock_cfg):
"""With no delegation.reasoning_effort, child inherits parent's config."""
mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": ""}
MockAgent.return_value = MagicMock()
parent = _make_mock_parent()
parent.reasoning_config = {"enabled": True, "effort": "xhigh"}
_build_child_agent(
task_index=0, goal="test", context=None, toolsets=None,
model=None, max_iterations=50, parent_agent=parent,
task_count=1,
)
call_kwargs = MockAgent.call_args[1]
self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "xhigh"})
@patch("tools.delegate_tool._load_config")
@patch("run_agent.AIAgent")
def test_override_reasoning_effort_from_config(self, MockAgent, mock_cfg):
"""delegation.reasoning_effort overrides the parent's level."""
mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "low"}
MockAgent.return_value = MagicMock()
parent = _make_mock_parent()
parent.reasoning_config = {"enabled": True, "effort": "xhigh"}
_build_child_agent(
task_index=0, goal="test", context=None, toolsets=None,
model=None, max_iterations=50, parent_agent=parent,
task_count=1,
)
call_kwargs = MockAgent.call_args[1]
self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "low"})
@patch("tools.delegate_tool._load_config")
@patch("run_agent.AIAgent")
def test_override_reasoning_effort_none_disables(self, MockAgent, mock_cfg):
"""delegation.reasoning_effort: 'none' disables thinking for subagents."""
mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "none"}
MockAgent.return_value = MagicMock()
parent = _make_mock_parent()
parent.reasoning_config = {"enabled": True, "effort": "high"}
_build_child_agent(
task_index=0, goal="test", context=None, toolsets=None,
model=None, max_iterations=50, parent_agent=parent,
task_count=1,
)
call_kwargs = MockAgent.call_args[1]
self.assertEqual(call_kwargs["reasoning_config"], {"enabled": False})
@patch("tools.delegate_tool._load_config")
@patch("run_agent.AIAgent")
def test_invalid_reasoning_effort_falls_back_to_parent(self, MockAgent, mock_cfg):
"""Invalid delegation.reasoning_effort falls back to parent's config."""
mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "banana"}
MockAgent.return_value = MagicMock()
parent = _make_mock_parent()
parent.reasoning_config = {"enabled": True, "effort": "medium"}
_build_child_agent(
task_index=0, goal="test", context=None, toolsets=None,
model=None, max_iterations=50, parent_agent=parent,
task_count=1,
)
call_kwargs = MockAgent.call_args[1]
self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "medium"})
# =========================================================================
# Dispatch helper, progress events, concurrency
# =========================================================================
class TestDispatchDelegateTask(unittest.TestCase):
"""Tests for the _dispatch_delegate_task helper and full param forwarding."""
def test_model_acp_args_not_forwarded(self):
"""The live model dispatch path strips hidden ACP transport args."""
import run_agent
captured = {}
def fake_delegate_task(**kwargs):
captured.update(kwargs)
return "{}"
parent = _make_mock_parent(depth=0)
with patch("tools.delegate_tool.delegate_task", fake_delegate_task):
run_agent.AIAgent._dispatch_delegate_task(
parent,
{
"goal": "test",
"acp_command": "claude",
"acp_args": ["--acp", "--stdio"],
"tasks": [
{
"goal": "nested",
"acp_command": "codex",
"acp_args": ["--acp"],
},
],
},
)
self.assertNotIn("acp_command", captured)
self.assertNotIn("acp_args", captured)
self.assertEqual(captured["goal"], "test")
self.assertNotIn("acp_command", captured["tasks"][0])
self.assertNotIn("acp_args", captured["tasks"][0])
class TestDelegateEventEnum(unittest.TestCase):
"""Tests for DelegateEvent enum and back-compat aliases."""
def test_enum_values_are_strings(self):
for event in DelegateEvent:
self.assertIsInstance(event.value, str)
self.assertTrue(event.value.startswith("delegate."))
def test_legacy_map_covers_all_old_names(self):
expected_legacy = {"_thinking", "reasoning.available",
"tool.started", "tool.completed", "subagent_progress"}
self.assertEqual(set(_LEGACY_EVENT_MAP.keys()), expected_legacy)
def test_legacy_map_values_are_delegate_events(self):
for old_name, event in _LEGACY_EVENT_MAP.items():
self.assertIsInstance(event, DelegateEvent)
def test_progress_callback_normalises_tool_started(self):
"""_build_child_progress_callback handles tool.started via enum."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
parent.tool_progress_callback = MagicMock()
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
self.assertIsNotNone(cb)
cb("tool.started", tool_name="terminal", preview="ls")
parent._delegate_spinner.print_above.assert_called()
def test_progress_callback_normalises_thinking(self):
"""Both _thinking and reasoning.available route to TASK_THINKING."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
parent.tool_progress_callback = None
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
cb("_thinking", tool_name=None, preview="pondering...")
assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
parent._delegate_spinner.print_above.reset_mock()
cb("reasoning.available", tool_name=None, preview="hmm")
assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
def test_progress_callback_tool_completed_is_noop(self):
"""tool.completed is normalised but produces no display output."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
parent.tool_progress_callback = None
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
cb("tool.completed", tool_name="terminal")
parent._delegate_spinner.print_above.assert_not_called()
def test_progress_callback_ignores_unknown_events(self):
"""Unknown event types are silently ignored."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
# Should not raise
cb("some.unknown.event", tool_name="x")
parent._delegate_spinner.print_above.assert_not_called()
def test_progress_callback_accepts_enum_value_directly(self):
"""cb(DelegateEvent.TASK_THINKING, ...) must route to the thinking
branch. Pre-fix the callback only handled legacy strings via
_LEGACY_EVENT_MAP.get and silently dropped enum-typed callers."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
parent.tool_progress_callback = None
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
cb(DelegateEvent.TASK_THINKING, preview="pondering")
# If the enum was accepted, the thinking emoji got printed.
assert any(
"💭" in str(c)
for c in parent._delegate_spinner.print_above.call_args_list
)
def test_progress_callback_accepts_new_style_string(self):
"""cb('delegate.task_thinking', ...) — the string form of the
enum value — must route to the thinking branch too, so new-style
emitters don't have to import DelegateEvent."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
cb("delegate.task_thinking", preview="hmm")
assert any(
"💭" in str(c)
for c in parent._delegate_spinner.print_above.call_args_list
)
def test_progress_callback_task_progress_not_misrendered(self):
"""'subagent_progress' (legacy name for TASK_PROGRESS) carries a
pre-batched summary in the tool_name slot. Before the fix, this
fell through to the TASK_TOOL_STARTED rendering path, treating
the summary string as a tool name. After the fix: distinct
render (no tool-start emoji lookup) and pass-through relay
upward (no re-batching).
Regression path only reachable once nested orchestration is
enabled: nested orchestrators relay subagent_progress from
grandchildren upward through this callback.
"""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
parent.tool_progress_callback = MagicMock()
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
cb("subagent_progress", tool_name="🔀 [1] terminal, file")
# Spinner gets a distinct 🔀-prefixed line, NOT a tool emoji
# followed by the summary string as if it were a tool name.
calls = parent._delegate_spinner.print_above.call_args_list
self.assertTrue(any("🔀 🔀 [1] terminal, file" in str(c) for c in calls))
# Parent callback receives the relay (pass-through, no re-batching).
parent.tool_progress_callback.assert_called_once()
# No '⚡' tool-start emoji should appear — that's the pre-fix bug.
self.assertFalse(any("" in str(c) for c in calls))
class TestConcurrencyDefaults(unittest.TestCase):
"""Tests for the concurrency default and no hard ceiling."""
@patch("tools.delegate_tool._load_config", return_value={})
def test_default_is_three(self, mock_cfg):
# Clear env var if set
with patch.dict(os.environ, {}, clear=True):
self.assertEqual(_get_max_concurrent_children(), 3)
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 10})
def test_no_upper_ceiling(self, mock_cfg):
"""Users can raise concurrency as high as they want — no hard cap."""
self.assertEqual(_get_max_concurrent_children(), 10)
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 100})
def test_very_high_values_honored(self, mock_cfg):
self.assertEqual(_get_max_concurrent_children(), 100)
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 0})
def test_zero_clamped_to_one(self, mock_cfg):
"""Floor of 1 is enforced; zero or negative values raise to 1."""
self.assertEqual(_get_max_concurrent_children(), 1)
@patch("tools.delegate_tool._load_config", return_value={})
def test_env_var_honored_uncapped(self, mock_cfg):
with patch.dict(os.environ, {"DELEGATION_MAX_CONCURRENT_CHILDREN": "12"}):
self.assertEqual(_get_max_concurrent_children(), 12)
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 6})
def test_configured_value_returned(self, mock_cfg):
self.assertEqual(_get_max_concurrent_children(), 6)
class TestAsyncCapUnified(unittest.TestCase):
"""max_async_children is deprecated: the async cap IS max_concurrent_children."""
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 15})
def test_async_cap_follows_concurrent_children(self, mock_cfg):
from tools.delegate_tool import _get_max_async_children
self.assertEqual(_get_max_async_children(), 15)
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 15, "max_async_children": 3})
def test_stale_max_async_children_ignored(self, mock_cfg):
"""A leftover max_async_children in config must not shrink the cap."""
from tools.delegate_tool import _get_max_async_children
self.assertEqual(_get_max_async_children(), 15)
@patch("tools.delegate_tool._load_config", return_value={})
def test_default_matches_concurrent_children_default(self, mock_cfg):
from tools.delegate_tool import _get_max_async_children
with patch.dict(os.environ, {}, clear=True):
self.assertEqual(_get_max_async_children(), _get_max_concurrent_children())
# =========================================================================
# max_spawn_depth clamping
# =========================================================================
class TestMaxSpawnDepth(unittest.TestCase):
"""Tests for _get_max_spawn_depth clamping and fallback behavior."""
@patch("tools.delegate_tool._load_config", return_value={})
def test_max_spawn_depth_defaults_to_1(self, mock_cfg):
from tools.delegate_tool import _get_max_spawn_depth
self.assertEqual(_get_max_spawn_depth(), 1)
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 0})
def test_max_spawn_depth_clamped_below_one(self, mock_cfg):
import logging
from tools.delegate_tool import _get_max_spawn_depth
with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
result = _get_max_spawn_depth()
self.assertEqual(result, 1)
self.assertTrue(any("below floor 1" in m for m in cm.output))
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 99})
def test_max_spawn_depth_no_upper_ceiling(self, mock_cfg):
"""No upper ceiling — high values pass through unchanged (cost is the limiter)."""
from tools.delegate_tool import _get_max_spawn_depth
self.assertEqual(_get_max_spawn_depth(), 99)
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": "not-a-number"})
def test_max_spawn_depth_invalid_falls_back_to_default(self, mock_cfg):
from tools.delegate_tool import _get_max_spawn_depth
self.assertEqual(_get_max_spawn_depth(), 1)
# =========================================================================
# role param plumbing
# =========================================================================
#
# These tests cover the schema + signature + stash plumbing of the role
# param. The full role-honoring behavior (toolset re-add, role-aware
# prompt) lives in TestOrchestratorRoleBehavior below; these tests only
# assert on _delegate_role stashing and on the schema shape.
class TestOrchestratorRoleSchema(unittest.TestCase):
"""Tests that the role param reaches the child via dispatch."""
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def _run_with_mock_child(self, role_arg, mock_cfg, mock_creds):
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True,
"api_calls": 1, "messages": [],
}
mock_child._delegate_saved_tool_names = []
mock_child._credential_pool = None
mock_child.session_prompt_tokens = 0
mock_child.session_completion_tokens = 0
mock_child.model = "test"
MockAgent.return_value = mock_child
kwargs = {"goal": "test", "parent_agent": parent}
if role_arg is not _SENTINEL:
kwargs["role"] = role_arg
delegate_task(**kwargs)
return mock_child
def test_default_role_is_leaf(self):
child = self._run_with_mock_child(_SENTINEL)
self.assertEqual(child._delegate_role, "leaf")
def test_explicit_orchestrator_role_stashed(self):
"""role='orchestrator' reaches _build_child_agent and is stashed.
Full behavior (toolset re-add) lands in commit 3; commit 2 only
verifies the plumbing."""
child = self._run_with_mock_child("orchestrator")
self.assertEqual(child._delegate_role, "orchestrator")
def test_unknown_role_coerces_to_leaf(self):
"""role='nonsense' → _normalize_role warns and returns 'leaf'."""
import logging
with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
child = self._run_with_mock_child("nonsense")
self.assertEqual(child._delegate_role, "leaf")
self.assertTrue(any("coercing" in m.lower() for m in cm.output))
def test_schema_has_role_top_level_and_per_task(self):
from tools.delegate_tool import DELEGATE_TASK_SCHEMA
props = DELEGATE_TASK_SCHEMA["parameters"]["properties"]
self.assertIn("role", props)
self.assertEqual(props["role"]["enum"], ["leaf", "orchestrator"])
task_props = props["tasks"]["items"]["properties"]
self.assertIn("role", task_props)
self.assertEqual(task_props["role"]["enum"], ["leaf", "orchestrator"])
def test_schema_omits_acp_transport_fields(self):
from tools.delegate_tool import DELEGATE_TASK_SCHEMA
props = DELEGATE_TASK_SCHEMA["parameters"]["properties"]
task_props = props["tasks"]["items"]["properties"]
self.assertNotIn("acp_command", props)
self.assertNotIn("acp_args", props)
self.assertNotIn("acp_command", task_props)
self.assertNotIn("acp_args", task_props)
# Sentinel used to distinguish "role kwarg omitted" from "role=None".
_SENTINEL = object()
# =========================================================================
# role-honoring behavior
# =========================================================================
def _make_role_mock_child():
"""Helper: mock child with minimal fields for delegate_task to process."""
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True,
"api_calls": 1, "messages": [],
}
mock_child._delegate_saved_tool_names = []
mock_child._credential_pool = None
mock_child.session_prompt_tokens = 0
mock_child.session_completion_tokens = 0
mock_child.model = "test"
return mock_child
class TestOrchestratorRoleBehavior(unittest.TestCase):
"""Tests that role='orchestrator' actually changes toolset + prompt."""
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def test_orchestrator_role_keeps_delegation_at_depth_1(
self, mock_cfg, mock_creds
):
"""role='orchestrator' + depth-0 parent with max_spawn_depth=2 →
child at depth 1 gets 'delegation' in enabled_toolsets (can
further delegate). Requires max_spawn_depth>=2 since the new
default is 1 (flat)."""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "file"]
with patch("run_agent.AIAgent") as MockAgent:
mock_child = _make_role_mock_child()
MockAgent.return_value = mock_child
delegate_task(goal="test", role="orchestrator", parent_agent=parent)
kwargs = MockAgent.call_args[1]
self.assertIn("delegation", kwargs["enabled_toolsets"])
self.assertEqual(mock_child._delegate_role, "orchestrator")
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def test_orchestrator_blocked_at_max_spawn_depth(
self, mock_cfg, mock_creds
):
"""Parent at depth 1 with max_spawn_depth=2 spawns child
at depth 2 (the floor); role='orchestrator' degrades to leaf."""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=1)
parent.enabled_toolsets = ["terminal", "delegation"]
with patch("run_agent.AIAgent") as MockAgent:
mock_child = _make_role_mock_child()
MockAgent.return_value = mock_child
delegate_task(goal="test", role="orchestrator", parent_agent=parent)
kwargs = MockAgent.call_args[1]
self.assertNotIn("delegation", kwargs["enabled_toolsets"])
self.assertEqual(mock_child._delegate_role, "leaf")
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config", return_value={})
def test_orchestrator_blocked_at_default_flat_depth(
self, mock_cfg, mock_creds
):
"""With default max_spawn_depth=1 (flat), role='orchestrator'
on a depth-0 parent produces a depth-1 child that is already at
the floor — the role degrades to 'leaf' and the delegation
toolset is stripped. This is the new default posture."""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "file", "delegation"]
with patch("run_agent.AIAgent") as MockAgent:
mock_child = _make_role_mock_child()
MockAgent.return_value = mock_child
delegate_task(goal="test", role="orchestrator", parent_agent=parent)
kwargs = MockAgent.call_args[1]
self.assertNotIn("delegation", kwargs["enabled_toolsets"])
self.assertEqual(mock_child._delegate_role, "leaf")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_orchestrator_enabled_false_forces_leaf(self, mock_creds):
"""Kill switch delegation.orchestrator_enabled=false overrides
role='orchestrator'."""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "delegation"]
with patch("tools.delegate_tool._load_config",
return_value={"orchestrator_enabled": False}):
with patch("run_agent.AIAgent") as MockAgent:
mock_child = _make_role_mock_child()
MockAgent.return_value = mock_child
delegate_task(goal="test", role="orchestrator",
parent_agent=parent)
kwargs = MockAgent.call_args[1]
self.assertNotIn("delegation", kwargs["enabled_toolsets"])
self.assertEqual(mock_child._delegate_role, "leaf")
# ── Role-aware system prompt ────────────────────────────────────────
def test_leaf_prompt_does_not_mention_delegation(self):
prompt = _build_child_system_prompt(
"Fix tests", role="leaf",
max_spawn_depth=2, child_depth=1,
)
self.assertNotIn("delegate_task", prompt)
self.assertNotIn("Orchestrator Role", prompt)
def test_orchestrator_prompt_mentions_delegation_capability(self):
prompt = _build_child_system_prompt(
"Survey approaches", role="orchestrator",
max_spawn_depth=2, child_depth=1,
)
self.assertIn("delegate_task", prompt)
self.assertIn("Orchestrator Role", prompt)
# Depth/max-depth note present and literal:
self.assertIn("depth 1", prompt)
self.assertIn("max_spawn_depth=2", prompt)
def test_orchestrator_prompt_at_depth_floor_says_children_are_leaves(self):
"""With max_spawn_depth=2 and child_depth=1, the orchestrator's
own children would be at depth 2 (the floor) → must be leaves."""
prompt = _build_child_system_prompt(
"Survey", role="orchestrator",
max_spawn_depth=2, child_depth=1,
)
self.assertIn("MUST be leaves", prompt)
def test_orchestrator_prompt_below_floor_allows_more_nesting(self):
"""With max_spawn_depth=3 and child_depth=1, the orchestrator's
own children can themselves be orchestrators (depth 2 < 3)."""
prompt = _build_child_system_prompt(
"Deep work", role="orchestrator",
max_spawn_depth=3, child_depth=1,
)
self.assertIn("can themselves be orchestrators", prompt)
# ── Batch mode and intersection ─────────────────────────────────────
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def test_batch_mode_per_task_role_override(self, mock_cfg, mock_creds):
"""Per-task role beats top-level; no top-level role → "leaf".
tasks=[{role:'orchestrator'},{role:'leaf'},{}] → first gets
delegation, second and third don't. Requires max_spawn_depth>=2
(raised explicitly here) since the new default is 1 (flat).
"""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "file", "delegation"]
built_toolsets = []
def _factory(*a, **kw):
m = _make_role_mock_child()
built_toolsets.append(kw.get("enabled_toolsets"))
return m
with patch("run_agent.AIAgent", side_effect=_factory):
delegate_task(
tasks=[
{"goal": "A", "role": "orchestrator"},
{"goal": "B", "role": "leaf"},
{"goal": "C"}, # no role → falls back to top_role (leaf)
],
parent_agent=parent,
)
self.assertIn("delegation", built_toolsets[0])
self.assertNotIn("delegation", built_toolsets[1])
self.assertNotIn("delegation", built_toolsets[2])
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def test_intersection_preserves_delegation_bound(
self, mock_cfg, mock_creds
):
"""Design decision: orchestrator capability is granted by role,
NOT inherited from the parent's toolset. A parent without
'delegation' in its enabled_toolsets can still spawn an
orchestrator child — the re-add in _build_child_agent runs
unconditionally for orchestrators (when max_spawn_depth allows).
If you want to change to "parent must have delegation too",
update _build_child_agent to check parent_toolsets before the
re-add and update this test to match.
"""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "file"] # no delegation
with patch("run_agent.AIAgent") as MockAgent:
mock_child = _make_role_mock_child()
MockAgent.return_value = mock_child
delegate_task(goal="test", role="orchestrator",
parent_agent=parent)
self.assertIn("delegation", MockAgent.call_args[1]["enabled_toolsets"])
class TestOrchestratorEndToEnd(unittest.TestCase):
"""End-to-end: parent -> orchestrator -> two-leaf nested orchestration.
Covers the acceptance gate: parent delegates to an orchestrator
child; the orchestrator delegates to two leaf grandchildren; the
role/toolset/depth chain all resolve correctly.
Mock strategy: a single AIAgent patch with a side_effect factory
that keys on the child's ephemeral_system_prompt — orchestrator
prompts contain the string "Orchestrator Role" (see
_build_child_system_prompt), leaves don't. The orchestrator
mock's run_conversation recursively calls delegate_task with
tasks=[{goal:...},{goal:...}] to spawn two leaves. This keeps
the test in one patch context and avoids depth-indexed nesting.
"""
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def test_end_to_end_nested_orchestration(self, mock_cfg, mock_creds):
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "file", "delegation"]
# (enabled_toolsets, _delegate_role) for each agent built
built_agents: list = []
# Keep the orchestrator mock around so the re-entrant delegate_task
# can reach it via closure.
orch_mock = {}
def _factory(*a, **kw):
prompt = kw.get("ephemeral_system_prompt", "") or ""
is_orchestrator = "Orchestrator Role" in prompt
m = _make_role_mock_child()
built_agents.append({
"enabled_toolsets": list(kw.get("enabled_toolsets") or []),
"is_orchestrator_prompt": is_orchestrator,
})
if is_orchestrator:
# Prepare the orchestrator mock as a parent-capable object
# so the nested delegate_task call succeeds.
m._delegate_depth = 1
m._delegate_role = "orchestrator"
m._active_children = []
m._active_children_lock = threading.Lock()
m._session_db = None
m.platform = "cli"
m.enabled_toolsets = ["terminal", "file", "delegation"]
m.api_key = "***"
m.base_url = ""
m.provider = None
m.api_mode = None
m.providers_allowed = None
m.providers_ignored = None
m.providers_order = None
m.provider_sort = None
m._print_fn = None
m.tool_progress_callback = None
m.thinking_callback = None
orch_mock["agent"] = m
def _orchestrator_run(user_message=None, task_id=None, stream_callback=None):
# Re-entrant: orchestrator spawns two leaves
delegate_task(
tasks=[{"goal": "leaf-A"}, {"goal": "leaf-B"}],
parent_agent=m,
)
return {
"final_response": "orchestrated 2 workers",
"completed": True, "api_calls": 1,
"messages": [],
}
m.run_conversation.side_effect = _orchestrator_run
return m
with patch("run_agent.AIAgent", side_effect=_factory) as MockAgent:
delegate_task(
goal="top-level orchestration",
role="orchestrator",
parent_agent=parent,
)
# 1 orchestrator + 2 leaf grandchildren = 3 agents
self.assertEqual(MockAgent.call_count, 3)
# First built = the orchestrator (parent's direct child)
self.assertIn("delegation", built_agents[0]["enabled_toolsets"])
self.assertTrue(built_agents[0]["is_orchestrator_prompt"])
# Next two = leaves (grandchildren)
self.assertNotIn("delegation", built_agents[1]["enabled_toolsets"])
self.assertFalse(built_agents[1]["is_orchestrator_prompt"])
self.assertNotIn("delegation", built_agents[2]["enabled_toolsets"])
self.assertFalse(built_agents[2]["is_orchestrator_prompt"])
class TestSubagentApprovalCallback(unittest.TestCase):
"""Subagent worker threads must have a non-interactive approval callback
installed so dangerous-command prompts don't fall back to input() and
deadlock the parent's prompt_toolkit TUI.
Governed by delegation.subagent_auto_approve:
false (default) → _subagent_auto_deny
true → _subagent_auto_approve
"""
def test_auto_deny_returns_deny(self):
from tools.delegate_tool import _subagent_auto_deny
self.assertEqual(
_subagent_auto_deny("rm -rf /tmp/x", "dangerous"),
"deny",
)
def test_auto_approve_returns_once(self):
from tools.delegate_tool import _subagent_auto_approve
self.assertEqual(
_subagent_auto_approve("rm -rf /tmp/x", "dangerous"),
"once",
)
@patch("tools.delegate_tool._load_config", return_value={})
def test_getter_defaults_to_deny(self, _mock_cfg):
from tools.delegate_tool import (
_get_subagent_approval_callback,
_subagent_auto_deny,
)
self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny)
@patch(
"tools.delegate_tool._load_config",
return_value={"subagent_auto_approve": False},
)
def test_getter_explicit_false_is_deny(self, _mock_cfg):
from tools.delegate_tool import (
_get_subagent_approval_callback,
_subagent_auto_deny,
)
self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny)
@patch(
"tools.delegate_tool._load_config",
return_value={"subagent_auto_approve": True},
)
def test_getter_true_is_approve(self, _mock_cfg):
from tools.delegate_tool import (
_get_subagent_approval_callback,
_subagent_auto_approve,
)
self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve)
@patch(
"tools.delegate_tool._load_config",
return_value={"subagent_auto_approve": "yes"},
)
def test_getter_truthy_string_is_approve(self, _mock_cfg):
"""is_truthy_value accepts 'yes'/'1'/'true' as truthy."""
from tools.delegate_tool import (
_get_subagent_approval_callback,
_subagent_auto_approve,
)
self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve)
def test_executor_initializer_installs_callback_in_worker(self):
"""The initializer sets the callback on the worker thread's TLS,
not the parent's — verifies the fix actually scopes to workers.
"""
from concurrent.futures import ThreadPoolExecutor
from tools.terminal_tool import (
set_approval_callback as _set_cb,
_get_approval_callback,
)
from tools.delegate_tool import _subagent_auto_deny
# Parent thread has no callback.
_set_cb(None)
self.assertIsNone(_get_approval_callback())
seen = []
def worker():
seen.append(_get_approval_callback())
with ThreadPoolExecutor(
max_workers=1,
initializer=_set_cb,
initargs=(_subagent_auto_deny,),
) as executor:
executor.submit(worker).result()
self.assertEqual(seen, [_subagent_auto_deny])
# Parent's callback slot is still empty (TLS isolates threads).
self.assertIsNone(_get_approval_callback())
class TestFallbackModelInheritance(unittest.TestCase):
"""Subagents must inherit the parent's fallback provider chain."""
def test_child_inherits_fallback_chain(self):
"""_build_child_agent passes parent._fallback_chain as fallback_model."""
parent = _make_mock_parent(depth=0)
fallback_entry = {"provider": "openrouter", "model": "gpt-4o-mini", "api_key": "sk-or-x"}
parent._fallback_chain = [fallback_entry]
with patch("run_agent.AIAgent") as MockAgent:
MockAgent.return_value = MagicMock()
_build_child_agent(
task_index=0,
goal="test fallback inheritance",
context=None,
toolsets=None,
model=None,
max_iterations=10,
parent_agent=parent,
task_count=1,
)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["fallback_model"], [fallback_entry])
def test_child_gets_no_fallback_when_parent_chain_empty(self):
"""When parent._fallback_chain is empty, fallback_model is None."""
parent = _make_mock_parent(depth=0)
parent._fallback_chain = []
with patch("run_agent.AIAgent") as MockAgent:
MockAgent.return_value = MagicMock()
_build_child_agent(
task_index=0,
goal="test no fallback",
context=None,
toolsets=None,
model=None,
max_iterations=10,
parent_agent=parent,
task_count=1,
)
_, kwargs = MockAgent.call_args
self.assertIsNone(kwargs["fallback_model"])
if __name__ == "__main__":
unittest.main()