diff --git a/run_agent.py b/run_agent.py index aaafd469a..cdd82f459 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5623,7 +5623,10 @@ class AIAgent: New DELEGATE_TASK_SCHEMA fields only need to be added here to reach all invocation paths (concurrent, sequential, inline). """ - from tools.delegate_tool import delegate_task as _delegate_task + from tools.delegate_tool import ( + _strip_model_hidden_task_fields, + delegate_task as _delegate_task, + ) # Delegations from the top-level MODEL always run in the background — # the model does not get to choose. delegate_task returns immediately # with a handle (one per task) and each subagent's result re-enters the @@ -5639,10 +5642,8 @@ class AIAgent: return _delegate_task( goal=function_args.get("goal"), context=function_args.get("context"), - tasks=function_args.get("tasks"), + tasks=_strip_model_hidden_task_fields(function_args.get("tasks")), max_iterations=function_args.get("max_iterations"), - acp_command=function_args.get("acp_command"), - acp_args=function_args.get("acp_args"), role=function_args.get("role"), background=(not _is_subagent), parent_agent=self, diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index d35d3627e..5830706bf 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -78,6 +78,12 @@ class TestDelegateRequirements(unittest.TestCase): # config-authoritative via delegation.max_iterations so users get # predictable budgets. self.assertNotIn("max_iterations", props) + # ACP subprocess transport is operator-controlled via config.yaml, not + # model-controlled via delegate_task arguments. + self.assertNotIn("acp_command", props) + self.assertNotIn("acp_args", props) + self.assertNotIn("acp_command", props["tasks"]["items"]["properties"]) + self.assertNotIn("acp_args", props["tasks"]["items"]["properties"]) self.assertNotIn("maxItems", props["tasks"]) # removed — limit is now runtime-configurable def test_schema_description_advertises_runtime_limits(self): @@ -522,16 +528,7 @@ class TestToolNamePreservation(unittest.TestCase): ) def test_build_child_agent_ignores_acp_command_when_binary_missing(self): - """Regression: _build_child_agent must not force provider='copilot-acp' - when the override_acp_command binary is not on PATH. - - Without this guard, a model that hallucinates - ``delegate_task(acp_command="copilot")`` on a host without the Copilot - CLI installed (Railway / headless containers / fresh VPS) would route - the subagent through CopilotACPClient, which spawns the binary via - subprocess and raises RuntimeError. After 3 retries the asyncio loop - teardown can take the entire gateway down. - """ + """Stale delegation.command config must not force ACP subprocess mode.""" parent = _make_mock_parent(depth=0) # The crash scenario is a TG/cron agent on a host with no ACP CLI — # parent itself has no acp_command, so clearing the override must NOT @@ -606,68 +603,20 @@ class TestToolNamePreservation(unittest.TestCase): self.assertEqual(captured["provider"], "copilot-acp") self.assertEqual(captured["acp_command"], "copilot") - def test_schema_prunes_acp_command_when_no_acp_binary(self): - """Schema-level defense: delegate_task tool schema must NOT advertise - acp_command / acp_args to the model when no ACP binary is installed. - - Headless deploys (Railway / Fly / Docker / fresh VPS) typically have - none of copilot / claude / codex. Without the schema prune, models - occasionally hallucinate ``acp_command="copilot"`` from the field's - description and crash subagent runs. - """ + def test_schema_never_exposes_acp_transport_fields(self): + """delegate_task must never make ACP transport model-facing.""" from tools.delegate_tool import _build_dynamic_schema_overrides - with patch("tools.delegate_tool._acp_binary_available", return_value=False): + with patch("shutil.which", return_value="/usr/local/bin/copilot"): overrides = _build_dynamic_schema_overrides() props = overrides["parameters"]["properties"] - self.assertNotIn("acp_command", props, "top-level acp_command must be pruned") - self.assertNotIn("acp_args", props, "top-level acp_args must be pruned") + self.assertNotIn("acp_command", props) + self.assertNotIn("acp_args", props) task_item_props = props["tasks"]["items"]["properties"] - self.assertNotIn( - "acp_command", task_item_props, "per-task acp_command must be pruned" - ) - self.assertNotIn( - "acp_args", task_item_props, "per-task acp_args must be pruned" - ) - - def test_schema_keeps_acp_command_when_binary_available(self): - """Backward compat: when an ACP CLI IS on PATH, schema is unchanged. - Users with working ACP setups must still be able to invoke it. - """ - from tools.delegate_tool import _build_dynamic_schema_overrides - - with patch("tools.delegate_tool._acp_binary_available", return_value=True): - overrides = _build_dynamic_schema_overrides() - - props = overrides["parameters"]["properties"] - self.assertIn("acp_command", props) - self.assertIn("acp_args", props) - - task_item_props = props["tasks"]["items"]["properties"] - self.assertIn("acp_command", task_item_props) - self.assertIn("acp_args", task_item_props) - - def test_acp_binary_available_checks_known_clis(self): - """_acp_binary_available must check the known ACP CLI names via - shutil.which — guards against typos or accidental list trimming. - """ - from tools.delegate_tool import _KNOWN_ACP_BINARIES, _acp_binary_available - - self.assertIn("copilot", _KNOWN_ACP_BINARIES) - - calls = [] - - def fake_which(name): - calls.append(name) - return None - - with patch("shutil.which", side_effect=fake_which): - self.assertFalse(_acp_binary_available()) - - for name in _KNOWN_ACP_BINARIES: - self.assertIn(name, calls) + self.assertNotIn("acp_command", task_item_props) + self.assertNotIn("acp_args", task_item_props) def test_saved_tool_names_set_on_child_before_run(self): """_run_single_child must set _delegate_saved_tool_names on the child @@ -2281,37 +2230,39 @@ class TestDelegationReasoningEffort(unittest.TestCase): class TestDispatchDelegateTask(unittest.TestCase): """Tests for the _dispatch_delegate_task helper and full param forwarding.""" - @patch("tools.delegate_tool._load_config", return_value={}) - @patch("tools.delegate_tool._resolve_delegation_credentials") - def test_acp_args_forwarded(self, mock_creds, mock_cfg): - """Both acp_command and acp_args reach delegate_task via the helper.""" - mock_creds.return_value = { - "provider": None, "base_url": None, - "api_key": None, "api_mode": None, "model": None, - } - parent = _make_mock_parent(depth=0) - with patch("tools.delegate_tool._build_child_agent") as mock_build: - mock_child = MagicMock() - mock_child.run_conversation.return_value = { - "final_response": "done", "completed": True, - "api_calls": 1, "messages": [], - } - mock_child._delegate_saved_tool_names = [] - mock_child._credential_pool = None - mock_child.session_prompt_tokens = 0 - mock_child.session_completion_tokens = 0 - mock_child.model = "test" - mock_build.return_value = mock_child + def test_model_acp_args_not_forwarded(self): + """The live model dispatch path strips hidden ACP transport args.""" + import run_agent - delegate_task( - goal="test", - acp_command="claude", - acp_args=["--acp", "--stdio"], - parent_agent=parent, + captured = {} + + def fake_delegate_task(**kwargs): + captured.update(kwargs) + return "{}" + + parent = _make_mock_parent(depth=0) + with patch("tools.delegate_tool.delegate_task", fake_delegate_task): + run_agent.AIAgent._dispatch_delegate_task( + parent, + { + "goal": "test", + "acp_command": "claude", + "acp_args": ["--acp", "--stdio"], + "tasks": [ + { + "goal": "nested", + "acp_command": "codex", + "acp_args": ["--acp"], + }, + ], + }, ) - _, kwargs = mock_build.call_args - self.assertEqual(kwargs["override_acp_command"], "claude") - self.assertEqual(kwargs["override_acp_args"], ["--acp", "--stdio"]) + + self.assertNotIn("acp_command", captured) + self.assertNotIn("acp_args", captured) + self.assertEqual(captured["goal"], "test") + self.assertNotIn("acp_command", captured["tasks"][0]) + self.assertNotIn("acp_args", captured["tasks"][0]) class TestDelegateEventEnum(unittest.TestCase): """Tests for DelegateEvent enum and back-compat aliases.""" @@ -2600,31 +2551,15 @@ class TestOrchestratorRoleSchema(unittest.TestCase): self.assertIn("role", task_props) self.assertEqual(task_props["role"]["enum"], ["leaf", "orchestrator"]) - def test_acp_command_description_has_do_not_set_guidance(self): - # acp_command/acp_args descriptions must NOT bias the model toward - # assuming an ACP CLI (Claude, Copilot, etc.) is installed. They must - # carry explicit "do not set unless told" guidance so the model doesn't - # hallucinate ACP availability (#22013). + def test_schema_omits_acp_transport_fields(self): from tools.delegate_tool import DELEGATE_TASK_SCHEMA props = DELEGATE_TASK_SCHEMA["parameters"]["properties"] - top_acp_desc = props["acp_command"]["description"] - self.assertIn("Do NOT set", top_acp_desc) - self.assertIn("explicitly told you", top_acp_desc) - task_props = props["tasks"]["items"]["properties"] - per_task_acp_desc = task_props["acp_command"]["description"] - self.assertIn("Do NOT set", per_task_acp_desc) - - def test_acp_command_description_has_no_claude_as_example(self): - # Descriptions must not list 'claude' as a canonical example value — - # that directly primes the model to attempt Claude ACP even when it is - # not installed (#22013). - from tools.delegate_tool import DELEGATE_TASK_SCHEMA - props = DELEGATE_TASK_SCHEMA["parameters"]["properties"] - top_acp_desc = props["acp_command"]["description"].lower() - self.assertNotIn("e.g. 'claude'", top_acp_desc) - self.assertNotIn("e.g. \"claude\"", top_acp_desc) + self.assertNotIn("acp_command", props) + self.assertNotIn("acp_args", props) + self.assertNotIn("acp_command", task_props) + self.assertNotIn("acp_args", task_props) # Sentinel used to distinguish "role kwarg omitted" from "role=None". diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index b3172e51a..2baf4da30 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -1055,7 +1055,7 @@ def _build_child_agent( override_base_url: Optional[str] = None, override_api_key: Optional[str] = None, override_api_mode: Optional[str] = None, - # ACP transport overrides — lets a non-ACP parent spawn ACP child agents + # ACP transport overrides from trusted delegation config. override_acp_command: Optional[str] = None, override_acp_args: Optional[List[str]] = None, # Per-call role controlling whether the child can further delegate. @@ -1212,11 +1212,9 @@ def _build_child_agent( effective_api_mode = None # force re-derivation from provider's defaults else: effective_api_mode = getattr(parent_agent, "api_mode", None) - # Defensive: validate override_acp_command exists on PATH before honoring - # it. Models occasionally pass acp_command="copilot" / "claude" / etc. in - # delegate_task tool calls despite the schema saying not to, which forces - # the subagent onto the copilot-acp transport below and crashes the - # gateway when the binary is missing (e.g. headless container deploys). + # Defensive: validate trusted delegation.command exists on PATH before + # honoring it. Stale config should not force a child onto the ACP transport + # and then fail at subprocess startup. if override_acp_command: import shutil as _shutil @@ -2346,8 +2344,6 @@ def delegate_task( context: Optional[str] = None, tasks: Optional[List[Dict[str, Any]]] = None, max_iterations: Optional[int] = None, - acp_command: Optional[str] = None, - acp_args: Optional[List[str]] = None, role: Optional[str] = None, background: Optional[bool] = None, parent_agent=None, @@ -2486,7 +2482,6 @@ def delegate_task( children = [] try: for i, t in enumerate(task_list): - task_acp_args = t.get("acp_args") if "acp_args" in t else None # Per-task role beats top-level; normalise again so unknown # per-task values warn and degrade to leaf uniformly. effective_role = _normalize_role(t.get("role") or top_role) @@ -2505,14 +2500,8 @@ def delegate_task( override_base_url=creds["base_url"], override_api_key=creds["api_key"], override_api_mode=creds["api_mode"], - override_acp_command=t.get("acp_command") - or acp_command - or creds.get("command"), - override_acp_args=( - task_acp_args - if task_acp_args is not None - else (acp_args if acp_args is not None else creds.get("args")) - ), + override_acp_command=creds.get("command"), + override_acp_args=creds.get("args"), role=effective_role, ) # Override with correct parent tool names (before child construction mutated global) @@ -3292,30 +3281,6 @@ def _build_role_param_description() -> str: ) -# Known ACP-compatible CLIs that delegate_task can shell out to. Kept -# narrow on purpose: only the ones agent/copilot_acp_client.py and friends -# actually understand. Add new entries here when a new ACP CLI ships. -_KNOWN_ACP_BINARIES: tuple[str, ...] = ("copilot", "claude", "codex") - - -def _acp_binary_available() -> bool: - """True iff at least one known ACP CLI is on PATH. - - Used to gate inclusion of ``acp_command`` / ``acp_args`` in the - delegate_task schema. On headless hosts (Railway / Fly / Docker / - fresh VPS) without any of these binaries, exposing the fields invites - the model to hallucinate ``acp_command="copilot"`` from the schema's - description, which used to crash subagent runs and take the gateway - down. Pruning the fields from the schema removes the temptation. - - Not cached: ``shutil.which`` is cheap and we want the schema to react - to mid-session installs without forcing a process restart. - """ - import shutil as _shutil - - return any(_shutil.which(name) for name in _KNOWN_ACP_BINARIES) - - def _build_dynamic_schema_overrides() -> dict: """Return per-call schema overrides reflecting current config. @@ -3333,24 +3298,6 @@ def _build_dynamic_schema_overrides() -> dict: overrides_params["properties"]["tasks"]["description"] = _build_tasks_param_description() overrides_params["properties"]["role"]["description"] = _build_role_param_description() - # Prune ACP overrides from the schema when no known ACP CLI is on PATH. - # The runtime guard in _build_child_agent remains as defense-in-depth for - # internal callers / tests / future code paths that skip the schema layer. - if not _acp_binary_available(): - overrides_params["properties"].pop("acp_command", None) - overrides_params["properties"].pop("acp_args", None) - tasks_schema = dict(overrides_params["properties"].get("tasks", {})) - if "items" in tasks_schema: - items = dict(tasks_schema["items"]) - if "properties" in items: - items["properties"] = { - k: v - for k, v in items["properties"].items() - if k not in ("acp_command", "acp_args") - } - tasks_schema["items"] = items - overrides_params["properties"]["tasks"] = tasks_schema - return { "description": _build_top_level_description(), "parameters": overrides_params, @@ -3401,19 +3348,6 @@ DELEGATE_TASK_SCHEMA = { "type": "string", "description": "Task-specific context", }, - "acp_command": { - "type": "string", - "description": ( - "Per-task ACP command override (e.g. 'copilot'). " - "Overrides the top-level acp_command for this task only. " - "Do NOT set unless the user explicitly told you an ACP CLI is installed." - ), - }, - "acp_args": { - "type": "array", - "items": {"type": "string"}, - "description": "Per-task ACP args override. Leave empty unless acp_command is set.", - }, "role": { "type": "string", "enum": ["leaf", "orchestrator"], @@ -3444,28 +3378,6 @@ DELEGATE_TASK_SCHEMA = { "compatibility." ), }, - "acp_command": { - "type": "string", - "description": ( - "Override ACP command for child agents (e.g. 'copilot'). " - "When set, children use ACP subprocess transport instead of inheriting " - "the parent's transport. Requires an ACP-compatible CLI " - "(currently GitHub Copilot CLI via 'copilot --acp --stdio'). " - "See agent/copilot_acp_client.py for the implementation. " - "IMPORTANT: Do NOT set this unless the user has explicitly told you " - "a specific ACP-compatible CLI is installed and configured. " - "Leave empty to use the parent's default transport (Hermes subagents)." - ), - }, - "acp_args": { - "type": "array", - "items": {"type": "string"}, - "description": ( - "Arguments for the ACP command (default: ['--acp', '--stdio']). " - "Only used when acp_command is set. " - "Leave empty unless acp_command is explicitly provided." - ), - }, }, "required": [], }, @@ -3492,6 +3404,28 @@ def _model_background_value(args: dict, parent_agent=None) -> bool: return not is_subagent +_MODEL_HIDDEN_TASK_FIELDS = {"acp_command", "acp_args"} + + +def _strip_model_hidden_task_fields(tasks: Any) -> Any: + if not isinstance(tasks, list): + return tasks + stripped_tasks = [] + changed = False + for task in tasks: + if not isinstance(task, dict): + stripped_tasks.append(task) + continue + stripped = { + key: value + for key, value in task.items() + if key not in _MODEL_HIDDEN_TASK_FIELDS + } + changed = changed or len(stripped) != len(task) + stripped_tasks.append(stripped) + return stripped_tasks if changed else tasks + + registry.register( name="delegate_task", toolset="delegation", @@ -3499,10 +3433,8 @@ registry.register( handler=lambda args, **kw: delegate_task( goal=args.get("goal"), context=args.get("context"), - tasks=args.get("tasks"), + tasks=_strip_model_hidden_task_fields(args.get("tasks")), max_iterations=args.get("max_iterations"), - acp_command=args.get("acp_command"), - acp_args=args.get("acp_args"), role=args.get("role"), background=_model_background_value(args, kw.get("parent_agent")), parent_agent=kw.get("parent_agent"),