From 8f2131190632ea09cbacdb45568b05709bace4e8 Mon Sep 17 00:00:00 2001 From: Justin Ohms Date: Tue, 12 May 2026 10:33:00 -0700 Subject: [PATCH] fix(delegation): route native-SDK providers through runtime resolver; fail on '(empty)' sentinel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related bugs caused subagent delegation to silently return empty summaries with 0 tokens when the user configured delegation.provider=bedrock alongside delegation.base_url=https://bedrock-runtime..amazonaws.com. Root cause #1 — misrouting in _resolve_delegation_credentials(): The configured_base_url branch unconditionally forced provider='custom' and api_mode='chat_completions', only specializing for chatgpt.com, anthropic, and kimi hosts. Bedrock (and other native-SDK providers) fell through as 'custom' + chat_completions, which then POSTed OpenAI-shaped JSON at Bedrock's native API. Bedrock rejected the payload and returned nothing, which looked like an empty LLM response to the child agent. Fix: when provider is one of {bedrock, vertex, google, google-genai}, skip the base_url short-circuit and fall through to resolve_runtime_provider(), which knows how to construct the proper SDK client. base_url can still be forwarded through that path for regional overrides. Root cause #2 — '(empty)' sentinel accepted as success: After N retries of empty LLM responses, run_agent.py emits the literal string '(empty)' as final_response. _run_single_child then hit `elif summary:` — '(empty)' is truthy, so status became 'completed' and the parent surfaced a blank result with no error. Users saw api_calls=4, tokens=0, duration~0.4s, status=completed. Fix: treat final_response.strip() == '(empty)' as a failure so the parent surfaces it instead of silently accepting zero-content 'success'. Both paths were reproduced in a live Hermes TUI session on us-west-2 Bedrock (provider=bedrock, model=us.anthropic.claude-sonnet-4-6) and are covered by new tests in tests/tools/test_delegate.py. --- tests/tools/test_delegate.py | 51 ++++++++++++++++++++++++++++++++++++ tools/delegate_tool.py | 21 +++++++++++++-- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 0fa8a965c..ac3790849 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -918,6 +918,31 @@ class TestDelegateObservability(unittest.TestCase): result = json.loads(delegate_task(goal="Test max iter", parent_agent=parent)) self.assertEqual(result["results"][0]["exit_reason"], "max_iterations") + def test_empty_sentinel_marks_status_failed(self): + """Regression: a child that returns the literal '(empty)' sentinel + (emitted by run_agent.py when the LLM returns empty responses after + retries — e.g. transport misrouting) must be reported as failed, not + silently accepted as a completed delegation. Otherwise the parent + surfaces an empty string as if the subagent succeeded.""" + parent = _make_mock_parent(depth=0) + + with patch("run_agent.AIAgent") as MockAgent: + mock_child = MagicMock() + mock_child.model = "claude-sonnet-4-6" + mock_child.session_prompt_tokens = 0 + mock_child.session_completion_tokens = 0 + mock_child.run_conversation.return_value = { + "final_response": "(empty)", + "completed": True, + "interrupted": False, + "api_calls": 4, + "messages": [], + } + MockAgent.return_value = mock_child + + result = json.loads(delegate_task(goal="Test empty sentinel", parent_agent=parent)) + self.assertEqual(result["results"][0]["status"], "failed") + class TestSubagentCostRollup(unittest.TestCase): """Port of Kilo-Org/kilocode#9448 — parent's session_estimated_cost_usd @@ -1341,6 +1366,32 @@ class TestDelegationCredentialResolution(unittest.TestCase): creds = _resolve_delegation_credentials(cfg, parent) self.assertIsNone(creds["provider"]) + @patch("hermes_cli.runtime_provider.resolve_runtime_provider") + def test_bedrock_provider_with_base_url_uses_runtime_resolver(self, mock_resolve): + """Regression: provider=bedrock + base_url set must NOT fall through the + direct-base_url branch (which would force provider='custom' + + chat_completions and silently misroute OpenAI JSON to the Bedrock + native endpoint, returning empty responses).""" + mock_resolve.return_value = { + "provider": "bedrock", + "base_url": "https://bedrock-runtime.us-west-2.amazonaws.com", + "api_key": "aws-resolved-key", + "api_mode": "bedrock_converse", + } + parent = _make_mock_parent(depth=0) + cfg = { + "model": "us.anthropic.claude-sonnet-4-6", + "provider": "bedrock", + "base_url": "https://bedrock-runtime.us-west-2.amazonaws.com", + } + creds = _resolve_delegation_credentials(cfg, parent) + # Must use Bedrock, not 'custom' + self.assertEqual(creds["provider"], "bedrock") + self.assertEqual(creds["api_mode"], "bedrock_converse") + mock_resolve.assert_called_once() + self.assertEqual(mock_resolve.call_args.kwargs.get("requested"), "bedrock") + + class TestDelegationProviderIntegration(unittest.TestCase): """Integration tests: delegation config → _run_single_child → AIAgent construction.""" diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 8a5a060fd..17b9435a0 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -2051,9 +2051,16 @@ def _run_single_child( interrupted = result.get("interrupted", False) api_calls = result.get("api_calls", 0) + # The child emits the literal "(empty)" sentinel (see run_agent.py) when + # it gives up after repeated empty-LLM-response retries — typically a + # transport bug (misrouted provider, adapter returning empty + # ChatCompletion, etc.). Treat it as a failure so the parent surfaces + # it instead of silently accepting zero-content "success". + _empty_sentinel = summary.strip() == "(empty)" + if interrupted: status = "interrupted" - elif summary: + elif summary and not _empty_sentinel: # A summary means the subagent produced usable output. # exit_reason ("completed" vs "max_iterations") already # tells the parent *how* the task ended. @@ -3000,7 +3007,17 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: configured_api_key = str(cfg.get("api_key") or "").strip() or None configured_api_mode = str(cfg.get("api_mode") or "").strip().lower() or None - if configured_base_url: + # Native-SDK providers (Bedrock, Vertex, Google GenAI) speak their own + # wire protocol — they cannot be reached via OpenAI chat_completions against + # a base_url. For these, always fall through to resolve_runtime_provider() + # so the proper SDK path is taken. The configured base_url is still + # forwarded through runtime-provider resolution when applicable (e.g. a + # custom Bedrock regional endpoint). + _NATIVE_SDK_PROVIDERS = {"bedrock", "vertex", "google", "google-genai"} + _provider_lower = (configured_provider or "").strip().lower() + _is_native_sdk_provider = _provider_lower in _NATIVE_SDK_PROVIDERS + + if configured_base_url and not _is_native_sdk_provider: # When delegation.api_key is not set, return None so _build_child_agent # falls back to the parent agent's API key via the credential inheritance # path (effective_api_key = override_api_key or parent_api_key). This