fix(delegation): route native-SDK providers through runtime resolver; fail on '(empty)' sentinel
Two related bugs caused subagent delegation to silently return empty summaries with 0 tokens when the user configured delegation.provider=bedrock alongside delegation.base_url=https://bedrock-runtime.<region>.amazonaws.com. Root cause #1 — misrouting in _resolve_delegation_credentials(): The configured_base_url branch unconditionally forced provider='custom' and api_mode='chat_completions', only specializing for chatgpt.com, anthropic, and kimi hosts. Bedrock (and other native-SDK providers) fell through as 'custom' + chat_completions, which then POSTed OpenAI-shaped JSON at Bedrock's native API. Bedrock rejected the payload and returned nothing, which looked like an empty LLM response to the child agent. Fix: when provider is one of {bedrock, vertex, google, google-genai}, skip the base_url short-circuit and fall through to resolve_runtime_provider(), which knows how to construct the proper SDK client. base_url can still be forwarded through that path for regional overrides. Root cause #2 — '(empty)' sentinel accepted as success: After N retries of empty LLM responses, run_agent.py emits the literal string '(empty)' as final_response. _run_single_child then hit `elif summary:` — '(empty)' is truthy, so status became 'completed' and the parent surfaced a blank result with no error. Users saw api_calls=4, tokens=0, duration~0.4s, status=completed. Fix: treat final_response.strip() == '(empty)' as a failure so the parent surfaces it instead of silently accepting zero-content 'success'. Both paths were reproduced in a live Hermes TUI session on us-west-2 Bedrock (provider=bedrock, model=us.anthropic.claude-sonnet-4-6) and are covered by new tests in tests/tools/test_delegate.py.
This commit is contained in:
parent
852c9b3cb2
commit
8f21311906
2 changed files with 70 additions and 2 deletions
|
|
@ -918,6 +918,31 @@ class TestDelegateObservability(unittest.TestCase):
|
|||
result = json.loads(delegate_task(goal="Test max iter", parent_agent=parent))
|
||||
self.assertEqual(result["results"][0]["exit_reason"], "max_iterations")
|
||||
|
||||
def test_empty_sentinel_marks_status_failed(self):
|
||||
"""Regression: a child that returns the literal '(empty)' sentinel
|
||||
(emitted by run_agent.py when the LLM returns empty responses after
|
||||
retries — e.g. transport misrouting) must be reported as failed, not
|
||||
silently accepted as a completed delegation. Otherwise the parent
|
||||
surfaces an empty string as if the subagent succeeded."""
|
||||
parent = _make_mock_parent(depth=0)
|
||||
|
||||
with patch("run_agent.AIAgent") as MockAgent:
|
||||
mock_child = MagicMock()
|
||||
mock_child.model = "claude-sonnet-4-6"
|
||||
mock_child.session_prompt_tokens = 0
|
||||
mock_child.session_completion_tokens = 0
|
||||
mock_child.run_conversation.return_value = {
|
||||
"final_response": "(empty)",
|
||||
"completed": True,
|
||||
"interrupted": False,
|
||||
"api_calls": 4,
|
||||
"messages": [],
|
||||
}
|
||||
MockAgent.return_value = mock_child
|
||||
|
||||
result = json.loads(delegate_task(goal="Test empty sentinel", parent_agent=parent))
|
||||
self.assertEqual(result["results"][0]["status"], "failed")
|
||||
|
||||
|
||||
class TestSubagentCostRollup(unittest.TestCase):
|
||||
"""Port of Kilo-Org/kilocode#9448 — parent's session_estimated_cost_usd
|
||||
|
|
@ -1341,6 +1366,32 @@ class TestDelegationCredentialResolution(unittest.TestCase):
|
|||
creds = _resolve_delegation_credentials(cfg, parent)
|
||||
self.assertIsNone(creds["provider"])
|
||||
|
||||
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
|
||||
def test_bedrock_provider_with_base_url_uses_runtime_resolver(self, mock_resolve):
|
||||
"""Regression: provider=bedrock + base_url set must NOT fall through the
|
||||
direct-base_url branch (which would force provider='custom' +
|
||||
chat_completions and silently misroute OpenAI JSON to the Bedrock
|
||||
native endpoint, returning empty responses)."""
|
||||
mock_resolve.return_value = {
|
||||
"provider": "bedrock",
|
||||
"base_url": "https://bedrock-runtime.us-west-2.amazonaws.com",
|
||||
"api_key": "aws-resolved-key",
|
||||
"api_mode": "bedrock_converse",
|
||||
}
|
||||
parent = _make_mock_parent(depth=0)
|
||||
cfg = {
|
||||
"model": "us.anthropic.claude-sonnet-4-6",
|
||||
"provider": "bedrock",
|
||||
"base_url": "https://bedrock-runtime.us-west-2.amazonaws.com",
|
||||
}
|
||||
creds = _resolve_delegation_credentials(cfg, parent)
|
||||
# Must use Bedrock, not 'custom'
|
||||
self.assertEqual(creds["provider"], "bedrock")
|
||||
self.assertEqual(creds["api_mode"], "bedrock_converse")
|
||||
mock_resolve.assert_called_once()
|
||||
self.assertEqual(mock_resolve.call_args.kwargs.get("requested"), "bedrock")
|
||||
|
||||
|
||||
|
||||
class TestDelegationProviderIntegration(unittest.TestCase):
|
||||
"""Integration tests: delegation config → _run_single_child → AIAgent construction."""
|
||||
|
|
|
|||
|
|
@ -2051,9 +2051,16 @@ def _run_single_child(
|
|||
interrupted = result.get("interrupted", False)
|
||||
api_calls = result.get("api_calls", 0)
|
||||
|
||||
# The child emits the literal "(empty)" sentinel (see run_agent.py) when
|
||||
# it gives up after repeated empty-LLM-response retries — typically a
|
||||
# transport bug (misrouted provider, adapter returning empty
|
||||
# ChatCompletion, etc.). Treat it as a failure so the parent surfaces
|
||||
# it instead of silently accepting zero-content "success".
|
||||
_empty_sentinel = summary.strip() == "(empty)"
|
||||
|
||||
if interrupted:
|
||||
status = "interrupted"
|
||||
elif summary:
|
||||
elif summary and not _empty_sentinel:
|
||||
# A summary means the subagent produced usable output.
|
||||
# exit_reason ("completed" vs "max_iterations") already
|
||||
# tells the parent *how* the task ended.
|
||||
|
|
@ -3000,7 +3007,17 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
|
|||
configured_api_key = str(cfg.get("api_key") or "").strip() or None
|
||||
configured_api_mode = str(cfg.get("api_mode") or "").strip().lower() or None
|
||||
|
||||
if configured_base_url:
|
||||
# Native-SDK providers (Bedrock, Vertex, Google GenAI) speak their own
|
||||
# wire protocol — they cannot be reached via OpenAI chat_completions against
|
||||
# a base_url. For these, always fall through to resolve_runtime_provider()
|
||||
# so the proper SDK path is taken. The configured base_url is still
|
||||
# forwarded through runtime-provider resolution when applicable (e.g. a
|
||||
# custom Bedrock regional endpoint).
|
||||
_NATIVE_SDK_PROVIDERS = {"bedrock", "vertex", "google", "google-genai"}
|
||||
_provider_lower = (configured_provider or "").strip().lower()
|
||||
_is_native_sdk_provider = _provider_lower in _NATIVE_SDK_PROVIDERS
|
||||
|
||||
if configured_base_url and not _is_native_sdk_provider:
|
||||
# When delegation.api_key is not set, return None so _build_child_agent
|
||||
# falls back to the parent agent's API key via the credential inheritance
|
||||
# path (effective_api_key = override_api_key or parent_api_key). This
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue