diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 5664c7450..49bc91f44 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -682,11 +682,21 @@ class ChatCompletionsTransport(ProviderTransport): if isinstance(_msg_extra, dict): refusal = _msg_extra.get("refusal") if isinstance(refusal, str) and refusal.strip(): - if not (isinstance(content, str) and content.strip()): - content = refusal - if finish_reason in (None, "stop"): - finish_reason = "content_filter" + # Record the refusal explanation regardless — it's useful provider + # metadata even when the model also returned a usable payload. provider_data["refusal"] = refusal + _has_text = isinstance(content, str) and content.strip() + _has_tool_calls = bool(tool_calls) + # Only promote to a terminal ``content_filter`` when the refusal is + # the *sole* payload — no visible text and no tool calls. A response + # that carries real content (or tool calls) alongside a refusal note + # is a normal, usable turn: surfacing it as a failed safety refusal + # would discard the model's actual work. In the empty-payload case, + # adopt the refusal as content so the loop has something to show. + if not _has_text and not _has_tool_calls: + content = refusal + if finish_reason in (None, "stop"): + finish_reason = "content_filter" return NormalizedResponse( content=content, diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py index 588b019d7..da642e2ae 100644 --- a/tests/agent/transports/test_chat_completions.py +++ b/tests/agent/transports/test_chat_completions.py @@ -923,8 +923,10 @@ class TestChatCompletionsNormalize: assert nr.content is None def test_refusal_does_not_clobber_existing_content(self, transport): - """If the model emitted partial text *and* a refusal, keep the visible - text as content but still flag the refusal via content_filter.""" + """If the model emitted real text *and* a refusal note, the turn is a + normal usable response: keep the visible text, record the refusal in + provider_data, and do NOT promote to a terminal content_filter (which + would discard the model's actual work by reframing it as a failure).""" r = SimpleNamespace( choices=[SimpleNamespace( message=SimpleNamespace( @@ -937,7 +939,32 @@ class TestChatCompletionsNormalize: ) nr = transport.normalize_response(r) assert nr.content == "partial answer" - assert nr.finish_reason == "content_filter" + assert nr.finish_reason == "stop" + assert nr.provider_data == {"refusal": "cannot continue"} + + def test_refusal_with_tool_calls_is_not_promoted(self, transport): + """A response that carries tool calls alongside a refusal note is a + usable tool turn — record the refusal but keep the tool calls and do + NOT terminate it as a content_filter refusal.""" + tc = SimpleNamespace( + id="call_1", type="function", + function=SimpleNamespace(name="do_thing", arguments="{}"), + ) + r = SimpleNamespace( + choices=[SimpleNamespace( + message=SimpleNamespace( + content=None, tool_calls=[tc], + reasoning_content=None, refusal="cannot continue", + ), + finish_reason="tool_calls", + )], + usage=None, + ) + nr = transport.normalize_response(r) + # Tool calls survive; finish reason is untouched; content not clobbered. + assert nr.tool_calls and nr.tool_calls[0].name == "do_thing" + assert nr.finish_reason == "tool_calls" + assert nr.content in (None, "") assert nr.provider_data == {"refusal": "cannot continue"}