fix(agent): preserve final_response on failure returns
AIAgent.run_conversation() promises a dict with final_response, but 16 terminal-failure branches returned dicts that either omitted the key or set it to None. Callers that index result['final_response'] directly (run_agent.py chat() + the __main__ printer) turn a real provider/context failure into an opaque KeyError instead of surfacing the actionable error. Every offending branch already carried usable 'error' text, so this mirrors that text into final_response for all 16 sites (8 that omitted the key, 8 that returned None). Adds an AST regression test that fails if any run_conversation() dict return omits final_response or sets it to a literal None, and tightens the invalid-response test to assert final_response == error.
This commit is contained in:
parent
43edbae638
commit
053424c486
2 changed files with 89 additions and 30 deletions
|
|
@ -1454,11 +1454,13 @@ def run_conversation(
|
|||
agent._emit_status(f"❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
|
||||
logger.error(f"{agent.log_prefix}Invalid API response after {max_retries} retries.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
_final_response = f"Invalid API response after {max_retries} retries: {_failure_hint}"
|
||||
return {
|
||||
"final_response": _final_response,
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Invalid API response after {max_retries} retries: {_failure_hint}",
|
||||
"error": _final_response,
|
||||
"failed": True # Mark as failure for filtering
|
||||
}
|
||||
|
||||
|
|
@ -1891,18 +1893,19 @@ def run_conversation(
|
|||
)
|
||||
agent._cleanup_task_resources(effective_task_id)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
_final_response = (
|
||||
"Stream repeatedly dropped mid tool-call (network); "
|
||||
"the tool was not executed"
|
||||
if _is_stub_stall
|
||||
else "Response truncated due to output length limit"
|
||||
)
|
||||
return {
|
||||
"final_response": None,
|
||||
"final_response": _final_response,
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
"partial": True,
|
||||
"error": (
|
||||
"Stream repeatedly dropped mid tool-call (network); "
|
||||
"the tool was not executed"
|
||||
if _is_stub_stall
|
||||
else "Response truncated due to output length limit"
|
||||
),
|
||||
"error": _final_response,
|
||||
}
|
||||
|
||||
# If we have prior messages, roll back to last complete state
|
||||
|
|
@ -1914,7 +1917,7 @@ def run_conversation(
|
|||
agent._persist_session(messages, conversation_history)
|
||||
|
||||
return {
|
||||
"final_response": None,
|
||||
"final_response": "Response truncated due to output length limit",
|
||||
"messages": rolled_back_messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
|
|
@ -1927,7 +1930,7 @@ def run_conversation(
|
|||
agent._vprint(f"{agent.log_prefix}❌ First response truncated - cannot recover", force=True)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"final_response": None,
|
||||
"final_response": "First response truncated due to output length limit",
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
|
|
@ -2917,15 +2920,17 @@ def run_conversation(
|
|||
f"auto-compaction disabled — not compressing."
|
||||
)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
_final_response = (
|
||||
"Context overflow and auto-compaction is disabled "
|
||||
"(compression.enabled: false). Run /compress to compact manually, "
|
||||
"/new to start fresh, or switch to a larger-context model."
|
||||
)
|
||||
return {
|
||||
"final_response": _final_response,
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": (
|
||||
"Context overflow and auto-compaction is disabled "
|
||||
"(compression.enabled: false). Run /compress to compact manually, "
|
||||
"/new to start fresh, or switch to a larger-context model."
|
||||
),
|
||||
"error": _final_response,
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
"compaction_disabled": True,
|
||||
|
|
@ -3200,11 +3205,13 @@ def run_conversation(
|
|||
agent._vprint(f"{agent.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logger.error(f"{agent.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
_final_response = f"Request payload too large: max compression attempts ({max_compression_attempts}) reached."
|
||||
return {
|
||||
"final_response": _final_response,
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.",
|
||||
"error": _final_response,
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
"compression_exhausted": True,
|
||||
|
|
@ -3244,11 +3251,13 @@ def run_conversation(
|
|||
agent._vprint(f"{agent.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logger.error(f"{agent.log_prefix}413 payload too large. Cannot compress further.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
_final_response = "Request payload too large (413). Cannot compress further."
|
||||
return {
|
||||
"final_response": _final_response,
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": "Request payload too large (413). Cannot compress further.",
|
||||
"error": _final_response,
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
"compression_exhausted": True,
|
||||
|
|
@ -3297,11 +3306,13 @@ def run_conversation(
|
|||
agent._vprint(f"{agent.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logger.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
_final_response = f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached."
|
||||
return {
|
||||
"final_response": _final_response,
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
|
||||
"error": _final_response,
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
"compression_exhausted": True,
|
||||
|
|
@ -3336,14 +3347,16 @@ def run_conversation(
|
|||
f"(max_tokens over provider cap): {error_msg[:200]}"
|
||||
)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
_final_response = (
|
||||
"max_tokens exceeds the provider's output cap for this model. "
|
||||
"Lower model.max_tokens in config.yaml."
|
||||
)
|
||||
return {
|
||||
"final_response": _final_response,
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": (
|
||||
"max_tokens exceeds the provider's output cap for this model. "
|
||||
"Lower model.max_tokens in config.yaml."
|
||||
),
|
||||
"error": _final_response,
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
}
|
||||
|
|
@ -3405,11 +3418,13 @@ def run_conversation(
|
|||
agent._vprint(f"{agent.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logger.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
_final_response = f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached."
|
||||
return {
|
||||
"final_response": _final_response,
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
|
||||
"error": _final_response,
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
"compression_exhausted": True,
|
||||
|
|
@ -3448,11 +3463,13 @@ def run_conversation(
|
|||
agent._vprint(f"{agent.log_prefix} 💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.", force=True)
|
||||
logger.error(f"{agent.log_prefix}Context length exceeded: {new_tokens:,} tokens. Cannot compress further.")
|
||||
agent._persist_session(messages, conversation_history)
|
||||
_final_response = f"Context length exceeded ({new_tokens:,} tokens). Cannot compress further."
|
||||
return {
|
||||
"final_response": _final_response,
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Context length exceeded ({new_tokens:,} tokens). Cannot compress further.",
|
||||
"error": _final_response,
|
||||
"partial": True,
|
||||
"failed": True,
|
||||
"compression_exhausted": True,
|
||||
|
|
@ -3668,7 +3685,7 @@ def run_conversation(
|
|||
error_detail=_nonretryable_summary,
|
||||
)
|
||||
return {
|
||||
"final_response": None,
|
||||
"final_response": _nonretryable_summary,
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
|
|
@ -4125,7 +4142,7 @@ def run_conversation(
|
|||
agent._persist_session(messages, conversation_history)
|
||||
|
||||
return {
|
||||
"final_response": None,
|
||||
"final_response": "Incomplete REASONING_SCRATCHPAD after 2 retries",
|
||||
"messages": rolled_back_messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
|
|
@ -4185,7 +4202,7 @@ def run_conversation(
|
|||
agent._codex_incomplete_retries = 0
|
||||
agent._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"final_response": None,
|
||||
"final_response": "Codex response remained incomplete after 3 continuation attempts",
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
|
|
@ -4231,13 +4248,14 @@ def run_conversation(
|
|||
agent._vprint(f"{agent.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.", force=True)
|
||||
agent._invalid_tool_retries = 0
|
||||
agent._persist_session(messages, conversation_history)
|
||||
_final_response = f"Model generated invalid tool call: {invalid_preview}"
|
||||
return {
|
||||
"final_response": None,
|
||||
"final_response": _final_response,
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
"partial": True,
|
||||
"error": f"Model generated invalid tool call: {invalid_preview}"
|
||||
"error": _final_response
|
||||
}
|
||||
|
||||
assistant_msg = agent._build_assistant_message(assistant_message, finish_reason)
|
||||
|
|
@ -4321,7 +4339,7 @@ def run_conversation(
|
|||
agent._cleanup_task_resources(effective_task_id)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"final_response": None,
|
||||
"final_response": "Response truncated due to output length limit",
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
|
|
|
|||
|
|
@ -55,6 +55,46 @@ def test_is_destructive_command_treats_install_as_mutating():
|
|||
assert run_agent._is_destructive_command("install template.env .env") is True
|
||||
|
||||
|
||||
def test_run_conversation_dict_returns_include_final_response():
|
||||
"""Structurally enforce final_response on dict returns from run_conversation().
|
||||
|
||||
This parses source, including nested helpers, so it requires the .py file
|
||||
to be available. It guards key presence and literal None values; runtime
|
||||
tests still cover branch-specific values.
|
||||
"""
|
||||
from agent import conversation_loop
|
||||
|
||||
try:
|
||||
source = inspect.getsource(conversation_loop.run_conversation)
|
||||
except OSError as exc:
|
||||
pytest.skip(f"run_conversation source is unavailable: {exc}")
|
||||
tree = ast.parse(source)
|
||||
missing = []
|
||||
literal_none = []
|
||||
for node in ast.walk(tree):
|
||||
if not isinstance(node, ast.Return) or not isinstance(node.value, ast.Dict):
|
||||
continue
|
||||
keys = [
|
||||
key.value if isinstance(key, ast.Constant) else None
|
||||
for key in node.value.keys
|
||||
]
|
||||
if "final_response" not in keys:
|
||||
missing.append(node.lineno)
|
||||
continue
|
||||
value = node.value.values[keys.index("final_response")]
|
||||
if isinstance(value, ast.Constant) and value.value is None:
|
||||
literal_none.append(node.lineno)
|
||||
|
||||
assert missing == [], (
|
||||
"run_conversation() dict returns must preserve the final_response "
|
||||
f"contract; missing at source-local lines {missing}"
|
||||
)
|
||||
assert literal_none == [], (
|
||||
"run_conversation() dict returns must expose actionable final_response "
|
||||
f"text instead of literal None; literal None at source-local lines {literal_none}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def agent():
|
||||
"""Minimal AIAgent with mocked OpenAI client and tool loading."""
|
||||
|
|
@ -5296,6 +5336,7 @@ class TestRetryExhaustion:
|
|||
assert result.get("failed") is True
|
||||
assert "error" in result
|
||||
assert "Invalid API response" in result["error"]
|
||||
assert result.get("final_response") == result["error"]
|
||||
|
||||
def test_content_filter_refusal_surfaced_not_retried(self, agent):
|
||||
"""A model refusal must be surfaced immediately, NOT laundered into
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue