Merge branch 'main' into rewbs/tool-use-charge-to-subscription
This commit is contained in:
commit
a2e56d044b
175 changed files with 18848 additions and 3772 deletions
432
run_agent.py
432
run_agent.py
|
|
@ -89,7 +89,7 @@ from agent.model_metadata import (
|
|||
)
|
||||
from agent.context_compressor import ContextCompressor
|
||||
from agent.prompt_caching import apply_anthropic_cache_control
|
||||
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS
|
||||
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS
|
||||
from agent.usage_pricing import estimate_usage_cost, normalize_usage
|
||||
from agent.display import (
|
||||
KawaiiSpinner, build_tool_preview as _build_tool_preview,
|
||||
|
|
@ -321,8 +321,12 @@ def _extract_parallel_scope_path(tool_name: str, function_args: dict) -> Path |
|
|||
if not isinstance(raw_path, str) or not raw_path.strip():
|
||||
return None
|
||||
|
||||
expanded = Path(raw_path).expanduser()
|
||||
if expanded.is_absolute():
|
||||
return Path(os.path.abspath(str(expanded)))
|
||||
|
||||
# Avoid resolve(); the file may not exist yet.
|
||||
return Path(raw_path).expanduser()
|
||||
return Path(os.path.abspath(str(Path.cwd() / expanded)))
|
||||
|
||||
|
||||
def _paths_overlap(left: Path, right: Path) -> bool:
|
||||
|
|
@ -468,7 +472,7 @@ class AIAgent:
|
|||
acp_args: list[str] | None = None,
|
||||
command: str = None,
|
||||
args: list[str] | None = None,
|
||||
model: str = "anthropic/claude-opus-4.6", # OpenRouter format
|
||||
model: str = "",
|
||||
max_iterations: int = 90, # Default tool-calling iterations (shared with subagents)
|
||||
tool_delay: float = 1.0,
|
||||
enabled_toolsets: List[str] = None,
|
||||
|
|
@ -487,6 +491,8 @@ class AIAgent:
|
|||
provider_data_collection: str = None,
|
||||
session_id: str = None,
|
||||
tool_progress_callback: callable = None,
|
||||
tool_start_callback: callable = None,
|
||||
tool_complete_callback: callable = None,
|
||||
thinking_callback: callable = None,
|
||||
reasoning_callback: callable = None,
|
||||
clarify_callback: callable = None,
|
||||
|
|
@ -506,9 +512,11 @@ class AIAgent:
|
|||
honcho_config=None,
|
||||
iteration_budget: "IterationBudget" = None,
|
||||
fallback_model: Dict[str, Any] = None,
|
||||
credential_pool=None,
|
||||
checkpoints_enabled: bool = False,
|
||||
checkpoint_max_snapshots: int = 50,
|
||||
pass_session_id: bool = False,
|
||||
persist_session: bool = True,
|
||||
):
|
||||
"""
|
||||
Initialize the AI Agent.
|
||||
|
|
@ -574,13 +582,14 @@ class AIAgent:
|
|||
self.background_review_callback = None # Optional sync callback for gateway delivery
|
||||
self.skip_context_files = skip_context_files
|
||||
self.pass_session_id = pass_session_id
|
||||
self.persist_session = persist_session
|
||||
self._credential_pool = credential_pool
|
||||
self.log_prefix_chars = log_prefix_chars
|
||||
self.log_prefix = f"{log_prefix} " if log_prefix else ""
|
||||
# Store effective base URL for feature detection (prompt caching, reasoning, etc.)
|
||||
# When no base_url is provided, the client defaults to OpenRouter, so reflect that here.
|
||||
self.base_url = base_url or OPENROUTER_BASE_URL
|
||||
self.base_url = base_url or ""
|
||||
provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
|
||||
self.provider = provider_name or "openrouter"
|
||||
self.provider = provider_name or ""
|
||||
self.acp_command = acp_command or command
|
||||
self.acp_args = list(acp_args or args or [])
|
||||
if api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}:
|
||||
|
|
@ -617,6 +626,8 @@ class AIAgent:
|
|||
).start()
|
||||
|
||||
self.tool_progress_callback = tool_progress_callback
|
||||
self.tool_start_callback = tool_start_callback
|
||||
self.tool_complete_callback = tool_complete_callback
|
||||
self.thinking_callback = thinking_callback
|
||||
self.reasoning_callback = reasoning_callback
|
||||
self._reasoning_deltas_fired = False # Set by _fire_reasoning_delta, reset per API call
|
||||
|
|
@ -1386,6 +1397,7 @@ class AIAgent:
|
|||
content = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL | re.IGNORECASE)
|
||||
content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
|
||||
content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL)
|
||||
content = re.sub(r'</?(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
|
||||
return content
|
||||
|
||||
def _looks_like_codex_intermediate_ack(
|
||||
|
|
@ -1701,7 +1713,10 @@ class AIAgent:
|
|||
"""Save session state to both JSON log and SQLite on any exit path.
|
||||
|
||||
Ensures conversations are never lost, even on errors or early returns.
|
||||
Skipped when ``persist_session=False`` (ephemeral helper flows).
|
||||
"""
|
||||
if not self.persist_session:
|
||||
return
|
||||
self._apply_persist_user_message_override(messages)
|
||||
self._session_messages = messages
|
||||
self._save_session_log(messages)
|
||||
|
|
@ -3234,9 +3249,10 @@ class AIAgent:
|
|||
"model": model,
|
||||
"instructions": instructions,
|
||||
"input": normalized_input,
|
||||
"tools": normalized_tools,
|
||||
"store": False,
|
||||
}
|
||||
if normalized_tools is not None:
|
||||
normalized["tools"] = normalized_tools
|
||||
|
||||
# Pass through reasoning config
|
||||
reasoning = api_kwargs.get("reasoning")
|
||||
|
|
@ -3481,14 +3497,33 @@ class AIAgent:
|
|||
|
||||
@staticmethod
|
||||
def _is_openai_client_closed(client: Any) -> bool:
|
||||
"""Check if an OpenAI client is closed.
|
||||
|
||||
Handles both property and method forms of is_closed:
|
||||
- httpx.Client.is_closed is a bool property
|
||||
- openai.OpenAI.is_closed is a method returning bool
|
||||
|
||||
Prior bug: getattr(client, "is_closed", False) returned the bound method,
|
||||
which is always truthy, causing unnecessary client recreation on every call.
|
||||
"""
|
||||
from unittest.mock import Mock
|
||||
|
||||
if isinstance(client, Mock):
|
||||
return False
|
||||
if bool(getattr(client, "is_closed", False)):
|
||||
return True
|
||||
|
||||
is_closed_attr = getattr(client, "is_closed", None)
|
||||
if is_closed_attr is not None:
|
||||
# Handle method (openai SDK) vs property (httpx)
|
||||
if callable(is_closed_attr):
|
||||
if is_closed_attr():
|
||||
return True
|
||||
elif bool(is_closed_attr):
|
||||
return True
|
||||
|
||||
http_client = getattr(client, "_client", None)
|
||||
return bool(getattr(http_client, "is_closed", False))
|
||||
if http_client is not None:
|
||||
return bool(getattr(http_client, "is_closed", False))
|
||||
return False
|
||||
|
||||
def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any:
|
||||
if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"):
|
||||
|
|
@ -3511,15 +3546,78 @@ class AIAgent:
|
|||
)
|
||||
return client
|
||||
|
||||
@staticmethod
|
||||
def _force_close_tcp_sockets(client: Any) -> int:
|
||||
"""Force-close underlying TCP sockets to prevent CLOSE-WAIT accumulation.
|
||||
|
||||
When a provider drops a connection mid-stream, httpx's ``client.close()``
|
||||
performs a graceful shutdown which leaves sockets in CLOSE-WAIT until the
|
||||
OS times them out (often minutes). This method walks the httpx transport
|
||||
pool and issues ``socket.shutdown(SHUT_RDWR)`` + ``socket.close()`` to
|
||||
force an immediate TCP RST, freeing the file descriptors.
|
||||
|
||||
Returns the number of sockets force-closed.
|
||||
"""
|
||||
import socket as _socket
|
||||
|
||||
closed = 0
|
||||
try:
|
||||
http_client = getattr(client, "_client", None)
|
||||
if http_client is None:
|
||||
return 0
|
||||
transport = getattr(http_client, "_transport", None)
|
||||
if transport is None:
|
||||
return 0
|
||||
pool = getattr(transport, "_pool", None)
|
||||
if pool is None:
|
||||
return 0
|
||||
# httpx uses httpcore connection pools; connections live in
|
||||
# _connections (list) or _pool (list) depending on version.
|
||||
connections = (
|
||||
getattr(pool, "_connections", None)
|
||||
or getattr(pool, "_pool", None)
|
||||
or []
|
||||
)
|
||||
for conn in list(connections):
|
||||
stream = (
|
||||
getattr(conn, "_network_stream", None)
|
||||
or getattr(conn, "_stream", None)
|
||||
)
|
||||
if stream is None:
|
||||
continue
|
||||
sock = getattr(stream, "_sock", None)
|
||||
if sock is None:
|
||||
sock = getattr(stream, "stream", None)
|
||||
if sock is not None:
|
||||
sock = getattr(sock, "_sock", None)
|
||||
if sock is None:
|
||||
continue
|
||||
try:
|
||||
sock.shutdown(_socket.SHUT_RDWR)
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
sock.close()
|
||||
except OSError:
|
||||
pass
|
||||
closed += 1
|
||||
except Exception as exc:
|
||||
logger.debug("Force-close TCP sockets sweep error: %s", exc)
|
||||
return closed
|
||||
|
||||
def _close_openai_client(self, client: Any, *, reason: str, shared: bool) -> None:
|
||||
if client is None:
|
||||
return
|
||||
# Force-close TCP sockets first to prevent CLOSE-WAIT accumulation,
|
||||
# then do the graceful SDK-level close.
|
||||
force_closed = self._force_close_tcp_sockets(client)
|
||||
try:
|
||||
client.close()
|
||||
logger.info(
|
||||
"OpenAI client closed (%s, shared=%s) %s",
|
||||
"OpenAI client closed (%s, shared=%s, tcp_force_closed=%d) %s",
|
||||
reason,
|
||||
shared,
|
||||
force_closed,
|
||||
self._client_log_context(),
|
||||
)
|
||||
except Exception as exc:
|
||||
|
|
@ -3564,6 +3662,76 @@ class AIAgent:
|
|||
with self._openai_client_lock():
|
||||
return self.client
|
||||
|
||||
def _cleanup_dead_connections(self) -> bool:
|
||||
"""Detect and clean up dead TCP connections on the primary client.
|
||||
|
||||
Inspects the httpx connection pool for sockets in unhealthy states
|
||||
(CLOSE-WAIT, errors). If any are found, force-closes all sockets
|
||||
and rebuilds the primary client from scratch.
|
||||
|
||||
Returns True if dead connections were found and cleaned up.
|
||||
"""
|
||||
client = getattr(self, "client", None)
|
||||
if client is None:
|
||||
return False
|
||||
try:
|
||||
http_client = getattr(client, "_client", None)
|
||||
if http_client is None:
|
||||
return False
|
||||
transport = getattr(http_client, "_transport", None)
|
||||
if transport is None:
|
||||
return False
|
||||
pool = getattr(transport, "_pool", None)
|
||||
if pool is None:
|
||||
return False
|
||||
connections = (
|
||||
getattr(pool, "_connections", None)
|
||||
or getattr(pool, "_pool", None)
|
||||
or []
|
||||
)
|
||||
dead_count = 0
|
||||
for conn in list(connections):
|
||||
# Check for connections that are idle but have closed sockets
|
||||
stream = (
|
||||
getattr(conn, "_network_stream", None)
|
||||
or getattr(conn, "_stream", None)
|
||||
)
|
||||
if stream is None:
|
||||
continue
|
||||
sock = getattr(stream, "_sock", None)
|
||||
if sock is None:
|
||||
sock = getattr(stream, "stream", None)
|
||||
if sock is not None:
|
||||
sock = getattr(sock, "_sock", None)
|
||||
if sock is None:
|
||||
continue
|
||||
# Probe socket health with a non-blocking recv peek
|
||||
import socket as _socket
|
||||
try:
|
||||
sock.setblocking(False)
|
||||
data = sock.recv(1, _socket.MSG_PEEK | _socket.MSG_DONTWAIT)
|
||||
if data == b"":
|
||||
dead_count += 1
|
||||
except BlockingIOError:
|
||||
pass # No data available — socket is healthy
|
||||
except OSError:
|
||||
dead_count += 1
|
||||
finally:
|
||||
try:
|
||||
sock.setblocking(True)
|
||||
except OSError:
|
||||
pass
|
||||
if dead_count > 0:
|
||||
logger.warning(
|
||||
"Found %d dead connection(s) in client pool — rebuilding client",
|
||||
dead_count,
|
||||
)
|
||||
self._replace_primary_openai_client(reason="dead_connection_cleanup")
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.debug("Dead connection check error: %s", exc)
|
||||
return False
|
||||
|
||||
def _create_request_openai_client(self, *, reason: str) -> Any:
|
||||
from unittest.mock import Mock
|
||||
|
||||
|
|
@ -3579,6 +3747,8 @@ class AIAgent:
|
|||
|
||||
def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
|
||||
"""Execute one streaming Responses API request and return the final response."""
|
||||
import httpx as _httpx
|
||||
|
||||
active_client = client or self._ensure_primary_openai_client(reason="codex_stream_direct")
|
||||
max_stream_retries = 1
|
||||
has_tool_calls = False
|
||||
|
|
@ -3612,6 +3782,22 @@ class AIAgent:
|
|||
if reasoning_text:
|
||||
self._fire_reasoning_delta(reasoning_text)
|
||||
return stream.get_final_response()
|
||||
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
|
||||
if attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s",
|
||||
attempt + 1,
|
||||
max_stream_retries + 1,
|
||||
self._client_log_context(),
|
||||
exc,
|
||||
)
|
||||
continue
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed; falling back to create(stream=True). %s error=%s",
|
||||
self._client_log_context(),
|
||||
exc,
|
||||
)
|
||||
return self._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
except RuntimeError as exc:
|
||||
err_text = str(exc)
|
||||
missing_completed = "response.completed" in err_text
|
||||
|
|
@ -3774,6 +3960,100 @@ class AIAgent:
|
|||
self._is_anthropic_oauth = _is_oauth_token(new_token)
|
||||
return True
|
||||
|
||||
def _apply_client_headers_for_base_url(self, base_url: str) -> None:
|
||||
from agent.auxiliary_client import _OR_HEADERS
|
||||
|
||||
normalized = (base_url or "").lower()
|
||||
if "openrouter" in normalized:
|
||||
self._client_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
elif "api.githubcopilot.com" in normalized:
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
self._client_kwargs["default_headers"] = copilot_default_headers()
|
||||
elif "api.kimi.com" in normalized:
|
||||
self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
else:
|
||||
self._client_kwargs.pop("default_headers", None)
|
||||
|
||||
def _swap_credential(self, entry) -> None:
|
||||
runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
|
||||
runtime_base = getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or self.base_url
|
||||
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_client, _is_oauth_token
|
||||
|
||||
try:
|
||||
self._anthropic_client.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._anthropic_api_key = runtime_key
|
||||
self._anthropic_base_url = runtime_base
|
||||
self._anthropic_client = build_anthropic_client(runtime_key, runtime_base)
|
||||
self._is_anthropic_oauth = _is_oauth_token(runtime_key) if self.provider == "anthropic" else False
|
||||
self.api_key = runtime_key
|
||||
self.base_url = runtime_base
|
||||
return
|
||||
|
||||
self.api_key = runtime_key
|
||||
self.base_url = runtime_base.rstrip("/") if isinstance(runtime_base, str) else runtime_base
|
||||
self._client_kwargs["api_key"] = self.api_key
|
||||
self._client_kwargs["base_url"] = self.base_url
|
||||
self._apply_client_headers_for_base_url(self.base_url)
|
||||
self._replace_primary_openai_client(reason="credential_rotation")
|
||||
|
||||
def _recover_with_credential_pool(
|
||||
self,
|
||||
*,
|
||||
status_code: Optional[int],
|
||||
has_retried_429: bool,
|
||||
) -> tuple[bool, bool]:
|
||||
"""Attempt credential recovery via pool rotation.
|
||||
|
||||
Returns (recovered, has_retried_429).
|
||||
On 429: first occurrence retries same credential (sets flag True).
|
||||
second consecutive 429 rotates to next credential (resets flag).
|
||||
On 402: immediately rotates (billing exhaustion won't resolve with retry).
|
||||
On 401: attempts token refresh before rotating.
|
||||
"""
|
||||
pool = self._credential_pool
|
||||
if pool is None or status_code is None:
|
||||
return False, has_retried_429
|
||||
|
||||
if status_code == 402:
|
||||
next_entry = pool.mark_exhausted_and_rotate(status_code=402)
|
||||
if next_entry is not None:
|
||||
logger.info(f"Credential 402 (billing) — rotated to pool entry {getattr(next_entry, 'id', '?')}")
|
||||
self._swap_credential(next_entry)
|
||||
return True, False
|
||||
return False, has_retried_429
|
||||
|
||||
if status_code == 429:
|
||||
if not has_retried_429:
|
||||
return False, True
|
||||
next_entry = pool.mark_exhausted_and_rotate(status_code=429)
|
||||
if next_entry is not None:
|
||||
logger.info(f"Credential 429 (rate limit) — rotated to pool entry {getattr(next_entry, 'id', '?')}")
|
||||
self._swap_credential(next_entry)
|
||||
return True, False
|
||||
return False, True
|
||||
|
||||
if status_code == 401:
|
||||
refreshed = pool.try_refresh_current()
|
||||
if refreshed is not None:
|
||||
logger.info(f"Credential 401 — refreshed pool entry {getattr(refreshed, 'id', '?')}")
|
||||
self._swap_credential(refreshed)
|
||||
return True, has_retried_429
|
||||
# Refresh failed — rotate to next credential instead of giving up.
|
||||
# The failed entry is already marked exhausted by try_refresh_current().
|
||||
next_entry = pool.mark_exhausted_and_rotate(status_code=401)
|
||||
if next_entry is not None:
|
||||
logger.info(f"Credential 401 (refresh failed) — rotated to pool entry {getattr(next_entry, 'id', '?')}")
|
||||
self._swap_credential(next_entry)
|
||||
return True, False
|
||||
|
||||
return False, has_retried_429
|
||||
|
||||
def _anthropic_messages_create(self, api_kwargs: dict):
|
||||
if self.api_mode == "anthropic_messages":
|
||||
self._try_refresh_anthropic_client_credentials()
|
||||
|
|
@ -4243,6 +4523,11 @@ class AIAgent:
|
|||
type(e).__name__,
|
||||
e,
|
||||
)
|
||||
self._emit_status(
|
||||
f"⚠️ Connection to provider dropped "
|
||||
f"({type(e).__name__}). Reconnecting… "
|
||||
f"(attempt {_stream_attempt + 2}/{_max_stream_retries + 1})"
|
||||
)
|
||||
# Close the stale request client before retry
|
||||
stale = request_client_holder.get("client")
|
||||
if stale is not None:
|
||||
|
|
@ -4250,7 +4535,21 @@ class AIAgent:
|
|||
stale, reason="stream_retry_cleanup"
|
||||
)
|
||||
request_client_holder["client"] = None
|
||||
# Also rebuild the primary client to purge
|
||||
# any dead connections from the pool.
|
||||
try:
|
||||
self._replace_primary_openai_client(
|
||||
reason="stream_retry_pool_cleanup"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
self._emit_status(
|
||||
"❌ Connection to provider failed after "
|
||||
f"{_max_stream_retries + 1} attempts. "
|
||||
"The provider may be experiencing issues — "
|
||||
"try again in a moment."
|
||||
)
|
||||
logger.warning(
|
||||
"Streaming exhausted %s retries on transient error, "
|
||||
"falling back to non-streaming: %s",
|
||||
|
|
@ -4322,6 +4621,12 @@ class AIAgent:
|
|||
self._close_request_openai_client(rc, reason="stale_stream_kill")
|
||||
except Exception:
|
||||
pass
|
||||
# Rebuild the primary client too — its connection pool
|
||||
# may hold dead sockets from the same provider outage.
|
||||
try:
|
||||
self._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
|
||||
except Exception:
|
||||
pass
|
||||
# Reset the timer so we don't kill repeatedly while
|
||||
# the inner thread processes the closure.
|
||||
last_chunk_time["t"] = time.time()
|
||||
|
|
@ -4722,6 +5027,19 @@ class AIAgent:
|
|||
tool_call.pop("call_id", None)
|
||||
tool_call.pop("response_item_id", None)
|
||||
|
||||
# GPT-5 and Codex models respond better to 'developer' than 'system'
|
||||
# for instruction-following. Swap the role at the API boundary so
|
||||
# internal message representation stays uniform ("system").
|
||||
_model_lower = (self.model or "").lower()
|
||||
if (
|
||||
sanitized_messages
|
||||
and sanitized_messages[0].get("role") == "system"
|
||||
and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
|
||||
):
|
||||
# Shallow-copy the list + first message only — rest stays shared.
|
||||
sanitized_messages = list(sanitized_messages)
|
||||
sanitized_messages[0] = {**sanitized_messages[0], "role": "developer"}
|
||||
|
||||
provider_preferences = {}
|
||||
if self.providers_allowed:
|
||||
provider_preferences["only"] = self.providers_allowed
|
||||
|
|
@ -5244,6 +5562,15 @@ class AIAgent:
|
|||
if _post_progress < 0.85:
|
||||
self._context_pressure_warned = False
|
||||
|
||||
# Clear the file-read dedup cache. After compression the original
|
||||
# read content is summarised away — if the model re-reads the same
|
||||
# file it needs the full content, not a "file unchanged" stub.
|
||||
try:
|
||||
from tools.file_tools import reset_file_dedup
|
||||
reset_file_dedup(task_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return compressed, new_system_prompt
|
||||
|
||||
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
|
|
@ -5408,7 +5735,7 @@ class AIAgent:
|
|||
args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
|
||||
print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
|
||||
|
||||
for _, name, args in parsed_calls:
|
||||
for tc, name, args in parsed_calls:
|
||||
if self.tool_progress_callback:
|
||||
try:
|
||||
preview = _build_tool_preview(name, args)
|
||||
|
|
@ -5416,6 +5743,13 @@ class AIAgent:
|
|||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
for tc, name, args in parsed_calls:
|
||||
if self.tool_start_callback:
|
||||
try:
|
||||
self.tool_start_callback(tc.id, name, args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool start callback error: {cb_err}")
|
||||
|
||||
# ── Concurrent execution ─────────────────────────────────────────
|
||||
# Each slot holds (function_name, function_args, function_result, duration, error_flag)
|
||||
results = [None] * num_tools
|
||||
|
|
@ -5486,6 +5820,12 @@ class AIAgent:
|
|||
response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
|
||||
print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}")
|
||||
|
||||
if self.tool_complete_callback:
|
||||
try:
|
||||
self.tool_complete_callback(tc.id, name, args, function_result)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool complete callback error: {cb_err}")
|
||||
|
||||
# Truncate oversized results
|
||||
MAX_TOOL_RESULT_CHARS = 100_000
|
||||
if len(function_result) > MAX_TOOL_RESULT_CHARS:
|
||||
|
|
@ -5574,6 +5914,12 @@ class AIAgent:
|
|||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
if self.tool_start_callback:
|
||||
try:
|
||||
self.tool_start_callback(tool_call.id, function_name, function_args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool start callback error: {cb_err}")
|
||||
|
||||
# Checkpoint: snapshot working dir before file-mutating tools
|
||||
if function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled:
|
||||
try:
|
||||
|
|
@ -5738,6 +6084,12 @@ class AIAgent:
|
|||
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
|
||||
logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")
|
||||
|
||||
if self.tool_complete_callback:
|
||||
try:
|
||||
self.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool complete callback error: {cb_err}")
|
||||
|
||||
# Guard against tools returning absurdly large content that would
|
||||
# blow up the context window. 100K chars ≈ 25K tokens — generous
|
||||
# enough for any reasonable tool output but prevents catastrophic
|
||||
|
|
@ -6076,6 +6428,20 @@ class AIAgent:
|
|||
self._last_content_with_tools = None
|
||||
self._mute_post_response = False
|
||||
self._surrogate_sanitized = False
|
||||
|
||||
# Pre-turn connection health check: detect and clean up dead TCP
|
||||
# connections left over from provider outages or dropped streams.
|
||||
# This prevents the next API call from hanging on a zombie socket.
|
||||
if self.api_mode != "anthropic_messages":
|
||||
try:
|
||||
if self._cleanup_dead_connections():
|
||||
self._emit_status(
|
||||
"🔌 Detected stale connections from a previous provider "
|
||||
"issue — cleaned up automatically. Proceeding with fresh "
|
||||
"connection."
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
# NOTE: _turns_since_memory and _iters_since_skill are NOT reset here.
|
||||
# They are initialized in __init__ and must persist across run_conversation
|
||||
# calls so that nudge logic accumulates correctly in CLI mode.
|
||||
|
|
@ -6254,6 +6620,12 @@ class AIAgent:
|
|||
)
|
||||
if len(messages) >= _orig_len:
|
||||
break # Cannot compress further
|
||||
# Compression created a new session — clear the history
|
||||
# reference so _flush_messages_to_session_db writes ALL
|
||||
# compressed messages to the new session's SQLite, not
|
||||
# skipping them because conversation_history is still the
|
||||
# pre-compression length.
|
||||
conversation_history = None
|
||||
# Re-estimate after compression
|
||||
_preflight_tokens = estimate_request_tokens_rough(
|
||||
messages,
|
||||
|
|
@ -6453,6 +6825,7 @@ class AIAgent:
|
|||
codex_auth_retry_attempted = False
|
||||
anthropic_auth_retry_attempted = False
|
||||
nous_auth_retry_attempted = False
|
||||
has_retried_429 = False
|
||||
restart_with_compressed_messages = False
|
||||
restart_with_length_continuation = False
|
||||
|
||||
|
|
@ -6888,6 +7261,7 @@ class AIAgent:
|
|||
if not self.quiet_mode:
|
||||
self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
|
||||
|
||||
has_retried_429 = False # Reset on success
|
||||
break # Success, exit retry loop
|
||||
|
||||
except InterruptedError:
|
||||
|
|
@ -6930,6 +7304,12 @@ class AIAgent:
|
|||
# prompt or prefill. Fall through to normal error path.
|
||||
|
||||
status_code = getattr(api_error, "status_code", None)
|
||||
recovered_with_pool, has_retried_429 = self._recover_with_credential_pool(
|
||||
status_code=status_code,
|
||||
has_retried_429=has_retried_429,
|
||||
)
|
||||
if recovered_with_pool:
|
||||
continue
|
||||
if (
|
||||
self.api_mode == "codex_responses"
|
||||
and self.provider == "openai-codex"
|
||||
|
|
@ -7038,10 +7418,17 @@ class AIAgent:
|
|||
or "quota" in error_msg
|
||||
)
|
||||
if is_rate_limited and self._fallback_index < len(self._fallback_chain):
|
||||
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
|
||||
if self._try_activate_fallback():
|
||||
retry_count = 0
|
||||
continue
|
||||
# Don't eagerly fallback if credential pool rotation may
|
||||
# still recover. The pool's retry-then-rotate cycle needs
|
||||
# at least one more attempt to fire — jumping to a fallback
|
||||
# provider here short-circuits it.
|
||||
pool = self._credential_pool
|
||||
pool_may_recover = pool is not None and pool.has_available()
|
||||
if not pool_may_recover:
|
||||
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
|
||||
if self._try_activate_fallback():
|
||||
retry_count = 0
|
||||
continue
|
||||
|
||||
is_payload_too_large = (
|
||||
status_code == 413
|
||||
|
|
@ -7054,6 +7441,7 @@ class AIAgent:
|
|||
compression_attempts += 1
|
||||
if compression_attempts > max_compression_attempts:
|
||||
self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -7078,6 +7466,7 @@ class AIAgent:
|
|||
break
|
||||
else:
|
||||
self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -7154,6 +7543,7 @@ class AIAgent:
|
|||
compression_attempts += 1
|
||||
if compression_attempts > max_compression_attempts:
|
||||
self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -7180,7 +7570,7 @@ class AIAgent:
|
|||
else:
|
||||
# Can't compress further and already at minimum tier
|
||||
self._vprint(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 The conversation has accumulated too much content.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.", force=True)
|
||||
logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -7769,6 +8159,10 @@ class AIAgent:
|
|||
approx_tokens=self.context_compressor.last_prompt_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
# Compression created a new session — clear history so
|
||||
# _flush_messages_to_session_db writes compressed messages
|
||||
# to the new session (see preflight compression comment).
|
||||
conversation_history = None
|
||||
|
||||
# Save session log incrementally (so progress is visible even if interrupted)
|
||||
self._session_messages = messages
|
||||
|
|
@ -8129,9 +8523,9 @@ class AIAgent:
|
|||
|
||||
def main(
|
||||
query: str = None,
|
||||
model: str = "anthropic/claude-opus-4.6",
|
||||
model: str = "",
|
||||
api_key: str = None,
|
||||
base_url: str = "https://openrouter.ai/api/v1",
|
||||
base_url: str = "",
|
||||
max_turns: int = 10,
|
||||
enabled_toolsets: str = None,
|
||||
disabled_toolsets: str = None,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue