diff --git a/run_agent.py b/run_agent.py index 720bc19dc..482878019 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5816,6 +5816,17 @@ class AIAgent: return primary_client with self._openai_client_lock(): request_kwargs = dict(self._client_kwargs) + # Per-request OpenAI-wire clients (used by both the non-streaming + # chat-completions path and the streaming chat-completions path + # in `_interruptible_api_call`) should not run the SDK's built-in + # retry loop: the agent's outer loop owns retries with credential + # rotation, provider fallback, and backoff that the SDK can't + # see. Leaving SDK retries on (default 2) compounds with our outer + # retries and lets a single hung provider request stretch to ~3x + # the per-call timeout before our stale detector reports it. + # Shared/primary clients and Anthropic / Bedrock paths are + # unaffected (they don't go through here). + request_kwargs["max_retries"] = 0 if ( base_url_host_matches(str(request_kwargs.get("base_url", "")), "api.githubcopilot.com") and self._api_kwargs_have_image_parts(api_kwargs or {})