fix(web_tools): delegate backend availability to provider registry

Plugin-registered web providers (registered via agent.web_search_registry)
were invisible to the tool-availability gate: _is_backend_available() was a
hardcoded env-var if-chain that returned False for any name outside the eight
built-in backends. Because check_web_api_key() is the check_fn for both
web_search and web_extract, a working custom provider with no built-in creds
left both tools filtered out of the toolset entirely.

Fix at the single chokepoint: _is_backend_available() now delegates non-legacy
backend names to the registered provider's is_available(), falling back to the
legacy built-in probes for known names and unregistered providers. Because
_get_backend(), _get_capability_backend(), and check_web_api_key() all resolve
availability through this one function, the fix cascades to every caller —
including the per-capability extract selection that produced a dead-end
'search-only' error (#32698). The two remaining hardcoded whitelist
early-returns (_get_backend, check_web_api_key) now also accept registered
names, and both walk registered providers as a final fallback so a custom
backend still resolves when no built-in has credentials.

Built-in backend priority is preserved unchanged: the registry is consulted
only for names outside _LEGACY_WEB_BACKENDS.

Fixes #28651
Fixes #31873
Fixes #32698
This commit is contained in:
kshitijk4poor 2026-07-03 19:54:09 +05:30 committed by kshitij
parent def6d6fe1b
commit 0a9d42ce40

View file

@ -136,6 +136,65 @@ def _load_web_config() -> dict:
except (ImportError, Exception):
return {}
# The built-in web backends whose availability is driven by hardcoded
# env-var / package / OAuth probes below. Any name NOT in this set is a
# candidate plugin-registered provider and must be resolved through the
# web_search_registry (``is_available()``) instead. Kept as a single named
# constant so the whitelist early-returns and the availability chokepoint
# stay in sync.
_LEGACY_WEB_BACKENDS = frozenset(
{"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}
)
def _registered_web_provider(backend: str):
"""Return a plugin-registered web provider by name, or ``None``.
Consults ``agent.web_search_registry`` so backends contributed by the
plugin system (which are absent from :data:`_LEGACY_WEB_BACKENDS`) are
discoverable during availability/selection resolution. Returns ``None``
on any lookup failure so callers can fall through to legacy checks.
"""
if not backend:
return None
try:
from agent.web_search_registry import get_provider
return get_provider(backend)
except Exception as exc: # noqa: BLE001 — registry optional; never fatal
logger.debug("web provider registry lookup failed for %r: %s", backend, exc)
return None
def _registered_web_provider_available(backend: str):
"""Availability of a *registered* web provider, or ``None`` if unregistered.
Returns ``True``/``False`` when *backend* names a registered provider
(calling its ``is_available()``), or ``None`` when it isn't registered —
letting the caller fall through to the legacy built-in probes.
"""
provider = _registered_web_provider(backend)
if provider is None:
return None
try:
return bool(provider.is_available())
except Exception as exc: # noqa: BLE001 — a broken provider is "unavailable"
logger.debug("web provider %r.is_available() raised: %s", backend, exc)
return False
def _list_registered_web_providers():
"""Return all plugin-registered web providers (empty list on failure)."""
try:
from agent.web_search_registry import list_providers
return list_providers()
except Exception as exc: # noqa: BLE001 — registry optional; never fatal
logger.debug("web provider registry list failed: %s", exc)
return []
def _get_backend() -> str:
"""Determine which web backend to use (shared fallback).
@ -144,7 +203,7 @@ def _get_backend() -> str:
keys manually without running setup.
"""
configured = (_load_web_config().get("backend") or "").lower().strip()
if configured in {"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}:
if configured in _LEGACY_WEB_BACKENDS or _registered_web_provider(configured) is not None:
return configured
# Fallback for manual / legacy config — pick the highest-priority
@ -168,6 +227,14 @@ def _get_backend() -> str:
if available:
return backend
# Final fallback: walk plugin-registered providers so a custom backend
# (with no built-in creds present) still resolves. Built-in names are
# already covered above, so this only surfaces plugin-contributed
# providers via their own is_available() gate.
for provider in _list_registered_web_providers():
if provider.name not in _LEGACY_WEB_BACKENDS and _is_backend_available(provider.name):
return provider.name
return "firecrawl" # default (backward compat)
@ -210,7 +277,22 @@ def _get_capability_backend(capability: str) -> str:
def _is_backend_available(backend: str) -> bool:
"""Return True when the selected backend is currently usable."""
"""Return True when the selected backend is currently usable.
For plugin-registered backends (any name outside
:data:`_LEGACY_WEB_BACKENDS`), availability is delegated to the
provider's ``is_available()`` via the web_search_registry. This is the
single chokepoint through which ``_get_backend``,
``_get_capability_backend``, and ``check_web_api_key`` all resolve
availability fixing custom-provider discovery for every caller at once
(issues #28651, #31873, #32698). Built-in backends keep their cheap
hardcoded probes below.
"""
backend = (backend or "").lower().strip()
if backend not in _LEGACY_WEB_BACKENDS:
registered = _registered_web_provider_available(backend)
if registered is not None:
return registered
if backend == "exa":
return _has_env("EXA_API_KEY")
if backend == "parallel":
@ -861,13 +943,26 @@ async def web_extract_tool(
# Convenience function to check Firecrawl credentials
def check_web_api_key() -> bool:
"""Check whether the configured web backend is available."""
"""Check whether the configured web backend is available.
Used as the ``check_fn`` gate for the ``web_search`` and ``web_extract``
tool registry entries so a plugin-registered provider that reports
``is_available()`` must light the tools up even when no built-in backend
has credentials (issues #28651, #31873). Resolution funnels through
:func:`_is_backend_available`, which delegates non-legacy names to the
registry.
"""
configured = _load_web_config().get("backend", "").lower().strip()
if configured in {"exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai"}:
return _is_backend_available(configured)
if configured and _is_backend_available(configured):
return True
# Any built-in backend with credentials present.
if any(_is_backend_available(backend) for backend in _LEGACY_WEB_BACKENDS):
return True
# Any plugin-registered provider that reports itself available.
return any(
_is_backend_available(backend)
for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai")
_is_backend_available(provider.name)
for provider in _list_registered_web_providers()
if provider.name not in _LEGACY_WEB_BACKENDS
)