From 0a9d42ce402cc1a4e12dee18a313c1db2e0a02e3 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Fri, 3 Jul 2026 19:54:09 +0530 Subject: [PATCH] fix(web_tools): delegate backend availability to provider registry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plugin-registered web providers (registered via agent.web_search_registry) were invisible to the tool-availability gate: _is_backend_available() was a hardcoded env-var if-chain that returned False for any name outside the eight built-in backends. Because check_web_api_key() is the check_fn for both web_search and web_extract, a working custom provider with no built-in creds left both tools filtered out of the toolset entirely. Fix at the single chokepoint: _is_backend_available() now delegates non-legacy backend names to the registered provider's is_available(), falling back to the legacy built-in probes for known names and unregistered providers. Because _get_backend(), _get_capability_backend(), and check_web_api_key() all resolve availability through this one function, the fix cascades to every caller — including the per-capability extract selection that produced a dead-end 'search-only' error (#32698). The two remaining hardcoded whitelist early-returns (_get_backend, check_web_api_key) now also accept registered names, and both walk registered providers as a final fallback so a custom backend still resolves when no built-in has credentials. Built-in backend priority is preserved unchanged: the registry is consulted only for names outside _LEGACY_WEB_BACKENDS. Fixes #28651 Fixes #31873 Fixes #32698 --- tools/web_tools.py | 109 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 102 insertions(+), 7 deletions(-) diff --git a/tools/web_tools.py b/tools/web_tools.py index e66d0ee0f..be558d5ad 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -136,6 +136,65 @@ def _load_web_config() -> dict: except (ImportError, Exception): return {} + +# The built-in web backends whose availability is driven by hardcoded +# env-var / package / OAuth probes below. Any name NOT in this set is a +# candidate plugin-registered provider and must be resolved through the +# web_search_registry (``is_available()``) instead. Kept as a single named +# constant so the whitelist early-returns and the availability chokepoint +# stay in sync. +_LEGACY_WEB_BACKENDS = frozenset( + {"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"} +) + + +def _registered_web_provider(backend: str): + """Return a plugin-registered web provider by name, or ``None``. + + Consults ``agent.web_search_registry`` so backends contributed by the + plugin system (which are absent from :data:`_LEGACY_WEB_BACKENDS`) are + discoverable during availability/selection resolution. Returns ``None`` + on any lookup failure so callers can fall through to legacy checks. + """ + if not backend: + return None + try: + from agent.web_search_registry import get_provider + + return get_provider(backend) + except Exception as exc: # noqa: BLE001 — registry optional; never fatal + logger.debug("web provider registry lookup failed for %r: %s", backend, exc) + return None + + +def _registered_web_provider_available(backend: str): + """Availability of a *registered* web provider, or ``None`` if unregistered. + + Returns ``True``/``False`` when *backend* names a registered provider + (calling its ``is_available()``), or ``None`` when it isn't registered — + letting the caller fall through to the legacy built-in probes. + """ + provider = _registered_web_provider(backend) + if provider is None: + return None + try: + return bool(provider.is_available()) + except Exception as exc: # noqa: BLE001 — a broken provider is "unavailable" + logger.debug("web provider %r.is_available() raised: %s", backend, exc) + return False + + +def _list_registered_web_providers(): + """Return all plugin-registered web providers (empty list on failure).""" + try: + from agent.web_search_registry import list_providers + + return list_providers() + except Exception as exc: # noqa: BLE001 — registry optional; never fatal + logger.debug("web provider registry list failed: %s", exc) + return [] + + def _get_backend() -> str: """Determine which web backend to use (shared fallback). @@ -144,7 +203,7 @@ def _get_backend() -> str: keys manually without running setup. """ configured = (_load_web_config().get("backend") or "").lower().strip() - if configured in {"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}: + if configured in _LEGACY_WEB_BACKENDS or _registered_web_provider(configured) is not None: return configured # Fallback for manual / legacy config — pick the highest-priority @@ -168,6 +227,14 @@ def _get_backend() -> str: if available: return backend + # Final fallback: walk plugin-registered providers so a custom backend + # (with no built-in creds present) still resolves. Built-in names are + # already covered above, so this only surfaces plugin-contributed + # providers via their own is_available() gate. + for provider in _list_registered_web_providers(): + if provider.name not in _LEGACY_WEB_BACKENDS and _is_backend_available(provider.name): + return provider.name + return "firecrawl" # default (backward compat) @@ -210,7 +277,22 @@ def _get_capability_backend(capability: str) -> str: def _is_backend_available(backend: str) -> bool: - """Return True when the selected backend is currently usable.""" + """Return True when the selected backend is currently usable. + + For plugin-registered backends (any name outside + :data:`_LEGACY_WEB_BACKENDS`), availability is delegated to the + provider's ``is_available()`` via the web_search_registry. This is the + single chokepoint through which ``_get_backend``, + ``_get_capability_backend``, and ``check_web_api_key`` all resolve + availability — fixing custom-provider discovery for every caller at once + (issues #28651, #31873, #32698). Built-in backends keep their cheap + hardcoded probes below. + """ + backend = (backend or "").lower().strip() + if backend not in _LEGACY_WEB_BACKENDS: + registered = _registered_web_provider_available(backend) + if registered is not None: + return registered if backend == "exa": return _has_env("EXA_API_KEY") if backend == "parallel": @@ -861,13 +943,26 @@ async def web_extract_tool( # Convenience function to check Firecrawl credentials def check_web_api_key() -> bool: - """Check whether the configured web backend is available.""" + """Check whether the configured web backend is available. + + Used as the ``check_fn`` gate for the ``web_search`` and ``web_extract`` + tool registry entries — so a plugin-registered provider that reports + ``is_available()`` must light the tools up even when no built-in backend + has credentials (issues #28651, #31873). Resolution funnels through + :func:`_is_backend_available`, which delegates non-legacy names to the + registry. + """ configured = _load_web_config().get("backend", "").lower().strip() - if configured in {"exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai"}: - return _is_backend_available(configured) + if configured and _is_backend_available(configured): + return True + # Any built-in backend with credentials present. + if any(_is_backend_available(backend) for backend in _LEGACY_WEB_BACKENDS): + return True + # Any plugin-registered provider that reports itself available. return any( - _is_backend_available(backend) - for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai") + _is_backend_available(provider.name) + for provider in _list_registered_web_providers() + if provider.name not in _LEGACY_WEB_BACKENDS )