diff --git a/tools/web_tools.py b/tools/web_tools.py index e66d0ee0f..be558d5ad 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -136,6 +136,65 @@ def _load_web_config() -> dict: except (ImportError, Exception): return {} + +# The built-in web backends whose availability is driven by hardcoded +# env-var / package / OAuth probes below. Any name NOT in this set is a +# candidate plugin-registered provider and must be resolved through the +# web_search_registry (``is_available()``) instead. Kept as a single named +# constant so the whitelist early-returns and the availability chokepoint +# stay in sync. +_LEGACY_WEB_BACKENDS = frozenset( + {"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"} +) + + +def _registered_web_provider(backend: str): + """Return a plugin-registered web provider by name, or ``None``. + + Consults ``agent.web_search_registry`` so backends contributed by the + plugin system (which are absent from :data:`_LEGACY_WEB_BACKENDS`) are + discoverable during availability/selection resolution. Returns ``None`` + on any lookup failure so callers can fall through to legacy checks. + """ + if not backend: + return None + try: + from agent.web_search_registry import get_provider + + return get_provider(backend) + except Exception as exc: # noqa: BLE001 — registry optional; never fatal + logger.debug("web provider registry lookup failed for %r: %s", backend, exc) + return None + + +def _registered_web_provider_available(backend: str): + """Availability of a *registered* web provider, or ``None`` if unregistered. + + Returns ``True``/``False`` when *backend* names a registered provider + (calling its ``is_available()``), or ``None`` when it isn't registered — + letting the caller fall through to the legacy built-in probes. + """ + provider = _registered_web_provider(backend) + if provider is None: + return None + try: + return bool(provider.is_available()) + except Exception as exc: # noqa: BLE001 — a broken provider is "unavailable" + logger.debug("web provider %r.is_available() raised: %s", backend, exc) + return False + + +def _list_registered_web_providers(): + """Return all plugin-registered web providers (empty list on failure).""" + try: + from agent.web_search_registry import list_providers + + return list_providers() + except Exception as exc: # noqa: BLE001 — registry optional; never fatal + logger.debug("web provider registry list failed: %s", exc) + return [] + + def _get_backend() -> str: """Determine which web backend to use (shared fallback). @@ -144,7 +203,7 @@ def _get_backend() -> str: keys manually without running setup. """ configured = (_load_web_config().get("backend") or "").lower().strip() - if configured in {"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}: + if configured in _LEGACY_WEB_BACKENDS or _registered_web_provider(configured) is not None: return configured # Fallback for manual / legacy config — pick the highest-priority @@ -168,6 +227,14 @@ def _get_backend() -> str: if available: return backend + # Final fallback: walk plugin-registered providers so a custom backend + # (with no built-in creds present) still resolves. Built-in names are + # already covered above, so this only surfaces plugin-contributed + # providers via their own is_available() gate. + for provider in _list_registered_web_providers(): + if provider.name not in _LEGACY_WEB_BACKENDS and _is_backend_available(provider.name): + return provider.name + return "firecrawl" # default (backward compat) @@ -210,7 +277,22 @@ def _get_capability_backend(capability: str) -> str: def _is_backend_available(backend: str) -> bool: - """Return True when the selected backend is currently usable.""" + """Return True when the selected backend is currently usable. + + For plugin-registered backends (any name outside + :data:`_LEGACY_WEB_BACKENDS`), availability is delegated to the + provider's ``is_available()`` via the web_search_registry. This is the + single chokepoint through which ``_get_backend``, + ``_get_capability_backend``, and ``check_web_api_key`` all resolve + availability — fixing custom-provider discovery for every caller at once + (issues #28651, #31873, #32698). Built-in backends keep their cheap + hardcoded probes below. + """ + backend = (backend or "").lower().strip() + if backend not in _LEGACY_WEB_BACKENDS: + registered = _registered_web_provider_available(backend) + if registered is not None: + return registered if backend == "exa": return _has_env("EXA_API_KEY") if backend == "parallel": @@ -861,13 +943,26 @@ async def web_extract_tool( # Convenience function to check Firecrawl credentials def check_web_api_key() -> bool: - """Check whether the configured web backend is available.""" + """Check whether the configured web backend is available. + + Used as the ``check_fn`` gate for the ``web_search`` and ``web_extract`` + tool registry entries — so a plugin-registered provider that reports + ``is_available()`` must light the tools up even when no built-in backend + has credentials (issues #28651, #31873). Resolution funnels through + :func:`_is_backend_available`, which delegates non-legacy names to the + registry. + """ configured = _load_web_config().get("backend", "").lower().strip() - if configured in {"exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai"}: - return _is_backend_available(configured) + if configured and _is_backend_available(configured): + return True + # Any built-in backend with credentials present. + if any(_is_backend_available(backend) for backend in _LEGACY_WEB_BACKENDS): + return True + # Any plugin-registered provider that reports itself available. return any( - _is_backend_available(backend) - for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai") + _is_backend_available(provider.name) + for provider in _list_registered_web_providers() + if provider.name not in _LEGACY_WEB_BACKENDS )