fix(plugins): don't cache a failed discovery sweep as discovered

Root-cause hardening for the stranded-empty-registry failure behind
'No web search/extract provider configured': discover_and_load() set
_discovered=True before scanning, so a sweep that raised partway was
swallowed by callers as a warning and every later call early-returned
against an empty registry for the process lifetime. The flag now acts
only as a re-entrancy guard and is reset when the sweep raises, so the
next call retries discovery.
This commit is contained in:
teknium1 2026-06-11 11:52:23 -07:00 committed by Teknium
parent 32a73010bb
commit 114e265737
2 changed files with 47 additions and 0 deletions

View file

@ -1069,8 +1069,21 @@ class PluginManager:
self._plugin_skills.clear()
self._aux_tasks.clear()
self._context_engine = None
# Set the flag up front as a re-entrancy guard (a plugin's register()
# can transitively trigger discovery again), but reset it if the sweep
# raises so a failed scan is NOT cached as "discovered with an empty
# registry" — callers swallow the exception and would otherwise be
# permanently stranded on the early-return above (the "No web provider
# configured" class of failures).
self._discovered = True
try:
self._discover_and_load_inner()
except BaseException:
self._discovered = False
raise
def _discover_and_load_inner(self) -> None:
"""The actual discovery sweep — see :meth:`discover_and_load`."""
manifests: List[PluginManifest] = []
# 1. Bundled plugins (<repo>/plugins/<name>/)

View file

@ -365,6 +365,40 @@ class TestPluginDiscovery:
}
assert len(non_bundled) == 1
def test_failed_discovery_is_not_cached(self, tmp_path, monkeypatch):
"""A sweep that raises must not cache 'discovered' with no plugins.
Regression for the stranded-empty-registry class of failures: callers
(e.g. tools.web_tools._ensure_web_plugins_loaded) swallow discovery
exceptions as warnings, so if a failed sweep flipped ``_discovered``
permanently, every later call would early-return against an empty
registry ("No web provider configured") for the process lifetime.
"""
plugins_dir = tmp_path / "hermes_test" / "plugins"
_make_plugin_dir(plugins_dir, "retry_plugin")
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
mgr = PluginManager()
def _boom(self_inner):
raise RuntimeError("sweep failed")
monkeypatch.setattr(PluginManager, "_discover_and_load_inner", _boom)
with pytest.raises(RuntimeError, match="sweep failed"):
mgr.discover_and_load()
assert mgr._discovered is False, "failed sweep was cached as discovered"
# A later call (with discovery healthy again) must do the real scan.
monkeypatch.undo()
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
mgr.discover_and_load()
assert mgr._discovered is True
non_bundled = {
n: p for n, p in mgr._plugins.items()
if p.manifest.source != "bundled"
}
assert len(non_bundled) == 1
def test_discover_skips_dir_without_manifest(self, tmp_path, monkeypatch):
"""Directories without plugin.yaml are silently skipped."""
plugins_dir = tmp_path / "hermes_test" / "plugins"