feat(tools): progressive tool disclosure for MCP and plugin tools
Adds Tool Search, a structured-tools progressive-disclosure layer that
replaces MCP and non-core plugin tools in the model-visible tools array
with three bridge tools (tool_search / tool_describe / tool_call) when
the deferrable surface would consume more than a configurable percentage
of the active model's context window. Core Hermes tools are never deferred.
Default mode is 'auto' with a 10% context threshold, so small toolsets
pay no overhead. Set tools.tool_search.enabled to 'on' to force or 'off'
to disable.
Design carefully reflects the OpenClaw production failure modes
documented in the openclaw-tool-search-report:
- Core tools never defer (toolsets._HERMES_CORE_TOOLS). Addresses the
'tools silently missing from isolated cron turns' regression class
(openclaw#84141) by construction: there is no code path that can
drop a core tool.
- Catalog is stateless across turns — rebuilt from the live tool-defs
list on every assembly. No session-keyed Map that can drift out of
sync with the registry.
- tool_call unwraps the bridge call before any hook fires, so plugin
pre/post hooks, guardrails, approval flows, and the activity feed
all see the underlying tool name, not the bridge (addresses
openclaw#85588 and the verbose-mode complaint on openclaw#79823).
- The unwrap happens in both the parallel and sequential paths of
agent/tool_executor.py and also in handle_function_call, so direct
callers (sandboxed code, eval harnesses) are covered too.
- Bridge tools cannot invoke each other (recursion guard) and cannot
invoke core tools (those must be called directly).
- Tools mode only — no JS-sandbox code-mode. Keeps the surface small.
- Token estimation via cheap char/4 heuristic; precision isn't needed
for the threshold decision.
Files:
- tools/tool_search.py — new module (BM25 retrieval, classification,
threshold gate, bridge dispatch, unwrap helper).
- tests/tools/test_tool_search.py — 35 tests including the OpenClaw
#84141 regression guard.
- model_tools.py — wires assembly into _compute_tool_definitions as the
final step, adds skip_tool_search_assembly kwarg so the bridge can
see the real catalog, dispatches the three bridge tools.
- agent/tool_executor.py — unwraps tool_call in both parallel and
sequential parsing loops so checkpointing, guardrails, plugin hooks,
and tool-progress callbacks all observe the underlying tool name.
- hermes_cli/config.py — DEFAULT_CONFIG['tools']['tool_search'] block.
- website/docs/user-guide/features/tool-search.md — user docs.
Validation:
- 35/35 new tests pass.
- Existing tool/registry/model_tools/config/coercion/executor tests
(82 + 74 + small adjacents) green.
- Live E2E: 20 fake MCP tools registered, get_tool_definitions returns
3 bridges, tool_search returns top 3 hits, tool_describe returns
full schema, tool_call dispatches to the real underlying handler
and the underlying result is what the model sees.
- Reserved-name recursion guard verified live.
- Core-tool refusal via tool_call verified live.
This commit is contained in:
parent
73d73f1f0d
commit
369075dc95
6 changed files with 1453 additions and 1 deletions
108
model_tools.py
108
model_tools.py
|
|
@ -265,6 +265,7 @@ def get_tool_definitions(
|
|||
enabled_toolsets: List[str] = None,
|
||||
disabled_toolsets: List[str] = None,
|
||||
quiet_mode: bool = False,
|
||||
skip_tool_search_assembly: bool = False,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get tool definitions for model API calls with toolset-based filtering.
|
||||
|
|
@ -275,6 +276,11 @@ def get_tool_definitions(
|
|||
enabled_toolsets: Only include tools from these toolsets.
|
||||
disabled_toolsets: Exclude tools from these toolsets (if enabled_toolsets is None).
|
||||
quiet_mode: Suppress status prints.
|
||||
skip_tool_search_assembly: When True, return the pre-assembly tool list
|
||||
(raw schemas for every enabled tool). Used internally by the
|
||||
tool_search / tool_describe bridge handlers so they can read the
|
||||
real catalog, not the already-collapsed one. Public callers should
|
||||
leave this False.
|
||||
|
||||
Returns:
|
||||
Filtered list of OpenAI-format tool definitions.
|
||||
|
|
@ -301,6 +307,7 @@ def get_tool_definitions(
|
|||
registry._generation,
|
||||
cfg_fp,
|
||||
bool(os.environ.get("HERMES_KANBAN_TASK")),
|
||||
bool(skip_tool_search_assembly),
|
||||
)
|
||||
cached = _tool_defs_cache.get(cache_key)
|
||||
if cached is not None:
|
||||
|
|
@ -312,7 +319,8 @@ def get_tool_definitions(
|
|||
# schemas are treated as read-only by all known callers.
|
||||
return list(cached)
|
||||
|
||||
result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode)
|
||||
result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode,
|
||||
skip_tool_search_assembly=skip_tool_search_assembly)
|
||||
if quiet_mode:
|
||||
# Cache the freshly-computed list, but hand callers a shallow copy so
|
||||
# downstream mutations (e.g. run_agent appending memory/LCM tool
|
||||
|
|
@ -330,6 +338,7 @@ def _compute_tool_definitions(
|
|||
enabled_toolsets: List[str] = None,
|
||||
disabled_toolsets: List[str] = None,
|
||||
quiet_mode: bool = False,
|
||||
skip_tool_search_assembly: bool = False,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Uncached implementation of :func:`get_tool_definitions`."""
|
||||
# Determine which tool names the caller wants
|
||||
|
|
@ -481,9 +490,61 @@ def _compute_tool_definitions(
|
|||
except Exception as e: # pragma: no cover — defensive
|
||||
logger.warning("Schema sanitization skipped: %s", e)
|
||||
|
||||
# ── Tool Search (progressive disclosure) ────────────────────────────
|
||||
# Conditionally replace MCP + plugin (non-core) tools with three bridge
|
||||
# tools (tool_search / tool_describe / tool_call) when the deferrable
|
||||
# surface exceeds the configured threshold (default 10% of context
|
||||
# window). Core Hermes tools (toolsets._HERMES_CORE_TOOLS) are NEVER
|
||||
# deferred. See tools/tool_search.py for full design notes.
|
||||
#
|
||||
# This is deliberately the last step before returning — sanitization
|
||||
# has already normalized schemas, and the assembly is idempotent in
|
||||
# case some caller invokes get_tool_definitions twice.
|
||||
try:
|
||||
from tools.tool_search import assemble_tool_defs, load_config as _load_ts_config
|
||||
ts_cfg = _load_ts_config()
|
||||
if not skip_tool_search_assembly and ts_cfg.enabled != "off":
|
||||
context_length = _resolve_active_context_length()
|
||||
assembly = assemble_tool_defs(
|
||||
filtered_tools,
|
||||
context_length=context_length,
|
||||
config=ts_cfg,
|
||||
)
|
||||
if assembly.activated and not quiet_mode:
|
||||
print(
|
||||
f"🔎 Tool Search: {assembly.deferred_count} MCP/plugin tools deferred "
|
||||
f"(~{assembly.deferred_tokens} tokens) behind tool_search/describe/call. "
|
||||
f"Threshold ~{assembly.threshold_tokens} tokens."
|
||||
)
|
||||
filtered_tools = assembly.tool_defs
|
||||
except Exception as e: # pragma: no cover — never break tool loading
|
||||
logger.warning("Tool search assembly skipped: %s", e)
|
||||
|
||||
return filtered_tools
|
||||
|
||||
|
||||
def _resolve_active_context_length() -> int:
|
||||
"""Look up the active model's context length for the tool-search gate.
|
||||
|
||||
Returns 0 when the model can't be resolved — ``should_activate`` falls
|
||||
back to a fixed token cutoff in that case.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load
|
||||
cfg = _load() or {}
|
||||
model_cfg = cfg.get("model") if isinstance(cfg.get("model"), dict) else {}
|
||||
if not isinstance(model_cfg, dict):
|
||||
model_cfg = {}
|
||||
model_id = (model_cfg.get("model") or model_cfg.get("default") or "").strip()
|
||||
if not model_id:
|
||||
return 0
|
||||
from agent.model_metadata import get_model_context_length
|
||||
return int(get_model_context_length(model_id) or 0)
|
||||
except Exception as e:
|
||||
logger.debug("Could not resolve active context length: %s", e)
|
||||
return 0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# handle_function_call (the main dispatcher)
|
||||
# =============================================================================
|
||||
|
|
@ -767,6 +828,51 @@ def handle_function_call(
|
|||
# Coerce string arguments to their schema-declared types (e.g. "42"→42)
|
||||
function_args = coerce_tool_args(function_name, function_args)
|
||||
|
||||
# ── Tool Search bridge dispatch ──────────────────────────────────
|
||||
# tool_search and tool_describe are pure catalog reads — handle them
|
||||
# inline. tool_call is unwrapped to the underlying tool so that every
|
||||
# downstream hook (pre/post, edit approval, guardrails) sees the real
|
||||
# tool name, not the bridge.
|
||||
_ts_mod = None
|
||||
try:
|
||||
from tools import tool_search as _ts_mod # noqa: F401
|
||||
except Exception:
|
||||
_ts_mod = None
|
||||
|
||||
if _ts_mod is not None and _ts_mod.is_bridge_tool(function_name):
|
||||
try:
|
||||
# Use skip_tool_search_assembly=True so we see the real catalog,
|
||||
# not the already-collapsed bridge-only list (the bridge would
|
||||
# otherwise be searching only itself).
|
||||
current_defs = get_tool_definitions(
|
||||
quiet_mode=True, skip_tool_search_assembly=True,
|
||||
) or []
|
||||
except Exception:
|
||||
current_defs = []
|
||||
if function_name == _ts_mod.TOOL_SEARCH_NAME:
|
||||
return _ts_mod.dispatch_tool_search(function_args or {},
|
||||
current_tool_defs=current_defs)
|
||||
if function_name == _ts_mod.TOOL_DESCRIBE_NAME:
|
||||
return _ts_mod.dispatch_tool_describe(function_args or {},
|
||||
current_tool_defs=current_defs)
|
||||
if function_name == _ts_mod.TOOL_CALL_NAME:
|
||||
underlying_name, underlying_args, err = _ts_mod.resolve_underlying_call(function_args or {})
|
||||
if err or not underlying_name:
|
||||
return json.dumps({"error": err or "tool_call could not be resolved"},
|
||||
ensure_ascii=False)
|
||||
# Recurse with the underlying tool. All hooks fire against the
|
||||
# real tool name. The bridge is invisible to hooks by design.
|
||||
return handle_function_call(
|
||||
function_name=underlying_name,
|
||||
function_args=underlying_args,
|
||||
task_id=task_id,
|
||||
tool_call_id=tool_call_id,
|
||||
session_id=session_id,
|
||||
user_task=user_task,
|
||||
enabled_tools=enabled_tools,
|
||||
skip_pre_tool_call_hook=skip_pre_tool_call_hook,
|
||||
)
|
||||
|
||||
try:
|
||||
if function_name in _AGENT_LOOP_TOOLS:
|
||||
return json.dumps({"error": f"{function_name} must be handled by the agent loop"})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue