feat(agent): one-shot LLM helper + llm.oneshot gateway RPC (#51261)
A "one-shot" is a single stateless model call that runs OUTSIDE any conversation: it never touches session history, never breaks prompt caching, and returns plain text. UI surfaces need this for small generative chores — a commit message from a diff, a rename suggestion, a summary — where an agent turn would pollute the thread and hand-rolling an LLM call at every call site would be worse. - `agent/oneshot.py`: `run_oneshot(...)` over the existing auxiliary-client plumbing (same path as title generation). Two call shapes: explicit instructions/input, or a registered `template` + `variables` (templates own the prompt engineering so it stays consistent across CLI/TUI/desktop). Ships a `commit_message` template. Model selection inherits the live session via `main_runtime`, else the configured aux `task` backend. - `tui_gateway/server.py`: `llm.oneshot` RPC (long-handler) inheriting the session's model when `session_id` resolves. Stateless by construction — no session mutation, cache untouched.
This commit is contained in:
parent
af7b7f6322
commit
211ba9c7d3
3 changed files with 347 additions and 0 deletions
158
agent/oneshot.py
Normal file
158
agent/oneshot.py
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
"""Shared one-off LLM requests for non-conversational helpers.
|
||||
|
||||
A "one-shot" is a single, stateless model call that runs *outside* any
|
||||
conversation: it never touches a session's history, never breaks prompt
|
||||
caching, and returns plain text. UI surfaces use it for small generative
|
||||
chores — a commit message from a diff, a rename suggestion, a summary —
|
||||
where spinning up an agent turn would be wrong (it would pollute the thread)
|
||||
and hand-rolling an LLM call at every call site would be worse.
|
||||
|
||||
Two ways to call it:
|
||||
|
||||
* ``run_oneshot(instructions=..., user_input=...)`` — caller supplies the
|
||||
full prompt.
|
||||
* ``run_oneshot(template="commit_message", variables={...})`` — caller
|
||||
names a registered template and passes its variables; the template owns
|
||||
the prompt engineering so it stays consistent across CLI/TUI/desktop.
|
||||
|
||||
Model selection rides the same auxiliary plumbing as title generation
|
||||
(:func:`agent.auxiliary_client.call_llm`): pass ``main_runtime`` to inherit
|
||||
the live session's provider/model, otherwise the configured ``task`` (default
|
||||
``title_generation``) resolves a cheap/fast backend.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
|
||||
from agent.auxiliary_client import call_llm, extract_content_or_reasoning
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# A template turns a variables dict into a (instructions, user_input) pair.
|
||||
# Templates are plain callables (not str.format) so diff/code payloads with
|
||||
# literal "{" / "}" pass through untouched.
|
||||
PromptTemplate = Callable[[Dict[str, Any]], Tuple[str, str]]
|
||||
|
||||
|
||||
def _truncate(text: str, limit: int) -> str:
|
||||
text = text or ""
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[:limit].rstrip() + "\n…(truncated)"
|
||||
|
||||
|
||||
_COMMIT_INSTRUCTIONS = (
|
||||
"You write git commit messages. Given a diff of staged changes, write ONE "
|
||||
"concise Conventional Commits message describing what the change does and why.\n"
|
||||
"Rules:\n"
|
||||
"- Subject line: type(scope): summary — imperative mood, lower-case, no "
|
||||
"trailing period, ≤ 72 characters. Types: feat, fix, refactor, perf, docs, "
|
||||
"test, build, chore, style, ci.\n"
|
||||
"- Omit the scope if it isn't obvious.\n"
|
||||
"- Add a short body (wrapped at ~72 cols) ONLY when the change needs "
|
||||
"explanation; skip it for small/obvious changes.\n"
|
||||
"- Describe the actual change, never restate the diff line-by-line.\n"
|
||||
"- Return ONLY the commit message text — no quotes, no markdown fences, no "
|
||||
"preamble."
|
||||
)
|
||||
|
||||
|
||||
def _commit_message_template(variables: Dict[str, Any]) -> Tuple[str, str]:
|
||||
diff = _truncate(str(variables.get("diff") or ""), 12000)
|
||||
recent = _truncate(str(variables.get("recent_commits") or ""), 1500)
|
||||
|
||||
parts = []
|
||||
if recent.strip():
|
||||
parts.append(
|
||||
"Recent commit subjects from this repo (match their style/conventions):\n"
|
||||
f"{recent}"
|
||||
)
|
||||
parts.append("Diff to describe:\n" + (diff or "(no textual diff available)"))
|
||||
|
||||
# "Regenerate" must yield something new even on models that decode greedily
|
||||
# / pin temperature server-side. A trailing nonce isn't enough, so we hand
|
||||
# back the previous message and require a genuinely different one.
|
||||
avoid = _truncate(str(variables.get("avoid") or "").strip(), 1000)
|
||||
if avoid:
|
||||
parts.append(
|
||||
"You already proposed the message below and the user wants a "
|
||||
"different one. Write a NEW message with different wording (and, if "
|
||||
"reasonable, a different emphasis or scope framing) — do not repeat "
|
||||
f"it:\n{avoid}"
|
||||
)
|
||||
|
||||
return _COMMIT_INSTRUCTIONS, "\n\n".join(parts)
|
||||
|
||||
|
||||
# Registry of named templates. Add an entry here to give a new surface a
|
||||
# consistent, reusable prompt without teaching every caller the prompt text.
|
||||
PROMPT_TEMPLATES: Dict[str, PromptTemplate] = {
|
||||
"commit_message": _commit_message_template,
|
||||
}
|
||||
|
||||
|
||||
def render_template(name: str, variables: Optional[Dict[str, Any]] = None) -> Tuple[str, str]:
|
||||
"""Resolve a registered template into (instructions, user_input).
|
||||
|
||||
Raises KeyError if the template name is unknown so callers fail loudly
|
||||
instead of silently sending an empty prompt.
|
||||
"""
|
||||
template = PROMPT_TEMPLATES.get(name)
|
||||
if template is None:
|
||||
raise KeyError(f"unknown one-shot template: {name}")
|
||||
return template(variables or {})
|
||||
|
||||
|
||||
def run_oneshot(
|
||||
*,
|
||||
instructions: str = "",
|
||||
user_input: str = "",
|
||||
template: Optional[str] = None,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
task: str = "title_generation",
|
||||
max_tokens: int = 1024,
|
||||
temperature: Optional[float] = 0.3,
|
||||
timeout: float = 60.0,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
) -> str:
|
||||
"""Run a single stateless LLM request and return its text.
|
||||
|
||||
Provide either a registered ``template`` (+ ``variables``) or an explicit
|
||||
``instructions`` / ``user_input`` pair. Returns the model's text answer,
|
||||
stripped of surrounding whitespace and any wrapping code fence.
|
||||
|
||||
Raises RuntimeError when no LLM provider is configured (surfaced from
|
||||
:func:`call_llm`) and KeyError for an unknown template name.
|
||||
"""
|
||||
if template:
|
||||
instructions, user_input = render_template(template, variables)
|
||||
|
||||
if not (instructions or "").strip() and not (user_input or "").strip():
|
||||
raise ValueError("run_oneshot requires a template or instructions/user_input")
|
||||
|
||||
messages = []
|
||||
if (instructions or "").strip():
|
||||
messages.append({"role": "system", "content": instructions})
|
||||
messages.append({"role": "user", "content": user_input or ""})
|
||||
|
||||
response = call_llm(
|
||||
task=task,
|
||||
messages=messages,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
timeout=timeout,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
|
||||
text = (extract_content_or_reasoning(response) or "").strip()
|
||||
return _strip_code_fence(text)
|
||||
|
||||
|
||||
def _strip_code_fence(text: str) -> str:
|
||||
"""Drop a single wrapping ``` fence the model may have added."""
|
||||
if not text.startswith("```"):
|
||||
return text
|
||||
lines = text.splitlines()
|
||||
if len(lines) >= 2 and lines[0].startswith("```") and lines[-1].strip() == "```":
|
||||
return "\n".join(lines[1:-1]).strip()
|
||||
return text
|
||||
110
tests/agent/test_oneshot.py
Normal file
110
tests/agent/test_oneshot.py
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
"""Tests for agent.oneshot — shared one-off (stateless) LLM requests."""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.oneshot import (
|
||||
PROMPT_TEMPLATES,
|
||||
render_template,
|
||||
run_oneshot,
|
||||
_strip_code_fence,
|
||||
_truncate,
|
||||
)
|
||||
|
||||
|
||||
class TestRenderTemplate:
|
||||
def test_unknown_template_raises(self):
|
||||
with pytest.raises(KeyError):
|
||||
render_template("does-not-exist", {})
|
||||
|
||||
def test_commit_message_template_is_registered(self):
|
||||
assert "commit_message" in PROMPT_TEMPLATES
|
||||
|
||||
def test_commit_message_includes_diff_and_recent(self):
|
||||
instructions, user = render_template(
|
||||
"commit_message",
|
||||
{"diff": "diff --git a/x b/x\n+new", "recent_commits": "feat: a\nfix: b"},
|
||||
)
|
||||
# Instructions describe the contract (conventional commits), not a snapshot.
|
||||
assert "Conventional Commits" in instructions
|
||||
assert "diff --git a/x b/x" in user
|
||||
assert "feat: a" in user
|
||||
|
||||
def test_commit_message_diff_with_braces_passes_through(self):
|
||||
# Templates must not use str.format — code payloads carry literal { }.
|
||||
_, user = render_template("commit_message", {"diff": "x = {a: 1}"})
|
||||
assert "x = {a: 1}" in user
|
||||
|
||||
def test_commit_message_handles_missing_variables(self):
|
||||
instructions, user = render_template("commit_message", {})
|
||||
assert instructions
|
||||
assert "no textual diff available" in user
|
||||
|
||||
def test_commit_message_avoid_forces_new_message(self):
|
||||
# Passing the previous message must instruct the model not to repeat it,
|
||||
# so "regenerate" yields a different result even on greedy models.
|
||||
_, plain = render_template("commit_message", {"diff": "d"})
|
||||
_, regen = render_template("commit_message", {"diff": "d", "avoid": "feat: prior"})
|
||||
assert "feat: prior" in regen
|
||||
assert "do not repeat" in regen
|
||||
assert "feat: prior" not in plain
|
||||
|
||||
|
||||
class TestRunOneshot:
|
||||
def _mock_response(self, content):
|
||||
resp = MagicMock()
|
||||
resp.choices = [MagicMock()]
|
||||
resp.choices[0].message.content = content
|
||||
resp.choices[0].message.reasoning = None
|
||||
resp.choices[0].message.reasoning_content = None
|
||||
resp.choices[0].message.reasoning_details = None
|
||||
return resp
|
||||
|
||||
def test_template_path_calls_llm_with_rendered_prompt(self):
|
||||
with patch(
|
||||
"agent.oneshot.call_llm",
|
||||
return_value=self._mock_response("feat: add thing"),
|
||||
) as llm:
|
||||
out = run_oneshot(template="commit_message", variables={"diff": "d"})
|
||||
|
||||
assert out == "feat: add thing"
|
||||
messages = llm.call_args.kwargs["messages"]
|
||||
assert messages[0]["role"] == "system"
|
||||
assert messages[1]["role"] == "user"
|
||||
|
||||
def test_explicit_instructions_path(self):
|
||||
with patch(
|
||||
"agent.oneshot.call_llm",
|
||||
return_value=self._mock_response("hello"),
|
||||
) as llm:
|
||||
out = run_oneshot(instructions="be brief", user_input="say hi")
|
||||
|
||||
assert out == "hello"
|
||||
messages = llm.call_args.kwargs["messages"]
|
||||
assert messages[0]["content"] == "be brief"
|
||||
assert messages[1]["content"] == "say hi"
|
||||
|
||||
def test_requires_template_or_prompt(self):
|
||||
with pytest.raises(ValueError):
|
||||
run_oneshot()
|
||||
|
||||
def test_strips_wrapping_code_fence(self):
|
||||
with patch(
|
||||
"agent.oneshot.call_llm",
|
||||
return_value=self._mock_response("```\nfix: bug\n```"),
|
||||
):
|
||||
assert run_oneshot(instructions="x", user_input="y") == "fix: bug"
|
||||
|
||||
|
||||
class TestHelpers:
|
||||
def test_truncate_under_limit_unchanged(self):
|
||||
assert _truncate("short", 100) == "short"
|
||||
|
||||
def test_truncate_over_limit_marks_truncation(self):
|
||||
out = _truncate("x" * 200, 50)
|
||||
assert out.endswith("…(truncated)")
|
||||
assert len(out) < 200
|
||||
|
||||
def test_strip_code_fence_without_fence_is_noop(self):
|
||||
assert _strip_code_fence("plain text") == "plain text"
|
||||
|
|
@ -177,6 +177,7 @@ _LONG_HANDLERS = frozenset(
|
|||
"billing.step_up",
|
||||
"browser.manage",
|
||||
"cli.exec",
|
||||
"llm.oneshot",
|
||||
"plugins.manage",
|
||||
"session.branch",
|
||||
"session.compress",
|
||||
|
|
@ -5200,6 +5201,84 @@ def _(rid, params: dict) -> dict:
|
|||
return _err(rid, 5007, str(e))
|
||||
|
||||
|
||||
def _main_runtime_from_agent(agent) -> dict | None:
|
||||
"""Build an aux-client main_runtime override from a live agent.
|
||||
|
||||
Lets a one-shot inherit the session's provider/model/credentials so its
|
||||
output matches the model the user is actually coding with, instead of
|
||||
falling back to the cheapest auto-detected backend.
|
||||
"""
|
||||
if agent is None:
|
||||
return None
|
||||
runtime: dict = {}
|
||||
for field in ("provider", "model", "base_url", "api_key", "api_mode", "auth_mode"):
|
||||
value = getattr(agent, field, None)
|
||||
if isinstance(value, str) and value.strip():
|
||||
runtime[field] = value.strip()
|
||||
elif field == "api_key" and callable(value):
|
||||
runtime[field] = value
|
||||
return runtime or None
|
||||
|
||||
|
||||
@method("llm.oneshot")
|
||||
def _(rid, params: dict) -> dict:
|
||||
"""Run a single stateless LLM request outside any conversation.
|
||||
|
||||
Generic helper for small generative chores (e.g. a commit message from a
|
||||
diff). Accepts either a named ``template`` + ``variables`` or an explicit
|
||||
``instructions`` / ``input`` pair. When ``session_id`` resolves to a live
|
||||
session the call inherits that agent's model; otherwise it uses the
|
||||
configured auxiliary ``task`` backend. Never mutates session history, so
|
||||
prompt caching is untouched.
|
||||
"""
|
||||
template = (params.get("template") or "").strip() or None
|
||||
instructions = params.get("instructions") or ""
|
||||
user_input = params.get("input") or ""
|
||||
variables = params.get("variables") if isinstance(params.get("variables"), dict) else {}
|
||||
task = (params.get("task") or "title_generation").strip() or "title_generation"
|
||||
|
||||
try:
|
||||
max_tokens = int(params.get("max_tokens") or 1024)
|
||||
except (TypeError, ValueError):
|
||||
max_tokens = 1024
|
||||
temperature = params.get("temperature")
|
||||
if temperature is not None:
|
||||
try:
|
||||
temperature = float(temperature)
|
||||
except (TypeError, ValueError):
|
||||
temperature = None
|
||||
|
||||
if not template and not str(instructions).strip() and not str(user_input).strip():
|
||||
return _err(rid, 4030, "llm.oneshot requires a template or instructions/input")
|
||||
|
||||
# Optional: inherit the live session's model (no error if absent).
|
||||
session = _sessions.get(params.get("session_id") or "")
|
||||
main_runtime = _main_runtime_from_agent(session.get("agent")) if session else None
|
||||
|
||||
try:
|
||||
from agent.oneshot import run_oneshot
|
||||
|
||||
text = run_oneshot(
|
||||
instructions=instructions,
|
||||
user_input=user_input,
|
||||
template=template,
|
||||
variables=variables,
|
||||
task=task,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature if temperature is not None else 0.3,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
except KeyError as e:
|
||||
return _err(rid, 4031, str(e))
|
||||
except ValueError as e:
|
||||
return _err(rid, 4032, str(e))
|
||||
except Exception as e:
|
||||
logger.warning("llm.oneshot failed: %s", e)
|
||||
return _err(rid, 5030, f"one-shot generation failed: {e}")
|
||||
|
||||
return _ok(rid, {"text": text})
|
||||
|
||||
|
||||
@method("handoff.request")
|
||||
def _(rid, params: dict) -> dict:
|
||||
"""Queue a handoff of this session to a messaging platform.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue