feat(agent): one-shot LLM helper + llm.oneshot gateway RPC (#51261)

A "one-shot" is a single stateless model call that runs OUTSIDE any conversation:
it never touches session history, never breaks prompt caching, and returns plain
text. UI surfaces need this for small generative chores — a commit message from a
diff, a rename suggestion, a summary — where an agent turn would pollute the
thread and hand-rolling an LLM call at every call site would be worse.

- `agent/oneshot.py`: `run_oneshot(...)` over the existing auxiliary-client
  plumbing (same path as title generation). Two call shapes: explicit
  instructions/input, or a registered `template` + `variables` (templates own the
  prompt engineering so it stays consistent across CLI/TUI/desktop). Ships a
  `commit_message` template. Model selection inherits the live session via
  `main_runtime`, else the configured aux `task` backend.
- `tui_gateway/server.py`: `llm.oneshot` RPC (long-handler) inheriting the
  session's model when `session_id` resolves.

Stateless by construction — no session mutation, cache untouched.
This commit is contained in:
brooklyn! 2026-06-23 03:01:50 -05:00 committed by GitHub
parent af7b7f6322
commit 211ba9c7d3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 347 additions and 0 deletions

158
agent/oneshot.py Normal file
View file

@ -0,0 +1,158 @@
"""Shared one-off LLM requests for non-conversational helpers.
A "one-shot" is a single, stateless model call that runs *outside* any
conversation: it never touches a session's history, never breaks prompt
caching, and returns plain text. UI surfaces use it for small generative
chores a commit message from a diff, a rename suggestion, a summary
where spinning up an agent turn would be wrong (it would pollute the thread)
and hand-rolling an LLM call at every call site would be worse.
Two ways to call it:
* ``run_oneshot(instructions=..., user_input=...)`` caller supplies the
full prompt.
* ``run_oneshot(template="commit_message", variables={...})`` caller
names a registered template and passes its variables; the template owns
the prompt engineering so it stays consistent across CLI/TUI/desktop.
Model selection rides the same auxiliary plumbing as title generation
(:func:`agent.auxiliary_client.call_llm`): pass ``main_runtime`` to inherit
the live session's provider/model, otherwise the configured ``task`` (default
``title_generation``) resolves a cheap/fast backend.
"""
import logging
from typing import Any, Callable, Dict, Optional, Tuple
from agent.auxiliary_client import call_llm, extract_content_or_reasoning
logger = logging.getLogger(__name__)
# A template turns a variables dict into a (instructions, user_input) pair.
# Templates are plain callables (not str.format) so diff/code payloads with
# literal "{" / "}" pass through untouched.
PromptTemplate = Callable[[Dict[str, Any]], Tuple[str, str]]
def _truncate(text: str, limit: int) -> str:
text = text or ""
if len(text) <= limit:
return text
return text[:limit].rstrip() + "\n…(truncated)"
_COMMIT_INSTRUCTIONS = (
"You write git commit messages. Given a diff of staged changes, write ONE "
"concise Conventional Commits message describing what the change does and why.\n"
"Rules:\n"
"- Subject line: type(scope): summary — imperative mood, lower-case, no "
"trailing period, ≤ 72 characters. Types: feat, fix, refactor, perf, docs, "
"test, build, chore, style, ci.\n"
"- Omit the scope if it isn't obvious.\n"
"- Add a short body (wrapped at ~72 cols) ONLY when the change needs "
"explanation; skip it for small/obvious changes.\n"
"- Describe the actual change, never restate the diff line-by-line.\n"
"- Return ONLY the commit message text — no quotes, no markdown fences, no "
"preamble."
)
def _commit_message_template(variables: Dict[str, Any]) -> Tuple[str, str]:
diff = _truncate(str(variables.get("diff") or ""), 12000)
recent = _truncate(str(variables.get("recent_commits") or ""), 1500)
parts = []
if recent.strip():
parts.append(
"Recent commit subjects from this repo (match their style/conventions):\n"
f"{recent}"
)
parts.append("Diff to describe:\n" + (diff or "(no textual diff available)"))
# "Regenerate" must yield something new even on models that decode greedily
# / pin temperature server-side. A trailing nonce isn't enough, so we hand
# back the previous message and require a genuinely different one.
avoid = _truncate(str(variables.get("avoid") or "").strip(), 1000)
if avoid:
parts.append(
"You already proposed the message below and the user wants a "
"different one. Write a NEW message with different wording (and, if "
"reasonable, a different emphasis or scope framing) — do not repeat "
f"it:\n{avoid}"
)
return _COMMIT_INSTRUCTIONS, "\n\n".join(parts)
# Registry of named templates. Add an entry here to give a new surface a
# consistent, reusable prompt without teaching every caller the prompt text.
PROMPT_TEMPLATES: Dict[str, PromptTemplate] = {
"commit_message": _commit_message_template,
}
def render_template(name: str, variables: Optional[Dict[str, Any]] = None) -> Tuple[str, str]:
"""Resolve a registered template into (instructions, user_input).
Raises KeyError if the template name is unknown so callers fail loudly
instead of silently sending an empty prompt.
"""
template = PROMPT_TEMPLATES.get(name)
if template is None:
raise KeyError(f"unknown one-shot template: {name}")
return template(variables or {})
def run_oneshot(
*,
instructions: str = "",
user_input: str = "",
template: Optional[str] = None,
variables: Optional[Dict[str, Any]] = None,
task: str = "title_generation",
max_tokens: int = 1024,
temperature: Optional[float] = 0.3,
timeout: float = 60.0,
main_runtime: Optional[Dict[str, Any]] = None,
) -> str:
"""Run a single stateless LLM request and return its text.
Provide either a registered ``template`` (+ ``variables``) or an explicit
``instructions`` / ``user_input`` pair. Returns the model's text answer,
stripped of surrounding whitespace and any wrapping code fence.
Raises RuntimeError when no LLM provider is configured (surfaced from
:func:`call_llm`) and KeyError for an unknown template name.
"""
if template:
instructions, user_input = render_template(template, variables)
if not (instructions or "").strip() and not (user_input or "").strip():
raise ValueError("run_oneshot requires a template or instructions/user_input")
messages = []
if (instructions or "").strip():
messages.append({"role": "system", "content": instructions})
messages.append({"role": "user", "content": user_input or ""})
response = call_llm(
task=task,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
timeout=timeout,
main_runtime=main_runtime,
)
text = (extract_content_or_reasoning(response) or "").strip()
return _strip_code_fence(text)
def _strip_code_fence(text: str) -> str:
"""Drop a single wrapping ``` fence the model may have added."""
if not text.startswith("```"):
return text
lines = text.splitlines()
if len(lines) >= 2 and lines[0].startswith("```") and lines[-1].strip() == "```":
return "\n".join(lines[1:-1]).strip()
return text

110
tests/agent/test_oneshot.py Normal file
View file

@ -0,0 +1,110 @@
"""Tests for agent.oneshot — shared one-off (stateless) LLM requests."""
from unittest.mock import MagicMock, patch
import pytest
from agent.oneshot import (
PROMPT_TEMPLATES,
render_template,
run_oneshot,
_strip_code_fence,
_truncate,
)
class TestRenderTemplate:
def test_unknown_template_raises(self):
with pytest.raises(KeyError):
render_template("does-not-exist", {})
def test_commit_message_template_is_registered(self):
assert "commit_message" in PROMPT_TEMPLATES
def test_commit_message_includes_diff_and_recent(self):
instructions, user = render_template(
"commit_message",
{"diff": "diff --git a/x b/x\n+new", "recent_commits": "feat: a\nfix: b"},
)
# Instructions describe the contract (conventional commits), not a snapshot.
assert "Conventional Commits" in instructions
assert "diff --git a/x b/x" in user
assert "feat: a" in user
def test_commit_message_diff_with_braces_passes_through(self):
# Templates must not use str.format — code payloads carry literal { }.
_, user = render_template("commit_message", {"diff": "x = {a: 1}"})
assert "x = {a: 1}" in user
def test_commit_message_handles_missing_variables(self):
instructions, user = render_template("commit_message", {})
assert instructions
assert "no textual diff available" in user
def test_commit_message_avoid_forces_new_message(self):
# Passing the previous message must instruct the model not to repeat it,
# so "regenerate" yields a different result even on greedy models.
_, plain = render_template("commit_message", {"diff": "d"})
_, regen = render_template("commit_message", {"diff": "d", "avoid": "feat: prior"})
assert "feat: prior" in regen
assert "do not repeat" in regen
assert "feat: prior" not in plain
class TestRunOneshot:
def _mock_response(self, content):
resp = MagicMock()
resp.choices = [MagicMock()]
resp.choices[0].message.content = content
resp.choices[0].message.reasoning = None
resp.choices[0].message.reasoning_content = None
resp.choices[0].message.reasoning_details = None
return resp
def test_template_path_calls_llm_with_rendered_prompt(self):
with patch(
"agent.oneshot.call_llm",
return_value=self._mock_response("feat: add thing"),
) as llm:
out = run_oneshot(template="commit_message", variables={"diff": "d"})
assert out == "feat: add thing"
messages = llm.call_args.kwargs["messages"]
assert messages[0]["role"] == "system"
assert messages[1]["role"] == "user"
def test_explicit_instructions_path(self):
with patch(
"agent.oneshot.call_llm",
return_value=self._mock_response("hello"),
) as llm:
out = run_oneshot(instructions="be brief", user_input="say hi")
assert out == "hello"
messages = llm.call_args.kwargs["messages"]
assert messages[0]["content"] == "be brief"
assert messages[1]["content"] == "say hi"
def test_requires_template_or_prompt(self):
with pytest.raises(ValueError):
run_oneshot()
def test_strips_wrapping_code_fence(self):
with patch(
"agent.oneshot.call_llm",
return_value=self._mock_response("```\nfix: bug\n```"),
):
assert run_oneshot(instructions="x", user_input="y") == "fix: bug"
class TestHelpers:
def test_truncate_under_limit_unchanged(self):
assert _truncate("short", 100) == "short"
def test_truncate_over_limit_marks_truncation(self):
out = _truncate("x" * 200, 50)
assert out.endswith("…(truncated)")
assert len(out) < 200
def test_strip_code_fence_without_fence_is_noop(self):
assert _strip_code_fence("plain text") == "plain text"

View file

@ -177,6 +177,7 @@ _LONG_HANDLERS = frozenset(
"billing.step_up",
"browser.manage",
"cli.exec",
"llm.oneshot",
"plugins.manage",
"session.branch",
"session.compress",
@ -5200,6 +5201,84 @@ def _(rid, params: dict) -> dict:
return _err(rid, 5007, str(e))
def _main_runtime_from_agent(agent) -> dict | None:
"""Build an aux-client main_runtime override from a live agent.
Lets a one-shot inherit the session's provider/model/credentials so its
output matches the model the user is actually coding with, instead of
falling back to the cheapest auto-detected backend.
"""
if agent is None:
return None
runtime: dict = {}
for field in ("provider", "model", "base_url", "api_key", "api_mode", "auth_mode"):
value = getattr(agent, field, None)
if isinstance(value, str) and value.strip():
runtime[field] = value.strip()
elif field == "api_key" and callable(value):
runtime[field] = value
return runtime or None
@method("llm.oneshot")
def _(rid, params: dict) -> dict:
"""Run a single stateless LLM request outside any conversation.
Generic helper for small generative chores (e.g. a commit message from a
diff). Accepts either a named ``template`` + ``variables`` or an explicit
``instructions`` / ``input`` pair. When ``session_id`` resolves to a live
session the call inherits that agent's model; otherwise it uses the
configured auxiliary ``task`` backend. Never mutates session history, so
prompt caching is untouched.
"""
template = (params.get("template") or "").strip() or None
instructions = params.get("instructions") or ""
user_input = params.get("input") or ""
variables = params.get("variables") if isinstance(params.get("variables"), dict) else {}
task = (params.get("task") or "title_generation").strip() or "title_generation"
try:
max_tokens = int(params.get("max_tokens") or 1024)
except (TypeError, ValueError):
max_tokens = 1024
temperature = params.get("temperature")
if temperature is not None:
try:
temperature = float(temperature)
except (TypeError, ValueError):
temperature = None
if not template and not str(instructions).strip() and not str(user_input).strip():
return _err(rid, 4030, "llm.oneshot requires a template or instructions/input")
# Optional: inherit the live session's model (no error if absent).
session = _sessions.get(params.get("session_id") or "")
main_runtime = _main_runtime_from_agent(session.get("agent")) if session else None
try:
from agent.oneshot import run_oneshot
text = run_oneshot(
instructions=instructions,
user_input=user_input,
template=template,
variables=variables,
task=task,
max_tokens=max_tokens,
temperature=temperature if temperature is not None else 0.3,
main_runtime=main_runtime,
)
except KeyError as e:
return _err(rid, 4031, str(e))
except ValueError as e:
return _err(rid, 4032, str(e))
except Exception as e:
logger.warning("llm.oneshot failed: %s", e)
return _err(rid, 5030, f"one-shot generation failed: {e}")
return _ok(rid, {"text": text})
@method("handoff.request")
def _(rid, params: dict) -> dict:
"""Queue a handoff of this session to a messaging platform.