feat(agent): one-shot LLM helper + llm.oneshot gateway RPC (#51261)

A "one-shot" is a single stateless model call that runs OUTSIDE any conversation: it never touches session history, never breaks prompt caching, and returns plain text. UI surfaces need this for small generative chores — a commit message from a diff, a rename suggestion, a summary — where an agent turn would pollute the thread and hand-rolling an LLM call at every call site would be worse. - `agent/oneshot.py`: `run_oneshot(...)` over the existing auxiliary-client plumbing (same path as title generation). Two call shapes: explicit instructions/input, or a registered `template` + `variables` (templates own the prompt engineering so it stays consistent across CLI/TUI/desktop). Ships a `commit_message` template. Model selection inherits the live session via `main_runtime`, else the configured aux `task` backend. - `tui_gateway/server.py`: `llm.oneshot` RPC (long-handler) inheriting the session's model when `session_id` resolves. Stateless by construction — no session mutation, cache untouched.
2026-06-23 03:01:50 -05:00 · 2026-06-23 03:01:50 -05:00 · 211ba9c7d3
commit 211ba9c7d3
parent af7b7f6322
3 changed files with 347 additions and 0 deletions
--- a/agent/oneshot.py
+++ b/agent/oneshot.py
@ -0,0 +1,158 @@
+"""Shared one-off LLM requests for non-conversational helpers.
+
+A "one-shot" is a single, stateless model call that runs *outside* any
+conversation: it never touches a session's history, never breaks prompt
+caching, and returns plain text. UI surfaces use it for small generative
+chores — a commit message from a diff, a rename suggestion, a summary —
+where spinning up an agent turn would be wrong (it would pollute the thread)
+and hand-rolling an LLM call at every call site would be worse.
+
+Two ways to call it:
+
+  * ``run_oneshot(instructions=..., user_input=...)`` — caller supplies the
+    full prompt.
+  * ``run_oneshot(template="commit_message", variables={...})`` — caller
+    names a registered template and passes its variables; the template owns
+    the prompt engineering so it stays consistent across CLI/TUI/desktop.
+
+Model selection rides the same auxiliary plumbing as title generation
+(:func:`agent.auxiliary_client.call_llm`): pass ``main_runtime`` to inherit
+the live session's provider/model, otherwise the configured ``task`` (default
+``title_generation``) resolves a cheap/fast backend.
+"""
+
+import logging
+from typing import Any, Callable, Dict, Optional, Tuple
+
+from agent.auxiliary_client import call_llm, extract_content_or_reasoning
+
+logger = logging.getLogger(__name__)
+
+# A template turns a variables dict into a (instructions, user_input) pair.
+# Templates are plain callables (not str.format) so diff/code payloads with
+# literal "{" / "}" pass through untouched.
+PromptTemplate = Callable[[Dict[str, Any]], Tuple[str, str]]
+
+
+def _truncate(text: str, limit: int) -> str:
+    text = text or ""
+    if len(text) <= limit:
+        return text
+    return text[:limit].rstrip() + "\n…(truncated)"
+
+
+_COMMIT_INSTRUCTIONS = (
+    "You write git commit messages. Given a diff of staged changes, write ONE "
+    "concise Conventional Commits message describing what the change does and why.\n"
+    "Rules:\n"
+    "- Subject line: type(scope): summary — imperative mood, lower-case, no "
+    "trailing period, ≤ 72 characters. Types: feat, fix, refactor, perf, docs, "
+    "test, build, chore, style, ci.\n"
+    "- Omit the scope if it isn't obvious.\n"
+    "- Add a short body (wrapped at ~72 cols) ONLY when the change needs "
+    "explanation; skip it for small/obvious changes.\n"
+    "- Describe the actual change, never restate the diff line-by-line.\n"
+    "- Return ONLY the commit message text — no quotes, no markdown fences, no "
+    "preamble."
+)
+
+
+def _commit_message_template(variables: Dict[str, Any]) -> Tuple[str, str]:
+    diff = _truncate(str(variables.get("diff") or ""), 12000)
+    recent = _truncate(str(variables.get("recent_commits") or ""), 1500)
+
+    parts = []
+    if recent.strip():
+        parts.append(
+            "Recent commit subjects from this repo (match their style/conventions):\n"
+            f"{recent}"
+        )
+    parts.append("Diff to describe:\n" + (diff or "(no textual diff available)"))
+
+    # "Regenerate" must yield something new even on models that decode greedily
+    # / pin temperature server-side. A trailing nonce isn't enough, so we hand
+    # back the previous message and require a genuinely different one.
+    avoid = _truncate(str(variables.get("avoid") or "").strip(), 1000)
+    if avoid:
+        parts.append(
+            "You already proposed the message below and the user wants a "
+            "different one. Write a NEW message with different wording (and, if "
+            "reasonable, a different emphasis or scope framing) — do not repeat "
+            f"it:\n{avoid}"
+        )
+
+    return _COMMIT_INSTRUCTIONS, "\n\n".join(parts)
+
+
+# Registry of named templates. Add an entry here to give a new surface a
+# consistent, reusable prompt without teaching every caller the prompt text.
+PROMPT_TEMPLATES: Dict[str, PromptTemplate] = {
+    "commit_message": _commit_message_template,
+}
+
+
+def render_template(name: str, variables: Optional[Dict[str, Any]] = None) -> Tuple[str, str]:
+    """Resolve a registered template into (instructions, user_input).
+
+    Raises KeyError if the template name is unknown so callers fail loudly
+    instead of silently sending an empty prompt.
+    """
+    template = PROMPT_TEMPLATES.get(name)
+    if template is None:
+        raise KeyError(f"unknown one-shot template: {name}")
+    return template(variables or {})
+
+
+def run_oneshot(
+    *,
+    instructions: str = "",
+    user_input: str = "",
+    template: Optional[str] = None,
+    variables: Optional[Dict[str, Any]] = None,
+    task: str = "title_generation",
+    max_tokens: int = 1024,
+    temperature: Optional[float] = 0.3,
+    timeout: float = 60.0,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> str:
+    """Run a single stateless LLM request and return its text.
+
+    Provide either a registered ``template`` (+ ``variables``) or an explicit
+    ``instructions`` / ``user_input`` pair. Returns the model's text answer,
+    stripped of surrounding whitespace and any wrapping code fence.
+
+    Raises RuntimeError when no LLM provider is configured (surfaced from
+    :func:`call_llm`) and KeyError for an unknown template name.
+    """
+    if template:
+        instructions, user_input = render_template(template, variables)
+
+    if not (instructions or "").strip() and not (user_input or "").strip():
+        raise ValueError("run_oneshot requires a template or instructions/user_input")
+
+    messages = []
+    if (instructions or "").strip():
+        messages.append({"role": "system", "content": instructions})
+    messages.append({"role": "user", "content": user_input or ""})
+
+    response = call_llm(
+        task=task,
+        messages=messages,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        timeout=timeout,
+        main_runtime=main_runtime,
+    )
+
+    text = (extract_content_or_reasoning(response) or "").strip()
+    return _strip_code_fence(text)
+
+
+def _strip_code_fence(text: str) -> str:
+    """Drop a single wrapping ``` fence the model may have added."""
+    if not text.startswith("```"):
+        return text
+    lines = text.splitlines()
+    if len(lines) >= 2 and lines[0].startswith("```") and lines[-1].strip() == "```":
+        return "\n".join(lines[1:-1]).strip()
+    return text
--- a/tests/agent/test_oneshot.py
+++ b/tests/agent/test_oneshot.py
@ -0,0 +1,110 @@
+"""Tests for agent.oneshot — shared one-off (stateless) LLM requests."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from agent.oneshot import (
+    PROMPT_TEMPLATES,
+    render_template,
+    run_oneshot,
+    _strip_code_fence,
+    _truncate,
+)
+
+
+class TestRenderTemplate:
+    def test_unknown_template_raises(self):
+        with pytest.raises(KeyError):
+            render_template("does-not-exist", {})
+
+    def test_commit_message_template_is_registered(self):
+        assert "commit_message" in PROMPT_TEMPLATES
+
+    def test_commit_message_includes_diff_and_recent(self):
+        instructions, user = render_template(
+            "commit_message",
+            {"diff": "diff --git a/x b/x\n+new", "recent_commits": "feat: a\nfix: b"},
+        )
+        # Instructions describe the contract (conventional commits), not a snapshot.
+        assert "Conventional Commits" in instructions
+        assert "diff --git a/x b/x" in user
+        assert "feat: a" in user
+
+    def test_commit_message_diff_with_braces_passes_through(self):
+        # Templates must not use str.format — code payloads carry literal { }.
+        _, user = render_template("commit_message", {"diff": "x = {a: 1}"})
+        assert "x = {a: 1}" in user
+
+    def test_commit_message_handles_missing_variables(self):
+        instructions, user = render_template("commit_message", {})
+        assert instructions
+        assert "no textual diff available" in user
+
+    def test_commit_message_avoid_forces_new_message(self):
+        # Passing the previous message must instruct the model not to repeat it,
+        # so "regenerate" yields a different result even on greedy models.
+        _, plain = render_template("commit_message", {"diff": "d"})
+        _, regen = render_template("commit_message", {"diff": "d", "avoid": "feat: prior"})
+        assert "feat: prior" in regen
+        assert "do not repeat" in regen
+        assert "feat: prior" not in plain
+
+
+class TestRunOneshot:
+    def _mock_response(self, content):
+        resp = MagicMock()
+        resp.choices = [MagicMock()]
+        resp.choices[0].message.content = content
+        resp.choices[0].message.reasoning = None
+        resp.choices[0].message.reasoning_content = None
+        resp.choices[0].message.reasoning_details = None
+        return resp
+
+    def test_template_path_calls_llm_with_rendered_prompt(self):
+        with patch(
+            "agent.oneshot.call_llm",
+            return_value=self._mock_response("feat: add thing"),
+        ) as llm:
+            out = run_oneshot(template="commit_message", variables={"diff": "d"})
+
+        assert out == "feat: add thing"
+        messages = llm.call_args.kwargs["messages"]
+        assert messages[0]["role"] == "system"
+        assert messages[1]["role"] == "user"
+
+    def test_explicit_instructions_path(self):
+        with patch(
+            "agent.oneshot.call_llm",
+            return_value=self._mock_response("hello"),
+        ) as llm:
+            out = run_oneshot(instructions="be brief", user_input="say hi")
+
+        assert out == "hello"
+        messages = llm.call_args.kwargs["messages"]
+        assert messages[0]["content"] == "be brief"
+        assert messages[1]["content"] == "say hi"
+
+    def test_requires_template_or_prompt(self):
+        with pytest.raises(ValueError):
+            run_oneshot()
+
+    def test_strips_wrapping_code_fence(self):
+        with patch(
+            "agent.oneshot.call_llm",
+            return_value=self._mock_response("```\nfix: bug\n```"),
+        ):
+            assert run_oneshot(instructions="x", user_input="y") == "fix: bug"
+
+
+class TestHelpers:
+    def test_truncate_under_limit_unchanged(self):
+        assert _truncate("short", 100) == "short"
+
+    def test_truncate_over_limit_marks_truncation(self):
+        out = _truncate("x" * 200, 50)
+        assert out.endswith("…(truncated)")
+        assert len(out) < 200
+
+    def test_strip_code_fence_without_fence_is_noop(self):
+        assert _strip_code_fence("plain text") == "plain text"
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -177,6 +177,7 @@ _LONG_HANDLERS = frozenset(
        "billing.step_up",
        "browser.manage",
        "cli.exec",
+        "llm.oneshot",
        "plugins.manage",
        "session.branch",
        "session.compress",
@ -5200,6 +5201,84 @@ def _(rid, params: dict) -> dict:
        return _err(rid, 5007, str(e))


+def _main_runtime_from_agent(agent) -> dict | None:
+    """Build an aux-client main_runtime override from a live agent.
+
+    Lets a one-shot inherit the session's provider/model/credentials so its
+    output matches the model the user is actually coding with, instead of
+    falling back to the cheapest auto-detected backend.
+    """
+    if agent is None:
+        return None
+    runtime: dict = {}
+    for field in ("provider", "model", "base_url", "api_key", "api_mode", "auth_mode"):
+        value = getattr(agent, field, None)
+        if isinstance(value, str) and value.strip():
+            runtime[field] = value.strip()
+        elif field == "api_key" and callable(value):
+            runtime[field] = value
+    return runtime or None
+
+
+@method("llm.oneshot")
+def _(rid, params: dict) -> dict:
+    """Run a single stateless LLM request outside any conversation.
+
+    Generic helper for small generative chores (e.g. a commit message from a
+    diff). Accepts either a named ``template`` + ``variables`` or an explicit
+    ``instructions`` / ``input`` pair. When ``session_id`` resolves to a live
+    session the call inherits that agent's model; otherwise it uses the
+    configured auxiliary ``task`` backend. Never mutates session history, so
+    prompt caching is untouched.
+    """
+    template = (params.get("template") or "").strip() or None
+    instructions = params.get("instructions") or ""
+    user_input = params.get("input") or ""
+    variables = params.get("variables") if isinstance(params.get("variables"), dict) else {}
+    task = (params.get("task") or "title_generation").strip() or "title_generation"
+
+    try:
+        max_tokens = int(params.get("max_tokens") or 1024)
+    except (TypeError, ValueError):
+        max_tokens = 1024
+    temperature = params.get("temperature")
+    if temperature is not None:
+        try:
+            temperature = float(temperature)
+        except (TypeError, ValueError):
+            temperature = None
+
+    if not template and not str(instructions).strip() and not str(user_input).strip():
+        return _err(rid, 4030, "llm.oneshot requires a template or instructions/input")
+
+    # Optional: inherit the live session's model (no error if absent).
+    session = _sessions.get(params.get("session_id") or "")
+    main_runtime = _main_runtime_from_agent(session.get("agent")) if session else None
+
+    try:
+        from agent.oneshot import run_oneshot
+
+        text = run_oneshot(
+            instructions=instructions,
+            user_input=user_input,
+            template=template,
+            variables=variables,
+            task=task,
+            max_tokens=max_tokens,
+            temperature=temperature if temperature is not None else 0.3,
+            main_runtime=main_runtime,
+        )
+    except KeyError as e:
+        return _err(rid, 4031, str(e))
+    except ValueError as e:
+        return _err(rid, 4032, str(e))
+    except Exception as e:
+        logger.warning("llm.oneshot failed: %s", e)
+        return _err(rid, 5030, f"one-shot generation failed: {e}")
+
+    return _ok(rid, {"text": text})
+
+
@method("handoff.request")
 def _(rid, params: dict) -> dict:
    """Queue a handoff of this session to a messaging platform.