From d431dfc4487dabe66860e71fdc9ad8ed745a6281 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 30 Jun 2026 16:56:01 -0700 Subject: [PATCH] fix(learn): honor requirements mixed with sources in /learn requests (#55956) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A /learn request can mix the source(s) to gather (paths, URLs, "what we just did") with requirements that shape the skill (focus, scope, what to omit). When a request led with a path or link, the agent fetched it and treated the trailing prose as incidental, dropping the user's stated focus — the symptom @GrenFX reported. The input layer was never the cause: both CLI (split(None, 1)) and gateway (get_command_args()) capture the full free-text argument. The gap was in build_learn_prompt, which dumped the request as one undifferentiated source blob. build_learn_prompt now tells the agent the request may mix sources and requirements in any order, that prose after a path/link is authoring guidance to honor (not noise), and to never fetch the first source and ignore the rest. Adds step 1b: apply every requirement to what the SKILL.md covers, not just which sources get read. Both surfaces inherit it; no parser change, zero tool footprint. --- agent/learn_prompt.py | 30 ++++++++++++++++++++++-------- tests/agent/test_learn_prompt.py | 18 +++++++++++++++++- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/agent/learn_prompt.py b/agent/learn_prompt.py index 64ad543f8..b633ed0f5 100644 --- a/agent/learn_prompt.py +++ b/agent/learn_prompt.py @@ -117,15 +117,29 @@ def build_learn_prompt(user_request: str) -> str: return ( "[/learn] The user wants you to learn a reusable skill from the " - "source(s) they described below, and save it.\n\n" - f"WHAT TO LEARN FROM:\n{req}\n\n" + "request below, and save it.\n\n" + f"THE REQUEST:\n{req}\n\n" + "The request is open-ended and may mix two kinds of content, in any " + "order: SOURCES to gather (directories, file paths, URLs, \"what we " + "just did\", pasted notes) AND REQUIREMENTS that shape the skill " + "(what to focus on, what to leave out, scope, naming, the angle to " + "take). Treat EVERY part of the request as load-bearing. In " + "particular, prose that comes after a path or link is NOT incidental " + "— it is the user telling you what they want from that source. A " + "request like ` focus on the auth flow, skip the deprecated " + "endpoints` means: gather the URL AND honor \"focus on auth, skip " + "deprecated\" as authoring requirements. Never fetch the first source " + "and ignore the rest.\n\n" "Do this:\n" - "1. Gather the material. Resolve whatever the user named using the " - "tools you already have — `read_file`/`search_files` for local files " - "or directories, `web_extract` for URLs, the current conversation " - "history if they referred to something you just did, and the text " - "they pasted as-is. If the request is ambiguous about scope, make a " - "reasonable choice and note it; do not stall.\n" + "1. Gather every source the user named, using the tools you already " + "have — `read_file`/`search_files` for local files or directories, " + "`web_extract` for URLs, the current conversation history if they " + "referred to something you just did, and the text they pasted as-is. " + "If the request is ambiguous about scope, make a reasonable choice " + "and note it; do not stall.\n" + "1b. Apply every requirement, focus, and constraint in the request to " + "the skill you author — these govern what the SKILL.md covers and " + "emphasizes, not just which sources you read.\n" "2. Author ONE SKILL.md and save it with the `skill_manage` tool " "(action=\"create\"). Pick a sensible category. If the procedure needs " "a non-trivial script, add it under the skill's `scripts/` with " diff --git a/tests/agent/test_learn_prompt.py b/tests/agent/test_learn_prompt.py index 392833d12..2e1c2f388 100644 --- a/tests/agent/test_learn_prompt.py +++ b/tests/agent/test_learn_prompt.py @@ -32,6 +32,23 @@ class TestBuildLearnPrompt: for tool in ("read_file", "search_files", "web_extract"): assert tool in prompt + def test_separates_sources_from_requirements(self): + # The reported bug (@GrenFX, Jun 2026): when a request leads with a + # path/URL, the agent fetched it and ignored the trailing prose. The + # prompt must tell the agent the request can MIX sources and + # requirements, and that prose after a source is authoring guidance to + # honor — not noise to drop. + prompt = build_learn_prompt( + "https://api.example.com/docs focus on the auth flow, skip deprecated bits" + ) + low = prompt.lower() + # Carries the whole request verbatim (no truncation at the URL). + assert "focus on the auth flow, skip deprecated bits" in prompt + # Explicitly distinguishes sources from requirements. + assert "requirement" in low + # Names the failure mode it's guarding against. + assert "never fetch the first source" in low + def test_empty_request_falls_back_to_the_conversation(self): # Bare /learn should distill "what we just did", not error. prompt = build_learn_prompt("") @@ -47,7 +64,6 @@ class TestBuildLearnPrompt: assert "60" in _AUTHORING_STANDARDS def test_teaches_the_full_hardline_standards(self): - # /learn must teach ALL the CONTRIBUTING.md skill rules, not just the # description length — otherwise distilled skills miss platform gating, # author credit, and the tool-framing table. Lock the coverage in. std = _AUTHORING_STANDARDS.lower()