From d431dfc4487dabe66860e71fdc9ad8ed745a6281 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 30 Jun 2026 16:56:01 -0700
Subject: [PATCH] fix(learn): honor requirements mixed with sources in /learn
 requests (#55956)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A /learn request can mix the source(s) to gather (paths, URLs, "what we
just did") with requirements that shape the skill (focus, scope, what to
omit). When a request led with a path or link, the agent fetched it and
treated the trailing prose as incidental, dropping the user's stated
focus — the symptom @GrenFX reported.

The input layer was never the cause: both CLI (split(None, 1)) and
gateway (get_command_args()) capture the full free-text argument. The
gap was in build_learn_prompt, which dumped the request as one
undifferentiated source blob.

build_learn_prompt now tells the agent the request may mix sources and
requirements in any order, that prose after a path/link is authoring
guidance to honor (not noise), and to never fetch the first source and
ignore the rest. Adds step 1b: apply every requirement to what the
SKILL.md covers, not just which sources get read. Both surfaces inherit
it; no parser change, zero tool footprint.
---
 agent/learn_prompt.py            | 30 ++++++++++++++++++++++--------
 tests/agent/test_learn_prompt.py | 18 +++++++++++++++++-
 2 files changed, 39 insertions(+), 9 deletions(-)
diff --git a/agent/learn_prompt.py b/agent/learn_prompt.py
index 64ad543f8..b633ed0f5 100644
--- a/agent/learn_prompt.py
+++ b/agent/learn_prompt.py
@@ -117,15 +117,29 @@ def build_learn_prompt(user_request: str) -> str:
 
     return (
         "[/learn] The user wants you to learn a reusable skill from the "
-        "source(s) they described below, and save it.\n\n"
-        f"WHAT TO LEARN FROM:\n{req}\n\n"
+        "request below, and save it.\n\n"
+        f"THE REQUEST:\n{req}\n\n"
+        "The request is open-ended and may mix two kinds of content, in any "
+        "order: SOURCES to gather (directories, file paths, URLs, \"what we "
+        "just did\", pasted notes) AND REQUIREMENTS that shape the skill "
+        "(what to focus on, what to leave out, scope, naming, the angle to "
+        "take). Treat EVERY part of the request as load-bearing. In "
+        "particular, prose that comes after a path or link is NOT incidental "
+        "— it is the user telling you what they want from that source. A "
+        "request like `<url> focus on the auth flow, skip the deprecated "
+        "endpoints` means: gather the URL AND honor \"focus on auth, skip "
+        "deprecated\" as authoring requirements. Never fetch the first source "
+        "and ignore the rest.\n\n"
         "Do this:\n"
-        "1. Gather the material. Resolve whatever the user named using the "
-        "tools you already have — `read_file`/`search_files` for local files "
-        "or directories, `web_extract` for URLs, the current conversation "
-        "history if they referred to something you just did, and the text "
-        "they pasted as-is. If the request is ambiguous about scope, make a "
-        "reasonable choice and note it; do not stall.\n"
+        "1. Gather every source the user named, using the tools you already "
+        "have — `read_file`/`search_files` for local files or directories, "
+        "`web_extract` for URLs, the current conversation history if they "
+        "referred to something you just did, and the text they pasted as-is. "
+        "If the request is ambiguous about scope, make a reasonable choice "
+        "and note it; do not stall.\n"
+        "1b. Apply every requirement, focus, and constraint in the request to "
+        "the skill you author — these govern what the SKILL.md covers and "
+        "emphasizes, not just which sources you read.\n"
         "2. Author ONE SKILL.md and save it with the `skill_manage` tool "
         "(action=\"create\"). Pick a sensible category. If the procedure needs "
         "a non-trivial script, add it under the skill's `scripts/` with "
diff --git a/tests/agent/test_learn_prompt.py b/tests/agent/test_learn_prompt.py
index 392833d12..2e1c2f388 100644
--- a/tests/agent/test_learn_prompt.py
+++ b/tests/agent/test_learn_prompt.py
@@ -32,6 +32,23 @@ class TestBuildLearnPrompt:
         for tool in ("read_file", "search_files", "web_extract"):
             assert tool in prompt
 
+    def test_separates_sources_from_requirements(self):
+        # The reported bug (@GrenFX, Jun 2026): when a request leads with a
+        # path/URL, the agent fetched it and ignored the trailing prose. The
+        # prompt must tell the agent the request can MIX sources and
+        # requirements, and that prose after a source is authoring guidance to
+        # honor — not noise to drop.
+        prompt = build_learn_prompt(
+            "https://api.example.com/docs focus on the auth flow, skip deprecated bits"
+        )
+        low = prompt.lower()
+        # Carries the whole request verbatim (no truncation at the URL).
+        assert "focus on the auth flow, skip deprecated bits" in prompt
+        # Explicitly distinguishes sources from requirements.
+        assert "requirement" in low
+        # Names the failure mode it's guarding against.
+        assert "never fetch the first source" in low
+
     def test_empty_request_falls_back_to_the_conversation(self):
         # Bare /learn should distill "what we just did", not error.
         prompt = build_learn_prompt("")
@@ -47,7 +64,6 @@ class TestBuildLearnPrompt:
         assert "60" in _AUTHORING_STANDARDS
 
     def test_teaches_the_full_hardline_standards(self):
-        # /learn must teach ALL the CONTRIBUTING.md skill rules, not just the
         # description length — otherwise distilled skills miss platform gating,
         # author credit, and the tool-framing table. Lock the coverage in.
         std = _AUTHORING_STANDARDS.lower()