feat(learning): profile-scoped memory + learned-skill graph API

Assemble a per-profile graph of memories and learned skills over time (agent/learning_graph.py) and serve it at GET /api/learning/graph (hermes_cli/web_server.py), with tests. The radial time axis the desktop renders is derived from this payload; the REST path stays under /learning for backend compatibility.
2026-06-30 00:54:14 -05:00 · 2026-06-30 00:54:14 -05:00 · 96552c31e3
commit 96552c31e3
parent f171842f0d
3 changed files with 414 additions and 0 deletions
--- a/agent/learning_graph.py
+++ b/agent/learning_graph.py
@ -0,0 +1,312 @@
+"""Assemble the "learning made visible" graph for desktop.
+
+This graph is intentionally scoped to what a user actually learns over time:
+- non-base, learned/profile skills (agent-created or used),
+- memory chunks from ``MEMORY.md`` / ``USER.md`` as first-class nodes.
+
+Skill links come from declared ``related_skills``. Memory-to-skill links are
+derived from lexical overlap so the graph can answer "which learned skills are
+connected to the things I remember?".
+
+Run as a module to print edge-density stats against real data:
+
+    python -m agent.learning_graph
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Optional
+
+
+@dataclass
+class SkillNode:
+    name: str
+    category: str
+    source: str = "profile"
+    timestamp: Optional[int] = None
+    use_count: int = 0
+    state: str = "active"
+    created_by: Optional[str] = None
+    pinned: bool = False
+    related: list[str] = field(default_factory=list)
+
+
+def _frontmatter(text: str) -> dict[str, Any]:
+    try:
+        from agent.skill_utils import parse_frontmatter
+
+        fm, _ = parse_frontmatter(text)
+        return fm or {}
+    except Exception:
+        return {}
+
+
+def _related(fm: dict[str, Any]) -> list[str]:
+    raw = fm.get("related_skills") or (fm.get("metadata", {}).get("hermes", {}) or {}).get("related_skills")
+    if isinstance(raw, list):
+        return [str(r).strip() for r in raw if str(r).strip()]
+    if isinstance(raw, str):
+        return [r.strip() for r in raw.strip("[]").split(",") if r.strip()]
+    return []
+
+
+def _category(fm: dict[str, Any], skill_md: Path) -> str:
+    cat = fm.get("category") or (fm.get("metadata", {}).get("hermes", {}) or {}).get("category")
+    if cat:
+        return str(cat)
+    # …/skills/<category>/<skill>/SKILL.md
+    parts = skill_md.parts
+    return parts[-3] if len(parts) >= 3 else "general"
+
+
+def _iter_skill_files(roots: list[tuple[str, Path]]):
+    for source, root in roots:
+        if root.exists():
+            for path in root.rglob("SKILL.md"):
+                yield source, path
+
+
+def _load_usage() -> dict[str, dict[str, Any]]:
+    try:
+        from tools.skill_usage import load_usage
+
+        return load_usage()
+    except Exception:
+        path = Path(os.path.expanduser("~/.hermes/skills/.usage.json"))
+        try:
+            return json.loads(path.read_text(encoding="utf-8"))
+        except Exception:
+            return {}
+
+
+def _to_int_ts(value: Any) -> Optional[int]:
+    try:
+        if value is None:
+            return None
+        if isinstance(value, (int, float)):
+            return int(value)
+        s = str(value).strip()
+        return int(float(s)) if s else None
+    except Exception:
+        return None
+
+
+def build_skill_nodes(skill_roots: list[tuple[str, Path]]) -> dict[str, SkillNode]:
+    usage = _load_usage()
+    nodes: dict[str, SkillNode] = {}
+
+    for source, skill_md in _iter_skill_files(skill_roots):
+        if any(p in {".archive", ".hub", "node_modules", ".git"} for p in skill_md.parts):
+            continue
+        try:
+            fm = _frontmatter(skill_md.read_text(encoding="utf-8")[:4000])
+        except OSError:
+            continue
+        name = str(fm.get("name") or skill_md.parent.name).strip()
+        if not name or name in nodes:
+            continue
+        rec = usage.get(name, {})
+        last_activity = _to_int_ts(rec.get("last_activity_at"))
+        file_ts = _to_int_ts(skill_md.stat().st_mtime)
+        nodes[name] = SkillNode(
+            name=name,
+            category=_category(fm, skill_md),
+            source=source,
+            timestamp=last_activity or file_ts,
+            use_count=int(rec.get("use_count", 0) or 0),
+            state=str(rec.get("state", "active") or "active"),
+            created_by=rec.get("created_by"),
+            pinned=bool(rec.get("pinned", False)),
+            related=_related(fm),
+        )
+    return nodes
+
+
+def build_edges(nodes: dict[str, SkillNode]) -> list[tuple[str, str]]:
+    """Undirected related_skills edges where BOTH endpoints exist (deduped)."""
+    seen: set[tuple[str, str]] = set()
+    edges: list[tuple[str, str]] = []
+    for node in nodes.values():
+        for target in node.related:
+            if target in nodes and target != node.name:
+                key = tuple(sorted((node.name, target)))
+                if key not in seen:
+                    seen.add(key)
+                    edges.append(key)
+    return edges
+
+
+def density_stats(nodes: dict[str, SkillNode], edges: list[tuple[str, str]]) -> dict[str, Any]:
+    linked: set[str] = set()
+    for a, b in edges:
+        linked.add(a)
+        linked.add(b)
+    cats: dict[str, int] = {}
+    for n in nodes.values():
+        cats[n.category] = cats.get(n.category, 0) + 1
+    n = len(nodes) or 1
+    return {
+        "nodes": len(nodes),
+        "related_edges": len(edges),
+        "edges_per_node": round(len(edges) / n, 3),
+        "linked_nodes": len(linked),
+        "isolated_pct": round(100 * (n - len(linked)) / n, 1),
+        "categories": len(cats),
+        "agent_created": sum(1 for x in nodes.values() if x.created_by == "agent"),
+        "used": sum(1 for x in nodes.values() if x.use_count > 0),
+        "top_categories": sorted(cats.items(), key=lambda kv: -kv[1])[:8],
+    }
+
+
+def _memory_cards() -> list[dict[str, Any]]:
+    """Freeform memory as readable cards.
+
+    ``MEMORY.md`` / ``USER.md`` are prose split on bare ``§`` separators; each
+    chunk becomes one card. Every chunk is surfaced — the graph shows everything.
+    """
+    try:
+        from hermes_constants import get_hermes_home
+
+        base = get_hermes_home() / "memories"
+    except Exception:
+        base = Path(os.path.expanduser("~/.hermes/memories"))
+
+    cards: list[dict[str, Any]] = []
+    for fname, source in (("MEMORY.md", "memory"), ("USER.md", "profile")):
+        path = base / fname
+        try:
+            text = path.read_text(encoding="utf-8").strip()
+            file_ts = _to_int_ts(path.stat().st_mtime)
+        except OSError:
+            continue
+        for chunk_idx, chunk in enumerate(c.strip() for c in text.split("\n§\n")):
+            if not chunk:
+                continue
+            first = chunk.splitlines()[0].strip().lstrip("# ").strip()
+            cards.append(
+                {
+                    "source": source,
+                    "timestamp": file_ts + chunk_idx if file_ts is not None else None,
+                    "title": (first[:80] + "…") if len(first) > 80 else first,
+                    "body": chunk[:1200],
+                }
+            )
+    return cards
+
+
+def _tokenize(text: str) -> set[str]:
+    return {t for t in re.split(r"[^a-z0-9]+", text.lower()) if len(t) >= 3}
+
+
+def _memory_skill_edges(memory_cards: list[dict[str, str]], skills: list[SkillNode]) -> list[tuple[str, str]]:
+    edges: list[tuple[str, str]] = []
+    skill_meta = [(s, _tokenize(s.name), s.name.lower()) for s in skills]
+    for idx, card in enumerate(memory_cards):
+        mem_id = f"memory:{card['source']}:{idx}"
+        text = f"{card.get('title', '')}\n{card.get('body', '')}".lower()
+        text_tokens = _tokenize(text)
+        scored: list[tuple[int, str]] = []
+        for skill, tokens, skill_name_lower in skill_meta:
+            score = 0
+            if skill_name_lower in text:
+                score += 6
+            score += len(tokens & text_tokens)
+            if score > 0:
+                scored.append((score, skill.name))
+        scored.sort(key=lambda x: (-x[0], x[1]))
+        for _, skill_name in scored[:4]:
+            edges.append((mem_id, skill_name))
+    return edges
+
+
+def _skill_roots() -> list[tuple[str, Path]]:
+    repo = Path(__file__).resolve().parent.parent
+    try:
+        from hermes_constants import get_hermes_home
+
+        home_skills = get_hermes_home() / "skills"
+    except Exception:
+        home_skills = Path(os.path.expanduser("~/.hermes/skills"))
+    return [("base", repo / "skills"), ("profile", home_skills)]
+
+
+def build_learning_graph() -> dict[str, Any]:
+    """Full payload for the desktop learning panel.
+
+    Focus on what is profile-learned and actionable:
+    - skills that are NOT base-installed and show real learning signal
+      (agent-created or used),
+    - memory chunks as first-class graph nodes connected to those learned skills.
+    """
+    all_skills = build_skill_nodes(_skill_roots())
+    learned_skills = {
+        name: node
+        for name, node in all_skills.items()
+        if node.source != "base" and (node.created_by == "agent" or node.use_count > 0)
+    }
+    skill_edges = build_edges(learned_skills)
+    memory_cards = _memory_cards()
+    memory_edges = _memory_skill_edges(memory_cards, list(learned_skills.values()))
+
+    edges = skill_edges + memory_edges
+    clusters: dict[str, int] = {}
+    for node in learned_skills.values():
+        clusters[node.category] = clusters.get(node.category, 0) + 1
+    if memory_cards:
+        clusters["memory"] = len(memory_cards)
+
+    graph_nodes = [
+        {
+            "id": n.name,
+            "label": n.name,
+            "kind": "skill",
+            "timestamp": n.timestamp,
+            "category": n.category,
+            "useCount": n.use_count,
+            "state": n.state,
+            "createdBy": n.created_by,
+            "pinned": n.pinned,
+        }
+        for n in learned_skills.values()
+    ]
+    for i, card in enumerate(memory_cards):
+        graph_nodes.append(
+            {
+                "id": f"memory:{card['source']}:{i}",
+                "label": card["title"],
+                "kind": "memory",
+                "memorySource": card["source"],
+                "timestamp": card.get("timestamp"),
+                "category": "memory",
+                "useCount": 0,
+                "state": "active",
+                "createdBy": "memory",
+                "pinned": False,
+            }
+        )
+
+    return {
+        "nodes": graph_nodes,
+        "edges": [{"source": a, "target": b} for a, b in edges],
+        "clusters": [
+            {"category": c, "count": n}
+            for c, n in sorted(clusters.items(), key=lambda kv: -kv[1])
+        ],
+        "memory": memory_cards,
+        "stats": {
+            **density_stats(learned_skills, skill_edges),
+            "memory_nodes": len(memory_cards),
+            "memory_skill_edges": len(memory_edges),
+            "learned_skills": len(learned_skills),
+        },
+    }
+
+
+if __name__ == "__main__":
+    nodes = build_skill_nodes(_skill_roots())
+    print(json.dumps(density_stats(nodes, build_edges(nodes)), indent=2))
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -2452,6 +2452,23 @@ async def run_curator():
    return {"ok": True, "pid": proc.pid, "name": "curator-run"}


+@app.get("/api/learning/graph")
+async def get_learning_graph(profile: Optional[str] = None):
+    """Learning graph payload for the desktop panel.
+
+    Profile-scoped view of learned, non-base skills plus memory chunks, with
+    graph links derived from skill relations and memory-skill overlap.
+    """
+    try:
+        from agent.learning_graph import build_learning_graph
+
+        with _profile_scope(profile):
+            return build_learning_graph()
+    except Exception:
+        _log.exception("GET /api/learning/graph failed")
+        raise HTTPException(status_code=500, detail="Failed to build learning graph")
+
+
 def _safe_call(mod, fn_name: str, default):
    try:
        fn = getattr(mod, fn_name, None)
--- a/tests/agent/test_learning_graph.py
+++ b/tests/agent/test_learning_graph.py
@ -0,0 +1,85 @@
+"""Behavior contracts for the learning-graph assembler.
+
+Asserts invariants (edges resolve to real nodes, clusters cover every node,
+memory cards are represented consistently), never a snapshot of the live skill
+catalog — that catalog grows every release and a count assertion would be a
+change-detector.
+"""
+
+from __future__ import annotations
+
+from agent import learning_graph
+from hermes_constants import reset_hermes_home_override, set_hermes_home_override
+
+
+def _node(name: str, category: str, related=None):
+    n = learning_graph.SkillNode(name=name, category=category)
+    n.related = list(related or [])
+    return n
+
+
+def test_edges_only_connect_existing_nodes():
+    nodes = {
+        "a": _node("a", "x", related=["b", "ghost"]),
+        "b": _node("b", "x", related=["a"]),
+        "c": _node("c", "y"),
+    }
+    edges = learning_graph.build_edges(nodes)
+
+    # The a→b link is kept once (deduped, undirected); a→ghost is dropped.
+    assert edges == [("a", "b")]
+
+
+def test_density_stats_count_isolated_nodes():
+    nodes = {
+        "a": _node("a", "x", related=["b"]),
+        "b": _node("b", "x", related=["a"]),
+        "c": _node("c", "y"),
+    }
+    stats = learning_graph.density_stats(nodes, learning_graph.build_edges(nodes))
+
+    assert stats["nodes"] == 3
+    assert stats["linked_nodes"] == 2
+    assert stats["isolated_pct"] == round(100 / 3, 1)
+
+
+def test_memory_is_cards_split_on_separator(tmp_path):
+    home = tmp_path / ".hermes"
+    (home / "memories").mkdir(parents=True)
+    (home / "memories" / "MEMORY.md").write_text(
+        "Project uses pytest with xdist\n§\nUser prefers concise responses",
+        encoding="utf-8",
+    )
+    token = set_hermes_home_override(home)
+    try:
+        graph = learning_graph.build_learning_graph()
+    finally:
+        reset_hermes_home_override(token)
+
+    titles = [c["title"] for c in graph["memory"]]
+    assert "Project uses pytest with xdist" in titles
+    assert "User prefers concise responses" in titles
+    # Memory cards remain typed cards and also appear as memory-kind nodes.
+    assert all(c["source"] in {"memory", "profile"} for c in graph["memory"])
+    assert all("timestamp" in c for c in graph["memory"])
+    assert any(n["kind"] == "memory" for n in graph["nodes"])
+
+
+def test_full_payload_shape_and_edge_integrity(tmp_path):
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    token = set_hermes_home_override(home)
+    try:
+        graph = learning_graph.build_learning_graph()
+    finally:
+        reset_hermes_home_override(token)
+
+    ids = {n["id"] for n in graph["nodes"]}
+    assert all(e["source"] in ids and e["target"] in ids for e in graph["edges"])
+    # Every node's category appears in the cluster list.
+    cluster_cats = {c["category"] for c in graph["clusters"]}
+    assert all(n["category"] in cluster_cats for n in graph["nodes"])
+    skill_nodes = [n for n in graph["nodes"] if n["kind"] == "skill"]
+    assert graph["stats"]["nodes"] == len(skill_nodes)
+    assert graph["stats"]["memory_nodes"] == len(graph["memory"])
+    assert all("timestamp" in n for n in graph["nodes"])