From 96552c31e3e9a6f69ce015febac77456675b494c Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 30 Jun 2026 00:54:14 -0500 Subject: [PATCH] feat(learning): profile-scoped memory + learned-skill graph API Assemble a per-profile graph of memories and learned skills over time (agent/learning_graph.py) and serve it at GET /api/learning/graph (hermes_cli/web_server.py), with tests. The radial time axis the desktop renders is derived from this payload; the REST path stays under /learning for backend compatibility. --- agent/learning_graph.py | 312 +++++++++++++++++++++++++++++ hermes_cli/web_server.py | 17 ++ tests/agent/test_learning_graph.py | 85 ++++++++ 3 files changed, 414 insertions(+) create mode 100644 agent/learning_graph.py create mode 100644 tests/agent/test_learning_graph.py diff --git a/agent/learning_graph.py b/agent/learning_graph.py new file mode 100644 index 000000000..34c4f0af0 --- /dev/null +++ b/agent/learning_graph.py @@ -0,0 +1,312 @@ +"""Assemble the "learning made visible" graph for desktop. + +This graph is intentionally scoped to what a user actually learns over time: +- non-base, learned/profile skills (agent-created or used), +- memory chunks from ``MEMORY.md`` / ``USER.md`` as first-class nodes. + +Skill links come from declared ``related_skills``. Memory-to-skill links are +derived from lexical overlap so the graph can answer "which learned skills are +connected to the things I remember?". + +Run as a module to print edge-density stats against real data: + + python -m agent.learning_graph +""" + +from __future__ import annotations + +import json +import os +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Optional + + +@dataclass +class SkillNode: + name: str + category: str + source: str = "profile" + timestamp: Optional[int] = None + use_count: int = 0 + state: str = "active" + created_by: Optional[str] = None + pinned: bool = False + related: list[str] = field(default_factory=list) + + +def _frontmatter(text: str) -> dict[str, Any]: + try: + from agent.skill_utils import parse_frontmatter + + fm, _ = parse_frontmatter(text) + return fm or {} + except Exception: + return {} + + +def _related(fm: dict[str, Any]) -> list[str]: + raw = fm.get("related_skills") or (fm.get("metadata", {}).get("hermes", {}) or {}).get("related_skills") + if isinstance(raw, list): + return [str(r).strip() for r in raw if str(r).strip()] + if isinstance(raw, str): + return [r.strip() for r in raw.strip("[]").split(",") if r.strip()] + return [] + + +def _category(fm: dict[str, Any], skill_md: Path) -> str: + cat = fm.get("category") or (fm.get("metadata", {}).get("hermes", {}) or {}).get("category") + if cat: + return str(cat) + # …/skills///SKILL.md + parts = skill_md.parts + return parts[-3] if len(parts) >= 3 else "general" + + +def _iter_skill_files(roots: list[tuple[str, Path]]): + for source, root in roots: + if root.exists(): + for path in root.rglob("SKILL.md"): + yield source, path + + +def _load_usage() -> dict[str, dict[str, Any]]: + try: + from tools.skill_usage import load_usage + + return load_usage() + except Exception: + path = Path(os.path.expanduser("~/.hermes/skills/.usage.json")) + try: + return json.loads(path.read_text(encoding="utf-8")) + except Exception: + return {} + + +def _to_int_ts(value: Any) -> Optional[int]: + try: + if value is None: + return None + if isinstance(value, (int, float)): + return int(value) + s = str(value).strip() + return int(float(s)) if s else None + except Exception: + return None + + +def build_skill_nodes(skill_roots: list[tuple[str, Path]]) -> dict[str, SkillNode]: + usage = _load_usage() + nodes: dict[str, SkillNode] = {} + + for source, skill_md in _iter_skill_files(skill_roots): + if any(p in {".archive", ".hub", "node_modules", ".git"} for p in skill_md.parts): + continue + try: + fm = _frontmatter(skill_md.read_text(encoding="utf-8")[:4000]) + except OSError: + continue + name = str(fm.get("name") or skill_md.parent.name).strip() + if not name or name in nodes: + continue + rec = usage.get(name, {}) + last_activity = _to_int_ts(rec.get("last_activity_at")) + file_ts = _to_int_ts(skill_md.stat().st_mtime) + nodes[name] = SkillNode( + name=name, + category=_category(fm, skill_md), + source=source, + timestamp=last_activity or file_ts, + use_count=int(rec.get("use_count", 0) or 0), + state=str(rec.get("state", "active") or "active"), + created_by=rec.get("created_by"), + pinned=bool(rec.get("pinned", False)), + related=_related(fm), + ) + return nodes + + +def build_edges(nodes: dict[str, SkillNode]) -> list[tuple[str, str]]: + """Undirected related_skills edges where BOTH endpoints exist (deduped).""" + seen: set[tuple[str, str]] = set() + edges: list[tuple[str, str]] = [] + for node in nodes.values(): + for target in node.related: + if target in nodes and target != node.name: + key = tuple(sorted((node.name, target))) + if key not in seen: + seen.add(key) + edges.append(key) + return edges + + +def density_stats(nodes: dict[str, SkillNode], edges: list[tuple[str, str]]) -> dict[str, Any]: + linked: set[str] = set() + for a, b in edges: + linked.add(a) + linked.add(b) + cats: dict[str, int] = {} + for n in nodes.values(): + cats[n.category] = cats.get(n.category, 0) + 1 + n = len(nodes) or 1 + return { + "nodes": len(nodes), + "related_edges": len(edges), + "edges_per_node": round(len(edges) / n, 3), + "linked_nodes": len(linked), + "isolated_pct": round(100 * (n - len(linked)) / n, 1), + "categories": len(cats), + "agent_created": sum(1 for x in nodes.values() if x.created_by == "agent"), + "used": sum(1 for x in nodes.values() if x.use_count > 0), + "top_categories": sorted(cats.items(), key=lambda kv: -kv[1])[:8], + } + + +def _memory_cards() -> list[dict[str, Any]]: + """Freeform memory as readable cards. + + ``MEMORY.md`` / ``USER.md`` are prose split on bare ``§`` separators; each + chunk becomes one card. Every chunk is surfaced — the graph shows everything. + """ + try: + from hermes_constants import get_hermes_home + + base = get_hermes_home() / "memories" + except Exception: + base = Path(os.path.expanduser("~/.hermes/memories")) + + cards: list[dict[str, Any]] = [] + for fname, source in (("MEMORY.md", "memory"), ("USER.md", "profile")): + path = base / fname + try: + text = path.read_text(encoding="utf-8").strip() + file_ts = _to_int_ts(path.stat().st_mtime) + except OSError: + continue + for chunk_idx, chunk in enumerate(c.strip() for c in text.split("\n§\n")): + if not chunk: + continue + first = chunk.splitlines()[0].strip().lstrip("# ").strip() + cards.append( + { + "source": source, + "timestamp": file_ts + chunk_idx if file_ts is not None else None, + "title": (first[:80] + "…") if len(first) > 80 else first, + "body": chunk[:1200], + } + ) + return cards + + +def _tokenize(text: str) -> set[str]: + return {t for t in re.split(r"[^a-z0-9]+", text.lower()) if len(t) >= 3} + + +def _memory_skill_edges(memory_cards: list[dict[str, str]], skills: list[SkillNode]) -> list[tuple[str, str]]: + edges: list[tuple[str, str]] = [] + skill_meta = [(s, _tokenize(s.name), s.name.lower()) for s in skills] + for idx, card in enumerate(memory_cards): + mem_id = f"memory:{card['source']}:{idx}" + text = f"{card.get('title', '')}\n{card.get('body', '')}".lower() + text_tokens = _tokenize(text) + scored: list[tuple[int, str]] = [] + for skill, tokens, skill_name_lower in skill_meta: + score = 0 + if skill_name_lower in text: + score += 6 + score += len(tokens & text_tokens) + if score > 0: + scored.append((score, skill.name)) + scored.sort(key=lambda x: (-x[0], x[1])) + for _, skill_name in scored[:4]: + edges.append((mem_id, skill_name)) + return edges + + +def _skill_roots() -> list[tuple[str, Path]]: + repo = Path(__file__).resolve().parent.parent + try: + from hermes_constants import get_hermes_home + + home_skills = get_hermes_home() / "skills" + except Exception: + home_skills = Path(os.path.expanduser("~/.hermes/skills")) + return [("base", repo / "skills"), ("profile", home_skills)] + + +def build_learning_graph() -> dict[str, Any]: + """Full payload for the desktop learning panel. + + Focus on what is profile-learned and actionable: + - skills that are NOT base-installed and show real learning signal + (agent-created or used), + - memory chunks as first-class graph nodes connected to those learned skills. + """ + all_skills = build_skill_nodes(_skill_roots()) + learned_skills = { + name: node + for name, node in all_skills.items() + if node.source != "base" and (node.created_by == "agent" or node.use_count > 0) + } + skill_edges = build_edges(learned_skills) + memory_cards = _memory_cards() + memory_edges = _memory_skill_edges(memory_cards, list(learned_skills.values())) + + edges = skill_edges + memory_edges + clusters: dict[str, int] = {} + for node in learned_skills.values(): + clusters[node.category] = clusters.get(node.category, 0) + 1 + if memory_cards: + clusters["memory"] = len(memory_cards) + + graph_nodes = [ + { + "id": n.name, + "label": n.name, + "kind": "skill", + "timestamp": n.timestamp, + "category": n.category, + "useCount": n.use_count, + "state": n.state, + "createdBy": n.created_by, + "pinned": n.pinned, + } + for n in learned_skills.values() + ] + for i, card in enumerate(memory_cards): + graph_nodes.append( + { + "id": f"memory:{card['source']}:{i}", + "label": card["title"], + "kind": "memory", + "memorySource": card["source"], + "timestamp": card.get("timestamp"), + "category": "memory", + "useCount": 0, + "state": "active", + "createdBy": "memory", + "pinned": False, + } + ) + + return { + "nodes": graph_nodes, + "edges": [{"source": a, "target": b} for a, b in edges], + "clusters": [ + {"category": c, "count": n} + for c, n in sorted(clusters.items(), key=lambda kv: -kv[1]) + ], + "memory": memory_cards, + "stats": { + **density_stats(learned_skills, skill_edges), + "memory_nodes": len(memory_cards), + "memory_skill_edges": len(memory_edges), + "learned_skills": len(learned_skills), + }, + } + + +if __name__ == "__main__": + nodes = build_skill_nodes(_skill_roots()) + print(json.dumps(density_stats(nodes, build_edges(nodes)), indent=2)) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 09dacecb9..2dc5fc9b6 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -2452,6 +2452,23 @@ async def run_curator(): return {"ok": True, "pid": proc.pid, "name": "curator-run"} +@app.get("/api/learning/graph") +async def get_learning_graph(profile: Optional[str] = None): + """Learning graph payload for the desktop panel. + + Profile-scoped view of learned, non-base skills plus memory chunks, with + graph links derived from skill relations and memory-skill overlap. + """ + try: + from agent.learning_graph import build_learning_graph + + with _profile_scope(profile): + return build_learning_graph() + except Exception: + _log.exception("GET /api/learning/graph failed") + raise HTTPException(status_code=500, detail="Failed to build learning graph") + + def _safe_call(mod, fn_name: str, default): try: fn = getattr(mod, fn_name, None) diff --git a/tests/agent/test_learning_graph.py b/tests/agent/test_learning_graph.py new file mode 100644 index 000000000..298f19920 --- /dev/null +++ b/tests/agent/test_learning_graph.py @@ -0,0 +1,85 @@ +"""Behavior contracts for the learning-graph assembler. + +Asserts invariants (edges resolve to real nodes, clusters cover every node, +memory cards are represented consistently), never a snapshot of the live skill +catalog — that catalog grows every release and a count assertion would be a +change-detector. +""" + +from __future__ import annotations + +from agent import learning_graph +from hermes_constants import reset_hermes_home_override, set_hermes_home_override + + +def _node(name: str, category: str, related=None): + n = learning_graph.SkillNode(name=name, category=category) + n.related = list(related or []) + return n + + +def test_edges_only_connect_existing_nodes(): + nodes = { + "a": _node("a", "x", related=["b", "ghost"]), + "b": _node("b", "x", related=["a"]), + "c": _node("c", "y"), + } + edges = learning_graph.build_edges(nodes) + + # The a→b link is kept once (deduped, undirected); a→ghost is dropped. + assert edges == [("a", "b")] + + +def test_density_stats_count_isolated_nodes(): + nodes = { + "a": _node("a", "x", related=["b"]), + "b": _node("b", "x", related=["a"]), + "c": _node("c", "y"), + } + stats = learning_graph.density_stats(nodes, learning_graph.build_edges(nodes)) + + assert stats["nodes"] == 3 + assert stats["linked_nodes"] == 2 + assert stats["isolated_pct"] == round(100 / 3, 1) + + +def test_memory_is_cards_split_on_separator(tmp_path): + home = tmp_path / ".hermes" + (home / "memories").mkdir(parents=True) + (home / "memories" / "MEMORY.md").write_text( + "Project uses pytest with xdist\n§\nUser prefers concise responses", + encoding="utf-8", + ) + token = set_hermes_home_override(home) + try: + graph = learning_graph.build_learning_graph() + finally: + reset_hermes_home_override(token) + + titles = [c["title"] for c in graph["memory"]] + assert "Project uses pytest with xdist" in titles + assert "User prefers concise responses" in titles + # Memory cards remain typed cards and also appear as memory-kind nodes. + assert all(c["source"] in {"memory", "profile"} for c in graph["memory"]) + assert all("timestamp" in c for c in graph["memory"]) + assert any(n["kind"] == "memory" for n in graph["nodes"]) + + +def test_full_payload_shape_and_edge_integrity(tmp_path): + home = tmp_path / ".hermes" + home.mkdir() + token = set_hermes_home_override(home) + try: + graph = learning_graph.build_learning_graph() + finally: + reset_hermes_home_override(token) + + ids = {n["id"] for n in graph["nodes"]} + assert all(e["source"] in ids and e["target"] in ids for e in graph["edges"]) + # Every node's category appears in the cluster list. + cluster_cats = {c["category"] for c in graph["clusters"]} + assert all(n["category"] in cluster_cats for n in graph["nodes"]) + skill_nodes = [n for n in graph["nodes"] if n["kind"] == "skill"] + assert graph["stats"]["nodes"] == len(skill_nodes) + assert graph["stats"]["memory_nodes"] == len(graph["memory"]) + assert all("timestamp" in n for n in graph["nodes"])