From a35ac254374065da0c0426fe4b2a2356f87a37ae Mon Sep 17 00:00:00 2001 From: SHL0MS Date: Fri, 3 Jul 2026 13:16:06 -0400 Subject: [PATCH] add `cdp`: launch/detect operator chrome over CDP for phase-2 browser + webmail phase-2 work (sending webmail, clearing session-bound gates like peopleconnect guided-mode) needs the operator's own logged-in browser, not a cloud browser. new `pdd.py cdp`: - finds chrome/chromium/brave/edge (macos/linux/windows), launches it detached on a dedicated debug profile ($HERMES_HOME/chrome-debug) with --remote-debugging-port, waits for the port, prints the CDP endpoint (webSocketDebuggerUrl) - `--check`: report whether a debug browser is already live (never double-launches) - `--print`: emit the exact command for the operator to run themselves - doctor, SKILL.md, and methods.md all point at it - windows-safe detach (start_new_session on posix, DETACHED_PROCESS on windows); stdlib only tests: standalone 98, PR 96 (+6 cdp); ruff + windows-footguns clean. --- optional-skills/security/unbroker/SKILL.md | 5 +- .../security/unbroker/references/methods.md | 6 +- .../security/unbroker/scripts/cdp.py | 159 ++++++++++++++++++ .../security/unbroker/scripts/pdd.py | 75 ++++++++- tests/skills/test_unbroker_skill.py | 72 ++++++++ 5 files changed, 313 insertions(+), 4 deletions(-) create mode 100644 optional-skills/security/unbroker/scripts/cdp.py diff --git a/optional-skills/security/unbroker/SKILL.md b/optional-skills/security/unbroker/SKILL.md index 7f9401599..8cbb9c83e 100644 --- a/optional-skills/security/unbroker/SKILL.md +++ b/optional-skills/security/unbroker/SKILL.md @@ -82,7 +82,9 @@ verifying re-scan. gates (e.g. PeopleConnect guided-mode). Drive the operator's real Chrome over CDP - launch `chrome --remote-debugging-port=9222 --user-data-dir="$HOME/.hermes/chrome-debug"` (a dedicated debug profile signed into the webmail once, not the Default profile) and connect the browser - tools to `127.0.0.1:9222`. See `references/methods.md` -> "Browser backends: scan vs execute". + tools to `127.0.0.1:9222`. **`$PDD cdp` launches this for you** (finds Chrome/Chromium/Brave/Edge, + starts it detached on the dedicated profile, prints the CDP endpoint; `--check` to test, `--print` + for the command). See `references/methods.md` -> "Browser backends: scan vs execute". Falls back to drafts for an email if the inbox isn't reachable. - **SMTP/IMAP (stored creds): `EMAIL_ADDRESS` + `EMAIL_PASSWORD`** (+ `EMAIL_SMTP_HOST` / `EMAIL_IMAP_HOST` for non-mainstream providers; gmail/outlook/yahoo/icloud/fastmail inferred). @@ -109,6 +111,7 @@ breaks reading the dossier). |---|---| | `$PDD setup --auto` | **Autonomous setup**: detect capabilities, pick the most autonomous valid config (no questions) | | `$PDD doctor` | Readiness check: config, broker count, and which upgrades are on/available | +| `$PDD cdp [--check] [--print] [--port N]` | Launch/detect the operator's Chrome over CDP for Phase-2 browser + webmail (dedicated debug profile; the reliable way to send webmail and clear session-bound gates) | | `$PDD intake --full-name "..." [--alias ...] [--email ... --phone ...] [--city --state] [--prior-location "City,ST"] --consent` | Create a consenting subject; captures aliases + multiple emails/phones + prior locations; prints `subject_id` | | `$PDD next ` | **The autonomous loop driver**: ordered agent actions right now + human digest + `next_wake_at` | | `$PDD brokers [--priority crucial]` | List the people-search broker database (curated + live) | diff --git a/optional-skills/security/unbroker/references/methods.md b/optional-skills/security/unbroker/references/methods.md index 14b4aafef..1eb167483 100644 --- a/optional-skills/security/unbroker/references/methods.md +++ b/optional-skills/security/unbroker/references/methods.md @@ -232,7 +232,11 @@ run stalling in Phase 2. broker accounts) in that profile once. That single browser then carries residential IP + real fingerprint + logged-in sessions, which is precisely what Phase-2 flows need. (This is a Hermes-side browser setup, not a `pdd` config value; `browser_backend` above only selects the Phase-1 scan - browser.) + browser.) **The skill launches this for you: `pdd.py cdp`** finds a Chrome/Chromium/Brave/Edge + binary, starts it detached on the dedicated profile, waits for the debug port, and prints the CDP + endpoint (`webSocketDebuggerUrl`). `pdd.py cdp --check` reports whether a debug browser is already + live (and never launches a second one); `pdd.py cdp --print` just emits the exact command for the + operator to run themselves. Point the browser tools at the `endpoint` it returns. - **Always-available fallback:** if no CDP browser is wired up, use the operator-in-the-loop path (scan ladder 3b) - hand over paste-ready URLs and field-by-field least-disclosure guidance, pausing before submit. It never fails; it just needs a human present. diff --git a/optional-skills/security/unbroker/scripts/cdp.py b/optional-skills/security/unbroker/scripts/cdp.py new file mode 100644 index 000000000..0d4beeaac --- /dev/null +++ b/optional-skills/security/unbroker/scripts/cdp.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +"""Launch (or detect) the operator's local Chrome/Chromium over the DevTools Protocol (CDP). + +Phase-2 work -- sending opt-out/CCPA email through the operator's logged-in webmail, and driving +session-bound multi-step opt-out gates (e.g. PeopleConnect guided-mode) -- must run in the +operator's OWN browser: real fingerprint, residential IP, and the operator's signed-in sessions. +A headless cloud browser (Browserbase) is the wrong tool there (it has no webmail session and is +itself anti-bot-gated on those exact flows). This module launches the operator's real Chrome with +remote debugging on a DEDICATED profile so Hermes's browser tools can attach at 127.0.0.1:. + +Stdlib only; cross-platform (macOS / Linux / Windows). Nothing here touches a password or PII. +""" +from __future__ import annotations + +import json +import os +import shutil +import subprocess +import sys +import urllib.error +import urllib.request +from pathlib import Path + +import paths + +DEFAULT_PORT = 9222 + +# Chromium-family binaries we know how to drive, in preference order. Names first (works on any OS +# where one is on PATH), then per-OS absolute-path fallbacks below. +_PATH_NAMES = ( + "google-chrome", "google-chrome-stable", "chromium", "chromium-browser", + "brave-browser", "microsoft-edge", "microsoft-edge-stable", "chrome", +) + + +def default_profile() -> Path: + """Dedicated debug profile dir, NOT the operator's Default Chrome profile. + + Chrome refuses remote-debugging on a profile that is already open in another Chrome instance, + so we isolate the debug session in its own user-data-dir under HERMES_HOME. + """ + return paths.hermes_home() / "chrome-debug" + + +def _mac_candidates() -> list[str]: + return [ + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Chromium.app/Contents/MacOS/Chromium", + "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser", + "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge", + "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary", + ] + + +def _windows_candidates() -> list[str]: + bases = [ + os.environ.get("ProgramFiles", r"C:\Program Files"), + os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)"), + os.environ.get("LOCALAPPDATA", ""), + ] + rels = [ + r"Google\Chrome\Application\chrome.exe", + r"Chromium\Application\chrome.exe", + r"BraveSoftware\Brave-Browser\Application\brave.exe", + r"Microsoft\Edge\Application\msedge.exe", + ] + out: list[str] = [] + for base in bases: + if not base: + continue + for rel in rels: + out.append(str(Path(base) / rel)) + return out + + +def find_browser(override: str | None = None) -> str | None: + """Return the first usable Chromium-family browser path/command, or None. + + `override` (an explicit path, or a command on PATH) wins when it resolves. + """ + if override: + if Path(override).exists(): + return override + return shutil.which(override) # may be None -> caller reports "not found" + for name in _PATH_NAMES: + found = shutil.which(name) + if found: + return found + if sys.platform == "darwin": + candidates = _mac_candidates() + elif sys.platform == "win32": + candidates = _windows_candidates() + else: + candidates = [] + for cand in candidates: + if Path(cand).exists(): + return cand + return None + + +def launch_command(browser: str, port: int = DEFAULT_PORT, profile: Path | None = None) -> list[str]: + """The exact argv used to start the debug browser (also handy for `--print`).""" + profile = profile or default_profile() + return [ + browser, + f"--remote-debugging-port={int(port)}", + f"--user-data-dir={profile}", + "--no-first-run", + "--no-default-browser-check", + ] + + +def _http_get(url: str, timeout: float) -> bytes: + req = urllib.request.Request(url, headers={"User-Agent": "unbroker-cdp/1.0"}) + with urllib.request.urlopen(req, timeout=timeout) as resp: # noqa: S310 (localhost only) + return resp.read() + + +def endpoint_status(port: int = DEFAULT_PORT, host: str = "127.0.0.1", + timeout: float = 1.0) -> dict | None: + """Return the CDP `/json/version` dict if a debuggable browser is live at host:port, else None. + + (Chrome restricts this endpoint to localhost/IP Host headers, so we always hit 127.0.0.1.) + """ + url = f"http://{host}:{int(port)}/json/version" + try: + raw = _http_get(url, timeout) + except (urllib.error.URLError, TimeoutError, ConnectionError, OSError, ValueError): + return None + try: + data = json.loads(raw.decode("utf-8", errors="replace")) + except (ValueError, AttributeError): + return None + return data if isinstance(data, dict) else None + + +def launch(browser: str, port: int = DEFAULT_PORT, profile: Path | None = None) -> int: + """Start the browser detached with remote debugging; return the child PID. + + Detach so the browser outlives this short-lived CLI call. POSIX uses start_new_session (which + avoids referencing os.setsid, so there is no Windows import-time footgun); Windows uses + DETACHED_PROCESS + a new process group. + """ + profile = profile or default_profile() + profile.mkdir(parents=True, exist_ok=True) + cmd = launch_command(browser, port, profile) + kwargs: dict = { + "stdin": subprocess.DEVNULL, + "stdout": subprocess.DEVNULL, + "stderr": subprocess.DEVNULL, + } + if sys.platform == "win32": + kwargs["creationflags"] = ( + subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP # windows-footgun: ok + ) + else: + kwargs["start_new_session"] = True + proc = subprocess.Popen(cmd, **kwargs) + return proc.pid diff --git a/optional-skills/security/unbroker/scripts/pdd.py b/optional-skills/security/unbroker/scripts/pdd.py index 6d28a3a45..ab9f77b78 100644 --- a/optional-skills/security/unbroker/scripts/pdd.py +++ b/optional-skills/security/unbroker/scripts/pdd.py @@ -31,6 +31,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) import autopilot # noqa: E402 import badbool # noqa: E402 +import cdp # noqa: E402 import brokers as brokers_mod # noqa: E402 import config as config_mod # noqa: E402 import crypto # noqa: E402 @@ -189,14 +190,15 @@ def cmd_doctor(args) -> None: "links via the operator's logged-in webmail. This needs Hermes pointed at the " "operator's OWN Chrome over CDP (launch with --remote-debugging-port=9222 " "--user-data-dir=~/.hermes/chrome-debug, signed into the webmail once); else it falls " - "back to drafts. See methods.md 'Browser backends'.") + "back to drafts. Run `pdd.py cdp` to launch it (or `pdd.py cdp --print` for the command). " + "See methods.md 'Browser backends'.") cloud_scan = cfg.get("browser_backend") == "browserbase" or ( cfg.get("browser_backend") == "auto" and caps.get("browserbase")) if cloud_scan: L.append(" NOTE: your scan backend is a cloud browser (Browserbase). It is great for " "Phase-1 scanning but CANNOT be the browser that sends webmail (no inbox session) " "and is itself Cloudflare/DataDome-gated on session-bound gates (e.g. PeopleConnect). " - "For Phase-2 email/verify, drive the operator's Chrome over CDP as above.") + "For Phase-2 email/verify, launch the operator's Chrome over CDP: `pdd.py cdp`.") if not crypto.is_engaged(): L.append(" Storage: dossiers are PLAINTEXT JSON (0600 under HERMES_HOME). " "Run `setup --encryption age` for at-rest encryption.") @@ -241,6 +243,63 @@ def cmd_doctor(args) -> None: print("\n".join(L)) +def cmd_cdp(args) -> None: + """Launch (or detect) the operator's Chrome over CDP for Phase-2 browser + webmail work. + + A cloud browser cannot send the operator's webmail or clear session-bound gates; this points + Hermes at the operator's real Chrome on a dedicated debug profile (see methods.md). + """ + import shlex + import time + + port = args.port + profile = Path(args.profile).expanduser() if args.profile else cdp.default_profile() + + live = cdp.endpoint_status(port) + if live: + _out({"running": True, "endpoint": f"127.0.0.1:{port}", + "browser": live.get("Browser"), + "webSocketDebuggerUrl": live.get("webSocketDebuggerUrl"), + "note": "a debuggable browser is already listening; point Hermes's browser tools at " + f"127.0.0.1:{port} and make sure the operator's webmail is signed in in THAT browser."}) + return + + if getattr(args, "check", False): + _out({"running": False, "endpoint": f"127.0.0.1:{port}", + "note": f"no debuggable browser here yet; run `pdd.py cdp --port {port}` (no --check) to launch one."}) + return + + browser = cdp.find_browser(args.browser) + if not browser: + _out({"running": False, "error": "no Chrome/Chromium-family browser found", + "fix": "install Google Chrome, or pass --browser /path/to/chrome (or a command on PATH)"}) + return + + cmd = cdp.launch_command(browser, port, profile) + if getattr(args, "print_only", False): + _out({"running": False, "browser": browser, "profile": str(profile), "command": cmd, + "shell": " ".join(shlex.quote(c) for c in cmd), + "note": "run this yourself to launch the debug browser, then sign into your webmail once."}) + return + + pid = cdp.launch(browser, port, profile) + live = None + for _ in range(20): # give Chrome a few seconds to open the debug port + live = cdp.endpoint_status(port) + if live: + break + time.sleep(0.5) + _out({"running": bool(live), "launched_pid": pid, "browser": browser, + "profile": str(profile), "endpoint": f"127.0.0.1:{port}", + "webSocketDebuggerUrl": (live or {}).get("webSocketDebuggerUrl"), + "next": ([f"point Hermes's browser tools at 127.0.0.1:{port} (CDP)", + "in the launched browser, sign into the operator's webmail ONCE (dedicated debug profile)", + "then run email/verify flows in browser mode -- they use this logged-in session"] + if live else + ["browser launched but the debug port has not answered yet; give it a few seconds, then " + f"re-run `pdd.py cdp --check --port {port}`"])}) + + def cmd_intake(args) -> None: if args.json: data = json.loads(Path(args.json).read_text(encoding="utf-8")) @@ -689,6 +748,18 @@ def build_parser() -> argparse.ArgumentParser: s = sub.add_parser("doctor", help="readiness check: config, brokers, available upgrades") s.set_defaults(func=cmd_doctor) + s = sub.add_parser("cdp", + help="launch/detect the operator's Chrome over CDP (Phase-2 browser + webmail)") + s.add_argument("--port", type=int, default=cdp.DEFAULT_PORT, help="remote debugging port (default 9222)") + s.add_argument("--profile", + help="user-data-dir (default: $HERMES_HOME/chrome-debug, a dedicated debug profile)") + s.add_argument("--browser", help="path to (or PATH name of) a Chrome/Chromium/Brave/Edge binary") + s.add_argument("--check", action="store_true", + help="only report whether a debug browser is live; do not launch") + s.add_argument("--print", dest="print_only", action="store_true", + help="print the launch command instead of launching it (run it yourself)") + s.set_defaults(func=cmd_cdp) + s = sub.add_parser("intake", help="create a subject dossier (records consent)") s.add_argument("--json", help="path to a dossier JSON file (overrides flags)") s.add_argument("--full-name") diff --git a/tests/skills/test_unbroker_skill.py b/tests/skills/test_unbroker_skill.py index 77599c934..ced394d93 100644 --- a/tests/skills/test_unbroker_skill.py +++ b/tests/skills/test_unbroker_skill.py @@ -36,6 +36,7 @@ import time as _time # noqa: E402 import badbool # noqa: E402 import brokers # noqa: E402 +import cdp # noqa: E402 import config # noqa: E402 import crypto # noqa: E402 import dossier # noqa: E402 @@ -480,6 +481,52 @@ def test_fanout_default_batch_size_is_five(): assert len(g["batches"]) == 3 # 5 + 5 + 2 +# --- cdp (operator browser over the DevTools protocol) -------------------------------------- + +def test_cdp_launch_command_has_debug_flags(): + cmd = cdp.launch_command("/usr/bin/chrome", port=9333, profile=Path("/tmp/prof")) + assert cmd[0] == "/usr/bin/chrome" + assert "--remote-debugging-port=9333" in cmd + assert "--user-data-dir=/tmp/prof" in cmd + assert "--no-first-run" in cmd + + +def test_cdp_default_profile_uses_hermes_home(): + prev = os.environ.get("HERMES_HOME") + with tempfile.TemporaryDirectory() as d: + os.environ["HERMES_HOME"] = d + try: + assert cdp.default_profile() == Path(d) / "chrome-debug" + finally: + if prev is None: + os.environ.pop("HERMES_HOME", None) + else: + os.environ["HERMES_HOME"] = prev + + +def test_cdp_endpoint_status_parses_live_and_handles_down(): + orig = cdp._http_get + cdp._http_get = lambda url, timeout: b'{"Browser":"Chrome/1.2","webSocketDebuggerUrl":"ws://x"}' + try: + st = cdp.endpoint_status(port=9222) + assert st and st["Browser"] == "Chrome/1.2" and st["webSocketDebuggerUrl"] == "ws://x" + finally: + cdp._http_get = orig + + def _boom(url, timeout): + raise ConnectionError("connection refused") + cdp._http_get = _boom + try: + assert cdp.endpoint_status(port=9222) is None # nothing listening -> None, never raises + finally: + cdp._http_get = orig + + +def test_cdp_find_browser_override(): + assert cdp.find_browser("/bin/sh") == "/bin/sh" # explicit path that exists + assert cdp.find_browser("definitely-not-a-real-browser-xyz") is None # bogus -> None (no crash) + + def test_plan_surfaces_antibot(): d = _consenting() broker = {"id": "tps", "optout": {"requires": {}}, "search": {"antibot": "datadome", "by": ["name"]}} @@ -1329,6 +1376,31 @@ def test_dotenv_env_fills_missing_creds_and_shell_wins(): os.environ[k] = v +def test_cdp_cli_check_reports_not_running(): + orig = cdp.endpoint_status + cdp.endpoint_status = lambda *a, **k: None + try: + out = _run(["cdp", "--check", "--port", "59981"]) + assert out["running"] is False and out["endpoint"].endswith(":59981") + finally: + cdp.endpoint_status = orig + + +def test_cdp_cli_detects_already_running_and_does_not_launch(): + # If a debug browser is already live, `cdp` must report it and NOT launch another. + orig_status, orig_launch = cdp.endpoint_status, cdp.launch + cdp.endpoint_status = lambda *a, **k: {"Browser": "Chrome/9", "webSocketDebuggerUrl": "ws://z"} + + def _no_launch(*a, **k): + raise AssertionError("launch() must not be called when a browser is already live") + cdp.launch = _no_launch + try: + out = _run(["cdp", "--port", "59982"]) + assert out["running"] is True and out["webSocketDebuggerUrl"] == "ws://z" + finally: + cdp.endpoint_status, cdp.launch = orig_status, orig_launch + + def test_registry_candidate_urls_newest_first_with_floor(): urls = registry.ca_candidate_urls(__import__("datetime").date(2027, 3, 1)) assert urls[0].endswith("registry2027.csv") and urls[-1].endswith("registry2025.csv")