fix(approval): redact secrets in user-facing approval prompts

The dangerous-command approval prompt renders the flagged command so the user can decide whether to approve. If the agent constructed it with a credential (curl -H 'Authorization: Bearer sk-...', psql postgres://user:pw@host, an execute_code script with api_key = 'sk-...'), that secret hit stdout and, via the gateway notify payload, Discord/Slack messages — which are screenshottable and forwardable. Apply the existing agent.redact.redact_sensitive_text() to every user-facing approval surface. Redaction is display-only: the raw command still executes after approval, and approval persistence keys off pattern_key (not the command text), so the allowlist is unaffected. Decision context (URL, flags, command structure) is preserved; only the secret value masks. Covers all surfaces, including the execute_code path the original PR missed: - prompt_dangerous_approval(): callback + stdout fallback - check_all_command_guards(): gateway approval_data + cron/batch pending fallback - check_execute_code_guard(): gateway approval_data + no-notifier pending fallback (script body can embed credentials) Adds TestApprovalPromptRedaction covering callback redaction, no-over-redaction of clean commands, and the execute_code pending fallback. Salvaged from PR #13139 by @sgabel; extended to the execute_code surface.
2026-06-30 16:46:29 -07:00 · 2026-06-30 16:46:29 -07:00 · 4a7a6fd401
commit 4a7a6fd401
parent 508156fd42
3 changed files with 127 additions and 19 deletions
--- a/scripts/release.py
+++ b/scripts/release.py
@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"

 # Auto-extracted from noreply emails + manual overrides
 AUTHOR_MAP = {
+    "5823452+sgabel@users.noreply.github.com": "sgabel",  # PR #13139 salvage (redact secrets in user-facing approval prompts)
    "cyb3rwr3n@users.noreply.github.com": "cyb3rwr3n",  # PR #11333 salvage (sanitize FTS5 queries for natural-language recall in holographic memory)
    "9350182+codexGW@users.noreply.github.com": "codexGW",  # PR #12302 salvage (Discord raw <@!ID> mention detection + drop bare mention-only pings)
    "186512915+lEWFkRAD@users.noreply.github.com": "lEWFkRAD",  # PR #53848 salvage (stream the MoA aggregator response to the user)
--- a/tests/tools/test_approval.py
+++ b/tests/tools/test_approval.py
@ -1997,3 +1997,78 @@ class TestTirithImportErrorFailOpenPolicy:
                        result = check_all_command_guards("echo hello", "local")

        assert result.get("approved") is True
+
+
+class TestApprovalPromptRedaction:
+    """Secrets are masked in user-facing approval surfaces (#13139).
+
+    The flagged command/script is rendered so the user can decide whether to
+    approve. If it carries a credential (Bearer token, DB password, prefixed
+    key), that secret would land on stdout and -- via the gateway notify
+    payload -- in Discord/Slack messages, which are screenshottable. Redaction
+    is display-only: the raw command still executes after approval and the
+    allowlist keys off pattern_key, not the command text.
+    """
+
+    SECRET_CMD = (
+        'curl -H "Authorization: Bearer sk-proj-abc123xyz4567890abcdef" '
+        "https://api.openai.com/v1/models"
+    )
+
+    def test_callback_receives_redacted_command(self):
+        """prompt_dangerous_approval hands the callback a masked command."""
+        seen = {}
+
+        def cb(command, description, *, allow_permanent=True):
+            seen["command"] = command
+            seen["description"] = description
+            return "deny"
+
+        prompt_dangerous_approval(
+            self.SECRET_CMD,
+            "pipe remote content; token sk-proj-abc123xyz4567890abcdef",
+            approval_callback=cb,
+        )
+        # Secret value gone, decision context (scheme, URL, flag) preserved.
+        assert "sk-proj-abc123xyz4567890abcdef" not in seen["command"]
+        assert "Authorization: Bearer ***" in seen["command"]
+        assert "https://api.openai.com/v1/models" in seen["command"]
+        assert "sk-proj-abc123xyz4567890abcdef" not in seen["description"]
+
+    def test_clean_command_passes_through_unredacted(self):
+        """A command with no secret is shown verbatim -- no over-redaction."""
+        seen = {}
+
+        def cb(command, description, *, allow_permanent=True):
+            seen["command"] = command
+            return "deny"
+
+        prompt_dangerous_approval("rm -rf /var/data", "recursive delete",
+                                  approval_callback=cb)
+        assert seen["command"] == "rm -rf /var/data"
+
+    def test_execute_code_pending_fallback_redacts_script(self):
+        """check_execute_code_guard's no-notifier fallback masks an embedded
+        secret in both the pending record and the returned approval message."""
+        from unittest.mock import patch as _patch
+
+        from tools.approval import check_execute_code_guard
+
+        code = (
+            "import os\n"
+            'api_key = "sk-proj-abc123xyz4567890abcdef"\n'
+            "print(api_key)"
+        )
+        cfg = {"approvals": {"mode": "manual"}}
+        with _patch("hermes_cli.config.load_config", return_value=cfg):
+            with _patch("tools.approval._is_gateway_approval_context",
+                        return_value=True):
+                with _patch("tools.approval._get_approval_mode",
+                            return_value="manual"):
+                    # No gateway notify callback registered -> pending fallback.
+                    result = check_execute_code_guard(code, "local")
+
+        assert result.get("status") == "pending_approval"
+        # The script's credential must not appear in the user-facing message.
+        assert "sk-proj-abc123xyz4567890abcdef" not in result["message"]
+        assert "sk-proj-abc123xyz4567890abcdef" not in result["command"]
--- a/tools/approval.py
+++ b/tools/approval.py
@ -1044,9 +1044,17 @@ def prompt_dangerous_approval(command: str, description: str,
    if timeout_seconds is None:
        timeout_seconds = _get_approval_timeout()

+    # Redact secrets before any user-visible rendering. The original
+    # `command` is still what executes after approval; only the displayed
+    # copy is scrubbed. Reuses the same redaction module used for memory
+    # and log sanitization so tokens mask consistently across surfaces.
+    from agent.redact import redact_sensitive_text
+    display_command = redact_sensitive_text(command)
+    display_description = redact_sensitive_text(description)
+
    if approval_callback is not None:
        try:
-            return approval_callback(command, description,
+            return approval_callback(display_command, display_description,
                                     allow_permanent=allow_permanent)
        except Exception as e:
            logger.error("Approval callback failed: %s", e, exc_info=True)
@ -1086,8 +1094,8 @@ def prompt_dangerous_approval(command: str, description: str,
        from agent.i18n import t
        while True:
            print()
-            print(f"  {t('approval.dangerous_header', description=description)}")
-            print(f"      {command}")
+            print(f"  {t('approval.dangerous_header', description=display_description)}")
+            print(f"      {display_command}")
            print()
            if allow_permanent:
                print(t("approval.choose_long"))
@ -1800,11 +1808,19 @@ def check_all_command_guards(command: str, env_type: str,
            # Block the agent thread until the user responds; the notify +
            # heartbeat wait loop is shared with check_execute_code_guard via
            # _await_gateway_decision().
+            #
+            # Redact secrets in the notified payload: the gateway renders this
+            # dict directly to Discord/Slack/etc. and those messages are
+            # screenshottable. The raw `command` still executes after approval
+            # via the closure below, so redaction is display-only. Approval
+            # persistence keys off pattern_key (not the command text), so the
+            # allowlist is unaffected.
+            from agent.redact import redact_sensitive_text
            approval_data = {
-                "command": command,
+                "command": redact_sensitive_text(command),
                "pattern_key": primary_key,
                "pattern_keys": all_keys,
-                "description": combined_desc,
+                "description": redact_sensitive_text(combined_desc),
                # Mirror the CLI's allow_permanent gate: a tirith warning downgrades
                # "always" to session scope below, so the UI must not offer it.
                "allow_permanent": not has_tirith,
@ -1868,22 +1884,27 @@ def check_all_command_guards(command: str, env_type: str,
                    "user_approved": True, "description": combined_desc}

        # Fallback: no gateway callback registered (e.g. cron, batch).
-        # Return approval_required for backward compat.
+        # Return approval_required for backward compat. Redact secrets in the
+        # user-facing copy — the raw `command` is preserved for execution and
+        # the allowlist keys off pattern_key, so redaction is display-only.
+        from agent.redact import redact_sensitive_text
+        _disp_command = redact_sensitive_text(command)
+        _disp_combined_desc = redact_sensitive_text(combined_desc)
        submit_pending(session_key, {
-            "command": command,
+            "command": _disp_command,
            "pattern_key": primary_key,
            "pattern_keys": all_keys,
-            "description": combined_desc,
+            "description": _disp_combined_desc,
        })
        return {
            "approved": False,
            "pattern_key": primary_key,
            "status": "pending_approval",
            "approval_pending": True,
-            "command": command,
-            "description": combined_desc,
+            "command": _disp_command,
+            "description": _disp_combined_desc,
            "message": (
-                f"⚠️ {combined_desc}. Asking the user for approval.\n\n**Command:**\n```\n{command}\n```"
+                f"⚠️ {_disp_combined_desc}. Asking the user for approval.\n\n**Command:**\n```\n{_disp_command}\n```"
            ),
        }

@ -2020,6 +2041,17 @@ def check_execute_code_guard(code: str, env_type: str,
    # paths don't pay to copy a potentially-large script into this string.
    command = f"execute_code <<'PY'\n{code}\nPY"

+    # Redacted copies for user-visible rendering only. An execute_code script
+    # can embed credentials (e.g. api_key = "sk-..."), and the gateway renders
+    # this payload directly to Discord/Slack — those messages are
+    # screenshottable. The raw `command`/`code` are still what get assessed by
+    # smart approval and executed; redaction is display-only. Approval
+    # persistence keys off pattern_key, so the allowlist is unaffected.
+    from agent.redact import redact_sensitive_text
+    display_command = redact_sensitive_text(command)
+    display_code = redact_sensitive_text(code)
+    display_description = redact_sensitive_text(description)
+
    # Check session/permanent approval — same gate as check_all_command_guards.
    # Without this, "Approve session" / "Always" choices are stored but never
    # consulted, so every execute_code call re-prompts the user (#39275).
@ -2058,29 +2090,29 @@ def check_execute_code_guard(code: str, env_type: str,
        # No gateway callback registered (e.g. ask-mode without a notifier):
        # surface a pending approval for backward compatibility.
        submit_pending(session_key, {
-            "command": command,
+            "command": display_command,
            "pattern_key": pattern_key,
            "pattern_keys": [pattern_key],
-            "description": description,
+            "description": display_description,
        })
        return {
            "approved": False,
            "pattern_key": pattern_key,
            "status": "pending_approval",
            "approval_pending": True,
-            "command": command,
-            "description": description,
+            "command": display_command,
+            "description": display_description,
            "message": (
-                f"⚠️ {description}. Asking the user for approval.\n\n"
-                f"**Code:**\n```python\n{code}\n```"
+                f"⚠️ {display_description}. Asking the user for approval.\n\n"
+                f"**Code:**\n```python\n{display_code}\n```"
            ),
        }

    approval_data = {
-        "command": command,
+        "command": display_command,
        "pattern_key": pattern_key,
        "pattern_keys": [pattern_key],
-        "description": description,
+        "description": display_description,
    }
    decision = _await_gateway_decision(
        session_key, notify_cb, approval_data, surface="gateway"