From 4a7a6fd401bbccb8b9c8f99b1703fb38e39aacc0 Mon Sep 17 00:00:00 2001
From: Scott Gabel <5823452+sgabel@users.noreply.github.com>
Date: Tue, 30 Jun 2026 16:46:29 -0700
Subject: [PATCH] fix(approval): redact secrets in user-facing approval prompts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dangerous-command approval prompt renders the flagged command so the
user can decide whether to approve. If the agent constructed it with a
credential (curl -H 'Authorization: Bearer sk-...', psql postgres://user:pw@host,
an execute_code script with api_key = 'sk-...'), that secret hit stdout and,
via the gateway notify payload, Discord/Slack messages — which are
screenshottable and forwardable.

Apply the existing agent.redact.redact_sensitive_text() to every user-facing
approval surface. Redaction is display-only: the raw command still executes
after approval, and approval persistence keys off pattern_key (not the command
text), so the allowlist is unaffected. Decision context (URL, flags, command
structure) is preserved; only the secret value masks.

Covers all surfaces, including the execute_code path the original PR missed:
- prompt_dangerous_approval(): callback + stdout fallback
- check_all_command_guards(): gateway approval_data + cron/batch pending fallback
- check_execute_code_guard(): gateway approval_data + no-notifier pending fallback
  (script body can embed credentials)

Adds TestApprovalPromptRedaction covering callback redaction, no-over-redaction
of clean commands, and the execute_code pending fallback.

Salvaged from PR #13139 by @sgabel; extended to the execute_code surface.
---
 scripts/release.py           |  1 +
 tests/tools/test_approval.py | 75 ++++++++++++++++++++++++++++++++++++
 tools/approval.py            | 70 ++++++++++++++++++++++++---------
 3 files changed, 127 insertions(+), 19 deletions(-)

diff --git a/scripts/release.py b/scripts/release.py
index 791558c32..57a3a82ec 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
 
 # Auto-extracted from noreply emails + manual overrides
 AUTHOR_MAP = {
+    "5823452+sgabel@users.noreply.github.com": "sgabel",  # PR #13139 salvage (redact secrets in user-facing approval prompts)
     "cyb3rwr3n@users.noreply.github.com": "cyb3rwr3n",  # PR #11333 salvage (sanitize FTS5 queries for natural-language recall in holographic memory)
     "9350182+codexGW@users.noreply.github.com": "codexGW",  # PR #12302 salvage (Discord raw <@!ID> mention detection + drop bare mention-only pings)
     "186512915+lEWFkRAD@users.noreply.github.com": "lEWFkRAD",  # PR #53848 salvage (stream the MoA aggregator response to the user)
diff --git a/tests/tools/test_approval.py b/tests/tools/test_approval.py
index 876a3d148..134e7c870 100644
--- a/tests/tools/test_approval.py
+++ b/tests/tools/test_approval.py
@@ -1997,3 +1997,78 @@ class TestTirithImportErrorFailOpenPolicy:
                         result = check_all_command_guards("echo hello", "local")
 
         assert result.get("approved") is True
+
+
+class TestApprovalPromptRedaction:
+    """Secrets are masked in user-facing approval surfaces (#13139).
+
+    The flagged command/script is rendered so the user can decide whether to
+    approve. If it carries a credential (Bearer token, DB password, prefixed
+    key), that secret would land on stdout and -- via the gateway notify
+    payload -- in Discord/Slack messages, which are screenshottable. Redaction
+    is display-only: the raw command still executes after approval and the
+    allowlist keys off pattern_key, not the command text.
+    """
+
+    SECRET_CMD = (
+        'curl -H "Authorization: Bearer sk-proj-abc123xyz4567890abcdef" '
+        "https://api.openai.com/v1/models"
+    )
+
+    def test_callback_receives_redacted_command(self):
+        """prompt_dangerous_approval hands the callback a masked command."""
+        seen = {}
+
+        def cb(command, description, *, allow_permanent=True):
+            seen["command"] = command
+            seen["description"] = description
+            return "deny"
+
+        prompt_dangerous_approval(
+            self.SECRET_CMD,
+            "pipe remote content; token sk-proj-abc123xyz4567890abcdef",
+            approval_callback=cb,
+        )
+        # Secret value gone, decision context (scheme, URL, flag) preserved.
+        assert "sk-proj-abc123xyz4567890abcdef" not in seen["command"]
+        assert "Authorization: Bearer ***" in seen["command"]
+        assert "https://api.openai.com/v1/models" in seen["command"]
+        assert "sk-proj-abc123xyz4567890abcdef" not in seen["description"]
+
+    def test_clean_command_passes_through_unredacted(self):
+        """A command with no secret is shown verbatim -- no over-redaction."""
+        seen = {}
+
+        def cb(command, description, *, allow_permanent=True):
+            seen["command"] = command
+            return "deny"
+
+        prompt_dangerous_approval("rm -rf /var/data", "recursive delete",
+                                  approval_callback=cb)
+        assert seen["command"] == "rm -rf /var/data"
+
+    def test_execute_code_pending_fallback_redacts_script(self):
+        """check_execute_code_guard's no-notifier fallback masks an embedded
+        secret in both the pending record and the returned approval message."""
+        from unittest.mock import patch as _patch
+
+        from tools.approval import check_execute_code_guard
+
+        code = (
+            "import os\n"
+            'api_key = "sk-proj-abc123xyz4567890abcdef"\n'
+            "print(api_key)"
+        )
+        cfg = {"approvals": {"mode": "manual"}}
+        with _patch("hermes_cli.config.load_config", return_value=cfg):
+            with _patch("tools.approval._is_gateway_approval_context",
+                        return_value=True):
+                with _patch("tools.approval._get_approval_mode",
+                            return_value="manual"):
+                    # No gateway notify callback registered -> pending fallback.
+                    result = check_execute_code_guard(code, "local")
+
+        assert result.get("status") == "pending_approval"
+        # The script's credential must not appear in the user-facing message.
+        assert "sk-proj-abc123xyz4567890abcdef" not in result["message"]
+        assert "sk-proj-abc123xyz4567890abcdef" not in result["command"]
diff --git a/tools/approval.py b/tools/approval.py
index ae8c82e19..ae54daa5a 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -1044,9 +1044,17 @@ def prompt_dangerous_approval(command: str, description: str,
     if timeout_seconds is None:
         timeout_seconds = _get_approval_timeout()
 
+    # Redact secrets before any user-visible rendering. The original
+    # `command` is still what executes after approval; only the displayed
+    # copy is scrubbed. Reuses the same redaction module used for memory
+    # and log sanitization so tokens mask consistently across surfaces.
+    from agent.redact import redact_sensitive_text
+    display_command = redact_sensitive_text(command)
+    display_description = redact_sensitive_text(description)
+
     if approval_callback is not None:
         try:
-            return approval_callback(command, description,
+            return approval_callback(display_command, display_description,
                                      allow_permanent=allow_permanent)
         except Exception as e:
             logger.error("Approval callback failed: %s", e, exc_info=True)
@@ -1086,8 +1094,8 @@ def prompt_dangerous_approval(command: str, description: str,
         from agent.i18n import t
         while True:
             print()
-            print(f"  {t('approval.dangerous_header', description=description)}")
-            print(f"      {command}")
+            print(f"  {t('approval.dangerous_header', description=display_description)}")
+            print(f"      {display_command}")
             print()
             if allow_permanent:
                 print(t("approval.choose_long"))
@@ -1800,11 +1808,19 @@ def check_all_command_guards(command: str, env_type: str,
             # Block the agent thread until the user responds; the notify +
             # heartbeat wait loop is shared with check_execute_code_guard via
             # _await_gateway_decision().
+            #
+            # Redact secrets in the notified payload: the gateway renders this
+            # dict directly to Discord/Slack/etc. and those messages are
+            # screenshottable. The raw `command` still executes after approval
+            # via the closure below, so redaction is display-only. Approval
+            # persistence keys off pattern_key (not the command text), so the
+            # allowlist is unaffected.
+            from agent.redact import redact_sensitive_text
             approval_data = {
-                "command": command,
+                "command": redact_sensitive_text(command),
                 "pattern_key": primary_key,
                 "pattern_keys": all_keys,
-                "description": combined_desc,
+                "description": redact_sensitive_text(combined_desc),
                 # Mirror the CLI's allow_permanent gate: a tirith warning downgrades
                 # "always" to session scope below, so the UI must not offer it.
                 "allow_permanent": not has_tirith,
@@ -1868,22 +1884,27 @@ def check_all_command_guards(command: str, env_type: str,
                     "user_approved": True, "description": combined_desc}
 
         # Fallback: no gateway callback registered (e.g. cron, batch).
-        # Return approval_required for backward compat.
+        # Return approval_required for backward compat. Redact secrets in the
+        # user-facing copy — the raw `command` is preserved for execution and
+        # the allowlist keys off pattern_key, so redaction is display-only.
+        from agent.redact import redact_sensitive_text
+        _disp_command = redact_sensitive_text(command)
+        _disp_combined_desc = redact_sensitive_text(combined_desc)
         submit_pending(session_key, {
-            "command": command,
+            "command": _disp_command,
             "pattern_key": primary_key,
             "pattern_keys": all_keys,
-            "description": combined_desc,
+            "description": _disp_combined_desc,
         })
         return {
             "approved": False,
             "pattern_key": primary_key,
             "status": "pending_approval",
             "approval_pending": True,
-            "command": command,
-            "description": combined_desc,
+            "command": _disp_command,
+            "description": _disp_combined_desc,
             "message": (
-                f"⚠️ {combined_desc}. Asking the user for approval.\n\n**Command:**\n```\n{command}\n```"
+                f"⚠️ {_disp_combined_desc}. Asking the user for approval.\n\n**Command:**\n```\n{_disp_command}\n```"
             ),
         }
 
@@ -2020,6 +2041,17 @@ def check_execute_code_guard(code: str, env_type: str,
     # paths don't pay to copy a potentially-large script into this string.
     command = f"execute_code <<'PY'\n{code}\nPY"
 
+    # Redacted copies for user-visible rendering only. An execute_code script
+    # can embed credentials (e.g. api_key = "sk-..."), and the gateway renders
+    # this payload directly to Discord/Slack — those messages are
+    # screenshottable. The raw `command`/`code` are still what get assessed by
+    # smart approval and executed; redaction is display-only. Approval
+    # persistence keys off pattern_key, so the allowlist is unaffected.
+    from agent.redact import redact_sensitive_text
+    display_command = redact_sensitive_text(command)
+    display_code = redact_sensitive_text(code)
+    display_description = redact_sensitive_text(description)
+
     # Check session/permanent approval — same gate as check_all_command_guards.
     # Without this, "Approve session" / "Always" choices are stored but never
     # consulted, so every execute_code call re-prompts the user (#39275).
@@ -2058,29 +2090,29 @@ def check_execute_code_guard(code: str, env_type: str,
         # No gateway callback registered (e.g. ask-mode without a notifier):
         # surface a pending approval for backward compatibility.
         submit_pending(session_key, {
-            "command": command,
+            "command": display_command,
             "pattern_key": pattern_key,
             "pattern_keys": [pattern_key],
-            "description": description,
+            "description": display_description,
         })
         return {
             "approved": False,
             "pattern_key": pattern_key,
             "status": "pending_approval",
             "approval_pending": True,
-            "command": command,
-            "description": description,
+            "command": display_command,
+            "description": display_description,
             "message": (
-                f"⚠️ {description}. Asking the user for approval.\n\n"
-                f"**Code:**\n```python\n{code}\n```"
+                f"⚠️ {display_description}. Asking the user for approval.\n\n"
+                f"**Code:**\n```python\n{display_code}\n```"
             ),
         }
 
     approval_data = {
-        "command": command,
+        "command": display_command,
         "pattern_key": pattern_key,
         "pattern_keys": [pattern_key],
-        "description": description,
+        "description": display_description,
     }
     decision = _await_gateway_decision(
         session_key, notify_cb, approval_data, surface="gateway"