From 4a7a6fd401bbccb8b9c8f99b1703fb38e39aacc0 Mon Sep 17 00:00:00 2001 From: Scott Gabel <5823452+sgabel@users.noreply.github.com> Date: Tue, 30 Jun 2026 16:46:29 -0700 Subject: [PATCH] fix(approval): redact secrets in user-facing approval prompts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dangerous-command approval prompt renders the flagged command so the user can decide whether to approve. If the agent constructed it with a credential (curl -H 'Authorization: Bearer sk-...', psql postgres://user:pw@host, an execute_code script with api_key = 'sk-...'), that secret hit stdout and, via the gateway notify payload, Discord/Slack messages — which are screenshottable and forwardable. Apply the existing agent.redact.redact_sensitive_text() to every user-facing approval surface. Redaction is display-only: the raw command still executes after approval, and approval persistence keys off pattern_key (not the command text), so the allowlist is unaffected. Decision context (URL, flags, command structure) is preserved; only the secret value masks. Covers all surfaces, including the execute_code path the original PR missed: - prompt_dangerous_approval(): callback + stdout fallback - check_all_command_guards(): gateway approval_data + cron/batch pending fallback - check_execute_code_guard(): gateway approval_data + no-notifier pending fallback (script body can embed credentials) Adds TestApprovalPromptRedaction covering callback redaction, no-over-redaction of clean commands, and the execute_code pending fallback. Salvaged from PR #13139 by @sgabel; extended to the execute_code surface. --- scripts/release.py | 1 + tests/tools/test_approval.py | 75 ++++++++++++++++++++++++++++++++++++ tools/approval.py | 70 ++++++++++++++++++++++++--------- 3 files changed, 127 insertions(+), 19 deletions(-) diff --git a/scripts/release.py b/scripts/release.py index 791558c32..57a3a82ec 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" # Auto-extracted from noreply emails + manual overrides AUTHOR_MAP = { + "5823452+sgabel@users.noreply.github.com": "sgabel", # PR #13139 salvage (redact secrets in user-facing approval prompts) "cyb3rwr3n@users.noreply.github.com": "cyb3rwr3n", # PR #11333 salvage (sanitize FTS5 queries for natural-language recall in holographic memory) "9350182+codexGW@users.noreply.github.com": "codexGW", # PR #12302 salvage (Discord raw <@!ID> mention detection + drop bare mention-only pings) "186512915+lEWFkRAD@users.noreply.github.com": "lEWFkRAD", # PR #53848 salvage (stream the MoA aggregator response to the user) diff --git a/tests/tools/test_approval.py b/tests/tools/test_approval.py index 876a3d148..134e7c870 100644 --- a/tests/tools/test_approval.py +++ b/tests/tools/test_approval.py @@ -1997,3 +1997,78 @@ class TestTirithImportErrorFailOpenPolicy: result = check_all_command_guards("echo hello", "local") assert result.get("approved") is True + + +class TestApprovalPromptRedaction: + """Secrets are masked in user-facing approval surfaces (#13139). + + The flagged command/script is rendered so the user can decide whether to + approve. If it carries a credential (Bearer token, DB password, prefixed + key), that secret would land on stdout and -- via the gateway notify + payload -- in Discord/Slack messages, which are screenshottable. Redaction + is display-only: the raw command still executes after approval and the + allowlist keys off pattern_key, not the command text. + """ + + SECRET_CMD = ( + 'curl -H "Authorization: Bearer sk-proj-abc123xyz4567890abcdef" ' + "https://api.openai.com/v1/models" + ) + + def test_callback_receives_redacted_command(self): + """prompt_dangerous_approval hands the callback a masked command.""" + seen = {} + + def cb(command, description, *, allow_permanent=True): + seen["command"] = command + seen["description"] = description + return "deny" + + prompt_dangerous_approval( + self.SECRET_CMD, + "pipe remote content; token sk-proj-abc123xyz4567890abcdef", + approval_callback=cb, + ) + # Secret value gone, decision context (scheme, URL, flag) preserved. + assert "sk-proj-abc123xyz4567890abcdef" not in seen["command"] + assert "Authorization: Bearer ***" in seen["command"] + assert "https://api.openai.com/v1/models" in seen["command"] + assert "sk-proj-abc123xyz4567890abcdef" not in seen["description"] + + def test_clean_command_passes_through_unredacted(self): + """A command with no secret is shown verbatim -- no over-redaction.""" + seen = {} + + def cb(command, description, *, allow_permanent=True): + seen["command"] = command + return "deny" + + prompt_dangerous_approval("rm -rf /var/data", "recursive delete", + approval_callback=cb) + assert seen["command"] == "rm -rf /var/data" + + def test_execute_code_pending_fallback_redacts_script(self): + """check_execute_code_guard's no-notifier fallback masks an embedded + secret in both the pending record and the returned approval message.""" + from unittest.mock import patch as _patch + + from tools.approval import check_execute_code_guard + + code = ( + "import os\n" + 'api_key = "sk-proj-abc123xyz4567890abcdef"\n' + "print(api_key)" + ) + cfg = {"approvals": {"mode": "manual"}} + with _patch("hermes_cli.config.load_config", return_value=cfg): + with _patch("tools.approval._is_gateway_approval_context", + return_value=True): + with _patch("tools.approval._get_approval_mode", + return_value="manual"): + # No gateway notify callback registered -> pending fallback. + result = check_execute_code_guard(code, "local") + + assert result.get("status") == "pending_approval" + # The script's credential must not appear in the user-facing message. + assert "sk-proj-abc123xyz4567890abcdef" not in result["message"] + assert "sk-proj-abc123xyz4567890abcdef" not in result["command"] diff --git a/tools/approval.py b/tools/approval.py index ae8c82e19..ae54daa5a 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -1044,9 +1044,17 @@ def prompt_dangerous_approval(command: str, description: str, if timeout_seconds is None: timeout_seconds = _get_approval_timeout() + # Redact secrets before any user-visible rendering. The original + # `command` is still what executes after approval; only the displayed + # copy is scrubbed. Reuses the same redaction module used for memory + # and log sanitization so tokens mask consistently across surfaces. + from agent.redact import redact_sensitive_text + display_command = redact_sensitive_text(command) + display_description = redact_sensitive_text(description) + if approval_callback is not None: try: - return approval_callback(command, description, + return approval_callback(display_command, display_description, allow_permanent=allow_permanent) except Exception as e: logger.error("Approval callback failed: %s", e, exc_info=True) @@ -1086,8 +1094,8 @@ def prompt_dangerous_approval(command: str, description: str, from agent.i18n import t while True: print() - print(f" {t('approval.dangerous_header', description=description)}") - print(f" {command}") + print(f" {t('approval.dangerous_header', description=display_description)}") + print(f" {display_command}") print() if allow_permanent: print(t("approval.choose_long")) @@ -1800,11 +1808,19 @@ def check_all_command_guards(command: str, env_type: str, # Block the agent thread until the user responds; the notify + # heartbeat wait loop is shared with check_execute_code_guard via # _await_gateway_decision(). + # + # Redact secrets in the notified payload: the gateway renders this + # dict directly to Discord/Slack/etc. and those messages are + # screenshottable. The raw `command` still executes after approval + # via the closure below, so redaction is display-only. Approval + # persistence keys off pattern_key (not the command text), so the + # allowlist is unaffected. + from agent.redact import redact_sensitive_text approval_data = { - "command": command, + "command": redact_sensitive_text(command), "pattern_key": primary_key, "pattern_keys": all_keys, - "description": combined_desc, + "description": redact_sensitive_text(combined_desc), # Mirror the CLI's allow_permanent gate: a tirith warning downgrades # "always" to session scope below, so the UI must not offer it. "allow_permanent": not has_tirith, @@ -1868,22 +1884,27 @@ def check_all_command_guards(command: str, env_type: str, "user_approved": True, "description": combined_desc} # Fallback: no gateway callback registered (e.g. cron, batch). - # Return approval_required for backward compat. + # Return approval_required for backward compat. Redact secrets in the + # user-facing copy — the raw `command` is preserved for execution and + # the allowlist keys off pattern_key, so redaction is display-only. + from agent.redact import redact_sensitive_text + _disp_command = redact_sensitive_text(command) + _disp_combined_desc = redact_sensitive_text(combined_desc) submit_pending(session_key, { - "command": command, + "command": _disp_command, "pattern_key": primary_key, "pattern_keys": all_keys, - "description": combined_desc, + "description": _disp_combined_desc, }) return { "approved": False, "pattern_key": primary_key, "status": "pending_approval", "approval_pending": True, - "command": command, - "description": combined_desc, + "command": _disp_command, + "description": _disp_combined_desc, "message": ( - f"⚠️ {combined_desc}. Asking the user for approval.\n\n**Command:**\n```\n{command}\n```" + f"⚠️ {_disp_combined_desc}. Asking the user for approval.\n\n**Command:**\n```\n{_disp_command}\n```" ), } @@ -2020,6 +2041,17 @@ def check_execute_code_guard(code: str, env_type: str, # paths don't pay to copy a potentially-large script into this string. command = f"execute_code <<'PY'\n{code}\nPY" + # Redacted copies for user-visible rendering only. An execute_code script + # can embed credentials (e.g. api_key = "sk-..."), and the gateway renders + # this payload directly to Discord/Slack — those messages are + # screenshottable. The raw `command`/`code` are still what get assessed by + # smart approval and executed; redaction is display-only. Approval + # persistence keys off pattern_key, so the allowlist is unaffected. + from agent.redact import redact_sensitive_text + display_command = redact_sensitive_text(command) + display_code = redact_sensitive_text(code) + display_description = redact_sensitive_text(description) + # Check session/permanent approval — same gate as check_all_command_guards. # Without this, "Approve session" / "Always" choices are stored but never # consulted, so every execute_code call re-prompts the user (#39275). @@ -2058,29 +2090,29 @@ def check_execute_code_guard(code: str, env_type: str, # No gateway callback registered (e.g. ask-mode without a notifier): # surface a pending approval for backward compatibility. submit_pending(session_key, { - "command": command, + "command": display_command, "pattern_key": pattern_key, "pattern_keys": [pattern_key], - "description": description, + "description": display_description, }) return { "approved": False, "pattern_key": pattern_key, "status": "pending_approval", "approval_pending": True, - "command": command, - "description": description, + "command": display_command, + "description": display_description, "message": ( - f"⚠️ {description}. Asking the user for approval.\n\n" - f"**Code:**\n```python\n{code}\n```" + f"⚠️ {display_description}. Asking the user for approval.\n\n" + f"**Code:**\n```python\n{display_code}\n```" ), } approval_data = { - "command": command, + "command": display_command, "pattern_key": pattern_key, "pattern_keys": [pattern_key], - "description": description, + "description": display_description, } decision = _await_gateway_decision( session_key, notify_cb, approval_data, surface="gateway"