fix(approval): redact secrets in user-facing approval prompts

The dangerous-command approval prompt renders the flagged command so the
user can decide whether to approve. If the agent constructed it with a
credential (curl -H 'Authorization: Bearer sk-...', psql postgres://user:pw@host,
an execute_code script with api_key = 'sk-...'), that secret hit stdout and,
via the gateway notify payload, Discord/Slack messages — which are
screenshottable and forwardable.

Apply the existing agent.redact.redact_sensitive_text() to every user-facing
approval surface. Redaction is display-only: the raw command still executes
after approval, and approval persistence keys off pattern_key (not the command
text), so the allowlist is unaffected. Decision context (URL, flags, command
structure) is preserved; only the secret value masks.

Covers all surfaces, including the execute_code path the original PR missed:
- prompt_dangerous_approval(): callback + stdout fallback
- check_all_command_guards(): gateway approval_data + cron/batch pending fallback
- check_execute_code_guard(): gateway approval_data + no-notifier pending fallback
  (script body can embed credentials)

Adds TestApprovalPromptRedaction covering callback redaction, no-over-redaction
of clean commands, and the execute_code pending fallback.

Salvaged from PR #13139 by @sgabel; extended to the execute_code surface.
This commit is contained in:
Scott Gabel 2026-06-30 16:46:29 -07:00 committed by Teknium
parent 508156fd42
commit 4a7a6fd401
3 changed files with 127 additions and 19 deletions

View file

@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
"5823452+sgabel@users.noreply.github.com": "sgabel", # PR #13139 salvage (redact secrets in user-facing approval prompts)
"cyb3rwr3n@users.noreply.github.com": "cyb3rwr3n", # PR #11333 salvage (sanitize FTS5 queries for natural-language recall in holographic memory)
"9350182+codexGW@users.noreply.github.com": "codexGW", # PR #12302 salvage (Discord raw <@!ID> mention detection + drop bare mention-only pings)
"186512915+lEWFkRAD@users.noreply.github.com": "lEWFkRAD", # PR #53848 salvage (stream the MoA aggregator response to the user)

View file

@ -1997,3 +1997,78 @@ class TestTirithImportErrorFailOpenPolicy:
result = check_all_command_guards("echo hello", "local")
assert result.get("approved") is True
class TestApprovalPromptRedaction:
"""Secrets are masked in user-facing approval surfaces (#13139).
The flagged command/script is rendered so the user can decide whether to
approve. If it carries a credential (Bearer token, DB password, prefixed
key), that secret would land on stdout and -- via the gateway notify
payload -- in Discord/Slack messages, which are screenshottable. Redaction
is display-only: the raw command still executes after approval and the
allowlist keys off pattern_key, not the command text.
"""
SECRET_CMD = (
'curl -H "Authorization: Bearer sk-proj-abc123xyz4567890abcdef" '
"https://api.openai.com/v1/models"
)
def test_callback_receives_redacted_command(self):
"""prompt_dangerous_approval hands the callback a masked command."""
seen = {}
def cb(command, description, *, allow_permanent=True):
seen["command"] = command
seen["description"] = description
return "deny"
prompt_dangerous_approval(
self.SECRET_CMD,
"pipe remote content; token sk-proj-abc123xyz4567890abcdef",
approval_callback=cb,
)
# Secret value gone, decision context (scheme, URL, flag) preserved.
assert "sk-proj-abc123xyz4567890abcdef" not in seen["command"]
assert "Authorization: Bearer ***" in seen["command"]
assert "https://api.openai.com/v1/models" in seen["command"]
assert "sk-proj-abc123xyz4567890abcdef" not in seen["description"]
def test_clean_command_passes_through_unredacted(self):
"""A command with no secret is shown verbatim -- no over-redaction."""
seen = {}
def cb(command, description, *, allow_permanent=True):
seen["command"] = command
return "deny"
prompt_dangerous_approval("rm -rf /var/data", "recursive delete",
approval_callback=cb)
assert seen["command"] == "rm -rf /var/data"
def test_execute_code_pending_fallback_redacts_script(self):
"""check_execute_code_guard's no-notifier fallback masks an embedded
secret in both the pending record and the returned approval message."""
from unittest.mock import patch as _patch
from tools.approval import check_execute_code_guard
code = (
"import os\n"
'api_key = "sk-proj-abc123xyz4567890abcdef"\n'
"print(api_key)"
)
cfg = {"approvals": {"mode": "manual"}}
with _patch("hermes_cli.config.load_config", return_value=cfg):
with _patch("tools.approval._is_gateway_approval_context",
return_value=True):
with _patch("tools.approval._get_approval_mode",
return_value="manual"):
# No gateway notify callback registered -> pending fallback.
result = check_execute_code_guard(code, "local")
assert result.get("status") == "pending_approval"
# The script's credential must not appear in the user-facing message.
assert "sk-proj-abc123xyz4567890abcdef" not in result["message"]
assert "sk-proj-abc123xyz4567890abcdef" not in result["command"]

View file

@ -1044,9 +1044,17 @@ def prompt_dangerous_approval(command: str, description: str,
if timeout_seconds is None:
timeout_seconds = _get_approval_timeout()
# Redact secrets before any user-visible rendering. The original
# `command` is still what executes after approval; only the displayed
# copy is scrubbed. Reuses the same redaction module used for memory
# and log sanitization so tokens mask consistently across surfaces.
from agent.redact import redact_sensitive_text
display_command = redact_sensitive_text(command)
display_description = redact_sensitive_text(description)
if approval_callback is not None:
try:
return approval_callback(command, description,
return approval_callback(display_command, display_description,
allow_permanent=allow_permanent)
except Exception as e:
logger.error("Approval callback failed: %s", e, exc_info=True)
@ -1086,8 +1094,8 @@ def prompt_dangerous_approval(command: str, description: str,
from agent.i18n import t
while True:
print()
print(f" {t('approval.dangerous_header', description=description)}")
print(f" {command}")
print(f" {t('approval.dangerous_header', description=display_description)}")
print(f" {display_command}")
print()
if allow_permanent:
print(t("approval.choose_long"))
@ -1800,11 +1808,19 @@ def check_all_command_guards(command: str, env_type: str,
# Block the agent thread until the user responds; the notify +
# heartbeat wait loop is shared with check_execute_code_guard via
# _await_gateway_decision().
#
# Redact secrets in the notified payload: the gateway renders this
# dict directly to Discord/Slack/etc. and those messages are
# screenshottable. The raw `command` still executes after approval
# via the closure below, so redaction is display-only. Approval
# persistence keys off pattern_key (not the command text), so the
# allowlist is unaffected.
from agent.redact import redact_sensitive_text
approval_data = {
"command": command,
"command": redact_sensitive_text(command),
"pattern_key": primary_key,
"pattern_keys": all_keys,
"description": combined_desc,
"description": redact_sensitive_text(combined_desc),
# Mirror the CLI's allow_permanent gate: a tirith warning downgrades
# "always" to session scope below, so the UI must not offer it.
"allow_permanent": not has_tirith,
@ -1868,22 +1884,27 @@ def check_all_command_guards(command: str, env_type: str,
"user_approved": True, "description": combined_desc}
# Fallback: no gateway callback registered (e.g. cron, batch).
# Return approval_required for backward compat.
# Return approval_required for backward compat. Redact secrets in the
# user-facing copy — the raw `command` is preserved for execution and
# the allowlist keys off pattern_key, so redaction is display-only.
from agent.redact import redact_sensitive_text
_disp_command = redact_sensitive_text(command)
_disp_combined_desc = redact_sensitive_text(combined_desc)
submit_pending(session_key, {
"command": command,
"command": _disp_command,
"pattern_key": primary_key,
"pattern_keys": all_keys,
"description": combined_desc,
"description": _disp_combined_desc,
})
return {
"approved": False,
"pattern_key": primary_key,
"status": "pending_approval",
"approval_pending": True,
"command": command,
"description": combined_desc,
"command": _disp_command,
"description": _disp_combined_desc,
"message": (
f"⚠️ {combined_desc}. Asking the user for approval.\n\n**Command:**\n```\n{command}\n```"
f"⚠️ {_disp_combined_desc}. Asking the user for approval.\n\n**Command:**\n```\n{_disp_command}\n```"
),
}
@ -2020,6 +2041,17 @@ def check_execute_code_guard(code: str, env_type: str,
# paths don't pay to copy a potentially-large script into this string.
command = f"execute_code <<'PY'\n{code}\nPY"
# Redacted copies for user-visible rendering only. An execute_code script
# can embed credentials (e.g. api_key = "sk-..."), and the gateway renders
# this payload directly to Discord/Slack — those messages are
# screenshottable. The raw `command`/`code` are still what get assessed by
# smart approval and executed; redaction is display-only. Approval
# persistence keys off pattern_key, so the allowlist is unaffected.
from agent.redact import redact_sensitive_text
display_command = redact_sensitive_text(command)
display_code = redact_sensitive_text(code)
display_description = redact_sensitive_text(description)
# Check session/permanent approval — same gate as check_all_command_guards.
# Without this, "Approve session" / "Always" choices are stored but never
# consulted, so every execute_code call re-prompts the user (#39275).
@ -2058,29 +2090,29 @@ def check_execute_code_guard(code: str, env_type: str,
# No gateway callback registered (e.g. ask-mode without a notifier):
# surface a pending approval for backward compatibility.
submit_pending(session_key, {
"command": command,
"command": display_command,
"pattern_key": pattern_key,
"pattern_keys": [pattern_key],
"description": description,
"description": display_description,
})
return {
"approved": False,
"pattern_key": pattern_key,
"status": "pending_approval",
"approval_pending": True,
"command": command,
"description": description,
"command": display_command,
"description": display_description,
"message": (
f"⚠️ {description}. Asking the user for approval.\n\n"
f"**Code:**\n```python\n{code}\n```"
f"⚠️ {display_description}. Asking the user for approval.\n\n"
f"**Code:**\n```python\n{display_code}\n```"
),
}
approval_data = {
"command": command,
"command": display_command,
"pattern_key": pattern_key,
"pattern_keys": [pattern_key],
"description": description,
"description": display_description,
}
decision = _await_gateway_decision(
session_key, notify_cb, approval_data, surface="gateway"