fix(browser): narrow credential-query denylist to unambiguous names
Follow-up on the salvaged #49830 hardening. The contributor's sensitive query-param set included bare English words (code, key, auth, session, sig) that double as ordinary page facets — ?code= on promo/challenge pages, ?key= as a search facet, ?session= on blogs — so web_extract and cloud browser_navigate would refuse a large slice of normal browsing. Narrow the set to unambiguously credential-named params (access_token, authorization, client_secret, password, token, x-amz-signature, ...). Prefix-based vendor-key redaction (is_safe_url) still catches recognizable key shapes; this set is the belt-and-suspenders for opaque secrets carried under an explicit credential-named parameter. Also fixes two intra-PR-staleness test breakages surfaced by salvaging onto current main: - web_extract_tool() no longer accepts use_llm_processing= (signature changed since the PR was authored) — dropped the invalid kwarg. - agent.redact now fully masks keyed 'token=<secret>' to 'token=***' instead of partial 'sk-...'; the console-redaction test now asserts the real invariant (secret body gone) rather than the exact mask format. Added a regression test that generic English-word query params are NOT blocked by the credential guard.
This commit is contained in:
parent
937e56be92
commit
cfbc7ed1f9
4 changed files with 39 additions and 10 deletions
|
|
@ -64,6 +64,7 @@ AUTHOR_MAP = {
|
|||
"214165399+kernel-t1@users.noreply.github.com": "kernel-t1", # PR #41349 salvage (.env sanitizer: only split when line starts with a known KEY= and preceding values are plain tokens; keep URL/query/whitespace secrets verbatim)
|
||||
"290858493+sasquatch9818@users.noreply.github.com": "sasquatch9818", # PR #41198 salvage (defang untrusted-tool-result delimiter against tag injection; drop forgeable startswith fast-path)
|
||||
"jnibarger01@gmail.com": "jnibarger01", # PR #35130 salvage (ReDoS-bound threat-pattern filler + FTS5 query cap + V4A Move-File approval/traversal targets)
|
||||
"yong2bba@gmail.com": "yong2bba", # PR #49830 salvage (harden browser tool safety boundaries: config-gated risky-eval blocklist, force-redact browser/CDP/supervisor output, session-ownership tracking, credential-query denylist)
|
||||
"info@djimit.nl": "djimit", # PR #48034 salvage (recover from truncated gateway responses: 4 continuation retries + exponential token headroom + normalize empty partials)
|
||||
"lubos@komfi.health": "lubosxyz", # PR #49225 salvage (persist codex app-server turns to session DB via agent_persisted=False so session_search/distill see gateway conversations)
|
||||
"290868363+petrichor-op@users.noreply.github.com": "petrichor-op", # PR #41281 salvage (never persist ephemeral empty-response recovery scaffolding to the SQLite session store / JSON log; filter by flag not position)
|
||||
|
|
|
|||
|
|
@ -112,9 +112,13 @@ class TestBrowserConsole:
|
|||
result = json.loads(browser_console(task_id="test"))
|
||||
|
||||
serialized = json.dumps(result)
|
||||
# The secret body must be gone. The exact mask format
|
||||
# (partial ``sk-…7890`` vs full ``***`` for keyed ``token=`` values)
|
||||
# is owned by agent.redact and intentionally not pinned here.
|
||||
assert "BROWSERCONSOLESECRET" not in serialized
|
||||
assert "sk-" in result["console_messages"][0]["text"]
|
||||
assert "..." in result["console_messages"][0]["text"]
|
||||
redacted_text = result["console_messages"][0]["text"]
|
||||
assert fake_key not in redacted_text
|
||||
assert "***" in redacted_text or "..." in redacted_text
|
||||
|
||||
def test_redacts_secrets_from_eval_result(self):
|
||||
from tools.browser_tool import _browser_eval
|
||||
|
|
|
|||
|
|
@ -108,14 +108,36 @@ class TestWebExtractSecretExfil:
|
|||
from tools.web_tools import web_extract_tool
|
||||
|
||||
result = await web_extract_tool(
|
||||
urls=["https://example.com/callback?code=opaque-oauth-code"],
|
||||
use_llm_processing=False,
|
||||
urls=["https://example.com/callback?access_token=opaque-oauth-value"],
|
||||
)
|
||||
|
||||
parsed = json.loads(result)
|
||||
assert parsed["success"] is False
|
||||
assert "credential-like query parameter" in parsed["error"]
|
||||
assert "code" in parsed["error"]
|
||||
assert "access_token" in parsed["error"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_allows_ambiguous_english_word_query_param(self):
|
||||
"""Generic query names that double as normal page facets must NOT block.
|
||||
|
||||
``?code=`` (promo/challenge pages), ``?key=`` (search facets),
|
||||
``?session=`` etc. are ordinary browsing params. Only unambiguously
|
||||
credential-named params are blocked, so web_extract stays usable.
|
||||
"""
|
||||
from tools.web_tools import web_extract_tool
|
||||
|
||||
for url in (
|
||||
"https://leetcode.com/problems/two-sum/?code=twosum",
|
||||
"https://github.com/search?q=hermes&code=1",
|
||||
"https://example.com/blog?session=summer",
|
||||
):
|
||||
result = await web_extract_tool(urls=[url])
|
||||
parsed = json.loads(result)
|
||||
# Not blocked by the credential-query guard (may fail for other
|
||||
# reasons like a missing backend, but never with this specific
|
||||
# error string).
|
||||
if parsed.get("success") is False:
|
||||
assert "credential-like query parameter" not in parsed.get("error", ""), url
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_allows_normal_url(self):
|
||||
|
|
|
|||
|
|
@ -77,26 +77,28 @@ def normalize_url_for_request(url: str) -> str:
|
|||
return urlunsplit((parsed.scheme, netloc, path, query, fragment))
|
||||
|
||||
|
||||
# Query parameter names that are unambiguously credential-bearing. Kept
|
||||
# deliberately narrow: bare English words that double as normal page facets
|
||||
# (``code`` on promo/challenge pages, ``key``/``auth``/``session``/``sig`` as
|
||||
# search or routing params) are intentionally EXCLUDED to avoid blocking
|
||||
# ordinary browsing. Prefix-based token redaction (``is_safe_url``) still
|
||||
# catches recognizable vendor key shapes; this set is the belt-and-suspenders
|
||||
# for opaque secrets that carry an explicit credential-named parameter.
|
||||
_SENSITIVE_QUERY_PARAM_NAMES = frozenset({
|
||||
"access_token",
|
||||
"api_key",
|
||||
"apikey",
|
||||
"auth",
|
||||
"auth_token",
|
||||
"authorization",
|
||||
"awsaccesskeyid",
|
||||
"client_secret",
|
||||
"code",
|
||||
"credential",
|
||||
"credentials",
|
||||
"key",
|
||||
"jwt",
|
||||
"password",
|
||||
"passwd",
|
||||
"secret",
|
||||
"session",
|
||||
"session_id",
|
||||
"sig",
|
||||
"signature",
|
||||
"token",
|
||||
"x_amz_security_token",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue