From 2aa983e2f270a5231a008ab8b02e64f61f27939b Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 13:48:10 -0700 Subject: [PATCH] feat(gateway): recognize .pdf in MEDIA: tag extraction (#13683) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PDFs emitted by tools (report generators, document exporters, etc.) now deliver as native attachments when wrapped in MEDIA: — same as images, audio, and video. Bare .pdf paths are intentionally NOT added to extract_local_files(), so the agent can still reference PDFs in text without auto-sending them. --- gateway/platforms/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index afb876712..56bb3c5cb 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1343,7 +1343,7 @@ class BasePlatformAdapter(ABC): # Extract MEDIA: tags, allowing optional whitespace after the colon # and quoted/backticked paths for LLM-formatted outputs. media_pattern = re.compile( - r'''[`"']?MEDIA:\s*(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?''' + r'''[`"']?MEDIA:\s*(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?''' ) for match in media_pattern.finditer(content): path = match.group("path").strip()