fix(matrix): normalize image-only filenames

This commit is contained in:
LeonSGP43 2026-04-23 00:42:09 +08:00 committed by Teknium
parent fbbcfa24c5
commit 74a4832b74
2 changed files with 117 additions and 0 deletions

View file

@ -122,6 +122,44 @@ _E2EE_INSTALL_HINT = (
"Install with: pip install 'mautrix[encryption]' (requires libolm C library)"
)
_MATRIX_IMAGE_FILENAME_EXTS = frozenset({
".jpg",
".jpeg",
".png",
".gif",
".webp",
".bmp",
".svg",
".heic",
".heif",
".avif",
})
def _looks_like_matrix_image_filename(text: str) -> bool:
"""Return True when Matrix image body text is probably just a transport filename.
Matrix ``m.image`` events commonly populate ``content.body`` with the uploaded
filename when the user did not add a caption. Treating that raw filename as
user-authored text confuses downstream vision enrichment.
"""
candidate = str(text or "").strip()
if not candidate or "\n" in candidate or candidate.endswith("/"):
return False
name = Path(candidate).name
if not name or name != candidate:
return False
suffix = Path(name).suffix.lower()
if not suffix:
return False
guessed_type, _ = mimetypes.guess_type(name)
if guessed_type and guessed_type.startswith("image/"):
return True
return suffix in _MATRIX_IMAGE_FILENAME_EXTS
def _check_e2ee_deps() -> bool:
"""Return True if mautrix E2EE dependencies (python-olm) are available."""
@ -1620,6 +1658,9 @@ class MatrixAdapter(BasePlatformAdapter):
return
body, is_dm, chat_type, thread_id, display_name, source = ctx
if msgtype == "m.image" and _looks_like_matrix_image_filename(body):
body = ""
allow_http_fallback = bool(http_url) and not is_encrypted_media
media_urls = (
[cached_path]

View file

@ -9,6 +9,7 @@ import pytest
from unittest.mock import MagicMock, patch, AsyncMock
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import MessageType
def _make_fake_mautrix():
@ -1896,6 +1897,81 @@ class TestMatrixReadReceipts:
assert result is False
# ---------------------------------------------------------------------------
# Media normalization
# ---------------------------------------------------------------------------
class TestMatrixImageOnlyMediaNormalization:
def setup_method(self):
self.adapter = _make_adapter()
self.adapter._client = MagicMock()
self.adapter._client.download_media = AsyncMock(return_value=None)
self.adapter._is_dm_room = AsyncMock(return_value=True)
self.adapter._get_display_name = AsyncMock(return_value="Alice")
self.adapter._background_read_receipt = MagicMock()
self.adapter._mxc_to_http = (
lambda url: "https://matrix.example.org/_matrix/media/v3/download/example/30.png"
)
@pytest.mark.asyncio
async def test_image_only_filename_body_is_not_forwarded_as_text(self):
captured_event = None
async def capture(msg_event):
nonlocal captured_event
captured_event = msg_event
self.adapter.handle_message = capture
await self.adapter._handle_media_message(
room_id="!room:example.org",
sender="@alice:example.org",
event_id="$image1",
event_ts=0.0,
source_content={
"msgtype": "m.image",
"body": "30.png",
"url": "mxc://example/30.png",
"info": {"mimetype": "image/png"},
},
relates_to={},
msgtype="m.image",
)
assert captured_event is not None
assert captured_event.text == ""
assert captured_event.media_urls == [
"https://matrix.example.org/_matrix/media/v3/download/example/30.png"
]
assert captured_event.message_type == MessageType.PHOTO
@pytest.mark.asyncio
async def test_image_caption_text_is_preserved(self):
captured_event = None
async def capture(msg_event):
nonlocal captured_event
captured_event = msg_event
self.adapter.handle_message = capture
await self.adapter._handle_media_message(
room_id="!room:example.org",
sender="@alice:example.org",
event_id="$image2",
event_ts=0.0,
source_content={
"msgtype": "m.image",
"body": "Please describe this chart",
"url": "mxc://example/30.png",
"info": {"mimetype": "image/png"},
},
relates_to={},
msgtype="m.image",
)
assert captured_event is not None
assert captured_event.text == "Please describe this chart"
# ---------------------------------------------------------------------------
# Message redaction
# ---------------------------------------------------------------------------