This commit is contained in:
Mahmoud Mohsen 2026-03-24 17:44:27 +08:00 committed by GitHub
commit 85e8868dc5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 14 additions and 4 deletions

View File

@ -11,7 +11,7 @@ IMAGE_EXTENSIONS = convert_to_lower_and_upper_set({"jpg", "jpeg", "png", "webp",
VIDEO_EXTENSIONS = convert_to_lower_and_upper_set({"mp4", "mov", "mpeg", "webm"})
AUDIO_EXTENSIONS = convert_to_lower_and_upper_set({"mp3", "m4a", "wav", "amr", "mpga"})
AUDIO_EXTENSIONS = convert_to_lower_and_upper_set({"mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"})
_doc_extensions: set[str]
if dify_config.ETL_TYPE == "Unstructured":

View File

@ -175,7 +175,7 @@ class TestFileTypeValidation:
@pytest.mark.parametrize(
"extension",
["mp3", "m4a", "wav", "amr", "mpga", "MP3", "WAV"],
["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm", "MP4", "MPEG", "WEEM"]
)
def test_audio_extension_in_constants(self, extension):
"""Test that audio extensions are correctly defined in constants."""
@ -1311,5 +1311,15 @@ class TestFileConstants:
assert len(image_audio_overlap) == 0, f"Image/Audio overlap: {image_audio_overlap}"
# Assert - Video and audio shouldn't overlap
video_audio_overlap = videos_lower & audio_lower
assert len(video_audio_overlap) == 0, f"Video/Audio overlap: {video_audio_overlap}"
# These formats are technically video containers but are supported
# by the audio-to-text endpoint, so overlap is allowed.
allowed_overlap = {'mp4', 'mpeg', 'webm'}
# Calculate the actual overlap found in the code
overlap = AUDIO_EXTENSIONS.intersection(VIDEO_EXTENSIONS)
# Remove the ones we know are safe
unexpected_overlap = overlap - allowed_overlap
# Fail ONLY if we find weird overlaps (like if 'jpg' was in audio)
assert not unexpected_overlap, f"Unexpected overlap between audio and video: {unexpected_overlap}"