From bf6bf6cb8f622b1221dab6c8498a6e79ce3199ab Mon Sep 17 00:00:00 2001 From: Mahmoud-Othman Date: Thu, 22 Jan 2026 05:43:14 +0200 Subject: [PATCH 1/3] fix: allow webm/mpeg formats in audio-to-text endpoint - Added webm, mpeg, and mp4 to ALLOWED_EXTENSIONS in constants. - Updated unit tests to verify support for these formats. Closes #29726 --- api/constants/__init__.py | 2 +- api/tests/unit_tests/core/datasource/test_file_upload.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/constants/__init__.py b/api/constants/__init__.py index e441395afc..1d11cecddb 100644 --- a/api/constants/__init__.py +++ b/api/constants/__init__.py @@ -11,7 +11,7 @@ IMAGE_EXTENSIONS = convert_to_lower_and_upper_set({"jpg", "jpeg", "png", "webp", VIDEO_EXTENSIONS = convert_to_lower_and_upper_set({"mp4", "mov", "mpeg", "webm"}) -AUDIO_EXTENSIONS = convert_to_lower_and_upper_set({"mp3", "m4a", "wav", "amr", "mpga"}) +AUDIO_EXTENSIONS = convert_to_lower_and_upper_set({"mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"}) _doc_extensions: set[str] if dify_config.ETL_TYPE == "Unstructured": diff --git a/api/tests/unit_tests/core/datasource/test_file_upload.py b/api/tests/unit_tests/core/datasource/test_file_upload.py index ad86190e00..f5be87022c 100644 --- a/api/tests/unit_tests/core/datasource/test_file_upload.py +++ b/api/tests/unit_tests/core/datasource/test_file_upload.py @@ -175,7 +175,7 @@ class TestFileTypeValidation: @pytest.mark.parametrize( "extension", - ["mp3", "m4a", "wav", "amr", "mpga", "MP3", "WAV"], + ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"], ) def test_audio_extension_in_constants(self, extension): """Test that audio extensions are correctly defined in constants.""" From b6c55c433ad04db387e659f5b32c4a4792a066de Mon Sep 17 00:00:00 2001 From: Mahmoud Mohsen <99143276+Mahmoud12344@users.noreply.github.com> Date: Thu, 22 Jan 2026 06:26:08 +0200 Subject: [PATCH 2/3] Update api/tests/unit_tests/core/datasource/test_file_upload.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- api/tests/unit_tests/core/datasource/test_file_upload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/tests/unit_tests/core/datasource/test_file_upload.py b/api/tests/unit_tests/core/datasource/test_file_upload.py index f5be87022c..9057491812 100644 --- a/api/tests/unit_tests/core/datasource/test_file_upload.py +++ b/api/tests/unit_tests/core/datasource/test_file_upload.py @@ -175,7 +175,7 @@ class TestFileTypeValidation: @pytest.mark.parametrize( "extension", - ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"], + ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm", "MP4", "MPEG", "WEBM"], ) def test_audio_extension_in_constants(self, extension): """Test that audio extensions are correctly defined in constants.""" From 025b1984a091da898cc4bbc2a1b2b00a6ce02ab3 Mon Sep 17 00:00:00 2001 From: Mahmoud-Othman Date: Thu, 22 Jan 2026 06:40:43 +0200 Subject: [PATCH 3/3] These formats {'mp4', 'mpeg', 'webm'} are technically video containers but are supported by the audio-to-text endpoint, so overlap is allowed. --- .../core/datasource/test_file_upload.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/api/tests/unit_tests/core/datasource/test_file_upload.py b/api/tests/unit_tests/core/datasource/test_file_upload.py index f5be87022c..c03698fb24 100644 --- a/api/tests/unit_tests/core/datasource/test_file_upload.py +++ b/api/tests/unit_tests/core/datasource/test_file_upload.py @@ -175,7 +175,7 @@ class TestFileTypeValidation: @pytest.mark.parametrize( "extension", - ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"], + ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm", "MP4", "MPEG", "WEEM"] ) def test_audio_extension_in_constants(self, extension): """Test that audio extensions are correctly defined in constants.""" @@ -1308,5 +1308,15 @@ class TestFileConstants: assert len(image_audio_overlap) == 0, f"Image/Audio overlap: {image_audio_overlap}" # Assert - Video and audio shouldn't overlap - video_audio_overlap = videos_lower & audio_lower - assert len(video_audio_overlap) == 0, f"Video/Audio overlap: {video_audio_overlap}" + # These formats are technically video containers but are supported + # by the audio-to-text endpoint, so overlap is allowed. + allowed_overlap = {'mp4', 'mpeg', 'webm'} + + # Calculate the actual overlap found in the code + overlap = AUDIO_EXTENSIONS.intersection(VIDEO_EXTENSIONS) + + # Remove the ones we know are safe + unexpected_overlap = overlap - allowed_overlap + + # Fail ONLY if we find weird overlaps (like if 'jpg' was in audio) + assert not unexpected_overlap, f"Unexpected overlap between audio and video: {unexpected_overlap}"