fix(api): allow punctuation in uploaded filenames (#33364)

Co-authored-by: Chen Yefan <cyefan2@gmail.com>
This commit is contained in:
yevanmore 2026-03-13 21:33:09 +08:00 committed by GitHub
parent 7e1dc3c122
commit 194c205ed3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 43 additions and 18 deletions

View File

@ -58,8 +58,9 @@ class FileService:
# get file extension
extension = os.path.splitext(filename)[1].lstrip(".").lower()
# check if filename contains invalid characters
if any(c in filename for c in ["/", "\\", ":", "*", "?", '"', "<", ">", "|"]):
# Only reject path separators here. The original filename is stored as metadata,
# while the storage key is UUID-based.
if any(c in filename for c in ["/", "\\"]):
raise ValueError("Filename contains invalid characters")
if len(filename) > 200:

View File

@ -263,6 +263,27 @@ class TestFileService:
user=account,
)
def test_upload_file_allows_regular_punctuation_in_filename(
self, db_session_with_containers: Session, engine, mock_external_service_dependencies
):
"""
Test file upload allows punctuation that is safe when stored as metadata.
"""
account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies)
filename = 'candidate?resume for "dify"<final>|v2:.txt'
content = b"test content"
mimetype = "text/plain"
upload_file = FileService(engine).upload_file(
filename=filename,
content=content,
mimetype=mimetype,
user=account,
)
assert upload_file.name == filename
def test_upload_file_filename_too_long(
self, db_session_with_containers: Session, engine, mock_external_service_dependencies
):

View File

@ -35,7 +35,7 @@ TEST COVERAGE OVERVIEW:
- Tests hash consistency and determinism
6. Invalid Filename Handling (TestInvalidFilenameHandling)
- Validates rejection of filenames with invalid characters (/, \\, :, *, ?, ", <, >, |)
- Validates rejection of filenames with path separators (/, \\)
- Tests filename length truncation (max 200 characters)
- Prevents path traversal attacks
- Handles edge cases like empty filenames
@ -535,30 +535,23 @@ class TestInvalidFilenameHandling:
@pytest.mark.parametrize(
"invalid_char",
["/", "\\", ":", "*", "?", '"', "<", ">", "|"],
["/", "\\"],
)
def test_filename_contains_invalid_characters(self, invalid_char):
"""Test detection of invalid characters in filename.
Security-critical test that validates rejection of dangerous filename characters.
Security-critical test that validates rejection of path separators.
These characters are blocked because they:
- / and \\ : Directory separators, could enable path traversal
- : : Drive letter separator on Windows, reserved character
- * and ? : Wildcards, could cause issues in file operations
- " : Quote character, could break command-line operations
- < and > : Redirection operators, command injection risk
- | : Pipe operator, command injection risk
Blocking these characters prevents:
- Path traversal attacks (../../etc/passwd)
- Command injection
- File system corruption
- Cross-platform compatibility issues
- ZIP entry traversal issues
- Ambiguous path handling
"""
# Arrange - Create filename with invalid character
filename = f"test{invalid_char}file.txt"
# Define complete list of invalid characters
invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"]
invalid_chars = ["/", "\\"]
# Act - Check if filename contains any invalid character
has_invalid_char = any(c in filename for c in invalid_chars)
@ -570,7 +563,7 @@ class TestInvalidFilenameHandling:
"""Test that valid filenames pass validation."""
# Arrange
filename = "valid_file-name_123.txt"
invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"]
invalid_chars = ["/", "\\"]
# Act
has_invalid_char = any(c in filename for c in invalid_chars)
@ -578,6 +571,16 @@ class TestInvalidFilenameHandling:
# Assert
assert has_invalid_char is False
@pytest.mark.parametrize("safe_char", [":", "*", "?", '"', "<", ">", "|"])
def test_filename_allows_safe_metadata_characters(self, safe_char):
"""Test that non-separator punctuation remains allowed in filenames."""
filename = f"candidate{safe_char}resume.txt"
invalid_chars = ["/", "\\"]
has_invalid_char = any(c in filename for c in invalid_chars)
assert has_invalid_char is False
def test_extremely_long_filename_truncation(self):
"""Test handling of extremely long filenames."""
# Arrange
@ -904,7 +907,7 @@ class TestFilenameValidation:
"""Test that filenames with spaces are handled correctly."""
# Arrange
filename = "my document with spaces.pdf"
invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"]
invalid_chars = ["/", "\\"]
# Act - Check for invalid characters
has_invalid = any(c in filename for c in invalid_chars)
@ -921,7 +924,7 @@ class TestFilenameValidation:
"مستند.txt", # Arabic
"ファイル.jpg", # Japanese
]
invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"]
invalid_chars = ["/", "\\"]
# Act & Assert - Unicode should be allowed
for filename in unicode_filenames: