From e4f1d3c63af681ed06a92c0e9f225b2fb7ae8438 Mon Sep 17 00:00:00 2001 From: tmimmanuel <14046872+tmimmanuel@users.noreply.github.com> Date: Thu, 19 Mar 2026 06:15:32 +0000 Subject: [PATCH] refactor: use EnumText(StorageType) for UploadFile.storage_type (#33728) --- api/core/datasource/datasource_file_manager.py | 3 ++- api/core/rag/extractor/pdf_extractor.py | 3 ++- api/core/rag/extractor/word_extractor.py | 5 +++-- api/models/model.py | 5 +++-- api/services/file_service.py | 5 +++-- .../factories/test_storage_key_loader.py | 5 +++-- .../services/test_workflow_draft_variable_service.py | 5 +++-- .../tasks/test_remove_app_and_related_data_task.py | 9 +++++---- .../factories/test_storage_key_loader.py | 5 +++-- .../services/document_service_status.py | 3 ++- .../services/test_document_service_rename_document.py | 3 ++- .../services/test_file_service.py | 3 ++- .../tasks/test_batch_clean_document_task.py | 3 ++- .../tasks/test_batch_create_segment_to_index_task.py | 3 ++- .../tasks/test_clean_dataset_task.py | 5 +++-- .../tasks/test_remove_app_and_related_data_task.py | 3 ++- .../controllers/console/datasets/test_datasets.py | 3 ++- .../core/datasource/test_datasource_file_manager.py | 1 + 18 files changed, 45 insertions(+), 27 deletions(-) diff --git a/api/core/datasource/datasource_file_manager.py b/api/core/datasource/datasource_file_manager.py index 5971c1e013..24243add17 100644 --- a/api/core/datasource/datasource_file_manager.py +++ b/api/core/datasource/datasource_file_manager.py @@ -15,6 +15,7 @@ from configs import dify_config from core.helper import ssrf_proxy from extensions.ext_database import db from extensions.ext_storage import storage +from extensions.storage.storage_type import StorageType from models.enums import CreatorUserRole from models.model import MessageFile, UploadFile from models.tools import ToolFile @@ -81,7 +82,7 @@ class DatasourceFileManager: upload_file = UploadFile( tenant_id=tenant_id, - storage_type=dify_config.STORAGE_TYPE, + storage_type=StorageType(dify_config.STORAGE_TYPE), key=filepath, name=present_filename, size=len(file_binary), diff --git a/api/core/rag/extractor/pdf_extractor.py b/api/core/rag/extractor/pdf_extractor.py index 6aabcac704..9abdb31325 100644 --- a/api/core/rag/extractor/pdf_extractor.py +++ b/api/core/rag/extractor/pdf_extractor.py @@ -15,6 +15,7 @@ from core.rag.extractor.extractor_base import BaseExtractor from core.rag.models.document import Document from extensions.ext_database import db from extensions.ext_storage import storage +from extensions.storage.storage_type import StorageType from libs.datetime_utils import naive_utc_now from models.enums import CreatorUserRole from models.model import UploadFile @@ -150,7 +151,7 @@ class PdfExtractor(BaseExtractor): # save file to db upload_file = UploadFile( tenant_id=self._tenant_id, - storage_type=dify_config.STORAGE_TYPE, + storage_type=StorageType(dify_config.STORAGE_TYPE), key=file_key, name=file_key, size=len(img_bytes), diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py index d6b6ca35be..f44e7492cb 100644 --- a/api/core/rag/extractor/word_extractor.py +++ b/api/core/rag/extractor/word_extractor.py @@ -21,6 +21,7 @@ from core.rag.extractor.extractor_base import BaseExtractor from core.rag.models.document import Document from extensions.ext_database import db from extensions.ext_storage import storage +from extensions.storage.storage_type import StorageType from libs.datetime_utils import naive_utc_now from models.enums import CreatorUserRole from models.model import UploadFile @@ -112,7 +113,7 @@ class WordExtractor(BaseExtractor): # save file to db upload_file = UploadFile( tenant_id=self.tenant_id, - storage_type=dify_config.STORAGE_TYPE, + storage_type=StorageType(dify_config.STORAGE_TYPE), key=file_key, name=file_key, size=0, @@ -140,7 +141,7 @@ class WordExtractor(BaseExtractor): # save file to db upload_file = UploadFile( tenant_id=self.tenant_id, - storage_type=dify_config.STORAGE_TYPE, + storage_type=StorageType(dify_config.STORAGE_TYPE), key=file_key, name=file_key, size=0, diff --git a/api/models/model.py b/api/models/model.py index ff69d9d3a2..45d9c501ae 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -23,6 +23,7 @@ from core.tools.signature import sign_tool_file from dify_graph.enums import WorkflowExecutionStatus from dify_graph.file import FILE_MODEL_IDENTITY, File, FileTransferMethod from dify_graph.file import helpers as file_helpers +from extensions.storage.storage_type import StorageType from libs.helper import generate_string # type: ignore[import-not-found] from libs.uuid_utils import uuidv7 @@ -2108,7 +2109,7 @@ class UploadFile(Base): # The `server_default` serves as a fallback mechanism. id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - storage_type: Mapped[str] = mapped_column(String(255), nullable=False) + storage_type: Mapped[StorageType] = mapped_column(EnumText(StorageType, length=255), nullable=False) key: Mapped[str] = mapped_column(String(255), nullable=False) name: Mapped[str] = mapped_column(String(255), nullable=False) size: Mapped[int] = mapped_column(sa.Integer, nullable=False) @@ -2152,7 +2153,7 @@ class UploadFile(Base): self, *, tenant_id: str, - storage_type: str, + storage_type: StorageType, key: str, name: str, size: int, diff --git a/api/services/file_service.py b/api/services/file_service.py index ecb30faaa8..a7060f3b92 100644 --- a/api/services/file_service.py +++ b/api/services/file_service.py @@ -23,6 +23,7 @@ from core.rag.extractor.extract_processor import ExtractProcessor from dify_graph.file import helpers as file_helpers from extensions.ext_database import db from extensions.ext_storage import storage +from extensions.storage.storage_type import StorageType from libs.datetime_utils import naive_utc_now from libs.helper import extract_tenant_id from models import Account @@ -93,7 +94,7 @@ class FileService: # save file to db upload_file = UploadFile( tenant_id=current_tenant_id or "", - storage_type=dify_config.STORAGE_TYPE, + storage_type=StorageType(dify_config.STORAGE_TYPE), key=file_key, name=filename, size=file_size, @@ -152,7 +153,7 @@ class FileService: # save file to db upload_file = UploadFile( tenant_id=tenant_id, - storage_type=dify_config.STORAGE_TYPE, + storage_type=StorageType(dify_config.STORAGE_TYPE), key=file_key, name=text_name, size=len(text), diff --git a/api/tests/integration_tests/factories/test_storage_key_loader.py b/api/tests/integration_tests/factories/test_storage_key_loader.py index b4e3a0e4de..db4bbc1ca1 100644 --- a/api/tests/integration_tests/factories/test_storage_key_loader.py +++ b/api/tests/integration_tests/factories/test_storage_key_loader.py @@ -8,6 +8,7 @@ from sqlalchemy.orm import Session from dify_graph.file import File, FileTransferMethod, FileType from extensions.ext_database import db +from extensions.storage.storage_type import StorageType from factories.file_factory import StorageKeyLoader from models import ToolFile, UploadFile from models.enums import CreatorUserRole @@ -53,7 +54,7 @@ class TestStorageKeyLoader(unittest.TestCase): upload_file = UploadFile( tenant_id=tenant_id, - storage_type="local", + storage_type=StorageType.LOCAL, key=storage_key, name="test_file.txt", size=1024, @@ -288,7 +289,7 @@ class TestStorageKeyLoader(unittest.TestCase): # Create upload file for other tenant (but don't add to cleanup list) upload_file_other = UploadFile( tenant_id=other_tenant_id, - storage_type="local", + storage_type=StorageType.LOCAL, key="other_tenant_key", name="other_file.txt", size=1024, diff --git a/api/tests/integration_tests/services/test_workflow_draft_variable_service.py b/api/tests/integration_tests/services/test_workflow_draft_variable_service.py index b6aeb54cca..9d3a869691 100644 --- a/api/tests/integration_tests/services/test_workflow_draft_variable_service.py +++ b/api/tests/integration_tests/services/test_workflow_draft_variable_service.py @@ -13,6 +13,7 @@ from dify_graph.variables.types import SegmentType from dify_graph.variables.variables import StringVariable from extensions.ext_database import db from extensions.ext_storage import storage +from extensions.storage.storage_type import StorageType from factories.variable_factory import build_segment from libs import datetime_utils from models.enums import CreatorUserRole @@ -347,7 +348,7 @@ class TestDraftVariableLoader(unittest.TestCase): # Create an upload file record upload_file = UploadFile( tenant_id=self._test_tenant_id, - storage_type="local", + storage_type=StorageType.LOCAL, key=f"test_offload_{uuid.uuid4()}.json", name="test_offload.json", size=len(content_bytes), @@ -450,7 +451,7 @@ class TestDraftVariableLoader(unittest.TestCase): # Create upload file record upload_file = UploadFile( tenant_id=self._test_tenant_id, - storage_type="local", + storage_type=StorageType.LOCAL, key=f"test_integration_{uuid.uuid4()}.txt", name="test_integration.txt", size=len(content_bytes), diff --git a/api/tests/integration_tests/tasks/test_remove_app_and_related_data_task.py b/api/tests/integration_tests/tasks/test_remove_app_and_related_data_task.py index 988313e68d..bc83c6cc12 100644 --- a/api/tests/integration_tests/tasks/test_remove_app_and_related_data_task.py +++ b/api/tests/integration_tests/tasks/test_remove_app_and_related_data_task.py @@ -6,6 +6,7 @@ from sqlalchemy import delete from core.db.session_factory import session_factory from dify_graph.variables.segments import StringSegment +from extensions.storage.storage_type import StorageType from models import Tenant from models.enums import CreatorUserRole from models.model import App, UploadFile @@ -197,7 +198,7 @@ class TestDeleteDraftVariablesWithOffloadIntegration: with session_factory.create_session() as session: upload_file1 = UploadFile( tenant_id=tenant.id, - storage_type="local", + storage_type=StorageType.LOCAL, key="test/file1.json", name="file1.json", size=1024, @@ -210,7 +211,7 @@ class TestDeleteDraftVariablesWithOffloadIntegration: ) upload_file2 = UploadFile( tenant_id=tenant.id, - storage_type="local", + storage_type=StorageType.LOCAL, key="test/file2.json", name="file2.json", size=2048, @@ -430,7 +431,7 @@ class TestDeleteDraftVariablesSessionCommit: with session_factory.create_session() as session: upload_file1 = UploadFile( tenant_id=tenant.id, - storage_type="local", + storage_type=StorageType.LOCAL, key="test/file1.json", name="file1.json", size=1024, @@ -443,7 +444,7 @@ class TestDeleteDraftVariablesSessionCommit: ) upload_file2 = UploadFile( tenant_id=tenant.id, - storage_type="local", + storage_type=StorageType.LOCAL, key="test/file2.json", name="file2.json", size=2048, diff --git a/api/tests/test_containers_integration_tests/factories/test_storage_key_loader.py b/api/tests/test_containers_integration_tests/factories/test_storage_key_loader.py index cb7cd37a3f..8e70fc0bb0 100644 --- a/api/tests/test_containers_integration_tests/factories/test_storage_key_loader.py +++ b/api/tests/test_containers_integration_tests/factories/test_storage_key_loader.py @@ -8,6 +8,7 @@ from sqlalchemy.orm import Session from dify_graph.file import File, FileTransferMethod, FileType from extensions.ext_database import db +from extensions.storage.storage_type import StorageType from factories.file_factory import StorageKeyLoader from models import ToolFile, UploadFile from models.enums import CreatorUserRole @@ -53,7 +54,7 @@ class TestStorageKeyLoader(unittest.TestCase): upload_file = UploadFile( tenant_id=tenant_id, - storage_type="local", + storage_type=StorageType.LOCAL, key=storage_key, name="test_file.txt", size=1024, @@ -289,7 +290,7 @@ class TestStorageKeyLoader(unittest.TestCase): # Create upload file for other tenant (but don't add to cleanup list) upload_file_other = UploadFile( tenant_id=other_tenant_id, - storage_type="local", + storage_type=StorageType.LOCAL, key="other_tenant_key", name="other_file.txt", size=1024, diff --git a/api/tests/test_containers_integration_tests/services/document_service_status.py b/api/tests/test_containers_integration_tests/services/document_service_status.py index 251f17dd03..f995ac7bef 100644 --- a/api/tests/test_containers_integration_tests/services/document_service_status.py +++ b/api/tests/test_containers_integration_tests/services/document_service_status.py @@ -13,6 +13,7 @@ from uuid import uuid4 import pytest +from extensions.storage.storage_type import StorageType from models import Account from models.dataset import Dataset, Document from models.enums import CreatorUserRole, DataSourceType, DocumentCreatedFrom, IndexingStatus @@ -198,7 +199,7 @@ class DocumentStatusTestDataFactory: """ upload_file = UploadFile( tenant_id=tenant_id, - storage_type="local", + storage_type=StorageType.LOCAL, key=f"uploads/{uuid4()}", name=name, size=128, diff --git a/api/tests/test_containers_integration_tests/services/test_document_service_rename_document.py b/api/tests/test_containers_integration_tests/services/test_document_service_rename_document.py index b159af0090..bffa520ce6 100644 --- a/api/tests/test_containers_integration_tests/services/test_document_service_rename_document.py +++ b/api/tests/test_containers_integration_tests/services/test_document_service_rename_document.py @@ -7,6 +7,7 @@ from uuid import uuid4 import pytest +from extensions.storage.storage_type import StorageType from models import Account from models.dataset import Dataset, Document from models.enums import CreatorUserRole, DataSourceType, DocumentCreatedFrom @@ -83,7 +84,7 @@ def make_upload_file(db_session_with_containers, tenant_id: str, file_id: str, n """Persist an upload file row referenced by document.data_source_info.""" upload_file = UploadFile( tenant_id=tenant_id, - storage_type="local", + storage_type=StorageType.LOCAL, key=f"uploads/{uuid4()}", name=name, size=128, diff --git a/api/tests/test_containers_integration_tests/services/test_file_service.py b/api/tests/test_containers_integration_tests/services/test_file_service.py index 50f5b7a8c0..42dbdef1c9 100644 --- a/api/tests/test_containers_integration_tests/services/test_file_service.py +++ b/api/tests/test_containers_integration_tests/services/test_file_service.py @@ -9,6 +9,7 @@ from sqlalchemy.orm import Session from werkzeug.exceptions import NotFound from configs import dify_config +from extensions.storage.storage_type import StorageType from models import Account, Tenant from models.enums import CreatorUserRole from models.model import EndUser, UploadFile @@ -140,7 +141,7 @@ class TestFileService: upload_file = UploadFile( tenant_id=account.current_tenant_id if hasattr(account, "current_tenant_id") else str(fake.uuid4()), - storage_type="local", + storage_type=StorageType.LOCAL, key=f"upload_files/test/{fake.uuid4()}.txt", name="test_file.txt", size=1024, diff --git a/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py b/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py index 6adefd59be..210d9eb39e 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py @@ -13,6 +13,7 @@ import pytest from faker import Faker from sqlalchemy.orm import Session +from extensions.storage.storage_type import StorageType from libs.datetime_utils import naive_utc_now from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models.dataset import Dataset, Document, DocumentSegment @@ -209,7 +210,7 @@ class TestBatchCleanDocumentTask: upload_file = UploadFile( tenant_id=account.current_tenant.id, - storage_type="local", + storage_type=StorageType.LOCAL, key=f"test_files/{fake.file_name()}", name=fake.file_name(), size=1024, diff --git a/api/tests/test_containers_integration_tests/tasks/test_batch_create_segment_to_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_batch_create_segment_to_index_task.py index ebe5ff1d96..202ccb0098 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_batch_create_segment_to_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_batch_create_segment_to_index_task.py @@ -19,6 +19,7 @@ import pytest from faker import Faker from sqlalchemy.orm import Session +from extensions.storage.storage_type import StorageType from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models.dataset import Dataset, Document, DocumentSegment from models.enums import CreatorUserRole, DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus @@ -203,7 +204,7 @@ class TestBatchCreateSegmentToIndexTask: upload_file = UploadFile( tenant_id=tenant.id, - storage_type="local", + storage_type=StorageType.LOCAL, key=f"test_files/{fake.file_name()}", name=fake.file_name(), size=1024, diff --git a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py index 638752cf8b..1cd698b870 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py @@ -18,6 +18,7 @@ import pytest from faker import Faker from sqlalchemy.orm import Session +from extensions.storage.storage_type import StorageType from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models.dataset import ( AppDatasetJoin, @@ -254,7 +255,7 @@ class TestCleanDatasetTask: upload_file = UploadFile( tenant_id=tenant.id, - storage_type="local", + storage_type=StorageType.LOCAL, key=f"test_files/{fake.file_name()}", name=fake.file_name(), size=1024, @@ -925,7 +926,7 @@ class TestCleanDatasetTask: special_filename = f"test_file_{special_content}.txt" upload_file = UploadFile( tenant_id=tenant.id, - storage_type="local", + storage_type=StorageType.LOCAL, key=f"test_files/{special_filename}", name=special_filename, size=1024, diff --git a/api/tests/test_containers_integration_tests/tasks/test_remove_app_and_related_data_task.py b/api/tests/test_containers_integration_tests/tasks/test_remove_app_and_related_data_task.py index 182c9ef882..5bded4d670 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_remove_app_and_related_data_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_remove_app_and_related_data_task.py @@ -6,6 +6,7 @@ import pytest from core.db.session_factory import session_factory from dify_graph.variables.segments import StringSegment from dify_graph.variables.types import SegmentType +from extensions.storage.storage_type import StorageType from libs.datetime_utils import naive_utc_now from models import Tenant from models.enums import CreatorUserRole @@ -78,7 +79,7 @@ def _create_offload_data(db_session_with_containers, *, tenant_id: str, app_id: for i in range(count): upload_file = UploadFile( tenant_id=tenant_id, - storage_type="local", + storage_type=StorageType.LOCAL, key=f"test/file-{uuid.uuid4()}-{i}.json", name=f"file-{i}.json", size=1024 + i, diff --git a/api/tests/unit_tests/controllers/console/datasets/test_datasets.py b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py index f9fc2ac397..0ee76e504b 100644 --- a/api/tests/unit_tests/controllers/console/datasets/test_datasets.py +++ b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py @@ -28,6 +28,7 @@ from controllers.console.datasets.datasets import ( from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.provider_manager import ProviderManager +from extensions.storage.storage_type import StorageType from models.enums import CreatorUserRole from models.model import ApiToken, UploadFile from services.dataset_service import DatasetPermissionService, DatasetService @@ -1121,7 +1122,7 @@ class TestDatasetIndexingEstimateApi: def _upload_file(self, *, tenant_id: str = "tenant-1", file_id: str = "file-1") -> UploadFile: upload_file = UploadFile( tenant_id=tenant_id, - storage_type="local", + storage_type=StorageType.LOCAL, key="key", name="name.txt", size=1, diff --git a/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py b/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py index a7c93242cd..7cd1fdf06b 100644 --- a/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py +++ b/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py @@ -166,6 +166,7 @@ class TestDatasourceFileManager: # Setup mock_guess_ext.return_value = None # Cannot guess mock_uuid.return_value = MagicMock(hex="unique_hex") + mock_config.STORAGE_TYPE = "local" # Execute upload_file = DatasourceFileManager.create_file_by_raw(