From 806016244fd455b662ba3f444599115404e182fe Mon Sep 17 00:00:00 2001 From: Harry Date: Fri, 30 Jan 2026 00:41:20 +0800 Subject: [PATCH] refactor(storage): integrate SilentStorage for improved file handling - Replaced direct storage references with SilentStorage in various components to enhance fallback mechanisms. - Updated storage key formats for sandbox archives and files to improve clarity and consistency. - Refactored related classes and methods to utilize the new SandboxFilePath structure. - Adjusted unit tests to reflect changes in the StorageTicket model and its serialization methods. --- api/core/app_assets/storage.py | 8 ++--- api/core/sandbox/inspector/archive_source.py | 9 ++--- api/core/sandbox/inspector/runtime_source.py | 4 +-- api/core/sandbox/storage/__init__.py | 4 +-- api/core/sandbox/storage/archive_storage.py | 13 +++++--- .../sandbox/storage/sandbox_file_storage.py | 22 ++++++------- .../storage/cached_presign_storage.py | 5 ++- api/services/app_asset_service.py | 2 -- api/services/storage_ticket_service.py | 33 +++++-------------- .../core/app_assets/test_storage.py | 14 +++++--- .../storage/test_cached_presign_storage.py | 6 ++-- 11 files changed, 54 insertions(+), 66 deletions(-) diff --git a/api/core/app_assets/storage.py b/api/core/app_assets/storage.py index 17964cb9d5..e5265f0803 100644 --- a/api/core/app_assets/storage.py +++ b/api/core/app_assets/storage.py @@ -22,6 +22,7 @@ from uuid import UUID from extensions.storage.base_storage import BaseStorage from extensions.storage.cached_presign_storage import CachedPresignStorage from extensions.storage.file_presign_storage import FilePresignStorage +from extensions.storage.silent_storage import SilentStorage _ASSET_BASE = "app_assets" _SILENT_STORAGE_NOT_FOUND = b"File Not Found" @@ -200,13 +201,12 @@ class AppAssetStorage: _storage: CachedPresignStorage - def __init__(self, storage: BaseStorage, *, redis_client: Any, cache_key_prefix: str = "app_assets") -> None: + def __init__(self, storage: BaseStorage) -> None: # Wrap with FilePresignStorage for fallback support, then CachedPresignStorage for caching - presign_storage = FilePresignStorage(storage) + presign_storage = FilePresignStorage(SilentStorage(storage)) self._storage = CachedPresignStorage( storage=presign_storage, - redis_client=redis_client, - cache_key_prefix=cache_key_prefix, + cache_key_prefix="app_assets", ) @property diff --git a/api/core/sandbox/inspector/archive_source.py b/api/core/sandbox/inspector/archive_source.py index a9b0b360b4..102e997f6b 100644 --- a/api/core/sandbox/inspector/archive_source.py +++ b/api/core/sandbox/inspector/archive_source.py @@ -9,10 +9,11 @@ from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNo from core.sandbox.inspector.base import SandboxFileSource from core.sandbox.storage import sandbox_file_storage from core.sandbox.storage.archive_storage import SandboxArchivePath -from core.sandbox.storage.sandbox_file_storage import SandboxFileDownloadPath +from core.sandbox.storage.sandbox_file_storage import SandboxFilePath from core.virtual_environment.__base.exec import CommandExecutionError from core.virtual_environment.__base.helpers import execute from extensions.ext_storage import storage +from extensions.storage.silent_storage import SilentStorage if TYPE_CHECKING: from core.zip_sandbox import ZipSandbox @@ -74,7 +75,7 @@ print(json.dumps(entries)) storage_key = archive_path.get_storage_key() if not storage.exists(storage_key): raise ValueError("Sandbox archive not found") - presign_storage = FilePresignStorage(storage.storage_runner) + presign_storage = FilePresignStorage(SilentStorage(storage.storage_runner)) return presign_storage.get_download_url(storage_key, self._EXPORT_EXPIRES_IN_SECONDS) def _create_zip_sandbox(self) -> ZipSandbox: @@ -190,7 +191,7 @@ raise SystemExit(2) if kind == "file": # Download file content from sandbox file_data = zs.read_file(target_path) - export_path = SandboxFileDownloadPath( + export_path = SandboxFilePath( tenant_id=UUID(self._tenant_id), sandbox_id=UUID(self._sandbox_id), export_id=export_id, @@ -201,7 +202,7 @@ raise SystemExit(2) # Create tar.gz archive of the directory tar_file = zs.tar(target_path, include_base=True, compress=True) tar_data = zs.read_file(tar_file.path) - export_path = SandboxFileDownloadPath( + export_path = SandboxFilePath( tenant_id=UUID(self._tenant_id), sandbox_id=UUID(self._sandbox_id), export_id=export_id, diff --git a/api/core/sandbox/inspector/runtime_source.py b/api/core/sandbox/inspector/runtime_source.py index 052092e00d..884d80f45c 100644 --- a/api/core/sandbox/inspector/runtime_source.py +++ b/api/core/sandbox/inspector/runtime_source.py @@ -8,7 +8,7 @@ from uuid import UUID, uuid4 from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode from core.sandbox.inspector.base import SandboxFileSource from core.sandbox.storage import sandbox_file_storage -from core.sandbox.storage.sandbox_file_storage import SandboxFileDownloadPath +from core.sandbox.storage.sandbox_file_storage import SandboxFilePath from core.virtual_environment.__base.exec import CommandExecutionError from core.virtual_environment.__base.helpers import execute from core.virtual_environment.__base.virtual_environment import VirtualEnvironment @@ -115,7 +115,7 @@ print(json.dumps(entries)) export_name = os.path.basename(path.rstrip("/")) or "workspace" filename = f"{export_name}.tar.gz" if kind == "dir" else (os.path.basename(path) or "file") export_id = uuid4().hex - export_path = SandboxFileDownloadPath( + export_path = SandboxFilePath( tenant_id=UUID(self._tenant_id), sandbox_id=UUID(self._sandbox_id), export_id=export_id, diff --git a/api/core/sandbox/storage/__init__.py b/api/core/sandbox/storage/__init__.py index b0eeb8940a..7e0338741d 100644 --- a/api/core/sandbox/storage/__init__.py +++ b/api/core/sandbox/storage/__init__.py @@ -1,13 +1,13 @@ from .archive_storage import ArchiveSandboxStorage, SandboxArchivePath from .noop_storage import NoopSandboxStorage -from .sandbox_file_storage import SandboxFileDownloadPath, SandboxFileStorage, sandbox_file_storage +from .sandbox_file_storage import SandboxFilePath, SandboxFileStorage, sandbox_file_storage from .sandbox_storage import SandboxStorage __all__ = [ "ArchiveSandboxStorage", "NoopSandboxStorage", "SandboxArchivePath", - "SandboxFileDownloadPath", + "SandboxFilePath", "SandboxFileStorage", "SandboxStorage", "sandbox_file_storage", diff --git a/api/core/sandbox/storage/archive_storage.py b/api/core/sandbox/storage/archive_storage.py index b67b1b8825..067fefa15e 100644 --- a/api/core/sandbox/storage/archive_storage.py +++ b/api/core/sandbox/storage/archive_storage.py @@ -3,7 +3,7 @@ This module provides storage operations for sandbox workspace archives (tar.gz), enabling state persistence across sandbox sessions. -Storage key format: sandbox/{tenant_id}/{sandbox_id}.tar.gz +Storage key format: sandbox_archives/{tenant_id}/{sandbox_id}.tar.gz All presign operations use the unified FilePresignStorage wrapper, which automatically falls back to Dify's file proxy when the underlying storage doesn't support presigned URLs. @@ -19,7 +19,9 @@ from core.virtual_environment.__base.exec import PipelineExecutionError from core.virtual_environment.__base.helpers import pipeline from core.virtual_environment.__base.virtual_environment import VirtualEnvironment from extensions.storage.base_storage import BaseStorage +from extensions.storage.cached_presign_storage import CachedPresignStorage from extensions.storage.file_presign_storage import FilePresignStorage +from extensions.storage.silent_storage import SilentStorage from .sandbox_storage import SandboxStorage @@ -42,7 +44,7 @@ class SandboxArchivePath: sandbox_id: UUID def get_storage_key(self) -> str: - return f"sandbox/{self.tenant_id}/{self.sandbox_id}.tar.gz" + return f"sandbox_archives/{self.tenant_id}/{self.sandbox_id}.tar.gz" class ArchiveSandboxStorage(SandboxStorage): @@ -55,7 +57,7 @@ class ArchiveSandboxStorage(SandboxStorage): _tenant_id: str _sandbox_id: str _exclude_patterns: list[str] - _storage: FilePresignStorage + _storage: BaseStorage def __init__( self, @@ -68,7 +70,10 @@ class ArchiveSandboxStorage(SandboxStorage): self._sandbox_id = sandbox_id self._exclude_patterns = exclude_patterns or [] # Wrap with FilePresignStorage for presign fallback support - self._storage = FilePresignStorage(storage) + self._storage = CachedPresignStorage( + storage=FilePresignStorage(SilentStorage(storage)), + cache_key_prefix="sandbox_archives", + ) @property def _archive_path(self) -> SandboxArchivePath: diff --git a/api/core/sandbox/storage/sandbox_file_storage.py b/api/core/sandbox/storage/sandbox_file_storage.py index 2d0b5482fc..379f789c75 100644 --- a/api/core/sandbox/storage/sandbox_file_storage.py +++ b/api/core/sandbox/storage/sandbox_file_storage.py @@ -18,10 +18,11 @@ from uuid import UUID from extensions.storage.base_storage import BaseStorage from extensions.storage.cached_presign_storage import CachedPresignStorage from extensions.storage.file_presign_storage import FilePresignStorage +from extensions.storage.silent_storage import SilentStorage @dataclass(frozen=True) -class SandboxFileDownloadPath: +class SandboxFilePath: """Path for sandbox file exports.""" tenant_id: UUID @@ -30,7 +31,7 @@ class SandboxFileDownloadPath: filename: str def get_storage_key(self) -> str: - return f"sandbox_file_downloads/{self.tenant_id}/{self.sandbox_id}/{self.export_id}/{self.filename}" + return f"sandbox_files/{self.tenant_id}/{self.sandbox_id}/{self.export_id}/{self.filename}" class SandboxFileStorage: @@ -50,21 +51,20 @@ class SandboxFileStorage: def __init__(self, storage: BaseStorage, *, redis_client: Any) -> None: # Wrap with FilePresignStorage for fallback support, then CachedPresignStorage for caching - presign_storage = FilePresignStorage(storage) + presign_storage = FilePresignStorage(SilentStorage(storage)) self._storage = CachedPresignStorage( storage=presign_storage, - redis_client=redis_client, - cache_key_prefix="sandbox_file_downloads", + cache_key_prefix="sandbox_files", ) - def save(self, download_path: SandboxFileDownloadPath, content: bytes) -> None: - self._storage.save(download_path.get_storage_key(), content) + def save(self, file_path: SandboxFilePath, content: bytes) -> None: + self._storage.save(file_path.get_storage_key(), content) - def get_download_url(self, download_path: SandboxFileDownloadPath, expires_in: int = 3600) -> str: - return self._storage.get_download_url(download_path.get_storage_key(), expires_in) + def get_download_url(self, file_path: SandboxFilePath, expires_in: int = 3600) -> str: + return self._storage.get_download_url(file_path.get_storage_key(), expires_in) - def get_upload_url(self, download_path: SandboxFileDownloadPath, expires_in: int = 3600) -> str: - return self._storage.get_upload_url(download_path.get_storage_key(), expires_in) + def get_upload_url(self, file_path: SandboxFilePath, expires_in: int = 3600) -> str: + return self._storage.get_upload_url(file_path.get_storage_key(), expires_in) class _LazySandboxFileStorage: diff --git a/api/extensions/storage/cached_presign_storage.py b/api/extensions/storage/cached_presign_storage.py index f9f2ba08c0..52b9fe6209 100644 --- a/api/extensions/storage/cached_presign_storage.py +++ b/api/extensions/storage/cached_presign_storage.py @@ -1,8 +1,8 @@ """Storage wrapper that caches presigned download URLs.""" import logging -from typing import Any +from extensions.ext_redis import redis_client from extensions.storage.base_storage import BaseStorage from extensions.storage.storage_wrapper import StorageWrapper @@ -17,7 +17,7 @@ class CachedPresignStorage(StorageWrapper): Example: cached_storage = CachedPresignStorage( - storage=FilePresignStorage(base_storage), + storage=FilePresignStorage(SilentStorage(base_storage)), redis_client=redis_client, cache_key_prefix="app_asset:draft_download", ) @@ -30,7 +30,6 @@ class CachedPresignStorage(StorageWrapper): def __init__( self, storage: BaseStorage, - redis_client: Any, cache_key_prefix: str = "presign_cache", ): super().__init__(storage) diff --git a/api/services/app_asset_service.py b/api/services/app_asset_service.py index 3efa985620..2c1d95c5d3 100644 --- a/api/services/app_asset_service.py +++ b/api/services/app_asset_service.py @@ -48,8 +48,6 @@ class AppAssetService: """ return AppAssetStorage( storage=storage.storage_runner, - redis_client=redis_client, - cache_key_prefix="app_assets", ) @staticmethod diff --git a/api/services/storage_ticket_service.py b/api/services/storage_ticket_service.py index e5242517fe..528c4107b7 100644 --- a/api/services/storage_ticket_service.py +++ b/api/services/storage_ticket_service.py @@ -14,7 +14,7 @@ Usage: url = StorageTicketService.create_upload_url("path/to/file.txt", expires_in=300, max_bytes=10*1024*1024) URL format: - {FILES_URL}/files/storage-tickets/{token} + {FILES_URL}/files/storage-files/{token} The token is validated by looking up the Redis key, which contains: - op: "download" or "upload" @@ -23,11 +23,12 @@ The token is validated by looking up the Redis key, which contains: - filename: suggested filename for Content-Disposition header """ -import json import logging -from dataclasses import dataclass +from typing import Literal from uuid import uuid4 +from pydantic import BaseModel + from configs import dify_config from extensions.ext_redis import redis_client @@ -39,32 +40,14 @@ DEFAULT_UPLOAD_TTL = 300 # 5 minutes DEFAULT_MAX_UPLOAD_BYTES = 100 * 1024 * 1024 # 100MB -@dataclass -class StorageTicket: +class StorageTicket(BaseModel): """Represents a storage access ticket.""" - op: str # "download" or "upload" + op: Literal["download", "upload"] storage_key: str max_bytes: int | None = None # upload only filename: str | None = None # suggested filename for download - def to_dict(self) -> dict: - data = {"op": self.op, "storage_key": self.storage_key} - if self.max_bytes is not None: - data["max_bytes"] = str(self.max_bytes) - if self.filename is not None: - data["filename"] = self.filename - return data - - @classmethod - def from_dict(cls, data: dict) -> "StorageTicket": - return cls( - op=data["op"], - storage_key=data["storage_key"], - max_bytes=data.get("max_bytes"), - filename=data.get("filename"), - ) - class StorageTicketService: """Service for creating and validating storage access tickets.""" @@ -133,7 +116,7 @@ class StorageTicketService: return None if isinstance(data, bytes): data = data.decode("utf-8") - return StorageTicket.from_dict(json.loads(data)) + return StorageTicket.model_validate_json(data) except Exception: logger.warning("Failed to retrieve storage ticket: %s", token, exc_info=True) return None @@ -143,7 +126,7 @@ class StorageTicketService: """Store a ticket in Redis and return the token.""" token = str(uuid4()) key = cls._ticket_key(token) - value = json.dumps(ticket.to_dict()) + value = ticket.model_dump_json() redis_client.setex(key, ttl, value) return token diff --git a/api/tests/unit_tests/core/app_assets/test_storage.py b/api/tests/unit_tests/core/app_assets/test_storage.py index 3181e6c1be..a25feaa9aa 100644 --- a/api/tests/unit_tests/core/app_assets/test_storage.py +++ b/api/tests/unit_tests/core/app_assets/test_storage.py @@ -167,7 +167,7 @@ def test_upload_ticket_url_generation(monkeypatch: pytest.MonkeyPatch): assert len(token) == 36 # UUID format -def test_storage_ticket_dataclass(): +def test_storage_ticket_pydantic(): """Test StorageTicket serialization and deserialization.""" ticket = StorageTicket( op="download", @@ -175,14 +175,17 @@ def test_storage_ticket_dataclass(): filename="file.txt", ) - data = ticket.to_dict() + data = ticket.model_dump() assert data == { "op": "download", "storage_key": "path/to/file.txt", "filename": "file.txt", + "max_bytes": None, } - restored = StorageTicket.from_dict(data) + # Test JSON serialization + json_str = ticket.model_dump_json() + restored = StorageTicket.model_validate_json(json_str) assert restored.op == ticket.op assert restored.storage_key == ticket.storage_key assert restored.filename == ticket.filename @@ -195,8 +198,9 @@ def test_storage_ticket_dataclass(): max_bytes=1024, ) - upload_data = upload_ticket.to_dict() + upload_data = upload_ticket.model_dump() assert upload_data["max_bytes"] == 1024 - restored_upload = StorageTicket.from_dict(upload_data) + upload_json = upload_ticket.model_dump_json() + restored_upload = StorageTicket.model_validate_json(upload_json) assert restored_upload.max_bytes == 1024 diff --git a/api/tests/unit_tests/extensions/storage/test_cached_presign_storage.py b/api/tests/unit_tests/extensions/storage/test_cached_presign_storage.py index d06b801209..3626f8e38c 100644 --- a/api/tests/unit_tests/extensions/storage/test_cached_presign_storage.py +++ b/api/tests/unit_tests/extensions/storage/test_cached_presign_storage.py @@ -19,11 +19,10 @@ class TestCachedPresignStorage: return Mock() @pytest.fixture - def cached_storage(self, mock_storage, mock_redis): + def cached_storage(self, mock_storage): """Create CachedPresignStorage with mocks.""" return CachedPresignStorage( storage=mock_storage, - redis_client=mock_redis, cache_key_prefix="test_prefix", ) @@ -196,11 +195,10 @@ class TestCachedPresignStorage: assert result == "https://cached-url.com" assert isinstance(result, str) - def test_default_cache_key_prefix(self, mock_storage, mock_redis): + def test_default_cache_key_prefix(self, mock_storage): """Test default cache key prefix is used when not specified.""" storage = CachedPresignStorage( storage=mock_storage, - redis_client=mock_redis, ) key = storage._cache_key("file.txt") assert key == "presign_cache:file.txt"