feat: implement file structured output

This commit is contained in:
Stream 2026-02-05 00:10:51 +08:00
parent 10fb482351
commit 15c0011897
No known key found for this signature in database
GPG Key ID: 9475891C9507B4F3
7 changed files with 492 additions and 680 deletions

View File

@ -1,188 +1,203 @@
"""
File reference detection and conversion for structured output.
File path detection and conversion for structured output.
This module provides utilities to:
1. Detect file reference fields in JSON Schema (format: "dify-file-ref")
2. Convert file ID strings to File objects after LLM returns
1. Detect sandbox file path fields in JSON Schema (format: "file-path")
2. Adapt schemas to add file-path descriptions before model invocation
3. Convert sandbox file path strings into File objects via a resolver
"""
import uuid
from collections.abc import Mapping
from typing import Any
from collections.abc import Callable, Mapping, Sequence
from typing import Any, cast
from core.file import File
from core.variables.segments import ArrayFileSegment, FileSegment
from factories.file_factory import build_from_mapping
FILE_REF_FORMAT = "dify-file-ref"
FILE_PATH_FORMAT = "file-path"
FILE_PATH_DESCRIPTION_SUFFIX = "this field contains a file path from the Dify sandbox"
def is_file_ref_property(schema: dict) -> bool:
"""Check if a schema property is a file reference."""
return schema.get("type") == "string" and schema.get("format") == FILE_REF_FORMAT
def is_file_path_property(schema: Mapping[str, Any]) -> bool:
"""Check if a schema property represents a sandbox file path."""
if schema.get("type") != "string":
return False
format_value = schema.get("format")
if not isinstance(format_value, str):
return False
normalized_format = format_value.lower().replace("_", "-")
return normalized_format == FILE_PATH_FORMAT
def detect_file_ref_fields(schema: Mapping[str, Any], path: str = "") -> list[str]:
"""
Recursively detect file reference fields in schema.
Args:
schema: JSON Schema to analyze
path: Current path in the schema (used for recursion)
Returns:
List of JSON paths containing file refs, e.g., ["image_id", "files[*]"]
"""
file_ref_paths: list[str] = []
def detect_file_path_fields(schema: Mapping[str, Any], path: str = "") -> list[str]:
"""Recursively detect file path fields in a JSON schema."""
file_path_fields: list[str] = []
schema_type = schema.get("type")
if schema_type == "object":
for prop_name, prop_schema in schema.get("properties", {}).items():
current_path = f"{path}.{prop_name}" if path else prop_name
properties = schema.get("properties")
if isinstance(properties, Mapping):
properties_mapping = cast(Mapping[str, Any], properties)
for prop_name, prop_schema in properties_mapping.items():
if not isinstance(prop_schema, Mapping):
continue
prop_schema_mapping = cast(Mapping[str, Any], prop_schema)
current_path = f"{path}.{prop_name}" if path else prop_name
if is_file_ref_property(prop_schema):
file_ref_paths.append(current_path)
elif isinstance(prop_schema, dict):
file_ref_paths.extend(detect_file_ref_fields(prop_schema, current_path))
if is_file_path_property(prop_schema_mapping):
file_path_fields.append(current_path)
else:
file_path_fields.extend(detect_file_path_fields(prop_schema_mapping, current_path))
elif schema_type == "array":
items_schema = schema.get("items", {})
items_schema = schema.get("items")
if not isinstance(items_schema, Mapping):
return file_path_fields
items_schema_mapping = cast(Mapping[str, Any], items_schema)
array_path = f"{path}[*]" if path else "[*]"
if is_file_ref_property(items_schema):
file_ref_paths.append(array_path)
elif isinstance(items_schema, dict):
file_ref_paths.extend(detect_file_ref_fields(items_schema, array_path))
return file_ref_paths
def convert_file_refs_in_output(
output: Mapping[str, Any],
json_schema: Mapping[str, Any],
tenant_id: str,
) -> dict[str, Any]:
"""
Convert file ID strings to File objects based on schema.
Args:
output: The structured_output from LLM result
json_schema: The original JSON schema (to detect file ref fields)
tenant_id: Tenant ID for file lookup
Returns:
Output with file references converted to File objects
"""
file_ref_paths = detect_file_ref_fields(json_schema)
if not file_ref_paths:
return dict(output)
result = _deep_copy_dict(output)
for path in file_ref_paths:
_convert_path_in_place(result, path.split("."), tenant_id)
return result
def _deep_copy_dict(obj: Mapping[str, Any]) -> dict[str, Any]:
"""Deep copy a mapping to a mutable dict."""
result: dict[str, Any] = {}
for key, value in obj.items():
if isinstance(value, Mapping):
result[key] = _deep_copy_dict(value)
elif isinstance(value, list):
result[key] = [_deep_copy_dict(item) if isinstance(item, Mapping) else item for item in value]
if is_file_path_property(items_schema_mapping):
file_path_fields.append(array_path)
else:
result[key] = value
return result
file_path_fields.extend(detect_file_path_fields(items_schema_mapping, array_path))
return file_path_fields
def _convert_path_in_place(obj: dict, path_parts: list[str], tenant_id: str) -> None:
"""Convert file refs at the given path in place, wrapping in Segment types."""
def adapt_schema_for_sandbox_file_paths(schema: Mapping[str, Any]) -> tuple[dict[str, Any], list[str]]:
"""Normalize sandbox file path fields and collect their JSON paths."""
result = _deep_copy_value(schema)
if not isinstance(result, dict):
raise ValueError("structured_output_schema must be a JSON object")
result_dict = cast(dict[str, Any], result)
file_path_fields: list[str] = []
_adapt_schema_in_place(result_dict, path="", file_path_fields=file_path_fields)
return result_dict, file_path_fields
def convert_sandbox_file_paths_in_output(
output: Mapping[str, Any],
file_path_fields: Sequence[str],
file_resolver: Callable[[str], File],
) -> tuple[dict[str, Any], list[File]]:
"""Convert sandbox file paths into File objects using the resolver."""
if not file_path_fields:
return dict(output), []
result = _deep_copy_value(output)
if not isinstance(result, dict):
raise ValueError("Structured output must be a JSON object")
result_dict = cast(dict[str, Any], result)
files: list[File] = []
for path in file_path_fields:
_convert_path_in_place(result_dict, path.split("."), file_resolver, files)
return result_dict, files
def _adapt_schema_in_place(schema: dict[str, Any], path: str, file_path_fields: list[str]) -> None:
schema_type = schema.get("type")
if schema_type == "object":
properties = schema.get("properties")
if isinstance(properties, Mapping):
properties_mapping = cast(Mapping[str, Any], properties)
for prop_name, prop_schema in properties_mapping.items():
if not isinstance(prop_schema, dict):
continue
prop_schema_dict = cast(dict[str, Any], prop_schema)
current_path = f"{path}.{prop_name}" if path else prop_name
if is_file_path_property(prop_schema_dict):
_normalize_file_path_schema(prop_schema_dict)
file_path_fields.append(current_path)
else:
_adapt_schema_in_place(prop_schema_dict, current_path, file_path_fields)
elif schema_type == "array":
items_schema = schema.get("items")
if not isinstance(items_schema, dict):
return
items_schema_dict = cast(dict[str, Any], items_schema)
array_path = f"{path}[*]" if path else "[*]"
if is_file_path_property(items_schema_dict):
_normalize_file_path_schema(items_schema_dict)
file_path_fields.append(array_path)
else:
_adapt_schema_in_place(items_schema_dict, array_path, file_path_fields)
def _normalize_file_path_schema(schema: dict[str, Any]) -> None:
schema["type"] = "string"
schema["format"] = FILE_PATH_FORMAT
description = schema.get("description", "")
if description:
if FILE_PATH_DESCRIPTION_SUFFIX not in description:
schema["description"] = f"{description}\n{FILE_PATH_DESCRIPTION_SUFFIX}"
else:
schema["description"] = FILE_PATH_DESCRIPTION_SUFFIX
def _deep_copy_value(value: Any) -> Any:
if isinstance(value, Mapping):
mapping = cast(Mapping[str, Any], value)
return {key: _deep_copy_value(item) for key, item in mapping.items()}
if isinstance(value, list):
list_value = cast(list[Any], value)
return [_deep_copy_value(item) for item in list_value]
return value
def _convert_path_in_place(
obj: dict[str, Any],
path_parts: list[str],
file_resolver: Callable[[str], File],
files: list[File],
) -> None:
if not path_parts:
return
current = path_parts[0]
remaining = path_parts[1:]
# Handle array notation like "files[*]"
if current.endswith("[*]"):
key = current[:-3] if current != "[*]" else None
target = obj.get(key) if key else obj
key = current[:-3] if current != "[*]" else ""
target_value = obj.get(key) if key else obj
if isinstance(target, list):
if isinstance(target_value, list):
target_list = cast(list[Any], target_value)
if remaining:
# Nested array with remaining path - recurse into each item
for item in target:
for item in target_list:
if isinstance(item, dict):
_convert_path_in_place(item, remaining, tenant_id)
item_dict = cast(dict[str, Any], item)
_convert_path_in_place(item_dict, remaining, file_resolver, files)
else:
# Array of file IDs - convert all and wrap in ArrayFileSegment
files: list[File] = []
for item in target:
file = _convert_file_id(item, tenant_id)
if file is not None:
files.append(file)
# Replace the array with ArrayFileSegment
resolved_files: list[File] = []
for item in target_list:
if not isinstance(item, str):
raise ValueError("File path must be a string")
file = file_resolver(item)
files.append(file)
resolved_files.append(file)
if key:
obj[key] = ArrayFileSegment(value=files)
obj[key] = ArrayFileSegment(value=resolved_files)
return
if not remaining:
# Leaf node - convert the value and wrap in FileSegment
if current in obj:
file = _convert_file_id(obj[current], tenant_id)
if file is not None:
obj[current] = FileSegment(value=file)
else:
obj[current] = None
else:
# Recurse into nested object
if current in obj and isinstance(obj[current], dict):
_convert_path_in_place(obj[current], remaining, tenant_id)
if current not in obj:
return
value = obj[current]
if value is None:
obj[current] = None
return
if not isinstance(value, str):
raise ValueError("File path must be a string")
file = file_resolver(value)
files.append(file)
obj[current] = FileSegment(value=file)
return
def _convert_file_id(file_id: Any, tenant_id: str) -> File | None:
"""
Convert a file ID string to a File object.
Tries multiple file sources in order:
1. ToolFile (files generated by tools/workflows)
2. UploadFile (files uploaded by users)
"""
if not isinstance(file_id, str):
return None
# Validate UUID format
try:
uuid.UUID(file_id)
except ValueError:
return None
# Try ToolFile first (files generated by tools/workflows)
try:
return build_from_mapping(
mapping={
"transfer_method": "tool_file",
"tool_file_id": file_id,
},
tenant_id=tenant_id,
)
except ValueError:
pass
# Try UploadFile (files uploaded by users)
try:
return build_from_mapping(
mapping={
"transfer_method": "local_file",
"upload_file_id": file_id,
},
tenant_id=tenant_id,
)
except ValueError:
pass
# File not found in any source
return None
if current in obj and isinstance(obj[current], dict):
_convert_path_in_place(obj[current], remaining, file_resolver, files)

View File

@ -8,7 +8,7 @@ import json_repair
from pydantic import BaseModel, TypeAdapter, ValidationError
from core.llm_generator.output_parser.errors import OutputParserError
from core.llm_generator.output_parser.file_ref import convert_file_refs_in_output
from core.llm_generator.output_parser.file_ref import detect_file_path_fields
from core.llm_generator.prompts import STRUCTURED_OUTPUT_PROMPT
from core.model_manager import ModelInstance
from core.model_runtime.callbacks.base_callback import Callback
@ -55,12 +55,12 @@ def invoke_llm_with_structured_output(
model_instance: ModelInstance,
prompt_messages: Sequence[PromptMessage],
json_schema: Mapping[str, Any],
model_parameters: Mapping | None = None,
model_parameters: Mapping[str, Any] | None = None,
tools: Sequence[PromptMessageTool] | None = None,
stop: list[str] | None = None,
user: str | None = None,
callbacks: list[Callback] | None = None,
tenant_id: str | None = None,
allow_file_path: bool = False,
) -> LLMResultWithStructuredOutput:
"""
Invoke large language model with structured output.
@ -78,14 +78,13 @@ def invoke_llm_with_structured_output(
:param stop: stop words
:param user: unique user id
:param callbacks: callbacks
:param tenant_id: tenant ID for file reference conversion. When provided and
json_schema contains file reference fields (format: "dify-file-ref"),
file IDs in the output will be automatically converted to File objects.
:return: full response or stream response chunk generator result
:param allow_file_path: allow schema fields formatted as file-path
:return: response with structured output
"""
model_parameters_with_json_schema: dict[str, Any] = {
**(model_parameters or {}),
}
model_parameters_with_json_schema: dict[str, Any] = dict(model_parameters or {})
if detect_file_path_fields(json_schema) and not allow_file_path:
raise OutputParserError("Structured output file paths are only supported in sandbox mode.")
# Determine structured output strategy
@ -122,14 +121,6 @@ def invoke_llm_with_structured_output(
# Fill missing fields with default values
structured_output = fill_defaults_from_schema(structured_output, json_schema)
# Convert file references if tenant_id is provided
if tenant_id is not None:
structured_output = convert_file_refs_in_output(
output=structured_output,
json_schema=json_schema,
tenant_id=tenant_id,
)
return LLMResultWithStructuredOutput(
structured_output=structured_output,
model=llm_result.model,
@ -147,12 +138,11 @@ def invoke_llm_with_pydantic_model(
model_instance: ModelInstance,
prompt_messages: Sequence[PromptMessage],
output_model: type[T],
model_parameters: Mapping | None = None,
model_parameters: Mapping[str, Any] | None = None,
tools: Sequence[PromptMessageTool] | None = None,
stop: list[str] | None = None,
user: str | None = None,
callbacks: list[Callback] | None = None,
tenant_id: str | None = None,
) -> T:
"""
Invoke large language model with a Pydantic output model.
@ -160,11 +150,8 @@ def invoke_llm_with_pydantic_model(
This helper generates a JSON schema from the Pydantic model, invokes the
structured-output LLM path, and validates the result.
The stream parameter controls the underlying LLM invocation mode:
- stream=True (default): Uses streaming LLM call, consumes the generator internally
- stream=False: Uses non-streaming LLM call
In both cases, the function returns the validated Pydantic model directly.
The helper performs a non-streaming invocation and returns the validated
Pydantic model directly.
"""
json_schema = _schema_from_pydantic(output_model)
@ -179,7 +166,6 @@ def invoke_llm_with_pydantic_model(
stop=stop,
user=user,
callbacks=callbacks,
tenant_id=tenant_id,
)
structured_output = result.structured_output
@ -189,6 +175,11 @@ def invoke_llm_with_pydantic_model(
return _validate_structured_output(output_model, structured_output)
def parse_structured_output_text(*, result_text: str, json_schema: Mapping[str, Any]) -> dict[str, Any]:
structured_output = _parse_structured_output(result_text)
return fill_defaults_from_schema(structured_output, json_schema)
def _schema_from_pydantic(output_model: type[BaseModel]) -> dict[str, Any]:
return output_model.model_json_schema()
@ -322,8 +313,8 @@ def fill_defaults_from_schema(
def _handle_native_json_schema(
provider: str,
model_schema: AIModelEntity,
structured_output_schema: Mapping,
model_parameters: dict,
structured_output_schema: Mapping[str, Any],
model_parameters: dict[str, Any],
rules: list[ParameterRule],
):
"""
@ -347,7 +338,7 @@ def _handle_native_json_schema(
return model_parameters
def _set_response_format(model_parameters: dict, rules: list):
def _set_response_format(model_parameters: dict[str, Any], rules: list[ParameterRule]):
"""
Set the appropriate response format parameter based on model rules.
@ -363,7 +354,7 @@ def _set_response_format(model_parameters: dict, rules: list):
def _handle_prompt_based_schema(
prompt_messages: Sequence[PromptMessage], structured_output_schema: Mapping
prompt_messages: Sequence[PromptMessage], structured_output_schema: Mapping[str, Any]
) -> list[PromptMessage]:
"""
Handle structured output for models without native JSON schema support.

View File

@ -4,10 +4,13 @@ import base64
import io
import json
import logging
import mimetypes
import os
import re
import time
from collections.abc import Generator, Mapping, Sequence
from functools import reduce
from pathlib import PurePosixPath
from typing import TYPE_CHECKING, Any, Literal, cast
from sqlalchemy import select
@ -20,7 +23,15 @@ from core.app_assets.constants import AppAssetsAttrs
from core.file import File, FileTransferMethod, FileType, file_manager
from core.helper.code_executor import CodeExecutor, CodeLanguage
from core.llm_generator.output_parser.errors import OutputParserError
from core.llm_generator.output_parser.structured_output import invoke_llm_with_structured_output
from core.llm_generator.output_parser.file_ref import (
adapt_schema_for_sandbox_file_paths,
convert_sandbox_file_paths_in_output,
detect_file_path_fields,
)
from core.llm_generator.output_parser.structured_output import (
invoke_llm_with_structured_output,
parse_structured_output_text,
)
from core.memory.base import BaseMemory
from core.model_manager import ModelInstance, ModelManager
from core.model_runtime.entities import (
@ -54,7 +65,7 @@ from core.prompt.entities.advanced_prompt_entities import CompletionModelPromptT
from core.prompt.utils.prompt_message_util import PromptMessageUtil
from core.rag.entities.citation_metadata import RetrievalSourceMetadata
from core.sandbox import Sandbox
from core.sandbox.bash.session import SandboxBashSession
from core.sandbox.bash.session import MAX_OUTPUT_FILE_SIZE, MAX_OUTPUT_FILES, SandboxBashSession
from core.sandbox.entities.config import AppAssets
from core.skill.constants import SkillAttrs
from core.skill.entities.skill_bundle import SkillBundle
@ -62,7 +73,8 @@ from core.skill.entities.skill_document import SkillDocument
from core.skill.entities.tool_dependencies import ToolDependencies, ToolDependency
from core.skill.skill_compiler import SkillCompiler
from core.tools.__base.tool import Tool
from core.tools.signature import sign_upload_file
from core.tools.signature import sign_tool_file, sign_upload_file
from core.tools.tool_file_manager import ToolFileManager
from core.tools.tool_manager import ToolManager
from core.variables import (
ArrayFileSegment,
@ -296,6 +308,27 @@ class LLMNode(Node[LLMNodeData]):
# Variables for outputs
generation_data: LLMGenerationData | None = None
structured_output: LLMStructuredOutput | None = None
structured_output_schema: Mapping[str, Any] | None
structured_output_file_paths: list[str] = []
if self.node_data.structured_output_enabled:
if not self.node_data.structured_output:
raise LLMNodeError("structured_output_enabled is True but structured_output is not set")
raw_schema = LLMNode.fetch_structured_output_schema(structured_output=self.node_data.structured_output)
if self.node_data.computer_use:
raise LLMNodeError("Structured output is not supported in computer use mode.")
else:
if detect_file_path_fields(raw_schema):
sandbox = self.graph_runtime_state.sandbox
if not sandbox:
raise LLMNodeError("Structured output file paths are only supported in sandbox mode.")
structured_output_schema, structured_output_file_paths = adapt_schema_for_sandbox_file_paths(
raw_schema
)
else:
structured_output_schema = raw_schema
else:
structured_output_schema = None
if self.node_data.computer_use:
sandbox = self.graph_runtime_state.sandbox
@ -309,6 +342,8 @@ class LLMNode(Node[LLMNodeData]):
stop=stop,
variable_pool=variable_pool,
tool_dependencies=tool_dependencies,
structured_output_schema=structured_output_schema,
structured_output_file_paths=structured_output_file_paths,
)
elif self.tool_call_enabled:
generator = self._invoke_llm_with_tools(
@ -328,14 +363,13 @@ class LLMNode(Node[LLMNodeData]):
prompt_messages=prompt_messages,
stop=stop,
user_id=self.user_id,
structured_output_enabled=self._node_data.structured_output_enabled,
structured_output=self._node_data.structured_output,
structured_output_schema=structured_output_schema,
allow_file_path=bool(structured_output_file_paths),
file_saver=self._llm_file_saver,
file_outputs=self._file_outputs,
node_id=self._node_id,
node_type=self.node_type,
reasoning_format=self._node_data.reasoning_format,
tenant_id=self.tenant_id,
)
(
@ -349,6 +383,33 @@ class LLMNode(Node[LLMNodeData]):
generation_data,
) = yield from self._stream_llm_events(generator, model_instance=model_instance)
if structured_output and structured_output_file_paths:
sandbox = self.graph_runtime_state.sandbox
if not sandbox:
raise LLMNodeError("Structured output file paths are only supported in sandbox mode.")
structured_output_value = structured_output.structured_output
if structured_output_value is None:
raise LLMNodeError("Structured output is empty")
resolved_count = 0
def resolve_file(path: str) -> File:
nonlocal resolved_count
if resolved_count >= MAX_OUTPUT_FILES:
raise LLMNodeError("Structured output files exceed the sandbox output limit")
resolved_count += 1
return self._resolve_sandbox_file_path(sandbox=sandbox, path=path)
converted_output, structured_output_files = convert_sandbox_file_paths_in_output(
output=structured_output_value,
file_path_fields=structured_output_file_paths,
file_resolver=resolve_file,
)
structured_output = LLMStructuredOutput(structured_output=converted_output)
if structured_output_files:
self._file_outputs.extend(structured_output_files)
# Extract variables from generation_data if available
if generation_data:
clean_text = generation_data.text
@ -493,14 +554,13 @@ class LLMNode(Node[LLMNodeData]):
prompt_messages: Sequence[PromptMessage],
stop: Sequence[str] | None = None,
user_id: str,
structured_output_enabled: bool,
structured_output: Mapping[str, Any] | None = None,
structured_output_schema: Mapping[str, Any] | None,
allow_file_path: bool = False,
file_saver: LLMFileSaver,
file_outputs: list[File],
node_id: str,
node_type: NodeType,
reasoning_format: Literal["separated", "tagged"] = "tagged",
tenant_id: str | None = None,
) -> Generator[NodeEventBase | LLMStructuredOutput, None, None]:
model_schema = model_instance.model_type_instance.get_model_schema(
node_data_model.name, model_instance.credentials
@ -508,10 +568,7 @@ class LLMNode(Node[LLMNodeData]):
if not model_schema:
raise ValueError(f"Model schema not found for {node_data_model.name}")
if structured_output_enabled:
output_schema = LLMNode.fetch_structured_output_schema(
structured_output=structured_output or {},
)
if structured_output_schema:
request_start_time = time.perf_counter()
invoke_result = invoke_llm_with_structured_output(
@ -519,11 +576,11 @@ class LLMNode(Node[LLMNodeData]):
model_schema=model_schema,
model_instance=model_instance,
prompt_messages=prompt_messages,
json_schema=output_schema,
json_schema=structured_output_schema,
model_parameters=node_data_model.completion_params,
stop=list(stop or []),
user=user_id,
tenant_id=tenant_id,
allow_file_path=allow_file_path,
)
else:
request_start_time = time.perf_counter()
@ -1651,6 +1708,93 @@ class LLMNode(Node[LLMNodeData]):
)
return saved_file
def _parse_structured_output_from_text(
self,
*,
result_text: str,
structured_output_schema: Mapping[str, Any],
) -> dict[str, Any]:
"""Parse structured output from tool-run text using the provided schema."""
try:
return parse_structured_output_text(result_text=result_text, json_schema=structured_output_schema)
except OutputParserError as exc:
raise LLMNodeError(f"Failed to parse structured output: {exc}") from exc
@staticmethod
def _normalize_sandbox_file_path(path: str) -> str:
raw = path.strip()
if not raw:
raise LLMNodeError("Sandbox file path must not be empty")
sandbox_path = PurePosixPath(raw)
if any(part == ".." for part in sandbox_path.parts):
raise LLMNodeError("Sandbox file path must not contain '..'")
normalized = str(sandbox_path)
if normalized in {".", ""}:
raise LLMNodeError("Sandbox file path is invalid")
return normalized
def _resolve_sandbox_file_path(self, *, sandbox: Sandbox, path: str) -> File:
normalized_path = self._normalize_sandbox_file_path(path)
filename = os.path.basename(normalized_path)
if not filename:
raise LLMNodeError("Sandbox file path must point to a file")
try:
file_content = sandbox.vm.download_file(normalized_path)
except Exception as exc:
raise LLMNodeError(f"Sandbox file not found: {normalized_path}") from exc
file_binary = file_content.getvalue()
if len(file_binary) > MAX_OUTPUT_FILE_SIZE:
raise LLMNodeError(f"Sandbox file exceeds size limit: {normalized_path}")
mime_type, _ = mimetypes.guess_type(filename)
if not mime_type:
mime_type = "application/octet-stream"
tool_file_manager = ToolFileManager()
tool_file = tool_file_manager.create_file_by_raw(
user_id=self.user_id,
tenant_id=self.tenant_id,
conversation_id=None,
file_binary=file_binary,
mimetype=mime_type,
filename=filename,
)
extension = os.path.splitext(filename)[1] if "." in filename else ".bin"
url = sign_tool_file(tool_file.id, extension)
file_type = self._get_file_type_from_mime(mime_type)
return File(
id=tool_file.id,
tenant_id=self.tenant_id,
type=file_type,
transfer_method=FileTransferMethod.TOOL_FILE,
filename=filename,
extension=extension,
mime_type=mime_type,
size=len(file_binary),
related_id=tool_file.id,
url=url,
storage_key=tool_file.file_key,
)
@staticmethod
def _get_file_type_from_mime(mime_type: str) -> FileType:
if mime_type.startswith("image/"):
return FileType.IMAGE
if mime_type.startswith("video/"):
return FileType.VIDEO
if mime_type.startswith("audio/"):
return FileType.AUDIO
if "text" in mime_type or "pdf" in mime_type:
return FileType.DOCUMENT
return FileType.CUSTOM
@staticmethod
def fetch_structured_output_schema(
*,
@ -1914,7 +2058,9 @@ class LLMNode(Node[LLMNodeData]):
stop: Sequence[str] | None,
variable_pool: VariablePool,
tool_dependencies: ToolDependencies | None,
) -> Generator[NodeEventBase, None, LLMGenerationData]:
structured_output_schema: Mapping[str, Any] | None,
structured_output_file_paths: Sequence[str] | None,
) -> Generator[NodeEventBase | LLMStructuredOutput, None, LLMGenerationData]:
result: LLMGenerationData | None = None
# FIXME(Mairuis): Async processing for bash session.
@ -1941,6 +2087,36 @@ class LLMNode(Node[LLMNodeData]):
result = yield from self._process_tool_outputs(outputs)
if result is not None and structured_output_schema:
structured_output = self._parse_structured_output_from_text(
result_text=result.text,
structured_output_schema=structured_output_schema,
)
file_paths = list(structured_output_file_paths or [])
if file_paths:
resolved_count = 0
def resolve_file(path: str) -> File:
nonlocal resolved_count
if resolved_count >= MAX_OUTPUT_FILES:
raise LLMNodeError("Structured output files exceed the sandbox output limit")
resolved_count += 1
return self._resolve_sandbox_file_path(sandbox=sandbox, path=path)
structured_output, structured_output_files = convert_sandbox_file_paths_in_output(
output=structured_output,
file_path_fields=file_paths,
file_resolver=resolve_file,
)
else:
structured_output_files = []
if structured_output_files:
result.files.extend(structured_output_files)
yield LLMStructuredOutput(structured_output=structured_output)
if result is None:
raise LLMNodeError("SandboxSession exited unexpectedly")

View File

@ -126,8 +126,8 @@ workflow:
additionalProperties: false
properties:
image:
description: File ID (UUID) of the selected image
format: dify-file-ref
description: Sandbox file path of the selected image
format: file-path
type: string
required:
- image

View File

@ -1,269 +1,120 @@
"""
Unit tests for file reference detection and conversion.
Unit tests for sandbox file path detection and conversion.
"""
import uuid
from unittest.mock import MagicMock, patch
import pytest
from core.file import File, FileTransferMethod, FileType
from core.llm_generator.output_parser.file_ref import (
FILE_REF_FORMAT,
convert_file_refs_in_output,
detect_file_ref_fields,
is_file_ref_property,
FILE_PATH_DESCRIPTION_SUFFIX,
FILE_PATH_FORMAT,
adapt_schema_for_sandbox_file_paths,
convert_sandbox_file_paths_in_output,
detect_file_path_fields,
is_file_path_property,
)
from core.variables.segments import ArrayFileSegment, FileSegment
class TestIsFileRefProperty:
"""Tests for is_file_ref_property function."""
def _build_file(file_id: str) -> File:
return File(
id=file_id,
tenant_id="tenant_123",
type=FileType.IMAGE,
transfer_method=FileTransferMethod.TOOL_FILE,
filename="test.png",
extension=".png",
mime_type="image/png",
size=128,
related_id=file_id,
storage_key="sandbox/path",
)
def test_valid_file_ref(self):
schema = {"type": "string", "format": FILE_REF_FORMAT}
assert is_file_ref_property(schema) is True
class TestIsFilePathProperty:
def test_valid_file_path_format(self):
schema = {"type": "string", "format": FILE_PATH_FORMAT}
assert is_file_path_property(schema) is True
def test_accepts_snake_case_format(self):
schema = {"type": "string", "format": "file_path"}
assert is_file_path_property(schema) is True
def test_invalid_type(self):
schema = {"type": "number", "format": FILE_REF_FORMAT}
assert is_file_ref_property(schema) is False
schema = {"type": "number", "format": FILE_PATH_FORMAT}
assert is_file_path_property(schema) is False
def test_missing_format(self):
schema = {"type": "string"}
assert is_file_ref_property(schema) is False
assert is_file_path_property(schema) is False
def test_wrong_format(self):
schema = {"type": "string", "format": "uuid"}
assert is_file_ref_property(schema) is False
assert is_file_path_property(schema) is False
class TestDetectFileRefFields:
"""Tests for detect_file_ref_fields function."""
def test_simple_file_ref(self):
class TestDetectFilePathFields:
def test_detects_nested_file_paths(self):
schema = {
"type": "object",
"properties": {
"image": {"type": "string", "format": FILE_REF_FORMAT},
"image": {"type": "string", "format": FILE_PATH_FORMAT},
"files": {"type": "array", "items": {"type": "string", "format": FILE_PATH_FORMAT}},
"meta": {"type": "object", "properties": {"doc": {"type": "string", "format": FILE_PATH_FORMAT}}},
},
}
paths = detect_file_ref_fields(schema)
assert paths == ["image"]
def test_multiple_file_refs(self):
schema = {
"type": "object",
"properties": {
"image": {"type": "string", "format": FILE_REF_FORMAT},
"document": {"type": "string", "format": FILE_REF_FORMAT},
"name": {"type": "string"},
},
}
paths = detect_file_ref_fields(schema)
assert set(paths) == {"image", "document"}
def test_array_of_file_refs(self):
schema = {
"type": "object",
"properties": {
"files": {
"type": "array",
"items": {"type": "string", "format": FILE_REF_FORMAT},
},
},
}
paths = detect_file_ref_fields(schema)
assert paths == ["files[*]"]
def test_nested_file_ref(self):
schema = {
"type": "object",
"properties": {
"data": {
"type": "object",
"properties": {
"image": {"type": "string", "format": FILE_REF_FORMAT},
},
},
},
}
paths = detect_file_ref_fields(schema)
assert paths == ["data.image"]
def test_no_file_refs(self):
schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"count": {"type": "number"},
},
}
paths = detect_file_ref_fields(schema)
assert paths == []
assert set(detect_file_path_fields(schema)) == {"image", "files[*]", "meta.doc"}
def test_empty_schema(self):
schema = {}
paths = detect_file_ref_fields(schema)
assert paths == []
assert detect_file_path_fields({}) == []
def test_mixed_schema(self):
class TestAdaptSchemaForSandboxFilePaths:
def test_appends_description(self):
schema = {
"type": "object",
"properties": {
"query": {"type": "string"},
"image": {"type": "string", "format": FILE_REF_FORMAT},
"documents": {
"type": "array",
"items": {"type": "string", "format": FILE_REF_FORMAT},
},
},
}
paths = detect_file_ref_fields(schema)
assert set(paths) == {"image", "documents[*]"}
class TestConvertFileRefsInOutput:
"""Tests for convert_file_refs_in_output function."""
@pytest.fixture
def mock_file(self):
"""Create a mock File object with all required attributes."""
file = MagicMock(spec=File)
file.type = FileType.IMAGE
file.transfer_method = FileTransferMethod.TOOL_FILE
file.related_id = "test-related-id"
file.remote_url = None
file.tenant_id = "tenant_123"
file.id = None
file.filename = "test.png"
file.extension = ".png"
file.mime_type = "image/png"
file.size = 1024
file.dify_model_identity = "__dify__file__"
return file
@pytest.fixture
def mock_build_from_mapping(self, mock_file):
"""Mock the build_from_mapping function."""
with patch("core.llm_generator.output_parser.file_ref.build_from_mapping") as mock:
mock.return_value = mock_file
yield mock
def test_convert_simple_file_ref(self, mock_build_from_mapping, mock_file):
file_id = str(uuid.uuid4())
output = {"image": file_id}
schema = {
"type": "object",
"properties": {
"image": {"type": "string", "format": FILE_REF_FORMAT},
"image": {"type": "string", "format": FILE_PATH_FORMAT, "description": "Pick a file"},
},
}
result = convert_file_refs_in_output(output, schema, "tenant_123")
adapted, fields = adapt_schema_for_sandbox_file_paths(schema)
# Result should be wrapped in FileSegment
assert isinstance(result["image"], FileSegment)
assert result["image"].value == mock_file
mock_build_from_mapping.assert_called_once_with(
mapping={"transfer_method": "tool_file", "tool_file_id": file_id},
tenant_id="tenant_123",
)
assert set(fields) == {"image"}
adapted_image = adapted["properties"]["image"]
assert adapted_image["type"] == "string"
assert adapted_image["format"] == FILE_PATH_FORMAT
assert FILE_PATH_DESCRIPTION_SUFFIX in adapted_image["description"]
def test_convert_array_of_file_refs(self, mock_build_from_mapping, mock_file):
file_id1 = str(uuid.uuid4())
file_id2 = str(uuid.uuid4())
output = {"files": [file_id1, file_id2]}
schema = {
"type": "object",
"properties": {
"files": {
"type": "array",
"items": {"type": "string", "format": FILE_REF_FORMAT},
},
},
class TestConvertSandboxFilePaths:
def test_convert_sandbox_file_paths(self):
output = {
"image": "a.png",
"files": ["b.png", "c.png"],
"meta": {"doc": "d.pdf"},
"name": "demo",
}
result = convert_file_refs_in_output(output, schema, "tenant_123")
def resolver(path: str) -> File:
return _build_file(path)
# Result should be wrapped in ArrayFileSegment
assert isinstance(result["files"], ArrayFileSegment)
assert list(result["files"].value) == [mock_file, mock_file]
assert mock_build_from_mapping.call_count == 2
converted, files = convert_sandbox_file_paths_in_output(output, ["image", "files[*]", "meta.doc"], resolver)
def test_no_conversion_without_file_refs(self):
output = {"name": "test", "count": 5}
schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"count": {"type": "number"},
},
}
assert isinstance(converted["image"], FileSegment)
assert isinstance(converted["files"], ArrayFileSegment)
assert isinstance(converted["meta"]["doc"], FileSegment)
assert converted["name"] == "demo"
assert [file.id for file in files] == ["a.png", "b.png", "c.png", "d.pdf"]
result = convert_file_refs_in_output(output, schema, "tenant_123")
def test_invalid_path_value_raises(self):
with pytest.raises(ValueError):
convert_sandbox_file_paths_in_output({"image": 123}, ["image"], _build_file)
assert result == {"name": "test", "count": 5}
def test_no_file_paths_returns_output(self):
output = {"name": "demo"}
converted, files = convert_sandbox_file_paths_in_output(output, [], _build_file)
def test_invalid_uuid_returns_none(self):
output = {"image": "not-a-valid-uuid"}
schema = {
"type": "object",
"properties": {
"image": {"type": "string", "format": FILE_REF_FORMAT},
},
}
result = convert_file_refs_in_output(output, schema, "tenant_123")
assert result["image"] is None
def test_file_not_found_returns_none(self):
file_id = str(uuid.uuid4())
output = {"image": file_id}
schema = {
"type": "object",
"properties": {
"image": {"type": "string", "format": FILE_REF_FORMAT},
},
}
with patch("core.llm_generator.output_parser.file_ref.build_from_mapping") as mock:
mock.side_effect = ValueError("File not found")
result = convert_file_refs_in_output(output, schema, "tenant_123")
assert result["image"] is None
def test_preserves_non_file_fields(self, mock_build_from_mapping, mock_file):
file_id = str(uuid.uuid4())
output = {"query": "search term", "image": file_id, "count": 10}
schema = {
"type": "object",
"properties": {
"query": {"type": "string"},
"image": {"type": "string", "format": FILE_REF_FORMAT},
"count": {"type": "number"},
},
}
result = convert_file_refs_in_output(output, schema, "tenant_123")
assert result["query"] == "search term"
assert isinstance(result["image"], FileSegment)
assert result["image"].value == mock_file
assert result["count"] == 10
def test_does_not_modify_original_output(self, mock_build_from_mapping, mock_file):
file_id = str(uuid.uuid4())
original = {"image": file_id}
output = dict(original)
schema = {
"type": "object",
"properties": {
"image": {"type": "string", "format": FILE_REF_FORMAT},
},
}
convert_file_refs_in_output(output, schema, "tenant_123")
# Original should still contain the string ID
assert original["image"] == file_id
assert converted == output
assert files == []

View File

@ -1,4 +1,5 @@
from decimal import Decimal
from typing import Any, cast
from unittest.mock import MagicMock, patch
import pytest
@ -6,23 +7,18 @@ from pydantic import BaseModel, ConfigDict
from core.llm_generator.output_parser.errors import OutputParserError
from core.llm_generator.output_parser.structured_output import (
_get_default_value_for_type,
fill_defaults_from_schema,
invoke_llm_with_pydantic_model,
invoke_llm_with_structured_output,
)
from core.model_runtime.entities.llm_entities import (
LLMResult,
LLMResultChunk,
LLMResultChunkDelta,
LLMResultChunkWithStructuredOutput,
LLMResultWithStructuredOutput,
LLMUsage,
)
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
SystemPromptMessage,
TextPromptMessageContent,
UserPromptMessage,
)
from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
@ -57,7 +53,7 @@ def get_model_entity(provider: str, model_name: str, support_structure_output: b
model_schema.support_structure_output = support_structure_output
model_schema.parameter_rules = []
return model_schema
return cast(AIModelEntity, model_schema)
def get_model_instance() -> MagicMock:
@ -71,7 +67,7 @@ def get_model_instance() -> MagicMock:
def test_structured_output_parser():
"""Test cases for invoke_llm_with_structured_output function"""
testcases = [
testcases: list[dict[str, Any]] = [
# Test case 1: Model with native structured output support, non-streaming
{
"name": "native_structured_output_non_streaming",
@ -88,39 +84,6 @@ def test_structured_output_parser():
"expected_result_type": LLMResultWithStructuredOutput,
"should_raise": False,
},
# Test case 2: Model with native structured output support, streaming
{
"name": "native_structured_output_streaming",
"provider": "openai",
"model_name": "gpt-4o",
"support_structure_output": True,
"stream": True,
"json_schema": {"type": "object", "properties": {"name": {"type": "string"}}},
"expected_llm_response": [
LLMResultChunk(
model="gpt-4o",
prompt_messages=[UserPromptMessage(content="test")],
system_fingerprint="test",
delta=LLMResultChunkDelta(
index=0,
message=AssistantPromptMessage(content='{"name":'),
usage=create_mock_usage(prompt_tokens=10, completion_tokens=2),
),
),
LLMResultChunk(
model="gpt-4o",
prompt_messages=[UserPromptMessage(content="test")],
system_fingerprint="test",
delta=LLMResultChunkDelta(
index=0,
message=AssistantPromptMessage(content=' "test"}'),
usage=create_mock_usage(prompt_tokens=10, completion_tokens=3),
),
),
],
"expected_result_type": "generator",
"should_raise": False,
},
# Test case 3: Model without native structured output support, non-streaming
{
"name": "prompt_based_structured_output_non_streaming",
@ -137,80 +100,6 @@ def test_structured_output_parser():
"expected_result_type": LLMResultWithStructuredOutput,
"should_raise": False,
},
# Test case 4: Model without native structured output support, streaming
{
"name": "prompt_based_structured_output_streaming",
"provider": "anthropic",
"model_name": "claude-3-sonnet",
"support_structure_output": False,
"stream": True,
"json_schema": {"type": "object", "properties": {"answer": {"type": "string"}}},
"expected_llm_response": [
LLMResultChunk(
model="claude-3-sonnet",
prompt_messages=[UserPromptMessage(content="test")],
system_fingerprint="test",
delta=LLMResultChunkDelta(
index=0,
message=AssistantPromptMessage(content='{"answer": "test'),
usage=create_mock_usage(prompt_tokens=15, completion_tokens=3),
),
),
LLMResultChunk(
model="claude-3-sonnet",
prompt_messages=[UserPromptMessage(content="test")],
system_fingerprint="test",
delta=LLMResultChunkDelta(
index=0,
message=AssistantPromptMessage(content=' response"}'),
usage=create_mock_usage(prompt_tokens=15, completion_tokens=5),
),
),
],
"expected_result_type": "generator",
"should_raise": False,
},
# Test case 5: Streaming with list content
{
"name": "streaming_with_list_content",
"provider": "openai",
"model_name": "gpt-4o",
"support_structure_output": True,
"stream": True,
"json_schema": {"type": "object", "properties": {"data": {"type": "string"}}},
"expected_llm_response": [
LLMResultChunk(
model="gpt-4o",
prompt_messages=[UserPromptMessage(content="test")],
system_fingerprint="test",
delta=LLMResultChunkDelta(
index=0,
message=AssistantPromptMessage(
content=[
TextPromptMessageContent(data='{"data":'),
]
),
usage=create_mock_usage(prompt_tokens=10, completion_tokens=2),
),
),
LLMResultChunk(
model="gpt-4o",
prompt_messages=[UserPromptMessage(content="test")],
system_fingerprint="test",
delta=LLMResultChunkDelta(
index=0,
message=AssistantPromptMessage(
content=[
TextPromptMessageContent(data=' "value"}'),
]
),
usage=create_mock_usage(prompt_tokens=10, completion_tokens=3),
),
),
],
"expected_result_type": "generator",
"should_raise": False,
},
# Test case 6: Error case - non-string LLM response content (non-streaming)
{
"name": "error_non_string_content_non_streaming",
@ -290,7 +179,7 @@ def test_structured_output_parser():
# Add parameter rules if specified
if "parameter_rules" in case:
model_schema.parameter_rules = case["parameter_rules"]
cast(Any, model_schema).parameter_rules = case["parameter_rules"]
# Setup model instance
model_instance = get_model_instance()
@ -304,25 +193,14 @@ def test_structured_output_parser():
if case["should_raise"]:
# Test error cases
with pytest.raises(case["expected_error"]): # noqa: PT012
if case["stream"]:
result_generator = invoke_llm_with_structured_output(
provider=case["provider"],
model_schema=model_schema,
model_instance=model_instance,
prompt_messages=prompt_messages,
json_schema=case["json_schema"],
)
# Consume the generator to trigger the error
list(result_generator)
else:
invoke_llm_with_structured_output(
provider=case["provider"],
model_schema=model_schema,
model_instance=model_instance,
prompt_messages=prompt_messages,
json_schema=case["json_schema"],
)
with pytest.raises(case["expected_error"]):
invoke_llm_with_structured_output(
provider=case["provider"],
model_schema=model_schema,
model_instance=model_instance,
prompt_messages=prompt_messages,
json_schema=case["json_schema"],
)
else:
# Test successful cases
with patch("core.llm_generator.output_parser.structured_output.json_repair.loads") as mock_json_repair:
@ -340,34 +218,18 @@ def test_structured_output_parser():
user="test_user",
)
if case["expected_result_type"] == "generator":
# Test streaming results
assert hasattr(result, "__iter__")
chunks = list(result)
assert len(chunks) > 0
# Verify all chunks are LLMResultChunkWithStructuredOutput
for chunk in chunks[:-1]: # All except last
assert isinstance(chunk, LLMResultChunkWithStructuredOutput)
assert chunk.model == case["model_name"]
# Last chunk should have structured output
last_chunk = chunks[-1]
assert isinstance(last_chunk, LLMResultChunkWithStructuredOutput)
assert last_chunk.structured_output is not None
assert isinstance(last_chunk.structured_output, dict)
else:
# Test non-streaming results
assert isinstance(result, case["expected_result_type"])
assert result.model == case["model_name"]
assert result.structured_output is not None
assert isinstance(result.structured_output, dict)
# Test non-streaming results
expected_type = cast(type, case["expected_result_type"])
assert isinstance(result, expected_type)
assert result.model == case["model_name"]
assert result.structured_output is not None
assert isinstance(result.structured_output, dict)
# Verify model_instance.invoke_llm was called with correct parameters
model_instance.invoke_llm.assert_called_once()
call_args = model_instance.invoke_llm.call_args
assert call_args.kwargs["stream"] == case["stream"]
assert call_args.kwargs["stream"] is False
assert call_args.kwargs["user"] == "test_user"
assert "temperature" in call_args.kwargs["model_parameters"]
assert "max_tokens" in call_args.kwargs["model_parameters"]
@ -377,7 +239,7 @@ def test_parse_structured_output_edge_cases():
"""Test edge cases for structured output parsing"""
# Test case with list that contains dict (reasoning model scenario)
testcase_list_with_dict = {
testcase_list_with_dict: dict[str, Any] = {
"name": "list_with_dict_parsing",
"provider": "deepseek",
"model_name": "deepseek-r1",
@ -425,7 +287,7 @@ def test_model_specific_schema_preparation():
"""Test schema preparation for different model types"""
# Test Gemini model
gemini_case = {
gemini_case: dict[str, Any] = {
"provider": "google",
"model_name": "gemini-pro",
"support_structure_output": True,
@ -493,46 +355,6 @@ def test_structured_output_with_pydantic_model_non_streaming():
assert result.name == "test"
def test_structured_output_with_pydantic_model_streaming():
model_schema = get_model_entity("openai", "gpt-4o", support_structure_output=True)
model_instance = get_model_instance()
def mock_streaming_response():
yield LLMResultChunk(
model="gpt-4o",
prompt_messages=[UserPromptMessage(content="test")],
system_fingerprint="test",
delta=LLMResultChunkDelta(
index=0,
message=AssistantPromptMessage(content='{"name":'),
usage=create_mock_usage(prompt_tokens=8, completion_tokens=2),
),
)
yield LLMResultChunk(
model="gpt-4o",
prompt_messages=[UserPromptMessage(content="test")],
system_fingerprint="test",
delta=LLMResultChunkDelta(
index=0,
message=AssistantPromptMessage(content=' "test"}'),
usage=create_mock_usage(prompt_tokens=8, completion_tokens=4),
),
)
model_instance.invoke_llm.return_value = mock_streaming_response()
result = invoke_llm_with_pydantic_model(
provider="openai",
model_schema=model_schema,
model_instance=model_instance,
prompt_messages=[UserPromptMessage(content="Return a JSON object with name.")],
output_model=ExampleOutput
)
assert isinstance(result, ExampleOutput)
assert result.name == "test"
def test_structured_output_with_pydantic_model_validation_error():
model_schema = get_model_entity("openai", "gpt-4o", support_structure_output=True)
model_instance = get_model_instance()
@ -552,55 +374,12 @@ def test_structured_output_with_pydantic_model_validation_error():
)
class TestGetDefaultValueForType:
"""Test cases for _get_default_value_for_type function"""
def test_string_type(self):
assert _get_default_value_for_type("string") == ""
def test_object_type(self):
assert _get_default_value_for_type("object") == {}
def test_array_type(self):
assert _get_default_value_for_type("array") == []
def test_number_type(self):
assert _get_default_value_for_type("number") == 0
def test_integer_type(self):
assert _get_default_value_for_type("integer") == 0
def test_boolean_type(self):
assert _get_default_value_for_type("boolean") is False
def test_null_type(self):
assert _get_default_value_for_type("null") is None
def test_none_type(self):
assert _get_default_value_for_type(None) is None
def test_unknown_type(self):
assert _get_default_value_for_type("unknown") is None
def test_union_type_string_null(self):
# ["string", "null"] should return "" (first non-null type)
assert _get_default_value_for_type(["string", "null"]) == ""
def test_union_type_null_first(self):
# ["null", "integer"] should return 0 (first non-null type)
assert _get_default_value_for_type(["null", "integer"]) == 0
def test_union_type_only_null(self):
# ["null"] should return None
assert _get_default_value_for_type(["null"]) is None
class TestFillDefaultsFromSchema:
"""Test cases for fill_defaults_from_schema function"""
def test_simple_required_fields(self):
"""Test filling simple required fields"""
schema = {
schema: dict[str, Any] = {
"type": "object",
"properties": {
"name": {"type": "string"},
@ -609,7 +388,7 @@ class TestFillDefaultsFromSchema:
},
"required": ["name", "age"],
}
output = {"name": "Alice"}
output: dict[str, Any] = {"name": "Alice"}
result = fill_defaults_from_schema(output, schema)
@ -619,7 +398,7 @@ class TestFillDefaultsFromSchema:
def test_non_required_fields_not_filled(self):
"""Test that non-required fields are not filled"""
schema = {
schema: dict[str, Any] = {
"type": "object",
"properties": {
"required_field": {"type": "string"},
@ -627,7 +406,7 @@ class TestFillDefaultsFromSchema:
},
"required": ["required_field"],
}
output = {}
output: dict[str, Any] = {}
result = fill_defaults_from_schema(output, schema)
@ -636,7 +415,7 @@ class TestFillDefaultsFromSchema:
def test_nested_object_required_fields(self):
"""Test filling nested object required fields"""
schema = {
schema: dict[str, Any] = {
"type": "object",
"properties": {
"user": {
@ -659,7 +438,7 @@ class TestFillDefaultsFromSchema:
},
"required": ["user"],
}
output = {
output: dict[str, Any] = {
"user": {
"name": "Alice",
"address": {
@ -684,7 +463,7 @@ class TestFillDefaultsFromSchema:
def test_missing_nested_object_created(self):
"""Test that missing required nested objects are created"""
schema = {
schema: dict[str, Any] = {
"type": "object",
"properties": {
"metadata": {
@ -698,7 +477,7 @@ class TestFillDefaultsFromSchema:
},
"required": ["metadata"],
}
output = {}
output: dict[str, Any] = {}
result = fill_defaults_from_schema(output, schema)
@ -710,7 +489,7 @@ class TestFillDefaultsFromSchema:
def test_all_types_default_values(self):
"""Test default values for all types"""
schema = {
schema: dict[str, Any] = {
"type": "object",
"properties": {
"str_field": {"type": "string"},
@ -722,7 +501,7 @@ class TestFillDefaultsFromSchema:
},
"required": ["str_field", "int_field", "num_field", "bool_field", "arr_field", "obj_field"],
}
output = {}
output: dict[str, Any] = {}
result = fill_defaults_from_schema(output, schema)
@ -737,7 +516,7 @@ class TestFillDefaultsFromSchema:
def test_existing_values_preserved(self):
"""Test that existing values are not overwritten"""
schema = {
schema: dict[str, Any] = {
"type": "object",
"properties": {
"name": {"type": "string"},
@ -745,7 +524,7 @@ class TestFillDefaultsFromSchema:
},
"required": ["name", "count"],
}
output = {"name": "Bob", "count": 42}
output: dict[str, Any] = {"name": "Bob", "count": 42}
result = fill_defaults_from_schema(output, schema)
@ -753,7 +532,7 @@ class TestFillDefaultsFromSchema:
def test_complex_nested_structure(self):
"""Test complex nested structure with multiple levels"""
schema = {
schema: dict[str, Any] = {
"type": "object",
"properties": {
"user": {
@ -789,7 +568,7 @@ class TestFillDefaultsFromSchema:
},
"required": ["user", "tags", "metadata", "is_active"],
}
output = {
output: dict[str, Any] = {
"user": {
"name": "Alice",
"age": 25,
@ -829,8 +608,8 @@ class TestFillDefaultsFromSchema:
def test_empty_schema(self):
"""Test with empty schema"""
schema = {}
output = {"any": "value"}
schema: dict[str, Any] = {}
output: dict[str, Any] = {"any": "value"}
result = fill_defaults_from_schema(output, schema)
@ -838,14 +617,14 @@ class TestFillDefaultsFromSchema:
def test_schema_without_required(self):
"""Test schema without required field"""
schema = {
schema: dict[str, Any] = {
"type": "object",
"properties": {
"optional1": {"type": "string"},
"optional2": {"type": "integer"},
},
}
output = {}
output: dict[str, Any] = {}
result = fill_defaults_from_schema(output, schema)

View File

@ -44,7 +44,7 @@ export type LLMNodeType = CommonNodeType & {
max_iterations?: number
}
export const FILE_REF_FORMAT = 'dify-file-ref'
export const FILE_REF_FORMAT = 'file-path'
export enum Type {
string = 'string',