This commit is contained in:
GuanMu 2026-03-24 12:43:03 +08:00 committed by GitHub
commit 25e331f559
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
43 changed files with 1977 additions and 69 deletions

View File

@ -2,7 +2,7 @@ from typing import Literal
from flask_restx import Resource, marshal_with
from pydantic import BaseModel
from werkzeug.exceptions import NotFound
from werkzeug.exceptions import BadRequest, NotFound
from controllers.common.schema import register_schema_models
from controllers.console import console_ns
@ -16,6 +16,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
MetadataDetail,
MetadataOperationData,
)
from services.errors.metadata_service import MetadataInUseError
from services.metadata_service import MetadataService
@ -97,7 +98,10 @@ class DatasetMetadataApi(Resource):
raise NotFound("Dataset not found.")
DatasetService.check_dataset_permission(dataset, current_user)
MetadataService.delete_metadata(dataset_id_str, metadata_id_str)
try:
MetadataService.delete_metadata(dataset_id_str, metadata_id_str)
except MetadataInUseError as exc:
raise BadRequest(str(exc))
return {"result": "success"}, 204

View File

@ -3,7 +3,7 @@ from typing import Literal
from flask_login import current_user
from flask_restx import marshal
from pydantic import BaseModel
from werkzeug.exceptions import NotFound
from werkzeug.exceptions import BadRequest, NotFound
from controllers.common.schema import register_schema_model, register_schema_models
from controllers.service_api import service_api_ns
@ -16,6 +16,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
MetadataDetail,
MetadataOperationData,
)
from services.errors.metadata_service import MetadataInUseError
from services.metadata_service import MetadataService
@ -127,7 +128,10 @@ class DatasetMetadataServiceApi(DatasetApiResource):
raise NotFound("Dataset not found.")
DatasetService.check_dataset_permission(dataset, current_user)
MetadataService.delete_metadata(dataset_id_str, metadata_id_str)
try:
MetadataService.delete_metadata(dataset_id_str, metadata_id_str)
except MetadataInUseError as exc:
raise BadRequest(str(exc))
return "", 204

View File

@ -7,12 +7,13 @@ from typing import Any
from flask import current_app
from sqlalchemy import delete, func, select
from sqlalchemy.orm import attributes
from core.db.session_factory import session_factory
from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError
from core.workflow.nodes.knowledge_index.protocols import Preview, PreviewItem, QaPreview
from models.dataset import Dataset, Document, DocumentSegment
from models.dataset import Dataset, DatasetMetadataBinding, Document, DocumentSegment
from .index_processor_factory import IndexProcessorFactory
from .processor.paragraph_index_processor import ParagraphIndexProcessor
@ -53,6 +54,9 @@ class IndexProcessor:
chunks: Mapping[str, Any],
batch: Any,
summary_index_setting: SummaryIndexSettingDict | None = None,
doc_metadata: Mapping[str, Any] | None = None,
metadata_binding_ids: list[str] | None = None,
user_id: str | None = None,
):
with session_factory.create_session() as session:
document = session.query(Document).filter_by(id=document_id).first()
@ -64,6 +68,7 @@ class IndexProcessor:
raise KnowledgeIndexNodeError(f"Dataset {dataset_id} not found.")
dataset_name_value = dataset.name
tenant_id_value = dataset.tenant_id
document_name_value = document.name
created_at_value = document.created_at
if summary_index_setting is None:
@ -108,6 +113,19 @@ class IndexProcessor:
document.need_summary = True
else:
document.need_summary = False
# Reconcile doc_metadata and bindings when the caller explicitly provides pipeline metadata.
if doc_metadata is not None or metadata_binding_ids is not None:
self._save_doc_metadata_and_bindings(
session=session,
dataset_id=dataset_id,
tenant_id=tenant_id_value,
document=document,
doc_metadata=doc_metadata or {},
metadata_binding_ids=metadata_binding_ids or [],
user_id=user_id,
)
session.add(document)
# update document segment status
session.query(DocumentSegment).where(
@ -131,6 +149,110 @@ class IndexProcessor:
"display_status": "completed",
}
def _save_doc_metadata_and_bindings(
self,
*,
session: Any,
dataset_id: str,
tenant_id: str,
document: Document,
doc_metadata: Mapping[str, Any],
metadata_binding_ids: list[str],
user_id: str | None,
) -> None:
"""
Persist resolved metadata values and ensure metadata bindings exist for the document.
Args:
doc_metadata: dict of {metadata_id: resolved_value}
metadata_binding_ids: list of metadata IDs to bind
"""
from models.dataset import DatasetMetadata
# Look up metadata names by ID (covers both value-write and binding-creation paths)
metadata_name_map: dict[str, str] = {}
all_ids_to_check = list({*doc_metadata.keys(), *metadata_binding_ids})
if all_ids_to_check:
dataset_metadatas = session.scalars(
select(DatasetMetadata).where(
DatasetMetadata.dataset_id == dataset_id,
DatasetMetadata.id.in_(all_ids_to_check),
)
).all()
for metadata in dataset_metadatas:
metadata_name_map[metadata.id] = metadata.name
# Build name -> value dict for document.doc_metadata
named_metadata: dict[str, Any] = {}
for metadata_id, value in doc_metadata.items():
metadata_name = metadata_name_map.get(metadata_id)
if not metadata_name:
logger.warning("[IndexProcessor] metadata_id %s not found, skipping", metadata_id)
continue
named_metadata[metadata_name] = value
existing_binding_rows = session.scalars(
select(DatasetMetadataBinding).where(
DatasetMetadataBinding.dataset_id == dataset_id,
DatasetMetadataBinding.document_id == document.id,
)
).all()
existing_binding_ids = {binding.metadata_id for binding in existing_binding_rows}
unique_metadata_ids = list(dict.fromkeys(metadata_binding_ids))
metadata_ids_to_load = list(existing_binding_ids | set(unique_metadata_ids))
if metadata_ids_to_load:
existing_metadata_defs = session.scalars(
select(DatasetMetadata).where(
DatasetMetadata.dataset_id == dataset_id,
DatasetMetadata.id.in_(metadata_ids_to_load),
)
).all()
for metadata in existing_metadata_defs:
metadata_name_map[metadata.id] = metadata.name
document_doc_metadata = dict(document.doc_metadata or {})
for metadata_id in existing_binding_ids:
metadata_name = metadata_name_map.get(metadata_id)
if metadata_name:
document_doc_metadata.pop(metadata_name, None)
document_doc_metadata.update(named_metadata)
document.doc_metadata = document_doc_metadata
attributes.flag_modified(document, "doc_metadata")
obsolete_metadata_ids = existing_binding_ids - set(unique_metadata_ids)
if obsolete_metadata_ids:
session.query(DatasetMetadataBinding).where(
DatasetMetadataBinding.dataset_id == dataset_id,
DatasetMetadataBinding.document_id == document.id,
DatasetMetadataBinding.metadata_id.in_(obsolete_metadata_ids),
).delete(synchronize_session=False)
for metadata_id in unique_metadata_ids:
if metadata_id not in metadata_name_map:
logger.warning(
"[IndexProcessor] metadata_id %s not found in dataset, skipping binding creation",
metadata_id,
)
continue
if metadata_id in existing_binding_ids:
continue
if user_id is None:
logger.warning(
"[IndexProcessor] user_id is None, cannot create binding for metadata_id=%s, skipping",
metadata_id,
)
continue
binding = DatasetMetadataBinding(
tenant_id=tenant_id,
dataset_id=dataset_id,
metadata_id=metadata_id,
document_id=document.id,
created_by=user_id,
)
session.add(binding)
def get_preview_output(
self,
chunks: Any,

View File

@ -153,6 +153,15 @@ class ParentChildStructureChunk(BaseModel):
data_source_info: Union[FileInfo, OnlineDocumentInfo, WebsiteInfo]
class DocMetadata(BaseModel):
"""
Doc Metadata.
"""
metadata_id: str
value: str | int | float | list[str] | None
class KnowledgeIndexNodeData(BaseNodeData):
"""
Knowledge index Node Data.
@ -161,5 +170,6 @@ class KnowledgeIndexNodeData(BaseNodeData):
type: NodeType = KNOWLEDGE_INDEX_NODE_TYPE
chunk_structure: str
index_chunk_variable_selector: list[str]
doc_metadata: list[DocMetadata] | None = None
indexing_technique: str | None = None
summary_index_setting: SummaryIndexSettingDict | None = None

View File

@ -1,5 +1,5 @@
import logging
from collections.abc import Mapping
from collections.abc import Mapping, Sequence
from typing import TYPE_CHECKING, Any
from core.rag.index_processor.index_processor import IndexProcessor
@ -13,11 +13,14 @@ from dify_graph.node_events import NodeRunResult
from dify_graph.nodes.base.node import Node
from dify_graph.nodes.base.template import Template
from .entities import KnowledgeIndexNodeData
from .entities import DocMetadata, KnowledgeIndexNodeData
from .exc import (
KnowledgeIndexNodeError,
)
# Constant for built-in metadata identifier
BUILT_IN_METADATA_ID = "built-in"
if TYPE_CHECKING:
from dify_graph.entities import GraphInitParams
from dify_graph.runtime import GraphRuntimeState
@ -92,6 +95,15 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
if not batch:
raise KnowledgeIndexNodeError("Batch is required.")
# Resolve metadata before indexing
resolved_doc_metadata: dict[str, Any] = {}
metadata_binding_ids: list[str] = []
if node_data.doc_metadata:
resolved_doc_metadata, metadata_binding_ids = self._resolve_doc_metadata_values(
dataset_id=dataset_id,
doc_metadata_items=node_data.doc_metadata,
)
results = self._invoke_knowledge_index(
dataset_id=dataset_id,
document_id=document_id,
@ -100,6 +112,8 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
batch=batch.value,
chunks=chunks,
summary_index_setting=summary_index_setting,
doc_metadata=resolved_doc_metadata,
metadata_binding_ids=metadata_binding_ids,
)
return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=variables, outputs=results)
@ -129,17 +143,86 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
batch: Any,
chunks: Mapping[str, Any],
summary_index_setting: SummaryIndexSettingDict | None = None,
doc_metadata: Mapping[str, Any] | None = None,
metadata_binding_ids: list[str] | None = None,
):
if not document_id:
raise KnowledgeIndexNodeError("document_id is required.")
rst = self.index_processor.index_and_clean(
dataset_id, document_id, original_document_id, chunks, batch, summary_index_setting
dataset_id,
document_id,
original_document_id,
chunks,
batch,
summary_index_setting,
doc_metadata=doc_metadata,
metadata_binding_ids=metadata_binding_ids,
user_id=self.require_dify_context().user_id,
)
self.summary_index_service.generate_and_vectorize_summary(
dataset_id, document_id, is_preview, summary_index_setting
)
return rst
def _resolve_doc_metadata_values(
self,
*,
dataset_id: str,
doc_metadata_items: Sequence[DocMetadata],
) -> tuple[dict[str, Any], list[str]]:
"""
Resolve metadata variable values from the variable pool.
Returns a dict of {metadata_id: resolved_value} and a list of metadata_binding_ids.
The IndexProcessor will handle looking up metadata names from DB.
"""
variable_pool = self.graph_runtime_state.variable_pool
resolved_metadata: dict[str, Any] = {}
metadata_binding_ids: list[str] = []
for item in doc_metadata_items:
if item.metadata_id == BUILT_IN_METADATA_ID:
continue
value = item.value
if isinstance(value, list):
variable = variable_pool.get(value)
if not variable:
variable_path = ".".join(value)
raise KnowledgeIndexNodeError(
f"Variable '{variable_path}' not found for metadata '{item.metadata_id}'. "
f"Please check your variable configuration."
)
value = variable.to_object()
if value is not None:
resolved_metadata[item.metadata_id] = value
metadata_binding_ids.append(item.metadata_id)
return resolved_metadata, metadata_binding_ids
@classmethod
def _extract_variable_selector_to_variable_mapping(
cls,
*,
graph_config: Mapping[str, Any],
node_id: str,
node_data: KnowledgeIndexNodeData,
) -> Mapping[str, Sequence[str]]:
variable_mapping: dict[str, Sequence[str]] = {}
# index chunk variable
variable_mapping[node_id + ".index_chunk_variable_selector"] = node_data.index_chunk_variable_selector
# doc_metadata variables
if node_data.doc_metadata:
for item in node_data.doc_metadata:
if isinstance(item.value, list):
variable_mapping[node_id + "." + item.metadata_id] = item.value
return variable_mapping
@classmethod
def version(cls) -> str:
return "1"

View File

@ -34,6 +34,9 @@ class IndexProcessorProtocol(Protocol):
chunks: Mapping[str, Any],
batch: Any,
summary_index_setting: dict | None = None,
doc_metadata: Mapping[str, Any] | None = None,
metadata_binding_ids: list[str] | None = None,
user_id: str | None = None,
) -> dict[str, Any]: ...
def get_preview_output(

View File

@ -41,6 +41,8 @@ from models.dataset import (
Dataset,
DatasetAutoDisableLog,
DatasetCollectionBinding,
DatasetMetadata,
DatasetMetadataBinding,
DatasetPermission,
DatasetPermissionEnum,
DatasetProcessRule,
@ -1940,11 +1942,95 @@ class DocumentService:
else default_retrieval_model
)
# Handle custom metadata configuration
custom_metadata: dict[str, str | int | float | None] = {}
metadata_bindings_to_create: list[str] = []
if knowledge_config.doc_metadata:
# Batch fetch all metadata definitions to avoid N+1 query
metadata_ids = [item.metadata_id for item in knowledge_config.doc_metadata]
metadata_defs = (
db.session.query(DatasetMetadata)
.filter(
DatasetMetadata.id.in_(metadata_ids),
DatasetMetadata.dataset_id == dataset.id,
)
.all()
)
metadata_map = {md.id: md for md in metadata_defs}
for item in knowledge_config.doc_metadata:
# Validate metadata_id belongs to this dataset
metadata_def = metadata_map.get(item.metadata_id)
if not metadata_def:
raise ValueError(f"Metadata with id '{item.metadata_id}' not found in this dataset")
custom_metadata[metadata_def.name] = item.value
metadata_bindings_to_create.append(item.metadata_id)
documents = []
if knowledge_config.original_document_id:
document = DocumentService.update_document_with_dataset_id(dataset, knowledge_config, account)
documents.append(document)
batch = document.batch
# Reconcile pipeline-managed metadata on re-index so removed fields do not linger.
if knowledge_config.doc_metadata is not None:
from sqlalchemy.orm import attributes
metadata_ids_deduped = list(dict.fromkeys(metadata_bindings_to_create))
existing_bindings = (
db.session.query(DatasetMetadataBinding)
.filter_by(dataset_id=dataset.id, document_id=document.id)
.all()
)
existing_binding_ids = {binding.metadata_id for binding in existing_bindings}
metadata_ids_to_load = list(existing_binding_ids | set(metadata_ids_deduped))
metadata_name_map: dict[str, str] = {}
if metadata_ids_to_load:
metadata_defs = (
db.session.query(DatasetMetadata)
.filter(
DatasetMetadata.dataset_id == dataset.id,
DatasetMetadata.id.in_(metadata_ids_to_load),
)
.all()
)
metadata_name_map = {metadata_def.id: metadata_def.name for metadata_def in metadata_defs}
doc_metadata_field = copy.deepcopy(document.doc_metadata) if document.doc_metadata else {}
for metadata_id in existing_binding_ids:
metadata_name = metadata_name_map.get(metadata_id)
if metadata_name:
doc_metadata_field.pop(metadata_name, None)
doc_metadata_field.update(custom_metadata)
document.doc_metadata = doc_metadata_field
attributes.flag_modified(document, "doc_metadata")
db.session.add(document)
obsolete_metadata_ids = existing_binding_ids - set(metadata_ids_deduped)
if obsolete_metadata_ids:
(
db.session.query(DatasetMetadataBinding)
.filter(
DatasetMetadataBinding.dataset_id == dataset.id,
DatasetMetadataBinding.document_id == document.id,
DatasetMetadataBinding.metadata_id.in_(obsolete_metadata_ids),
)
.delete(synchronize_session=False)
)
existing_current_binding_ids = existing_binding_ids & set(metadata_ids_deduped)
for metadata_id in metadata_ids_deduped:
if metadata_id in existing_current_binding_ids:
continue
binding = DatasetMetadataBinding(
tenant_id=dataset.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
metadata_id=metadata_id,
created_by=account.id,
)
db.session.add(binding)
db.session.commit()
else:
# When creating new documents, data_source must be provided
if not knowledge_config.data_source:
@ -2045,6 +2131,10 @@ class DocumentService:
document.data_source_info = json.dumps(data_source_info)
document.batch = batch
document.indexing_status = IndexingStatus.WAITING
if custom_metadata:
doc_metadata = copy.deepcopy(document.doc_metadata) if document.doc_metadata else {}
doc_metadata.update(custom_metadata)
document.doc_metadata = doc_metadata
db.session.add(document)
documents.append(document)
duplicate_document_ids.append(document.id)
@ -2062,6 +2152,7 @@ class DocumentService:
account,
file.name,
batch,
custom_metadata=custom_metadata or None,
)
db.session.add(document)
db.session.flush()
@ -2114,6 +2205,7 @@ class DocumentService:
account,
truncated_page_name,
batch,
custom_metadata=custom_metadata or None,
)
db.session.add(document)
db.session.flush()
@ -2154,12 +2246,46 @@ class DocumentService:
account,
document_name,
batch,
custom_metadata=custom_metadata or None,
)
db.session.add(document)
db.session.flush()
document_ids.append(document.id)
documents.append(document)
position += 1
# Create DatasetMetadataBinding records for custom metadata
# before commit so documents and bindings are in a single transaction.
if metadata_bindings_to_create:
target_document_ids = list(set(document_ids + duplicate_document_ids))
metadata_ids = list(dict.fromkeys(metadata_bindings_to_create))
if target_document_ids and metadata_ids:
existing_binding_pairs = {
(document_id, metadata_id)
for document_id, metadata_id in db.session.query(
DatasetMetadataBinding.document_id,
DatasetMetadataBinding.metadata_id,
)
.filter(
DatasetMetadataBinding.dataset_id == dataset.id,
DatasetMetadataBinding.document_id.in_(target_document_ids),
DatasetMetadataBinding.metadata_id.in_(metadata_ids),
)
.all()
}
for doc_id in target_document_ids:
for metadata_id in metadata_ids:
if (doc_id, metadata_id) in existing_binding_pairs:
continue
binding = DatasetMetadataBinding(
tenant_id=dataset.tenant_id,
dataset_id=dataset.id,
document_id=doc_id,
metadata_id=metadata_id,
created_by=account.id,
)
db.session.add(binding)
db.session.commit()
# trigger async task
@ -2474,6 +2600,7 @@ class DocumentService:
account: Account,
name: str,
batch: str,
custom_metadata: dict | None = None,
):
# Set need_summary based on dataset's summary_index_setting
need_summary = False
@ -2504,6 +2631,9 @@ class DocumentService:
BuiltInField.last_update_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
BuiltInField.source: data_source_type,
}
# Merge custom metadata if provided
if custom_metadata:
doc_metadata.update(custom_metadata)
if doc_metadata:
document.doc_metadata = doc_metadata
return document

View File

@ -113,6 +113,11 @@ class MetaDataConfig(BaseModel):
doc_metadata: dict
class DocumentMetadataInput(BaseModel):
metadata_id: str
value: str | int | float | None = None
class KnowledgeConfig(BaseModel):
original_document_id: str | None = None
duplicate: bool = True
@ -127,6 +132,7 @@ class KnowledgeConfig(BaseModel):
embedding_model_provider: str | None = None
name: str | None = None
is_multimodal: bool = False
doc_metadata: list[DocumentMetadataInput] | None = None
@field_validator("doc_form")
@classmethod

View File

@ -0,0 +1,2 @@
class MetadataInUseError(ValueError):
"""Raised when metadata is still referenced by a pipeline configuration."""

View File

@ -1,5 +1,9 @@
import copy
import json
import logging
from collections.abc import Mapping
from werkzeug.exceptions import NotFound
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
from extensions.ext_database import db
@ -8,16 +12,108 @@ from libs.datetime_utils import naive_utc_now
from libs.login import current_account_with_tenant
from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding
from models.enums import DatasetMetadataType
from models.model import App, AppModelConfig
from models.workflow import Workflow
from services.dataset_service import DocumentService
from services.entities.knowledge_entities.knowledge_entities import (
MetadataArgs,
MetadataOperationData,
)
from services.errors.metadata_service import MetadataInUseError
logger = logging.getLogger(__name__)
_PIPELINE_REF_CACHE_TTL = 60 # seconds
class MetadataService:
@staticmethod
def _collect_referenced_metadata_ids(payload: object, referenced_ids: set[str]) -> None:
"""Collect all metadata IDs referenced by persisted pipeline JSON payloads."""
if isinstance(payload, Mapping):
metadata_id = payload.get("metadata_id")
if isinstance(metadata_id, str):
referenced_ids.add(metadata_id)
for value in payload.values():
MetadataService._collect_referenced_metadata_ids(value, referenced_ids)
return
if isinstance(payload, list):
for item in payload:
MetadataService._collect_referenced_metadata_ids(item, referenced_ids)
@staticmethod
def _load_reference_payload(raw_payload: str | None, source_name: str) -> object | None:
if not raw_payload:
return None
try:
return json.loads(raw_payload)
except json.JSONDecodeError:
logger.warning("Failed to decode metadata reference payload from %s", source_name)
return None
@staticmethod
def _scan_all_referenced_metadata_ids(tenant_id: str) -> set[str]:
"""Scan app configs and workflow graphs to collect all referenced metadata IDs."""
all_referenced: set[str] = set()
app_model_config_rows = (
db.session.query(AppModelConfig.dataset_configs)
.join(App, App.id == AppModelConfig.app_id)
.filter(
App.tenant_id == tenant_id,
AppModelConfig.dataset_configs.isnot(None),
AppModelConfig.dataset_configs.contains('"metadata_id"'),
)
.all()
)
workflow_rows = (
db.session.query(Workflow.graph)
.filter(
Workflow.tenant_id == tenant_id,
Workflow.graph.contains('"metadata_id"'),
)
.all()
)
for (raw_payload,) in app_model_config_rows:
payload = MetadataService._load_reference_payload(raw_payload, "app_model_configs.dataset_configs")
if payload is not None:
MetadataService._collect_referenced_metadata_ids(payload, all_referenced)
for (raw_payload,) in workflow_rows:
payload = MetadataService._load_reference_payload(raw_payload, "workflows.graph")
if payload is not None:
MetadataService._collect_referenced_metadata_ids(payload, all_referenced)
return all_referenced
@staticmethod
def _get_referenced_metadata_ids(tenant_id: str, metadata_ids: set[str], *, bypass_cache: bool = False) -> set[str]:
"""Return metadata IDs (from the given set) that are referenced by pipeline configurations.
Results are cached per-tenant for _PIPELINE_REF_CACHE_TTL seconds.
Pass bypass_cache=True for write paths (e.g. delete) that require fresh data.
"""
if not metadata_ids:
return set()
cache_key = f"metadata:pipeline_refs:{tenant_id}"
if not bypass_cache:
raw = redis_client.get(cache_key)
if raw:
all_referenced = set(json.loads(raw))
return all_referenced & metadata_ids
all_referenced = MetadataService._scan_all_referenced_metadata_ids(tenant_id)
redis_client.setex(cache_key, _PIPELINE_REF_CACHE_TTL, json.dumps(list(all_referenced)))
return all_referenced & metadata_ids
@staticmethod
def create_metadata(dataset_id: str, metadata_args: MetadataArgs) -> DatasetMetadata:
# check if metadata name is too long
@ -103,7 +199,13 @@ class MetadataService:
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first()
if metadata is None:
raise ValueError("Metadata not found.")
raise NotFound("Metadata not found.")
_, current_tenant_id = current_account_with_tenant()
referenced_metadata_ids = MetadataService._get_referenced_metadata_ids(
current_tenant_id, {metadata_id}, bypass_cache=True
)
if metadata_id in referenced_metadata_ids:
raise MetadataInUseError("This metadata is referenced by a pipeline and cannot be deleted.")
db.session.delete(metadata)
# deal related documents
@ -123,8 +225,13 @@ class MetadataService:
db.session.add(document)
db.session.commit()
return metadata
except MetadataInUseError:
raise
except NotFound:
raise
except Exception:
logger.exception("Delete metadata failed")
raise
finally:
redis_client.delete(lock_key)
@ -269,6 +376,9 @@ class MetadataService:
@staticmethod
def get_dataset_metadatas(dataset: Dataset):
metadata_items = [item for item in dataset.doc_metadata or [] if item.get("id") != "built-in"]
metadata_ids: set[str] = {mid for item in metadata_items if (mid := item.get("id")) is not None}
referenced_metadata_ids = MetadataService._get_referenced_metadata_ids(dataset.tenant_id, metadata_ids)
return {
"doc_metadata": [
{
@ -278,9 +388,9 @@ class MetadataService:
"count": db.session.query(DatasetMetadataBinding)
.filter_by(metadata_id=item.get("id"), dataset_id=dataset.id)
.count(),
"is_referenced_by_pipeline": item.get("id") in referenced_metadata_ids,
}
for item in dataset.doc_metadata or []
if item.get("id") != "built-in"
for item in metadata_items
],
"built_in_field_enabled": dataset.built_in_field_enabled,
}

View File

@ -268,7 +268,7 @@ class TestAccountService:
fake = Faker()
email = fake.email()
name = fake.name()
password = generate_valid_password(fake)
password = fake.password(length=12)
# Setup mocks
mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True
mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False

View File

@ -3,6 +3,7 @@ from unittest.mock import create_autospec, patch
import pytest
from faker import Faker
from sqlalchemy.orm import Session
from werkzeug.exceptions import NotFound
from core.rag.index_processor.constant.built_in_field import BuiltInField
from core.rag.index_processor.constant.index_type import IndexStructureType
@ -470,11 +471,9 @@ class TestMetadataService:
fake_metadata_id = str(uuid.uuid4()) # Use valid UUID format
# Act: Execute the method under test
result = MetadataService.delete_metadata(dataset.id, fake_metadata_id)
# Assert: Verify the method returns None when metadata is not found
assert result is None
# Act / Assert: deleting a missing metadata now surfaces a 404-compatible error
with pytest.raises(NotFound, match="Metadata not found"):
MetadataService.delete_metadata(dataset.id, fake_metadata_id)
def test_delete_metadata_with_document_bindings(
self, db_session_with_containers: Session, mock_external_service_dependencies

View File

@ -3,7 +3,7 @@ from unittest.mock import MagicMock, PropertyMock, patch
import pytest
from flask import Flask
from werkzeug.exceptions import NotFound
from werkzeug.exceptions import BadRequest, NotFound
from controllers.console import console_ns
from controllers.console.datasets.metadata import (
@ -18,6 +18,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
MetadataArgs,
MetadataOperationData,
)
from services.errors.metadata_service import MetadataInUseError
from services.metadata_service import MetadataService
@ -267,6 +268,62 @@ class TestDatasetMetadataApi:
assert status == 204
assert result["result"] == "success"
def test_delete_metadata_referenced_by_pipeline(self, app, current_user, dataset, dataset_id, metadata_id):
api = DatasetMetadataApi()
method = unwrap(api.delete)
with (
app.test_request_context("/"),
patch(
"controllers.console.datasets.metadata.current_account_with_tenant",
return_value=(current_user, "tenant-1"),
),
patch.object(
DatasetService,
"get_dataset",
return_value=dataset,
),
patch.object(
DatasetService,
"check_dataset_permission",
),
patch.object(
MetadataService,
"delete_metadata",
side_effect=MetadataInUseError("This metadata is referenced by a pipeline and cannot be deleted."),
),
):
with pytest.raises(BadRequest, match="referenced by a pipeline"):
method(api, dataset_id, metadata_id)
def test_delete_metadata_not_found(self, app, current_user, dataset, dataset_id, metadata_id):
api = DatasetMetadataApi()
method = unwrap(api.delete)
with (
app.test_request_context("/"),
patch(
"controllers.console.datasets.metadata.current_account_with_tenant",
return_value=(current_user, "tenant-1"),
),
patch.object(
DatasetService,
"get_dataset",
return_value=dataset,
),
patch.object(
DatasetService,
"check_dataset_permission",
),
patch.object(
MetadataService,
"delete_metadata",
side_effect=NotFound("Metadata not found."),
),
):
with pytest.raises(NotFound, match="Metadata not found"):
method(api, dataset_id, metadata_id)
class TestDatasetMetadataBuiltInFieldApi:
def test_get_built_in_fields(self, app):

View File

@ -19,7 +19,7 @@ import uuid
from unittest.mock import Mock, patch
import pytest
from werkzeug.exceptions import NotFound
from werkzeug.exceptions import BadRequest, NotFound
from controllers.service_api.dataset.metadata import (
DatasetMetadataBuiltInFieldActionServiceApi,
@ -28,6 +28,7 @@ from controllers.service_api.dataset.metadata import (
DatasetMetadataServiceApi,
DocumentMetadataEditServiceApi,
)
from services.errors.metadata_service import MetadataInUseError
from tests.unit_tests.controllers.service_api.conftest import _unwrap
@ -323,6 +324,70 @@ class TestDatasetMetadataServiceApiDelete:
metadata_id=metadata_id,
)
@patch("controllers.service_api.dataset.metadata.MetadataService")
@patch("controllers.service_api.dataset.metadata.DatasetService")
@patch("controllers.service_api.dataset.metadata.current_user")
def test_delete_metadata_referenced_by_pipeline(
self,
mock_current_user,
mock_dataset_svc,
mock_meta_svc,
app,
mock_tenant,
mock_dataset,
):
"""Test 400 when metadata is still referenced by a pipeline."""
metadata_id = str(uuid.uuid4())
mock_dataset_svc.get_dataset.return_value = mock_dataset
mock_dataset_svc.check_dataset_permission.return_value = None
mock_meta_svc.delete_metadata.side_effect = MetadataInUseError(
"This metadata is referenced by a pipeline and cannot be deleted."
)
with app.test_request_context(
f"/datasets/{mock_dataset.id}/metadata/{metadata_id}",
method="DELETE",
):
api = DatasetMetadataServiceApi()
with pytest.raises(BadRequest, match="referenced by a pipeline"):
self._call_delete(
api,
tenant_id=mock_tenant.id,
dataset_id=mock_dataset.id,
metadata_id=metadata_id,
)
@patch("controllers.service_api.dataset.metadata.MetadataService")
@patch("controllers.service_api.dataset.metadata.DatasetService")
@patch("controllers.service_api.dataset.metadata.current_user")
def test_delete_metadata_not_found(
self,
mock_current_user,
mock_dataset_svc,
mock_meta_svc,
app,
mock_tenant,
mock_dataset,
):
"""Test 404 when metadata does not exist."""
metadata_id = str(uuid.uuid4())
mock_dataset_svc.get_dataset.return_value = mock_dataset
mock_dataset_svc.check_dataset_permission.return_value = None
mock_meta_svc.delete_metadata.side_effect = NotFound("Metadata not found.")
with app.test_request_context(
f"/datasets/{mock_dataset.id}/metadata/{metadata_id}",
method="DELETE",
):
api = DatasetMetadataServiceApi()
with pytest.raises(NotFound, match="Metadata not found"):
self._call_delete(
api,
tenant_id=mock_tenant.id,
dataset_id=mock_dataset.id,
metadata_id=metadata_id,
)
# ---------------------------------------------------------------------------
# DatasetMetadataBuiltInFieldServiceApi

View File

@ -0,0 +1,67 @@
from unittest.mock import Mock, patch
from core.rag.index_processor.index_processor import IndexProcessor
from models.dataset import DatasetMetadataBinding
def test_save_doc_metadata_and_bindings_replaces_removed_metadata_and_bindings():
processor = IndexProcessor()
session = Mock()
new_metadata = Mock()
new_metadata.id = "meta-new"
new_metadata.name = "new_field"
old_metadata = Mock()
old_metadata.id = "meta-old"
old_metadata.name = "old_field"
old_binding = Mock(spec=DatasetMetadataBinding)
old_binding.metadata_id = "meta-old"
first_scalars_result = Mock()
first_scalars_result.all.return_value = [new_metadata]
second_scalars_result = Mock()
second_scalars_result.all.return_value = [old_binding]
third_scalars_result = Mock()
third_scalars_result.all.return_value = [old_metadata, new_metadata]
session.scalars.side_effect = [
first_scalars_result,
second_scalars_result,
third_scalars_result,
]
delete_query = Mock()
delete_where = Mock()
delete_where.delete.return_value = 1
delete_query.where.return_value = delete_where
session.query.return_value = delete_query
document = Mock()
document.id = "doc-1"
document.doc_metadata = {
"old_field": "stale",
"keep_field": "keep",
}
with patch("core.rag.index_processor.index_processor.attributes.flag_modified"):
processor._save_doc_metadata_and_bindings(
session=session,
dataset_id="dataset-1",
tenant_id="tenant-1",
document=document,
doc_metadata={"meta-new": "new_value"},
metadata_binding_ids=["meta-new"],
user_id="user-1",
)
assert document.doc_metadata == {
"keep_field": "keep",
"new_field": "new_value",
}
delete_where.delete.assert_called_once_with(synchronize_session=False)
binding_instance = session.add.call_args_list[0].args[0]
assert isinstance(binding_instance, DatasetMetadataBinding)
assert binding_instance.document_id == "doc-1"
assert binding_instance.metadata_id == "meta-new"

View File

@ -5,7 +5,7 @@ from unittest.mock import Mock
import pytest
from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom
from core.workflow.nodes.knowledge_index.entities import KnowledgeIndexNodeData
from core.workflow.nodes.knowledge_index.entities import DocMetadata, KnowledgeIndexNodeData
from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError
from core.workflow.nodes.knowledge_index.knowledge_index_node import KnowledgeIndexNode
from core.workflow.nodes.knowledge_index.protocols import (
@ -512,6 +512,130 @@ class TestKnowledgeIndexNode:
assert "Unexpected error" in result.error
assert result.error_type == "Exception"
def test_run_with_doc_metadata(
self,
mock_graph_init_params,
mock_graph_runtime_state,
mock_index_processor,
mock_summary_index_service,
sample_chunks,
):
"""Test _run resolves doc_metadata from variable pool and passes to index_processor."""
# Arrange
dataset_id = str(uuid.uuid4())
document_id = str(uuid.uuid4())
batch = "batch_123"
meta_id = "meta_uuid_1"
chunks_selector = ["start", "chunks"]
mock_graph_runtime_state.variable_pool.add(
["sys", SystemVariableKey.DATASET_ID],
StringSegment(value=dataset_id),
)
mock_graph_runtime_state.variable_pool.add(
["sys", SystemVariableKey.DOCUMENT_ID],
StringSegment(value=document_id),
)
mock_graph_runtime_state.variable_pool.add(
["sys", SystemVariableKey.BATCH],
StringSegment(value=batch),
)
mock_graph_runtime_state.variable_pool.add(
["sys", SystemVariableKey.INVOKE_FROM],
StringSegment(value=InvokeFrom.SERVICE_API),
)
mock_graph_runtime_state.variable_pool.add(chunks_selector, sample_chunks)
mock_graph_runtime_state.variable_pool.add(["start", "category"], StringSegment(value="Financial"))
mock_index_processor.index_and_clean.return_value = {"status": "indexed"}
node_data = KnowledgeIndexNodeData(
title="Knowledge Index",
type="knowledge-index",
chunk_structure="general_structure",
index_chunk_variable_selector=chunks_selector,
doc_metadata=[DocMetadata(metadata_id=meta_id, value=["start", "category"])],
)
node_id = str(uuid.uuid4())
config = {"id": node_id, "data": node_data.model_dump()}
node = KnowledgeIndexNode(
id=node_id,
config=config,
graph_init_params=mock_graph_init_params,
graph_runtime_state=mock_graph_runtime_state,
)
# Act
result = node._run()
# Assert
assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED
call_kwargs = mock_index_processor.index_and_clean.call_args.kwargs
assert call_kwargs["doc_metadata"] == {meta_id: "Financial"}
assert meta_id in call_kwargs["metadata_binding_ids"]
def test_run_with_missing_metadata_variable_fails(
self,
mock_graph_init_params,
mock_graph_runtime_state,
mock_index_processor,
mock_summary_index_service,
sample_chunks,
):
"""Test _run fails when a metadata variable selector is not in the pool."""
# Arrange
dataset_id = str(uuid.uuid4())
document_id = str(uuid.uuid4())
batch = "batch_123"
chunks_selector = ["start", "chunks"]
mock_graph_runtime_state.variable_pool.add(
["sys", SystemVariableKey.DATASET_ID],
StringSegment(value=dataset_id),
)
mock_graph_runtime_state.variable_pool.add(
["sys", SystemVariableKey.DOCUMENT_ID],
StringSegment(value=document_id),
)
mock_graph_runtime_state.variable_pool.add(
["sys", SystemVariableKey.BATCH],
StringSegment(value=batch),
)
mock_graph_runtime_state.variable_pool.add(
["sys", SystemVariableKey.INVOKE_FROM],
StringSegment(value=InvokeFrom.SERVICE_API),
)
mock_graph_runtime_state.variable_pool.add(chunks_selector, sample_chunks)
# NOTE: "start.missing" is NOT added to the pool
node_data = KnowledgeIndexNodeData(
title="Knowledge Index",
type="knowledge-index",
chunk_structure="general_structure",
index_chunk_variable_selector=chunks_selector,
doc_metadata=[DocMetadata(metadata_id="meta_uuid_1", value=["start", "missing"])],
)
node_id = str(uuid.uuid4())
config = {"id": node_id, "data": node_data.model_dump()}
node = KnowledgeIndexNode(
id=node_id,
config=config,
graph_init_params=mock_graph_init_params,
graph_runtime_state=mock_graph_runtime_state,
)
# Act
result = node._run()
# Assert
assert result.status == WorkflowNodeExecutionStatus.FAILED
assert "start.missing" in result.error
mock_index_processor.index_and_clean.assert_not_called()
def test_invoke_knowledge_index(
self,
mock_graph_init_params,
@ -645,6 +769,14 @@ class TestInvokeKnowledgeIndex:
dataset_id, document_id, False, summary_setting
)
mock_index_processor.index_and_clean.assert_called_once_with(
dataset_id, document_id, original_document_id, chunks, batch, summary_setting
dataset_id,
document_id,
original_document_id,
chunks,
batch,
summary_setting,
doc_metadata=None,
metadata_binding_ids=None,
user_id=mock_graph_init_params.run_context["_dify"].user_id,
)
assert result == {"status": "indexed"}

View File

@ -87,12 +87,12 @@ This test suite follows a comprehensive testing strategy that covers:
from unittest.mock import Mock, patch
import pytest
from werkzeug.exceptions import NotFound
from core.rag.index_processor.constant.built_in_field import BuiltInField
from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding
from services.entities.knowledge_entities.knowledge_entities import (
MetadataArgs,
MetadataValue,
)
from services.metadata_service import MetadataService
@ -308,7 +308,7 @@ class MetadataTestDataFactory:
value: str = "test",
) -> Mock:
"""
Create a mock MetadataValue entity.
Create a mock metadata value entity.
Args:
metadata_id: ID of the metadata field
@ -316,9 +316,9 @@ class MetadataTestDataFactory:
value: Value of the metadata
Returns:
Mock object configured as a MetadataValue instance
Mock object configured with metadata value fields
"""
metadata_value = Mock(spec=MetadataValue)
metadata_value = Mock()
metadata_value.id = metadata_id
metadata_value.name = name
metadata_value.value = value
@ -775,7 +775,7 @@ class TestMetadataServiceDeleteMetadata:
"""
Test error handling when metadata is not found.
Verifies that when the metadata ID doesn't exist, a ValueError
Verifies that when the metadata ID doesn't exist, a NotFound
is raised and the lock is properly released.
This test ensures:
@ -794,7 +794,7 @@ class TestMetadataServiceDeleteMetadata:
mock_db_session.query.return_value = mock_query
# Act & Assert
with pytest.raises(ValueError, match="Metadata not found"):
with pytest.raises(NotFound, match="Metadata not found"):
MetadataService.delete_metadata(dataset_id, metadata_id)
# Verify lock was released

View File

@ -87,6 +87,7 @@ def test_save_document_with_dataset_id_ignores_lock_not_owned(
process_rule=None,
duplicate=False,
doc_language="en",
doc_metadata=None,
)
account = fake_current_user

View File

@ -0,0 +1,310 @@
from unittest.mock import Mock, patch
from uuid import uuid4
import pytest
from models.account import Account
from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding, Document
from models.model import UploadFile
from services.dataset_service import DocumentService
from services.entities.knowledge_entities.knowledge_entities import (
DataSource,
DocumentMetadataInput,
FileInfo,
InfoList,
KnowledgeConfig,
)
class TestDocumentServiceMetadata:
@pytest.fixture
def mock_dependencies(self):
with (
patch("services.dataset_service.db.session") as mock_db,
patch("services.dataset_service.DatasetService.get_dataset") as mock_get_dataset,
patch("services.dataset_service.redis_client") as mock_redis,
patch("services.dataset_service.DocumentService.build_document") as mock_build_document,
patch("services.dataset_service.current_user") as mock_current_user,
patch("services.dataset_service.DocumentIndexingTaskProxy") as mock_indexing_task,
patch("services.dataset_service.DuplicateDocumentIndexingTaskProxy") as mock_duplicate_indexing_task,
# We don't patch DocumentService.save_document_with_dataset_id as that's what we are testing
):
# Hack to pass isinstance check
mock_current_user.__class__ = Account
mock_current_user.current_tenant_id = "tenant-123"
yield {
"db": mock_db,
"get_dataset": mock_get_dataset,
"redis": mock_redis,
"build_document": mock_build_document,
"current_user": mock_current_user,
}
def test_save_document_with_metadata(self, mock_dependencies):
# Arrange
dataset_id = str(uuid4())
tenant_id = str(uuid4())
account = Mock(spec=Account)
account.id = "account-1"
account.current_tenant_id = tenant_id
dataset = Mock(spec=Dataset)
dataset.id = dataset_id
dataset.tenant_id = tenant_id
dataset.built_in_field_enabled = False
dataset.doc_form = "text_model"
mock_dependencies["get_dataset"].return_value = dataset
# Define metadata inputs
metadata_id = str(uuid4())
doc_metadata_inputs = [DocumentMetadataInput(metadata_id=metadata_id, value="custom_value")]
# Knowledge config
knowledge_config = KnowledgeConfig(
data_source_type="upload_file",
data_source=DataSource(
info_list=InfoList(data_source_type="upload_file", file_info_list=FileInfo(file_ids=["file-1"]))
),
doc_form="text_model",
doc_language="en",
indexing_technique="high_quality",
doc_metadata=doc_metadata_inputs,
)
# Mock local file for upload_file type
with patch("services.dataset_service.db.session.query") as mock_query:
# Mock DatasetMetadata lookup
mock_metadata_def = Mock(spec=DatasetMetadata)
mock_metadata_def.id = metadata_id
mock_metadata_def.name = "custom_field"
mock_metadata_def.field_type = "text"
# Create a side effect for query(Model)
def query_side_effect(*models):
m = Mock()
if len(models) == 1 and models[0] == DatasetMetadata:
m.filter.return_value.filter.return_value.first.return_value = mock_metadata_def
# handle the specific chain in code
m.filter_by.return_value.first.return_value = mock_metadata_def
m.filter.return_value.all.return_value = [mock_metadata_def]
return m
if len(models) == 1 and models[0] == Document:
doc_mock = Mock()
doc_mock.position = 1
# For get_documents_position
m.filter_by.return_value.order_by.return_value.first.return_value = doc_mock
# For duplicate check
m.where.return_value.all.return_value = []
return m
if len(models) == 1 and models[0] == UploadFile:
m.where.return_value.all.return_value = [Mock(id="file-1", tenant_id=tenant_id)]
return m
if len(models) == 2:
m.filter.return_value.all.return_value = []
return m
return m
mock_query.side_effect = query_side_effect
# Mock build_document to return a document
mock_document = Mock(spec=Document)
mock_document.id = "doc-123"
mock_document.doc_metadata = {}
mock_dependencies["build_document"].return_value = mock_document
# Act
DocumentService.save_document_with_dataset_id(
dataset=dataset, knowledge_config=knowledge_config, account=account
)
# Assert
# 1. Check custom metadata passed to build_document
call_args = mock_dependencies["build_document"].call_args
assert call_args is not None
_, kwargs = call_args
assert "custom_metadata" in kwargs
assert kwargs["custom_metadata"] == {"custom_field": "custom_value"}
# 2. Check DatasetMetadataBinding creation
binding_instances = [
call.args[0]
for call in mock_dependencies["db"].add.call_args_list
if isinstance(call.args[0], DatasetMetadataBinding)
]
assert len(binding_instances) == 1
assert binding_instances[0].document_id == "doc-123"
assert binding_instances[0].metadata_id == metadata_id
def test_save_duplicate_document_with_metadata_creates_binding(self, mock_dependencies):
# Arrange
dataset_id = str(uuid4())
tenant_id = str(uuid4())
account = Mock(spec=Account)
account.id = "account-1"
account.current_tenant_id = tenant_id
dataset = Mock(spec=Dataset)
dataset.id = dataset_id
dataset.tenant_id = tenant_id
dataset.built_in_field_enabled = False
dataset.doc_form = "text_model"
mock_dependencies["get_dataset"].return_value = dataset
metadata_id = str(uuid4())
knowledge_config = KnowledgeConfig(
data_source_type="upload_file",
data_source=DataSource(
info_list=InfoList(data_source_type="upload_file", file_info_list=FileInfo(file_ids=["file-1"]))
),
doc_form="text_model",
doc_language="en",
indexing_technique="high_quality",
duplicate=True,
doc_metadata=[DocumentMetadataInput(metadata_id=metadata_id, value="custom_value")],
)
existing_document = Mock(spec=Document)
existing_document.id = "dup-doc-1"
existing_document.name = "dup.txt"
existing_document.doc_metadata = {"existing_field": "existing_value"}
with patch("services.dataset_service.db.session.query") as mock_query:
mock_metadata_def = Mock(spec=DatasetMetadata)
mock_metadata_def.id = metadata_id
mock_metadata_def.name = "custom_field"
mock_metadata_def.field_type = "text"
def query_side_effect(*models):
m = Mock()
if len(models) == 1 and models[0] == DatasetMetadata:
m.filter.return_value.all.return_value = [mock_metadata_def]
return m
if len(models) == 1 and models[0] == Document:
doc_mock = Mock()
doc_mock.position = 1
m.filter_by.return_value.order_by.return_value.first.return_value = doc_mock
m.where.return_value.all.return_value = [existing_document]
return m
if len(models) == 1 and models[0] == UploadFile:
file_mock = Mock(id="file-1", tenant_id=tenant_id)
file_mock.name = "dup.txt"
m.where.return_value.all.return_value = [file_mock]
return m
if len(models) == 2:
m.filter.return_value.all.return_value = []
return m
return m
mock_query.side_effect = query_side_effect
# Act
DocumentService.save_document_with_dataset_id(
dataset=dataset, knowledge_config=knowledge_config, account=account
)
# Assert
mock_dependencies["build_document"].assert_not_called()
assert existing_document.doc_metadata["custom_field"] == "custom_value"
binding_instances = [
call.args[0]
for call in mock_dependencies["db"].add.call_args_list
if isinstance(call.args[0], DatasetMetadataBinding)
]
assert any(
binding.document_id == existing_document.id and binding.metadata_id == metadata_id
for binding in binding_instances
)
def test_reindex_document_replaces_removed_metadata_and_bindings(self, mock_dependencies):
dataset_id = str(uuid4())
tenant_id = str(uuid4())
account = Mock(spec=Account)
account.id = "account-1"
account.current_tenant_id = tenant_id
dataset = Mock(spec=Dataset)
dataset.id = dataset_id
dataset.tenant_id = tenant_id
dataset.built_in_field_enabled = False
dataset.doc_form = "text_model"
dataset.indexing_technique = "high_quality"
old_metadata_id = str(uuid4())
new_metadata_id = str(uuid4())
existing_document = Mock(spec=Document)
existing_document.id = "doc-1"
existing_document.batch = "batch-1"
existing_document.doc_metadata = {
"old_field": "stale",
"unchanged_field": "keep",
}
knowledge_config = KnowledgeConfig(
original_document_id=existing_document.id,
doc_form="text_model",
doc_language="en",
indexing_technique="high_quality",
doc_metadata=[DocumentMetadataInput(metadata_id=new_metadata_id, value="new_value")],
)
old_binding = Mock(spec=DatasetMetadataBinding)
old_binding.metadata_id = old_metadata_id
old_metadata_def = Mock(spec=DatasetMetadata)
old_metadata_def.id = old_metadata_id
old_metadata_def.name = "old_field"
new_metadata_def = Mock(spec=DatasetMetadata)
new_metadata_def.id = new_metadata_id
new_metadata_def.name = "new_field"
with (
patch("services.dataset_service.DatasetService.check_doc_form"),
patch("services.dataset_service.FeatureService.get_features") as mock_get_features,
patch(
"services.dataset_service.DocumentService.update_document_with_dataset_id",
return_value=existing_document,
),
patch("services.dataset_service.db.session.query") as mock_query,
patch("sqlalchemy.orm.attributes.flag_modified"),
):
mock_get_features.return_value = Mock(billing=Mock(enabled=False))
def query_side_effect(*models):
m = Mock()
if len(models) == 1 and models[0] == DatasetMetadata:
m.filter.return_value.all.return_value = [old_metadata_def, new_metadata_def]
return m
if len(models) == 1 and models[0] == DatasetMetadataBinding:
m.filter_by.return_value.all.return_value = [old_binding]
m.filter.return_value.delete.return_value = 1
return m
return m
mock_query.side_effect = query_side_effect
documents, batch = DocumentService.save_document_with_dataset_id(
dataset=dataset,
knowledge_config=knowledge_config,
account=account,
)
assert documents == [existing_document]
assert batch == "batch-1"
assert existing_document.doc_metadata == {
"unchanged_field": "keep",
"new_field": "new_value",
}
binding_instances = [
call.args[0]
for call in mock_dependencies["db"].add.call_args_list
if isinstance(call.args[0], DatasetMetadataBinding)
]
assert len(binding_instances) == 1
assert binding_instances[0].document_id == existing_document.id
assert binding_instances[0].metadata_id == new_metadata_id

View File

@ -0,0 +1,128 @@
from unittest.mock import MagicMock, Mock, patch
from services.metadata_service import MetadataService
class TestMetadataServicePipelineGuard:
def test_collect_referenced_metadata_ids_from_nested_payload(self):
payload = {
"metadata_filtering_conditions": {
"conditions": [
{"id": "cond-1", "metadata_id": "meta-1"},
{"id": "cond-2", "group": {"metadata_id": "meta-2"}},
],
},
"node": {
"data": {
"doc_metadata": [
{"metadata_id": "meta-3"},
],
},
},
}
referenced_ids: set[str] = set()
MetadataService._collect_referenced_metadata_ids(payload, referenced_ids)
# All metadata_ids found in the payload are collected (no candidate filter)
assert referenced_ids == {"meta-1", "meta-2", "meta-3"}
def test_collect_referenced_metadata_ids_list_root(self):
payload = [
{"metadata_id": "meta-a"},
{"nested": {"metadata_id": "meta-b"}},
{"no_metadata": True},
]
referenced_ids: set[str] = set()
MetadataService._collect_referenced_metadata_ids(payload, referenced_ids)
assert referenced_ids == {"meta-a", "meta-b"}
def test_get_referenced_metadata_ids_filters_by_candidate_set(self):
"""Only IDs that are both referenced AND in metadata_ids are returned."""
with (
patch.object(
MetadataService,
"_scan_all_referenced_metadata_ids",
return_value={"meta-1", "meta-2", "meta-3"},
),
patch("services.metadata_service.redis_client") as mock_redis,
):
mock_redis.get.return_value = None # cache miss
result = MetadataService._get_referenced_metadata_ids("tenant-1", {"meta-2", "meta-9"})
assert result == {"meta-2"}
def test_get_referenced_metadata_ids_uses_cache(self):
"""Cache hit returns the intersection without calling the DB scanner."""
import json
cached_ids = ["meta-1", "meta-2"]
with (
patch("services.metadata_service.redis_client") as mock_redis,
patch.object(MetadataService, "_scan_all_referenced_metadata_ids") as mock_scan,
):
mock_redis.get.return_value = json.dumps(cached_ids).encode()
result = MetadataService._get_referenced_metadata_ids("tenant-1", {"meta-2", "meta-9"})
mock_scan.assert_not_called()
assert result == {"meta-2"}
def test_get_referenced_metadata_ids_bypass_cache_skips_redis(self):
"""bypass_cache=True skips the Redis read and always scans the DB."""
with (
patch("services.metadata_service.redis_client") as mock_redis,
patch.object(
MetadataService,
"_scan_all_referenced_metadata_ids",
return_value={"meta-1"},
) as mock_scan,
):
result = MetadataService._get_referenced_metadata_ids("tenant-1", {"meta-1"}, bypass_cache=True)
mock_redis.get.assert_not_called()
mock_scan.assert_called_once_with("tenant-1")
assert result == {"meta-1"}
def test_get_dataset_metadatas_marks_pipeline_references(self):
dataset = Mock()
dataset.id = "dataset-1"
dataset.tenant_id = "tenant-1"
dataset.built_in_field_enabled = False
dataset.doc_metadata = [
{"id": "meta-1", "name": "author", "type": "string"},
{"id": "meta-2", "name": "category", "type": "string"},
]
query_mock = MagicMock()
query_mock.filter_by.return_value = query_mock
query_mock.count.side_effect = [2, 5]
with (
patch.object(MetadataService, "_get_referenced_metadata_ids", return_value={"meta-2"}),
patch("services.metadata_service.db.session.query", return_value=query_mock),
):
result = MetadataService.get_dataset_metadatas(dataset)
assert result == {
"doc_metadata": [
{
"id": "meta-1",
"name": "author",
"type": "string",
"count": 2,
"is_referenced_by_pipeline": False,
},
{
"id": "meta-2",
"name": "category",
"type": "string",
"count": 5,
"is_referenced_by_pipeline": True,
},
],
"built_in_field_enabled": False,
}

View File

@ -0,0 +1,118 @@
import type { DataSet } from '@/models/datasets'
import { render, screen } from '@testing-library/react'
import * as React from 'react'
import { IndexingType } from '@/app/components/datasets/create/step-two'
import DatasetDetailLayout from './layout-main'
let mockPathname = '/datasets/dataset-1/documents'
let mockDataset: Partial<DataSet> | undefined = {
id: 'dataset-1',
name: 'Pipeline Dataset',
provider: 'vendor',
runtime_mode: 'rag_pipeline',
is_published: false,
indexing_technique: IndexingType.QUALIFIED,
document_count: 2,
}
const mockSetAppSidebarExpand = vi.fn()
vi.mock('next/navigation', () => ({
usePathname: () => mockPathname,
}))
vi.mock('@/app/components/app-sidebar', () => ({
default: ({ navigation }: { navigation: Array<{ name: string, disabled?: boolean }> }) => (
<div data-testid="app-sidebar">
{navigation.map(item => (
<div
key={item.name}
data-testid={`nav-${item.name}`}
data-disabled={String(Boolean(item.disabled))}
>
{item.name}
</div>
))}
</div>
),
}))
vi.mock('@/app/components/base/loading', () => ({
default: () => <div data-testid="loading" />,
}))
vi.mock('@/app/components/datasets/extra-info', () => ({
default: () => <div data-testid="extra-info" />,
}))
vi.mock('@/app/components/app/store', () => ({
useStore: (selector: (state: { setAppSidebarExpand: typeof mockSetAppSidebarExpand }) => unknown) => selector({
setAppSidebarExpand: mockSetAppSidebarExpand,
}),
}))
vi.mock('@/context/app-context', () => ({
useAppContext: () => ({
isCurrentWorkspaceDatasetOperator: true,
}),
}))
vi.mock('@/context/event-emitter', () => ({
useEventEmitterContextContext: () => ({
eventEmitter: {
useSubscription: vi.fn(),
},
}),
}))
vi.mock('@/hooks/use-breakpoints', () => ({
default: () => 'pc',
MediaType: {
mobile: 'mobile',
tablet: 'tablet',
pc: 'pc',
},
}))
vi.mock('@/hooks/use-document-title', () => ({
default: vi.fn(),
}))
vi.mock('@/service/knowledge/use-dataset', () => ({
useDatasetDetail: () => ({
data: mockDataset,
error: undefined,
refetch: vi.fn(),
}),
useDatasetRelatedApps: () => ({
data: [],
}),
}))
describe('DatasetDetailLayout', () => {
beforeEach(() => {
vi.clearAllMocks()
localStorage.clear()
mockPathname = '/datasets/dataset-1/documents'
mockDataset = {
id: 'dataset-1',
name: 'Pipeline Dataset',
provider: 'vendor',
runtime_mode: 'rag_pipeline',
is_published: false,
indexing_technique: IndexingType.QUALIFIED,
document_count: 2,
}
})
it('should keep documents navigation enabled when rag pipeline is unpublished', () => {
render(
<DatasetDetailLayout datasetId="dataset-1">
<div>content</div>
</DatasetDetailLayout>,
)
expect(screen.getByTestId('nav-common.datasetMenus.documents')).toHaveAttribute('data-disabled', 'false')
expect(screen.getByTestId('nav-common.datasetMenus.hitTesting')).toHaveAttribute('data-disabled', 'true')
})
})

View File

@ -57,7 +57,7 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
const { data: relatedApps } = useDatasetRelatedApps(datasetId)
const isButtonDisabledWithPipeline = useMemo(() => {
const isHitTestingDisabled = useMemo(() => {
if (!datasetRes)
return true
if (datasetRes.provider === 'external')
@ -74,7 +74,7 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
href: `/datasets/${datasetId}/hitTesting`,
icon: RiFocus2Line,
selectedIcon: RiFocus2Fill,
disabled: isButtonDisabledWithPipeline,
disabled: isHitTestingDisabled,
},
{
name: t('datasetMenus.settings', { ns: 'common' }),
@ -98,12 +98,12 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
href: `/datasets/${datasetId}/documents`,
icon: RiFileTextLine,
selectedIcon: RiFileTextFill,
disabled: isButtonDisabledWithPipeline,
disabled: false,
})
}
return baseNavigation
}, [t, datasetId, isButtonDisabledWithPipeline, datasetRes?.provider])
}, [t, datasetId, isHitTestingDisabled, datasetRes?.provider])
useDocumentTitle(datasetRes?.name || t('menus.datasets', { ns: 'common' }))

View File

@ -0,0 +1,69 @@
'use client'
import type { ChangeEventHandler, CSSProperties } from 'react'
import * as React from 'react'
import { cn } from '@/utils/classnames'
export type InputNumberProps = {
value?: number
defaultValue?: number
min?: number
max?: number
step?: number
onChange?: (value: number | null) => void
readOnly?: boolean
disabled?: boolean
className?: string
style?: CSSProperties
size?: 'regular' | 'large'
}
const InputNumber = React.forwardRef<HTMLInputElement, InputNumberProps>(({
value,
defaultValue,
min,
max,
step = 1,
onChange,
readOnly,
disabled,
className,
style,
size = 'regular',
}, ref) => {
const handleChange: ChangeEventHandler<HTMLInputElement> = (e) => {
const val = e.target.value
if (val === '') {
onChange?.(null)
return
}
const num = Number.parseFloat(val)
if (!Number.isNaN(num))
onChange?.(num)
}
return (
<input
ref={ref}
type="number"
value={value ?? ''}
defaultValue={defaultValue}
min={min}
max={max}
step={step}
onChange={handleChange}
readOnly={readOnly}
disabled={disabled}
style={style}
className={cn(
'w-full appearance-none bg-transparent text-components-input-text-filled outline-none placeholder:text-components-input-text-placeholder',
size === 'regular' && 'text-[13px]',
size === 'large' && 'text-[14px]',
className,
)}
/>
)
})
InputNumber.displayName = 'InputNumber'
export default InputNumber

View File

@ -33,6 +33,7 @@ vi.mock('@/context/dataset-detail', () => ({
embedding_available: true,
data_source_type: DataSourceType.FILE,
runtime_mode: 'rag',
is_published: true,
},
}
return selector(mockState as MockState)
@ -144,6 +145,7 @@ vi.mock('../components/documents-header', () => ({
default: ({
datasetId,
embeddingAvailable,
canAddDocument,
onInputChange,
onAddDocument,
onStatusFilterChange,
@ -153,6 +155,7 @@ vi.mock('../components/documents-header', () => ({
datasetId: string
dataSourceType?: string
embeddingAvailable: boolean
canAddDocument: boolean
isFreePlan: boolean
statusFilterValue: string
sortValue: string
@ -176,6 +179,7 @@ vi.mock('../components/documents-header', () => ({
<div data-testid="documents-header">
<span data-testid="header-dataset-id">{datasetId}</span>
<span data-testid="header-embedding-available">{String(embeddingAvailable)}</span>
<span data-testid="header-can-add-document">{String(canAddDocument)}</span>
<input
data-testid="search-input"
onChange={e => onInputChange(e.target.value)}
@ -278,6 +282,7 @@ describe('Documents', () => {
embedding_available: true,
data_source_type: DataSourceType.FILE,
runtime_mode: 'rag',
is_published: true,
},
}
return selector(mockState as MockState)
@ -294,6 +299,7 @@ describe('Documents', () => {
render(<Documents {...defaultProps} />)
expect(screen.getByTestId('header-dataset-id')).toHaveTextContent('test-dataset-id')
expect(screen.getByTestId('header-embedding-available')).toHaveTextContent('true')
expect(screen.getByTestId('header-can-add-document')).toHaveTextContent('true')
})
it('should render document list when documents exist', () => {
@ -362,6 +368,7 @@ describe('Documents', () => {
embedding_available: true,
data_source_type: DataSourceType.NOTION,
runtime_mode: 'rag',
is_published: true,
},
}
return selector(mockState as MockState)
@ -466,6 +473,7 @@ describe('Documents', () => {
embedding_available: true,
data_source_type: DataSourceType.FILE,
runtime_mode: 'rag_pipeline',
is_published: true,
},
}
return selector(mockState as MockState)
@ -478,6 +486,30 @@ describe('Documents', () => {
expect(mockPush).toHaveBeenCalledWith('/datasets/test-dataset-id/documents/create-from-pipeline')
})
it('should not navigate to create page when rag pipeline is unpublished', () => {
vi.mocked(useDatasetDetailContextWithSelector).mockImplementation((selector: MockSelector) => {
const mockState = {
dataset: {
id: 'test-dataset-id',
name: 'Test Dataset',
embedding_available: true,
data_source_type: DataSourceType.FILE,
runtime_mode: 'rag_pipeline',
is_published: false,
},
}
return selector(mockState as MockState)
})
render(<Documents {...defaultProps} />)
expect(screen.getByTestId('header-can-add-document')).toHaveTextContent('false')
screen.getByTestId('add-document-btn').click()
expect(mockPush).not.toHaveBeenCalled()
})
it('should navigate from empty element add button', () => {
vi.mocked(useDatasetDetailContextWithSelector).mockImplementation((selector: MockSelector) => {
const mockState = {
@ -487,6 +519,7 @@ describe('Documents', () => {
embedding_available: true,
data_source_type: DataSourceType.FILE,
runtime_mode: 'rag',
is_published: true,
},
}
return selector(mockState as MockState)
@ -649,6 +682,7 @@ describe('Documents', () => {
embedding_available: false,
data_source_type: DataSourceType.FILE,
runtime_mode: 'rag',
is_published: true,
},
}
return selector(mockState as MockState)

View File

@ -4,6 +4,15 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'
import { DataSourceType } from '@/models/datasets'
import DocumentsHeader from '../documents-header'
const LIST_TITLE_RE = /list\.title/i
const LIST_DESC_RE = /list\.desc/i
const LIST_LEARN_MORE_RE = /list\.learnMore/i
const METADATA_RE = /metadata\.metadata/i
const ADD_FILE_RE = /list\.addFile/i
const ADD_PAGES_RE = /list\.addPages/i
const ADD_URL_RE = /list\.addUrl/i
const CURRENT_DRAFT_UNPUBLISHED_RE = /workflow\.common\.currentDraftUnpublished/i
// Mock the context hooks
vi.mock('@/context/i18n', () => ({
useDocLink: () => (path: string) => `https://docs.example.com${path}`,
@ -32,6 +41,7 @@ describe('DocumentsHeader', () => {
datasetId: 'dataset-123',
dataSourceType: DataSourceType.FILE,
embeddingAvailable: true,
canAddDocument: true,
isFreePlan: false,
statusFilterValue: 'all',
sortValue: 'created_at' as SortType,
@ -60,23 +70,23 @@ describe('DocumentsHeader', () => {
describe('Rendering', () => {
it('should render without crashing', () => {
render(<DocumentsHeader {...defaultProps} />)
expect(screen.getByText(/list\.title/i)).toBeInTheDocument()
expect(screen.getByText(LIST_TITLE_RE)).toBeInTheDocument()
})
it('should render title', () => {
render(<DocumentsHeader {...defaultProps} />)
expect(screen.getByRole('heading', { level: 1 })).toHaveTextContent(/list\.title/i)
expect(screen.getByRole('heading', { level: 1 })).toHaveTextContent(LIST_TITLE_RE)
})
it('should render description text', () => {
render(<DocumentsHeader {...defaultProps} />)
expect(screen.getByText(/list\.desc/i)).toBeInTheDocument()
expect(screen.getByText(LIST_DESC_RE)).toBeInTheDocument()
})
it('should render learn more link', () => {
render(<DocumentsHeader {...defaultProps} />)
const link = screen.getByRole('link')
expect(link).toHaveTextContent(/list\.learnMore/i)
expect(link).toHaveTextContent(LIST_LEARN_MORE_RE)
expect(link).toHaveAttribute('href', expect.stringContaining('use-dify/knowledge'))
expect(link).toHaveAttribute('target', '_blank')
expect(link).toHaveAttribute('rel', 'noopener noreferrer')
@ -110,35 +120,45 @@ describe('DocumentsHeader', () => {
describe('Embedding Availability', () => {
it('should show metadata button when embedding is available', () => {
render(<DocumentsHeader {...defaultProps} embeddingAvailable={true} />)
expect(screen.getByText(/metadata\.metadata/i)).toBeInTheDocument()
expect(screen.getByText(METADATA_RE)).toBeInTheDocument()
})
it('should show add document button when embedding is available', () => {
render(<DocumentsHeader {...defaultProps} embeddingAvailable={true} />)
expect(screen.getByText(/list\.addFile/i)).toBeInTheDocument()
expect(screen.getByText(ADD_FILE_RE)).toBeInTheDocument()
})
it('should show warning when embedding is not available', () => {
render(<DocumentsHeader {...defaultProps} embeddingAvailable={false} />)
expect(screen.queryByText(/metadata\.metadata/i)).not.toBeInTheDocument()
expect(screen.queryByText(/list\.addFile/i)).not.toBeInTheDocument()
expect(screen.queryByText(METADATA_RE)).not.toBeInTheDocument()
expect(screen.queryByText(ADD_FILE_RE)).not.toBeInTheDocument()
})
it('should disable add document button when document upload is unavailable', () => {
render(<DocumentsHeader {...defaultProps} canAddDocument={false} />)
expect(screen.getByRole('button', { name: ADD_FILE_RE })).toBeDisabled()
})
it('should show unpublished warning when document upload is unavailable', () => {
render(<DocumentsHeader {...defaultProps} canAddDocument={false} />)
expect(screen.getByText(CURRENT_DRAFT_UNPUBLISHED_RE)).toBeInTheDocument()
})
})
describe('Add Button Text', () => {
it('should show "Add File" for FILE data source', () => {
render(<DocumentsHeader {...defaultProps} dataSourceType={DataSourceType.FILE} />)
expect(screen.getByText(/list\.addFile/i)).toBeInTheDocument()
expect(screen.getByText(ADD_FILE_RE)).toBeInTheDocument()
})
it('should show "Add Pages" for NOTION data source', () => {
render(<DocumentsHeader {...defaultProps} dataSourceType={DataSourceType.NOTION} />)
expect(screen.getByText(/list\.addPages/i)).toBeInTheDocument()
expect(screen.getByText(ADD_PAGES_RE)).toBeInTheDocument()
})
it('should show "Add Url" for WEB data source', () => {
render(<DocumentsHeader {...defaultProps} dataSourceType={DataSourceType.WEB} />)
expect(screen.getByText(/list\.addUrl/i)).toBeInTheDocument()
expect(screen.getByText(ADD_URL_RE)).toBeInTheDocument()
})
})
@ -159,7 +179,7 @@ describe('DocumentsHeader', () => {
const showEditMetadataModal = vi.fn()
render(<DocumentsHeader {...defaultProps} showEditMetadataModal={showEditMetadataModal} />)
const metadataButton = screen.getByText(/metadata\.metadata/i)
const metadataButton = screen.getByText(METADATA_RE)
fireEvent.click(metadataButton)
expect(showEditMetadataModal).toHaveBeenCalledTimes(1)
@ -169,7 +189,7 @@ describe('DocumentsHeader', () => {
const onAddDocument = vi.fn()
render(<DocumentsHeader {...defaultProps} onAddDocument={onAddDocument} />)
const addButton = screen.getByText(/list\.addFile/i)
const addButton = screen.getByText(ADD_FILE_RE)
fireEvent.click(addButton)
expect(onAddDocument).toHaveBeenCalledTimes(1)
@ -190,7 +210,7 @@ describe('DocumentsHeader', () => {
it('should handle undefined dataSourceType', () => {
render(<DocumentsHeader {...defaultProps} dataSourceType={undefined} />)
// Should default to "Add File" text
expect(screen.getByText(/list\.addFile/i)).toBeInTheDocument()
expect(screen.getByText(ADD_FILE_RE)).toBeInTheDocument()
})
it('should handle empty metadata arrays', () => {
@ -208,7 +228,7 @@ describe('DocumentsHeader', () => {
it('should render with descending sort order', () => {
render(<DocumentsHeader {...defaultProps} sortValue="-created_at" />)
// Component should still render correctly
expect(screen.getByText(/list\.title/i)).toBeInTheDocument()
expect(screen.getByText(LIST_TITLE_RE)).toBeInTheDocument()
})
})
})

View File

@ -24,6 +24,7 @@ type DocumentsHeaderProps = {
datasetId: string
dataSourceType?: DataSourceType
embeddingAvailable: boolean
canAddDocument: boolean
isFreePlan: boolean
// Filter & sort
@ -55,6 +56,7 @@ const DocumentsHeader: FC<DocumentsHeaderProps> = ({
datasetId,
dataSourceType,
embeddingAvailable,
canAddDocument,
isFreePlan,
statusFilterValue,
sortValue,
@ -81,6 +83,7 @@ const DocumentsHeader: FC<DocumentsHeaderProps> = ({
const isDataSourceNotion = dataSourceType === DataSourceType.NOTION
const isDataSourceWeb = dataSourceType === DataSourceType.WEB
const showUnpublishedWarning = embeddingAvailable && !canAddDocument
const statusFilterItems: Item[] = useMemo(() => [
{ value: 'all', name: t('list.index.all', { ns: 'datasetDocuments' }) as string },
@ -168,6 +171,12 @@ const DocumentsHeader: FC<DocumentsHeaderProps> = ({
description={t('embeddingModelNotAvailable', { ns: 'dataset' })}
/>
)}
{showUnpublishedWarning && (
<StatusWithAction
type="warning"
description={t('common.currentDraftUnpublished', { ns: 'workflow' })}
/>
)}
{embeddingAvailable && (
<Button variant="secondary" className="shrink-0" onClick={showEditMetadataModal}>
<RiDraftLine className="mr-1 size-4" />
@ -187,7 +196,7 @@ const DocumentsHeader: FC<DocumentsHeaderProps> = ({
/>
)}
{embeddingAvailable && (
<Button variant="primary" onClick={onAddDocument} className="shrink-0">
<Button variant="primary" onClick={onAddDocument} className="shrink-0" disabled={!canAddDocument}>
<PlusIcon className="mr-2 h-4 w-4 stroke-current" />
{addButtonText}
</Button>

View File

@ -30,6 +30,8 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
const dataset = useDatasetDetailContextWithSelector(s => s.dataset)
const embeddingAvailable = !!dataset?.embedding_available
const canAddDocument = embeddingAvailable
&& !(dataset?.runtime_mode === 'rag_pipeline' && !dataset?.is_published)
// Use custom hook for page state management
const {
@ -106,12 +108,14 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
// Route to document creation page
const routeToDocCreate = useCallback(() => {
if (!canAddDocument)
return
if (dataset?.runtime_mode === 'rag_pipeline') {
router.push(`/datasets/${datasetId}/documents/create-from-pipeline`)
return
}
router.push(`/datasets/${datasetId}/documents/create`)
}, [dataset?.runtime_mode, datasetId, router])
}, [canAddDocument, dataset?.runtime_mode, datasetId, router])
const total = documentsRes?.total || 0
const documentsList = documentsRes?.data
@ -147,7 +151,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
const isDataSourceNotion = dataset?.data_source_type === DataSourceType.NOTION
return (
<EmptyElement
canAdd={embeddingAvailable}
canAdd={canAddDocument}
onClick={routeToDocCreate}
type={isDataSourceNotion ? 'sync' : 'upload'}
/>
@ -160,6 +164,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
datasetId={datasetId}
dataSourceType={dataset?.data_source_type}
embeddingAvailable={embeddingAvailable}
canAddDocument={canAddDocument}
isFreePlan={isFreePlan}
statusFilterValue={statusFilterValue}
sortValue={sortValue}

View File

@ -182,6 +182,23 @@ describe('WrappedDatePicker', () => {
expect(screen.getByTestId('date-picker-wrapper')).toBeInTheDocument()
})
it('should not allow changing or clearing the date when readonly', () => {
const handleChange = vi.fn()
const timestamp = Math.floor(Date.now() / 1000)
const { container } = render(
<WrappedDatePicker value={timestamp} onChange={handleChange} readonly />,
)
fireEvent.click(screen.getByTestId('select-date'))
fireEvent.click(screen.getByTestId('clear-date'))
const closeIcon = container.querySelector('.cursor-pointer.hover\\:text-components-input-text-filled')
if (closeIcon)
fireEvent.click(closeIcon)
expect(handleChange).not.toHaveBeenCalled()
})
})
describe('Styling', () => {

View File

@ -15,28 +15,39 @@ type Props = {
className?: string
value?: number
onChange: (date: number | null) => void
readonly?: boolean
}
const WrappedDatePicker = ({
className,
value,
onChange,
readonly,
}: Props) => {
const { t } = useTranslation()
const { userProfile: { timezone } } = useAppContext()
const { formatTime: formatTimestamp } = useTimestamp()
const handleDateChange = useCallback((date?: dayjs.Dayjs) => {
if (readonly)
return
if (date)
onChange(date.unix())
else
onChange(null)
}, [onChange])
}, [onChange, readonly])
const renderTrigger = useCallback(({
handleClickTrigger,
}: TriggerProps) => {
return (
<div onClick={handleClickTrigger} className={cn('group flex items-center rounded-md bg-components-input-bg-normal', className)}>
<div
onClick={readonly ? undefined : handleClickTrigger}
className={cn(
'group flex items-center rounded-md bg-components-input-bg-normal',
readonly && 'cursor-not-allowed opacity-50',
className,
)}
>
<div
className={cn(
'grow',
@ -49,6 +60,7 @@ const WrappedDatePicker = ({
className={cn(
'hidden h-4 w-4 cursor-pointer hover:text-components-input-text-filled group-hover:block',
value && 'text-text-quaternary',
readonly && 'pointer-events-none',
)}
onClick={() => handleDateChange()}
/>
@ -60,7 +72,7 @@ const WrappedDatePicker = ({
/>
</div>
)
}, [className, value, formatTimestamp, t, handleDateChange])
}, [className, value, formatTimestamp, t, handleDateChange, readonly])
return (
<DatePicker

View File

@ -488,6 +488,34 @@ describe('DatasetMetadataDrawer', () => {
if (cancelBtn)
fireEvent.click(cancelBtn)
})
it('should show error toast when deleting metadata referenced by a pipeline', async () => {
const onRemove = vi.fn().mockResolvedValue({})
render(
<DatasetMetadataDrawer
{...defaultProps}
onRemove={onRemove}
userMetadata={[
{ id: '1', name: 'field_one', type: DataType.string, count: 5, isReferencedByPipeline: true },
]}
/>,
)
await waitFor(() => {
expect(screen.getByRole('dialog')).toBeInTheDocument()
})
const deleteContainer = screen.getByTestId('metadata-delete-1')
const deleteIcon = deleteContainer.querySelector('svg')
if (deleteIcon)
fireEvent.click(deleteIcon)
expect(mockToastNotify).toHaveBeenCalledWith(
expect.objectContaining({ type: 'error' }),
)
expect(onRemove).not.toHaveBeenCalled()
expect(screen.queryByText('dataset.metadata.datasetMetadata.deleteTitle')).not.toBeInTheDocument()
})
})
describe('Props', () => {

View File

@ -63,6 +63,13 @@ const Item: FC<ItemProps> = ({
hideDeleteConfirm()
onDelete?.()
}, [hideDeleteConfirm, onDelete])
const handleDeleteClick = useCallback(() => {
if (payload.isReferencedByPipeline) {
Toast.notify({ type: 'error', message: t(`${i18nPrefix}.deleteDisabledByPipeline`, { ns: 'dataset' }) })
return
}
showDeleteConfirm()
}, [payload.isReferencedByPipeline, showDeleteConfirm, t])
return (
<div
@ -75,24 +82,28 @@ const Item: FC<ItemProps> = ({
>
<div
className={cn(
'flex h-8 items-center justify-between px-2',
'flex h-8 items-center justify-between px-2',
disabled && 'opacity-30', // not include border and bg
)}
>
<div className="flex h-full items-center space-x-1 text-text-tertiary">
<Icon className="size-4 shrink-0" />
<div className="system-sm-medium max-w-[250px] truncate text-text-primary">{payload.name}</div>
<div className="system-xs-regular shrink-0">{payload.type}</div>
<div className="max-w-[250px] truncate text-text-primary system-sm-medium">{payload.name}</div>
<div className="shrink-0 system-xs-regular">{payload.type}</div>
</div>
{(!readonly || disabled) && (
<div className="system-xs-regular ml-2 shrink-0 text-text-tertiary group-hover/item:hidden">
<div className="ml-2 shrink-0 text-text-tertiary system-xs-regular group-hover/item:hidden">
{disabled ? t(`${i18nPrefix}.disabled`, { ns: 'dataset' }) : t(`${i18nPrefix}.values`, { ns: 'dataset', num: payload.count || 0 })}
</div>
)}
<div className="ml-2 hidden items-center space-x-1 text-text-tertiary group-hover/item:flex">
<RiEditLine className="size-4 cursor-pointer" onClick={handleRename} />
<div ref={deleteBtnRef} className="hover:text-text-destructive">
<RiDeleteBinLine className="size-4 cursor-pointer" onClick={showDeleteConfirm} />
<div
ref={deleteBtnRef}
data-testid={`metadata-delete-${payload.id}`}
className="hover:text-text-destructive"
>
<RiDeleteBinLine className="size-4 cursor-pointer" onClick={handleDeleteClick} />
</div>
</div>
{isShowDeleteConfirm && (
@ -177,7 +188,7 @@ const DatasetMetadataDrawer: FC<Props> = ({
panelClassName="px-4 block !max-w-[420px] my-2 rounded-l-2xl"
>
<div className="h-full overflow-y-auto">
<div className="system-sm-regular text-text-tertiary">{t(`${i18nPrefix}.description`, { ns: 'dataset' })}</div>
<div className="text-text-tertiary system-sm-regular">{t(`${i18nPrefix}.description`, { ns: 'dataset' })}</div>
<CreateModal
open={open}
setOpen={setOpen}
@ -207,7 +218,7 @@ const DatasetMetadataDrawer: FC<Props> = ({
value={isBuiltInEnabled}
onChange={onIsBuiltInEnabledChange}
/>
<div className="system-sm-semibold ml-2 mr-0.5 text-text-secondary">{t(`${i18nPrefix}.builtIn`, { ns: 'dataset' })}</div>
<div className="ml-2 mr-0.5 text-text-secondary system-sm-semibold">{t(`${i18nPrefix}.builtIn`, { ns: 'dataset' })}</div>
<Tooltip popupContent={<div className="max-w-[100px]">{t(`${i18nPrefix}.builtInDescription`, { ns: 'dataset' })}</div>} />
</div>

View File

@ -19,6 +19,7 @@ export type MetadataItemWithValue = MetadataItem & {
export type MetadataItemWithValueLength = MetadataItem & {
count: number
isReferencedByPipeline?: boolean
}
export type MetadataItemInBatchEdit = MetadataItemWithValue & {

View File

@ -0,0 +1,292 @@
'use client'
import type { FC } from 'react'
import type { DocMetadataItem } from '../types'
import type { MetadataItemWithValueLength } from '@/app/components/datasets/metadata/types'
import type { ValueSelector, Var } from '@/app/components/workflow/types'
import { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import InputNumber from '@/app/components/base/input-number'
import { Tooltip, TooltipContent, TooltipTrigger } from '@/app/components/base/ui/tooltip'
import Datepicker from '@/app/components/datasets/metadata/base/date-picker'
import { DataType } from '@/app/components/datasets/metadata/types'
import VarReferencePicker from '@/app/components/workflow/nodes/_base/components/variable/var-reference-picker'
import { VarType } from '@/app/components/workflow/types'
import { cn } from '@/utils/classnames'
type ConstantValueInputProps = {
metadataType: DataType | undefined
value: string | number | string[] | null
onChange: (value: string | number | null) => void
readonly?: boolean
placeholder: string
}
const ConstantValueInput: FC<ConstantValueInputProps> = ({
metadataType,
value,
onChange,
readonly,
placeholder,
}) => {
if (metadataType === DataType.time) {
const timeValue = typeof value === 'number' ? value : undefined
return (
<Datepicker
className="h-full w-full"
value={timeValue}
onChange={v => onChange(v)}
readonly={readonly}
/>
)
}
if (metadataType === DataType.number) {
return (
<InputNumber
className="h-full w-full border-none bg-transparent p-0"
value={typeof value === 'number' ? value : undefined}
onChange={(v: number | null) => onChange(v)}
readOnly={readonly}
size="regular"
/>
)
}
return (
<input
type="text"
value={typeof value === 'string' ? value : ''}
onChange={e => onChange(e.target.value)}
placeholder={placeholder}
disabled={readonly}
className="h-full w-full bg-transparent text-[13px] text-text-primary outline-none placeholder:text-text-placeholder disabled:opacity-50"
/>
)
}
type MetadataSectionProps = {
nodeId: string
userMetadata?: MetadataItemWithValueLength[]
docMetadata?: DocMetadataItem[]
onDocMetadataChange?: (metadata: DocMetadataItem[]) => void
readonly?: boolean
className?: string
}
const MetadataSection: FC<MetadataSectionProps> = ({
nodeId,
userMetadata = [],
docMetadata = [],
onDocMetadataChange,
readonly,
className,
}) => {
const { t } = useTranslation()
// Document metadata value handlers
const handleAddDocMetadata = useCallback(() => {
if (onDocMetadataChange) {
onDocMetadataChange([...docMetadata, { metadata_id: '', value: null }])
}
}, [docMetadata, onDocMetadataChange])
const handleRemoveDocMetadata = useCallback((index: number) => {
if (onDocMetadataChange) {
const newMetadata = [...docMetadata]
newMetadata.splice(index, 1)
onDocMetadataChange(newMetadata)
}
}, [docMetadata, onDocMetadataChange])
const handleDocMetadataIdChange = useCallback((index: number, metadataId: string) => {
if (onDocMetadataChange) {
const newMetadata = [...docMetadata]
newMetadata[index] = { metadata_id: metadataId, value: null }
onDocMetadataChange(newMetadata)
}
}, [docMetadata, onDocMetadataChange])
const handleDocMetadataValueChange = useCallback((index: number, value: string | number | ValueSelector | null) => {
if (onDocMetadataChange) {
const newMetadata = [...docMetadata]
newMetadata[index] = { ...newMetadata[index], value }
onDocMetadataChange(newMetadata)
}
}, [docMetadata, onDocMetadataChange])
const getAvailableMetadataOptions = useCallback((currentId: string) => {
const usedIds = docMetadata.map(m => m.metadata_id).filter(id => id !== currentId)
return userMetadata.filter(m => !usedIds.includes(m.id))
}, [userMetadata, docMetadata])
const getMetadataType = useCallback((metadataId: string): DataType | undefined => {
return userMetadata.find(m => m.id === metadataId)?.type
}, [userMetadata])
// Filter variables based on metadata type
const createVarFilter = useCallback((metadataId: string) => {
return (variable: Var): boolean => {
const metadataType = getMetadataType(metadataId)
if (!metadataType)
return false
// Type mapping: Metadata DataType -> Workflow VarType
switch (metadataType) {
case DataType.string:
return variable.type === VarType.string
case DataType.number:
return variable.type === VarType.number || variable.type === VarType.integer
case DataType.time:
return variable.type === VarType.number || variable.type === VarType.integer
default:
return false
}
}
}, [getMetadataType])
return (
<div className={cn('space-y-3', className)}>
<div className="flex items-center justify-between">
<div className="text-text-tertiary system-xs-semibold-uppercase">
{t('metadata.metadata', { ns: 'dataset' })}
</div>
</div>
{/* Document Metadata Values Section */}
{userMetadata.length > 0 && (
<div className="space-y-2 rounded-lg border border-components-panel-border bg-components-panel-bg p-3">
<div className="flex items-center justify-end">
{!readonly && (
<button
type="button"
onClick={handleAddDocMetadata}
className="flex items-center gap-1 text-text-accent-secondary system-xs-medium hover:text-text-accent disabled:opacity-50"
disabled={docMetadata.length >= userMetadata.length}
>
<div className="i-ri-add-line size-3.5" />
{t('operation.add', { ns: 'common' })}
</button>
)}
</div>
{docMetadata.length > 0
? (
<div className="space-y-2">
{docMetadata.map((item, index) => {
const isVariable = Array.isArray(item.value)
const itemKey = item.metadata_id ? `metadata-${item.metadata_id}` : `new-${index}`
return (
<div key={itemKey} className="flex items-center gap-2">
<div className="flex w-0 grow items-center gap-2">
<div className="flex w-1/3 items-center gap-1 rounded-lg border border-components-panel-border bg-components-input-bg-normal px-2">
<select
value={item.metadata_id}
onChange={e => handleDocMetadataIdChange(index, e.target.value)}
disabled={readonly}
className="h-8 w-full appearance-none bg-transparent text-[13px] text-text-primary outline-none disabled:opacity-50"
>
<option value="" disabled>{t('placeholder.select', { ns: 'common' })}</option>
{getAvailableMetadataOptions(item.metadata_id).map(opt => (
<option key={opt.id} value={opt.id}>{opt.name}</option>
))}
{item.metadata_id && !getAvailableMetadataOptions(item.metadata_id).some(o => o.id === item.metadata_id) && (
<option value={item.metadata_id}>{userMetadata.find(m => m.id === item.metadata_id)?.name}</option>
)}
</select>
</div>
<div className="flex h-8 grow items-center gap-1 rounded-lg border border-components-panel-border bg-components-input-bg-normal">
<div className="ml-1 inline-flex shrink-0 gap-px rounded-[10px] bg-components-segmented-control-bg-normal p-0.5">
<Tooltip>
<TooltipTrigger render={(
<div
className={cn('cursor-pointer rounded-lg px-2.5 py-1.5 text-text-tertiary hover:bg-state-base-hover', isVariable && 'bg-components-segmented-control-item-active-bg text-text-secondary shadow-xs hover:bg-components-segmented-control-item-active-bg', readonly && 'cursor-not-allowed opacity-50')}
onClick={() => !readonly && handleDocMetadataValueChange(index, [])}
>
<div className="i-custom-vender-solid-development-variable-02 h-4 w-4" />
</div>
)}
/>
{!isVariable && (
<TooltipContent>
{t('nodes.common.valueType.variable', { ns: 'workflow' })}
</TooltipContent>
)}
</Tooltip>
<Tooltip>
<TooltipTrigger render={(
<div
className={cn('cursor-pointer rounded-lg px-2.5 py-1.5 text-text-tertiary hover:bg-state-base-hover', !isVariable && 'bg-components-segmented-control-item-active-bg text-text-secondary shadow-xs hover:bg-components-segmented-control-item-active-bg', readonly && 'cursor-not-allowed opacity-50')}
onClick={() => !readonly && handleDocMetadataValueChange(index, '')}
>
<div className="i-ri-edit-line h-4 w-4" />
</div>
)}
/>
{isVariable && (
<TooltipContent>
{t('nodes.common.valueType.constant', { ns: 'workflow' })}
</TooltipContent>
)}
</Tooltip>
</div>
<div className="h-full w-px bg-divider-regular" />
<div className="w-0 grow overflow-hidden">
{isVariable
? (
<VarReferencePicker
nodeId={nodeId}
readonly={readonly || false}
value={item.value as ValueSelector}
onChange={value => handleDocMetadataValueChange(index, value)}
isSupportConstantValue={false}
isSupportFileVar={false}
placeholder={t('placeholder.input', { ns: 'common' }) || ''}
className="h-full border-none !bg-transparent p-0"
zIndex={1000}
isShowNodeName
minWidth={360}
filterVar={createVarFilter(item.metadata_id)}
/>
)
: (
<div className="flex h-full w-full items-center px-2">
<ConstantValueInput
metadataType={getMetadataType(item.metadata_id)}
value={item.value}
onChange={value => handleDocMetadataValueChange(index, value)}
readonly={readonly}
placeholder={t('placeholder.input', { ns: 'common' }) || ''}
/>
</div>
)}
</div>
</div>
</div>
{!readonly && (
<button
type="button"
onClick={() => handleRemoveDocMetadata(index)}
className="flex size-8 shrink-0 items-center justify-center rounded-lg text-text-tertiary hover:bg-state-destructive-hover hover:text-text-destructive"
>
<div className="i-ri-delete-bin-line size-4" />
</button>
)}
</div>
)
})}
</div>
)
: (
<div className="py-2 text-center text-text-quaternary system-2xs-regular">
{t('stepTwo.metadata.noValues', { ns: 'datasetCreation' })}
</div>
)}
</div>
)}
</div>
)
}
export default MetadataSection

View File

@ -1,4 +1,5 @@
import type {
DocMetadataItem,
KnowledgeBaseNodeType,
RerankingModel,
SummaryIndexSetting,
@ -247,6 +248,12 @@ export const useConfig = (id: string) => {
})
}, [handleNodeDataUpdate])
const handleDocMetadataChange = useCallback((docMetadata: DocMetadataItem[]) => {
handleNodeDataUpdate({
doc_metadata: docMetadata,
})
}, [handleNodeDataUpdate])
const handleSummaryIndexSettingChange = useCallback((summaryIndexSetting: SummaryIndexSetting) => {
const nodeData = getNodeData()
handleNodeDataUpdate({
@ -271,6 +278,7 @@ export const useConfig = (id: string) => {
handleScoreThresholdChange,
handleScoreThresholdEnabledChange,
handleInputVariableChange,
handleDocMetadataChange,
handleSummaryIndexSettingChange,
}
}

View File

@ -20,11 +20,14 @@ import {
} from '@/app/components/workflow/nodes/_base/components/layout'
import VarReferencePicker from '@/app/components/workflow/nodes/_base/components/variable/var-reference-picker'
import { IS_CE_EDITION } from '@/config'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { consoleQuery } from '@/service/client'
import { useDatasetMetaData } from '@/service/knowledge/use-metadata'
import Split from '../_base/components/split'
import ChunkStructure from './components/chunk-structure'
import EmbeddingModel from './components/embedding-model'
import IndexMethod from './components/index-method'
import MetadataSection from './components/metadata-section'
import RetrievalSetting from './components/retrieval-setting'
import { useConfig } from './hooks/use-config'
import { useEmbeddingModelStatus } from './hooks/use-embedding-model-status'
@ -62,6 +65,10 @@ const Panel: FC<NodePanelProps<KnowledgeBaseNodeType>> = ({
}),
)
// Get datasetId from context and fetch metadata
const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id)
const { data: metadataList } = useDatasetMetaData(datasetId || '')
const {
handleChunkStructureChange,
handleIndexMethodChange,
@ -76,6 +83,7 @@ const Panel: FC<NodePanelProps<KnowledgeBaseNodeType>> = ({
handleScoreThresholdChange,
handleScoreThresholdEnabledChange,
handleInputVariableChange,
handleDocMetadataChange,
handleSummaryIndexSettingChange,
} = useConfig(id)
@ -278,6 +286,17 @@ const Panel: FC<NodePanelProps<KnowledgeBaseNodeType>> = ({
/>
</div>
</BoxGroup>
{(metadataList?.doc_metadata?.length ?? 0) > 0 && (
<BoxGroup>
<MetadataSection
nodeId={id}
userMetadata={metadataList?.doc_metadata || []}
docMetadata={data.doc_metadata}
onDocMetadataChange={handleDocMetadataChange}
readonly={nodesReadOnly}
/>
</BoxGroup>
)}
</>
)
}

View File

@ -42,12 +42,19 @@ export type RetrievalSetting = {
score_threshold: number
reranking_mode?: RerankingModeEnum
}
export type DocMetadataItem = {
metadata_id: string
value: string | number | string[] | null // string[] for ValueSelector
}
export type SummaryIndexSetting = {
enable?: boolean
model_name?: string
model_provider_name?: string
summary_prompt?: string
}
export type KnowledgeBaseNodeType = CommonNodeType & {
index_chunk_variable_selector: string[]
chunk_structure?: ChunkStructureEnum
@ -56,6 +63,7 @@ export type KnowledgeBaseNodeType = CommonNodeType & {
embedding_model_provider?: string
keyword_number: number
retrieval_model: RetrievalSetting
doc_metadata?: DocMetadataItem[]
_embeddingModelList?: Model[]
_embeddingProviderModelList?: ModelItem[]
_rerankModelList?: Model[]

View File

@ -4329,12 +4329,6 @@
"app/components/datasets/metadata/metadata-dataset/dataset-metadata-drawer.tsx": {
"no-restricted-imports": {
"count": 4
},
"tailwindcss/enforce-consistent-class-order": {
"count": 5
},
"tailwindcss/no-unnecessary-whitespace": {
"count": 1
}
},
"app/components/datasets/metadata/metadata-dataset/field.tsx": {

View File

@ -121,6 +121,8 @@
"stepTwo.indexSettingTip": "To change the index method & embedding model, please go to the ",
"stepTwo.maxLength": "Maximum chunk length",
"stepTwo.maxLengthCheck": "Maximum chunk length should be less than {{limit}}",
"stepTwo.metadata.customValues": "Custom Values",
"stepTwo.metadata.noValues": "No values configured",
"stepTwo.nextStep": "Save & Process",
"stepTwo.notAvailableForParentChild": "Not available for Parent-child Index",
"stepTwo.notAvailableForQA": "Not available for Q&A Index",

View File

@ -124,6 +124,7 @@
"metadata.datasetMetadata.builtIn": "Built-in",
"metadata.datasetMetadata.builtInDescription": "Built-in metadata is automatically extracted and generated. It must be enabled before use and cannot be edited.",
"metadata.datasetMetadata.deleteContent": "Are you sure you want to delete the metadata \"{{name}}\"",
"metadata.datasetMetadata.deleteDisabledByPipeline": "This metadata is used by a pipeline and cannot be deleted.",
"metadata.datasetMetadata.deleteTitle": "Confirm to delete",
"metadata.datasetMetadata.description": "You can manage all metadata in this knowledge here. Modifications will be synchronized to every document.",
"metadata.datasetMetadata.disabled": "Disabled",

View File

@ -455,6 +455,8 @@
"nodes.common.retry.times": "times",
"nodes.common.typeSwitch.input": "Input value",
"nodes.common.typeSwitch.variable": "Use variable",
"nodes.common.valueType.constant": "Constant",
"nodes.common.valueType.variable": "Variable",
"nodes.dataSource.add": "Add data source",
"nodes.dataSource.supportedFileFormats": "Supported file formats",
"nodes.dataSource.supportedFileFormatsPlaceholder": "File extension, e.g. doc",

View File

@ -121,6 +121,8 @@
"stepTwo.indexSettingTip": "要更改索引方法和 embedding 模型,请转到",
"stepTwo.maxLength": "分段最大长度",
"stepTwo.maxLengthCheck": "分段最大长度不能大于 {{limit}}",
"stepTwo.metadata.customValues": "自定义值",
"stepTwo.metadata.noValues": "未配置任何值",
"stepTwo.nextStep": "保存并处理",
"stepTwo.notAvailableForParentChild": "不支持父子索引",
"stepTwo.notAvailableForQA": "不支持 Q&A 索引",

View File

@ -455,6 +455,8 @@
"nodes.common.retry.times": "次",
"nodes.common.typeSwitch.input": "输入值",
"nodes.common.typeSwitch.variable": "使用变量",
"nodes.common.valueType.constant": "常量",
"nodes.common.valueType.variable": "变量",
"nodes.dataSource.add": "添加数据源",
"nodes.dataSource.supportedFileFormats": "支持的文件格式",
"nodes.dataSource.supportedFileFormatsPlaceholder": "文件格式例如doc",

View File

@ -7,11 +7,32 @@ import { useDocumentListKey, useInvalidDocumentList } from './use-document'
const NAME_SPACE = 'dataset-metadata'
type DatasetMetadataResponse = {
doc_metadata: MetadataItemWithValueLength[]
built_in_field_enabled: boolean
}
type DatasetMetadataApiItem = MetadataItemWithValueLength & {
is_referenced_by_pipeline?: boolean
}
type DatasetMetadataApiResponse = {
doc_metadata: DatasetMetadataApiItem[]
built_in_field_enabled: boolean
}
export const useDatasetMetaData = (datasetId: string) => {
return useQuery<{ doc_metadata: MetadataItemWithValueLength[], built_in_field_enabled: boolean }>({
return useQuery<DatasetMetadataResponse>({
queryKey: [NAME_SPACE, 'dataset', datasetId],
queryFn: () => {
return get<{ doc_metadata: MetadataItemWithValueLength[], built_in_field_enabled: boolean }>(`/datasets/${datasetId}/metadata`)
queryFn: async () => {
const response = await get<DatasetMetadataApiResponse>(`/datasets/${datasetId}/metadata`)
return {
...response,
doc_metadata: response.doc_metadata.map(item => ({
...item,
isReferencedByPipeline: item.is_referenced_by_pipeline,
})),
}
},
})
}