mirror of https://github.com/langgenius/dify.git
Merge 67b394cdd8 into 8b634a9bee
This commit is contained in:
commit
25e331f559
|
|
@ -2,7 +2,7 @@ from typing import Literal
|
|||
|
||||
from flask_restx import Resource, marshal_with
|
||||
from pydantic import BaseModel
|
||||
from werkzeug.exceptions import NotFound
|
||||
from werkzeug.exceptions import BadRequest, NotFound
|
||||
|
||||
from controllers.common.schema import register_schema_models
|
||||
from controllers.console import console_ns
|
||||
|
|
@ -16,6 +16,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
|
|||
MetadataDetail,
|
||||
MetadataOperationData,
|
||||
)
|
||||
from services.errors.metadata_service import MetadataInUseError
|
||||
from services.metadata_service import MetadataService
|
||||
|
||||
|
||||
|
|
@ -97,7 +98,10 @@ class DatasetMetadataApi(Resource):
|
|||
raise NotFound("Dataset not found.")
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
|
||||
MetadataService.delete_metadata(dataset_id_str, metadata_id_str)
|
||||
try:
|
||||
MetadataService.delete_metadata(dataset_id_str, metadata_id_str)
|
||||
except MetadataInUseError as exc:
|
||||
raise BadRequest(str(exc))
|
||||
return {"result": "success"}, 204
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from typing import Literal
|
|||
from flask_login import current_user
|
||||
from flask_restx import marshal
|
||||
from pydantic import BaseModel
|
||||
from werkzeug.exceptions import NotFound
|
||||
from werkzeug.exceptions import BadRequest, NotFound
|
||||
|
||||
from controllers.common.schema import register_schema_model, register_schema_models
|
||||
from controllers.service_api import service_api_ns
|
||||
|
|
@ -16,6 +16,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
|
|||
MetadataDetail,
|
||||
MetadataOperationData,
|
||||
)
|
||||
from services.errors.metadata_service import MetadataInUseError
|
||||
from services.metadata_service import MetadataService
|
||||
|
||||
|
||||
|
|
@ -127,7 +128,10 @@ class DatasetMetadataServiceApi(DatasetApiResource):
|
|||
raise NotFound("Dataset not found.")
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
|
||||
MetadataService.delete_metadata(dataset_id_str, metadata_id_str)
|
||||
try:
|
||||
MetadataService.delete_metadata(dataset_id_str, metadata_id_str)
|
||||
except MetadataInUseError as exc:
|
||||
raise BadRequest(str(exc))
|
||||
return "", 204
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7,12 +7,13 @@ from typing import Any
|
|||
|
||||
from flask import current_app
|
||||
from sqlalchemy import delete, func, select
|
||||
from sqlalchemy.orm import attributes
|
||||
|
||||
from core.db.session_factory import session_factory
|
||||
from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
|
||||
from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError
|
||||
from core.workflow.nodes.knowledge_index.protocols import Preview, PreviewItem, QaPreview
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
from models.dataset import Dataset, DatasetMetadataBinding, Document, DocumentSegment
|
||||
|
||||
from .index_processor_factory import IndexProcessorFactory
|
||||
from .processor.paragraph_index_processor import ParagraphIndexProcessor
|
||||
|
|
@ -53,6 +54,9 @@ class IndexProcessor:
|
|||
chunks: Mapping[str, Any],
|
||||
batch: Any,
|
||||
summary_index_setting: SummaryIndexSettingDict | None = None,
|
||||
doc_metadata: Mapping[str, Any] | None = None,
|
||||
metadata_binding_ids: list[str] | None = None,
|
||||
user_id: str | None = None,
|
||||
):
|
||||
with session_factory.create_session() as session:
|
||||
document = session.query(Document).filter_by(id=document_id).first()
|
||||
|
|
@ -64,6 +68,7 @@ class IndexProcessor:
|
|||
raise KnowledgeIndexNodeError(f"Dataset {dataset_id} not found.")
|
||||
|
||||
dataset_name_value = dataset.name
|
||||
tenant_id_value = dataset.tenant_id
|
||||
document_name_value = document.name
|
||||
created_at_value = document.created_at
|
||||
if summary_index_setting is None:
|
||||
|
|
@ -108,6 +113,19 @@ class IndexProcessor:
|
|||
document.need_summary = True
|
||||
else:
|
||||
document.need_summary = False
|
||||
|
||||
# Reconcile doc_metadata and bindings when the caller explicitly provides pipeline metadata.
|
||||
if doc_metadata is not None or metadata_binding_ids is not None:
|
||||
self._save_doc_metadata_and_bindings(
|
||||
session=session,
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id_value,
|
||||
document=document,
|
||||
doc_metadata=doc_metadata or {},
|
||||
metadata_binding_ids=metadata_binding_ids or [],
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
# update document segment status
|
||||
session.query(DocumentSegment).where(
|
||||
|
|
@ -131,6 +149,110 @@ class IndexProcessor:
|
|||
"display_status": "completed",
|
||||
}
|
||||
|
||||
def _save_doc_metadata_and_bindings(
|
||||
self,
|
||||
*,
|
||||
session: Any,
|
||||
dataset_id: str,
|
||||
tenant_id: str,
|
||||
document: Document,
|
||||
doc_metadata: Mapping[str, Any],
|
||||
metadata_binding_ids: list[str],
|
||||
user_id: str | None,
|
||||
) -> None:
|
||||
"""
|
||||
Persist resolved metadata values and ensure metadata bindings exist for the document.
|
||||
|
||||
Args:
|
||||
doc_metadata: dict of {metadata_id: resolved_value}
|
||||
metadata_binding_ids: list of metadata IDs to bind
|
||||
"""
|
||||
from models.dataset import DatasetMetadata
|
||||
|
||||
# Look up metadata names by ID (covers both value-write and binding-creation paths)
|
||||
metadata_name_map: dict[str, str] = {}
|
||||
all_ids_to_check = list({*doc_metadata.keys(), *metadata_binding_ids})
|
||||
if all_ids_to_check:
|
||||
dataset_metadatas = session.scalars(
|
||||
select(DatasetMetadata).where(
|
||||
DatasetMetadata.dataset_id == dataset_id,
|
||||
DatasetMetadata.id.in_(all_ids_to_check),
|
||||
)
|
||||
).all()
|
||||
for metadata in dataset_metadatas:
|
||||
metadata_name_map[metadata.id] = metadata.name
|
||||
|
||||
# Build name -> value dict for document.doc_metadata
|
||||
named_metadata: dict[str, Any] = {}
|
||||
for metadata_id, value in doc_metadata.items():
|
||||
metadata_name = metadata_name_map.get(metadata_id)
|
||||
if not metadata_name:
|
||||
logger.warning("[IndexProcessor] metadata_id %s not found, skipping", metadata_id)
|
||||
continue
|
||||
named_metadata[metadata_name] = value
|
||||
|
||||
existing_binding_rows = session.scalars(
|
||||
select(DatasetMetadataBinding).where(
|
||||
DatasetMetadataBinding.dataset_id == dataset_id,
|
||||
DatasetMetadataBinding.document_id == document.id,
|
||||
)
|
||||
).all()
|
||||
existing_binding_ids = {binding.metadata_id for binding in existing_binding_rows}
|
||||
unique_metadata_ids = list(dict.fromkeys(metadata_binding_ids))
|
||||
|
||||
metadata_ids_to_load = list(existing_binding_ids | set(unique_metadata_ids))
|
||||
if metadata_ids_to_load:
|
||||
existing_metadata_defs = session.scalars(
|
||||
select(DatasetMetadata).where(
|
||||
DatasetMetadata.dataset_id == dataset_id,
|
||||
DatasetMetadata.id.in_(metadata_ids_to_load),
|
||||
)
|
||||
).all()
|
||||
for metadata in existing_metadata_defs:
|
||||
metadata_name_map[metadata.id] = metadata.name
|
||||
|
||||
document_doc_metadata = dict(document.doc_metadata or {})
|
||||
for metadata_id in existing_binding_ids:
|
||||
metadata_name = metadata_name_map.get(metadata_id)
|
||||
if metadata_name:
|
||||
document_doc_metadata.pop(metadata_name, None)
|
||||
document_doc_metadata.update(named_metadata)
|
||||
document.doc_metadata = document_doc_metadata
|
||||
attributes.flag_modified(document, "doc_metadata")
|
||||
|
||||
obsolete_metadata_ids = existing_binding_ids - set(unique_metadata_ids)
|
||||
if obsolete_metadata_ids:
|
||||
session.query(DatasetMetadataBinding).where(
|
||||
DatasetMetadataBinding.dataset_id == dataset_id,
|
||||
DatasetMetadataBinding.document_id == document.id,
|
||||
DatasetMetadataBinding.metadata_id.in_(obsolete_metadata_ids),
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
for metadata_id in unique_metadata_ids:
|
||||
if metadata_id not in metadata_name_map:
|
||||
logger.warning(
|
||||
"[IndexProcessor] metadata_id %s not found in dataset, skipping binding creation",
|
||||
metadata_id,
|
||||
)
|
||||
continue
|
||||
if metadata_id in existing_binding_ids:
|
||||
continue
|
||||
if user_id is None:
|
||||
logger.warning(
|
||||
"[IndexProcessor] user_id is None, cannot create binding for metadata_id=%s, skipping",
|
||||
metadata_id,
|
||||
)
|
||||
continue
|
||||
|
||||
binding = DatasetMetadataBinding(
|
||||
tenant_id=tenant_id,
|
||||
dataset_id=dataset_id,
|
||||
metadata_id=metadata_id,
|
||||
document_id=document.id,
|
||||
created_by=user_id,
|
||||
)
|
||||
session.add(binding)
|
||||
|
||||
def get_preview_output(
|
||||
self,
|
||||
chunks: Any,
|
||||
|
|
|
|||
|
|
@ -153,6 +153,15 @@ class ParentChildStructureChunk(BaseModel):
|
|||
data_source_info: Union[FileInfo, OnlineDocumentInfo, WebsiteInfo]
|
||||
|
||||
|
||||
class DocMetadata(BaseModel):
|
||||
"""
|
||||
Doc Metadata.
|
||||
"""
|
||||
|
||||
metadata_id: str
|
||||
value: str | int | float | list[str] | None
|
||||
|
||||
|
||||
class KnowledgeIndexNodeData(BaseNodeData):
|
||||
"""
|
||||
Knowledge index Node Data.
|
||||
|
|
@ -161,5 +170,6 @@ class KnowledgeIndexNodeData(BaseNodeData):
|
|||
type: NodeType = KNOWLEDGE_INDEX_NODE_TYPE
|
||||
chunk_structure: str
|
||||
index_chunk_variable_selector: list[str]
|
||||
doc_metadata: list[DocMetadata] | None = None
|
||||
indexing_technique: str | None = None
|
||||
summary_index_setting: SummaryIndexSettingDict | None = None
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import logging
|
||||
from collections.abc import Mapping
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from core.rag.index_processor.index_processor import IndexProcessor
|
||||
|
|
@ -13,11 +13,14 @@ from dify_graph.node_events import NodeRunResult
|
|||
from dify_graph.nodes.base.node import Node
|
||||
from dify_graph.nodes.base.template import Template
|
||||
|
||||
from .entities import KnowledgeIndexNodeData
|
||||
from .entities import DocMetadata, KnowledgeIndexNodeData
|
||||
from .exc import (
|
||||
KnowledgeIndexNodeError,
|
||||
)
|
||||
|
||||
# Constant for built-in metadata identifier
|
||||
BUILT_IN_METADATA_ID = "built-in"
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from dify_graph.entities import GraphInitParams
|
||||
from dify_graph.runtime import GraphRuntimeState
|
||||
|
|
@ -92,6 +95,15 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
|||
if not batch:
|
||||
raise KnowledgeIndexNodeError("Batch is required.")
|
||||
|
||||
# Resolve metadata before indexing
|
||||
resolved_doc_metadata: dict[str, Any] = {}
|
||||
metadata_binding_ids: list[str] = []
|
||||
if node_data.doc_metadata:
|
||||
resolved_doc_metadata, metadata_binding_ids = self._resolve_doc_metadata_values(
|
||||
dataset_id=dataset_id,
|
||||
doc_metadata_items=node_data.doc_metadata,
|
||||
)
|
||||
|
||||
results = self._invoke_knowledge_index(
|
||||
dataset_id=dataset_id,
|
||||
document_id=document_id,
|
||||
|
|
@ -100,6 +112,8 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
|||
batch=batch.value,
|
||||
chunks=chunks,
|
||||
summary_index_setting=summary_index_setting,
|
||||
doc_metadata=resolved_doc_metadata,
|
||||
metadata_binding_ids=metadata_binding_ids,
|
||||
)
|
||||
return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=variables, outputs=results)
|
||||
|
||||
|
|
@ -129,17 +143,86 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
|
|||
batch: Any,
|
||||
chunks: Mapping[str, Any],
|
||||
summary_index_setting: SummaryIndexSettingDict | None = None,
|
||||
doc_metadata: Mapping[str, Any] | None = None,
|
||||
metadata_binding_ids: list[str] | None = None,
|
||||
):
|
||||
if not document_id:
|
||||
raise KnowledgeIndexNodeError("document_id is required.")
|
||||
rst = self.index_processor.index_and_clean(
|
||||
dataset_id, document_id, original_document_id, chunks, batch, summary_index_setting
|
||||
dataset_id,
|
||||
document_id,
|
||||
original_document_id,
|
||||
chunks,
|
||||
batch,
|
||||
summary_index_setting,
|
||||
doc_metadata=doc_metadata,
|
||||
metadata_binding_ids=metadata_binding_ids,
|
||||
user_id=self.require_dify_context().user_id,
|
||||
)
|
||||
self.summary_index_service.generate_and_vectorize_summary(
|
||||
dataset_id, document_id, is_preview, summary_index_setting
|
||||
)
|
||||
return rst
|
||||
|
||||
def _resolve_doc_metadata_values(
|
||||
self,
|
||||
*,
|
||||
dataset_id: str,
|
||||
doc_metadata_items: Sequence[DocMetadata],
|
||||
) -> tuple[dict[str, Any], list[str]]:
|
||||
"""
|
||||
Resolve metadata variable values from the variable pool.
|
||||
|
||||
Returns a dict of {metadata_id: resolved_value} and a list of metadata_binding_ids.
|
||||
The IndexProcessor will handle looking up metadata names from DB.
|
||||
"""
|
||||
variable_pool = self.graph_runtime_state.variable_pool
|
||||
resolved_metadata: dict[str, Any] = {}
|
||||
metadata_binding_ids: list[str] = []
|
||||
|
||||
for item in doc_metadata_items:
|
||||
if item.metadata_id == BUILT_IN_METADATA_ID:
|
||||
continue
|
||||
|
||||
value = item.value
|
||||
if isinstance(value, list):
|
||||
variable = variable_pool.get(value)
|
||||
if not variable:
|
||||
variable_path = ".".join(value)
|
||||
raise KnowledgeIndexNodeError(
|
||||
f"Variable '{variable_path}' not found for metadata '{item.metadata_id}'. "
|
||||
f"Please check your variable configuration."
|
||||
)
|
||||
value = variable.to_object()
|
||||
|
||||
if value is not None:
|
||||
resolved_metadata[item.metadata_id] = value
|
||||
|
||||
metadata_binding_ids.append(item.metadata_id)
|
||||
|
||||
return resolved_metadata, metadata_binding_ids
|
||||
|
||||
@classmethod
|
||||
def _extract_variable_selector_to_variable_mapping(
|
||||
cls,
|
||||
*,
|
||||
graph_config: Mapping[str, Any],
|
||||
node_id: str,
|
||||
node_data: KnowledgeIndexNodeData,
|
||||
) -> Mapping[str, Sequence[str]]:
|
||||
variable_mapping: dict[str, Sequence[str]] = {}
|
||||
|
||||
# index chunk variable
|
||||
variable_mapping[node_id + ".index_chunk_variable_selector"] = node_data.index_chunk_variable_selector
|
||||
|
||||
# doc_metadata variables
|
||||
if node_data.doc_metadata:
|
||||
for item in node_data.doc_metadata:
|
||||
if isinstance(item.value, list):
|
||||
variable_mapping[node_id + "." + item.metadata_id] = item.value
|
||||
|
||||
return variable_mapping
|
||||
|
||||
@classmethod
|
||||
def version(cls) -> str:
|
||||
return "1"
|
||||
|
|
|
|||
|
|
@ -34,6 +34,9 @@ class IndexProcessorProtocol(Protocol):
|
|||
chunks: Mapping[str, Any],
|
||||
batch: Any,
|
||||
summary_index_setting: dict | None = None,
|
||||
doc_metadata: Mapping[str, Any] | None = None,
|
||||
metadata_binding_ids: list[str] | None = None,
|
||||
user_id: str | None = None,
|
||||
) -> dict[str, Any]: ...
|
||||
|
||||
def get_preview_output(
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ from models.dataset import (
|
|||
Dataset,
|
||||
DatasetAutoDisableLog,
|
||||
DatasetCollectionBinding,
|
||||
DatasetMetadata,
|
||||
DatasetMetadataBinding,
|
||||
DatasetPermission,
|
||||
DatasetPermissionEnum,
|
||||
DatasetProcessRule,
|
||||
|
|
@ -1940,11 +1942,95 @@ class DocumentService:
|
|||
else default_retrieval_model
|
||||
)
|
||||
|
||||
# Handle custom metadata configuration
|
||||
custom_metadata: dict[str, str | int | float | None] = {}
|
||||
metadata_bindings_to_create: list[str] = []
|
||||
if knowledge_config.doc_metadata:
|
||||
# Batch fetch all metadata definitions to avoid N+1 query
|
||||
metadata_ids = [item.metadata_id for item in knowledge_config.doc_metadata]
|
||||
metadata_defs = (
|
||||
db.session.query(DatasetMetadata)
|
||||
.filter(
|
||||
DatasetMetadata.id.in_(metadata_ids),
|
||||
DatasetMetadata.dataset_id == dataset.id,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
metadata_map = {md.id: md for md in metadata_defs}
|
||||
|
||||
for item in knowledge_config.doc_metadata:
|
||||
# Validate metadata_id belongs to this dataset
|
||||
metadata_def = metadata_map.get(item.metadata_id)
|
||||
if not metadata_def:
|
||||
raise ValueError(f"Metadata with id '{item.metadata_id}' not found in this dataset")
|
||||
custom_metadata[metadata_def.name] = item.value
|
||||
metadata_bindings_to_create.append(item.metadata_id)
|
||||
|
||||
documents = []
|
||||
if knowledge_config.original_document_id:
|
||||
document = DocumentService.update_document_with_dataset_id(dataset, knowledge_config, account)
|
||||
documents.append(document)
|
||||
batch = document.batch
|
||||
# Reconcile pipeline-managed metadata on re-index so removed fields do not linger.
|
||||
if knowledge_config.doc_metadata is not None:
|
||||
from sqlalchemy.orm import attributes
|
||||
|
||||
metadata_ids_deduped = list(dict.fromkeys(metadata_bindings_to_create))
|
||||
existing_bindings = (
|
||||
db.session.query(DatasetMetadataBinding)
|
||||
.filter_by(dataset_id=dataset.id, document_id=document.id)
|
||||
.all()
|
||||
)
|
||||
existing_binding_ids = {binding.metadata_id for binding in existing_bindings}
|
||||
|
||||
metadata_ids_to_load = list(existing_binding_ids | set(metadata_ids_deduped))
|
||||
metadata_name_map: dict[str, str] = {}
|
||||
if metadata_ids_to_load:
|
||||
metadata_defs = (
|
||||
db.session.query(DatasetMetadata)
|
||||
.filter(
|
||||
DatasetMetadata.dataset_id == dataset.id,
|
||||
DatasetMetadata.id.in_(metadata_ids_to_load),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
metadata_name_map = {metadata_def.id: metadata_def.name for metadata_def in metadata_defs}
|
||||
|
||||
doc_metadata_field = copy.deepcopy(document.doc_metadata) if document.doc_metadata else {}
|
||||
for metadata_id in existing_binding_ids:
|
||||
metadata_name = metadata_name_map.get(metadata_id)
|
||||
if metadata_name:
|
||||
doc_metadata_field.pop(metadata_name, None)
|
||||
doc_metadata_field.update(custom_metadata)
|
||||
document.doc_metadata = doc_metadata_field
|
||||
attributes.flag_modified(document, "doc_metadata")
|
||||
db.session.add(document)
|
||||
|
||||
obsolete_metadata_ids = existing_binding_ids - set(metadata_ids_deduped)
|
||||
if obsolete_metadata_ids:
|
||||
(
|
||||
db.session.query(DatasetMetadataBinding)
|
||||
.filter(
|
||||
DatasetMetadataBinding.dataset_id == dataset.id,
|
||||
DatasetMetadataBinding.document_id == document.id,
|
||||
DatasetMetadataBinding.metadata_id.in_(obsolete_metadata_ids),
|
||||
)
|
||||
.delete(synchronize_session=False)
|
||||
)
|
||||
|
||||
existing_current_binding_ids = existing_binding_ids & set(metadata_ids_deduped)
|
||||
for metadata_id in metadata_ids_deduped:
|
||||
if metadata_id in existing_current_binding_ids:
|
||||
continue
|
||||
binding = DatasetMetadataBinding(
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
metadata_id=metadata_id,
|
||||
created_by=account.id,
|
||||
)
|
||||
db.session.add(binding)
|
||||
db.session.commit()
|
||||
else:
|
||||
# When creating new documents, data_source must be provided
|
||||
if not knowledge_config.data_source:
|
||||
|
|
@ -2045,6 +2131,10 @@ class DocumentService:
|
|||
document.data_source_info = json.dumps(data_source_info)
|
||||
document.batch = batch
|
||||
document.indexing_status = IndexingStatus.WAITING
|
||||
if custom_metadata:
|
||||
doc_metadata = copy.deepcopy(document.doc_metadata) if document.doc_metadata else {}
|
||||
doc_metadata.update(custom_metadata)
|
||||
document.doc_metadata = doc_metadata
|
||||
db.session.add(document)
|
||||
documents.append(document)
|
||||
duplicate_document_ids.append(document.id)
|
||||
|
|
@ -2062,6 +2152,7 @@ class DocumentService:
|
|||
account,
|
||||
file.name,
|
||||
batch,
|
||||
custom_metadata=custom_metadata or None,
|
||||
)
|
||||
db.session.add(document)
|
||||
db.session.flush()
|
||||
|
|
@ -2114,6 +2205,7 @@ class DocumentService:
|
|||
account,
|
||||
truncated_page_name,
|
||||
batch,
|
||||
custom_metadata=custom_metadata or None,
|
||||
)
|
||||
db.session.add(document)
|
||||
db.session.flush()
|
||||
|
|
@ -2154,12 +2246,46 @@ class DocumentService:
|
|||
account,
|
||||
document_name,
|
||||
batch,
|
||||
custom_metadata=custom_metadata or None,
|
||||
)
|
||||
db.session.add(document)
|
||||
db.session.flush()
|
||||
document_ids.append(document.id)
|
||||
documents.append(document)
|
||||
position += 1
|
||||
# Create DatasetMetadataBinding records for custom metadata
|
||||
# before commit so documents and bindings are in a single transaction.
|
||||
if metadata_bindings_to_create:
|
||||
target_document_ids = list(set(document_ids + duplicate_document_ids))
|
||||
metadata_ids = list(dict.fromkeys(metadata_bindings_to_create))
|
||||
if target_document_ids and metadata_ids:
|
||||
existing_binding_pairs = {
|
||||
(document_id, metadata_id)
|
||||
for document_id, metadata_id in db.session.query(
|
||||
DatasetMetadataBinding.document_id,
|
||||
DatasetMetadataBinding.metadata_id,
|
||||
)
|
||||
.filter(
|
||||
DatasetMetadataBinding.dataset_id == dataset.id,
|
||||
DatasetMetadataBinding.document_id.in_(target_document_ids),
|
||||
DatasetMetadataBinding.metadata_id.in_(metadata_ids),
|
||||
)
|
||||
.all()
|
||||
}
|
||||
|
||||
for doc_id in target_document_ids:
|
||||
for metadata_id in metadata_ids:
|
||||
if (doc_id, metadata_id) in existing_binding_pairs:
|
||||
continue
|
||||
binding = DatasetMetadataBinding(
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=doc_id,
|
||||
metadata_id=metadata_id,
|
||||
created_by=account.id,
|
||||
)
|
||||
db.session.add(binding)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# trigger async task
|
||||
|
|
@ -2474,6 +2600,7 @@ class DocumentService:
|
|||
account: Account,
|
||||
name: str,
|
||||
batch: str,
|
||||
custom_metadata: dict | None = None,
|
||||
):
|
||||
# Set need_summary based on dataset's summary_index_setting
|
||||
need_summary = False
|
||||
|
|
@ -2504,6 +2631,9 @@ class DocumentService:
|
|||
BuiltInField.last_update_date: datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M:%S"),
|
||||
BuiltInField.source: data_source_type,
|
||||
}
|
||||
# Merge custom metadata if provided
|
||||
if custom_metadata:
|
||||
doc_metadata.update(custom_metadata)
|
||||
if doc_metadata:
|
||||
document.doc_metadata = doc_metadata
|
||||
return document
|
||||
|
|
|
|||
|
|
@ -113,6 +113,11 @@ class MetaDataConfig(BaseModel):
|
|||
doc_metadata: dict
|
||||
|
||||
|
||||
class DocumentMetadataInput(BaseModel):
|
||||
metadata_id: str
|
||||
value: str | int | float | None = None
|
||||
|
||||
|
||||
class KnowledgeConfig(BaseModel):
|
||||
original_document_id: str | None = None
|
||||
duplicate: bool = True
|
||||
|
|
@ -127,6 +132,7 @@ class KnowledgeConfig(BaseModel):
|
|||
embedding_model_provider: str | None = None
|
||||
name: str | None = None
|
||||
is_multimodal: bool = False
|
||||
doc_metadata: list[DocumentMetadataInput] | None = None
|
||||
|
||||
@field_validator("doc_form")
|
||||
@classmethod
|
||||
|
|
|
|||
|
|
@ -0,0 +1,2 @@
|
|||
class MetadataInUseError(ValueError):
|
||||
"""Raised when metadata is still referenced by a pipeline configuration."""
|
||||
|
|
@ -1,5 +1,9 @@
|
|||
import copy
|
||||
import json
|
||||
import logging
|
||||
from collections.abc import Mapping
|
||||
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
|
||||
from extensions.ext_database import db
|
||||
|
|
@ -8,16 +12,108 @@ from libs.datetime_utils import naive_utc_now
|
|||
from libs.login import current_account_with_tenant
|
||||
from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding
|
||||
from models.enums import DatasetMetadataType
|
||||
from models.model import App, AppModelConfig
|
||||
from models.workflow import Workflow
|
||||
from services.dataset_service import DocumentService
|
||||
from services.entities.knowledge_entities.knowledge_entities import (
|
||||
MetadataArgs,
|
||||
MetadataOperationData,
|
||||
)
|
||||
from services.errors.metadata_service import MetadataInUseError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_PIPELINE_REF_CACHE_TTL = 60 # seconds
|
||||
|
||||
|
||||
class MetadataService:
|
||||
@staticmethod
|
||||
def _collect_referenced_metadata_ids(payload: object, referenced_ids: set[str]) -> None:
|
||||
"""Collect all metadata IDs referenced by persisted pipeline JSON payloads."""
|
||||
if isinstance(payload, Mapping):
|
||||
metadata_id = payload.get("metadata_id")
|
||||
if isinstance(metadata_id, str):
|
||||
referenced_ids.add(metadata_id)
|
||||
|
||||
for value in payload.values():
|
||||
MetadataService._collect_referenced_metadata_ids(value, referenced_ids)
|
||||
return
|
||||
|
||||
if isinstance(payload, list):
|
||||
for item in payload:
|
||||
MetadataService._collect_referenced_metadata_ids(item, referenced_ids)
|
||||
|
||||
@staticmethod
|
||||
def _load_reference_payload(raw_payload: str | None, source_name: str) -> object | None:
|
||||
if not raw_payload:
|
||||
return None
|
||||
|
||||
try:
|
||||
return json.loads(raw_payload)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning("Failed to decode metadata reference payload from %s", source_name)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _scan_all_referenced_metadata_ids(tenant_id: str) -> set[str]:
|
||||
"""Scan app configs and workflow graphs to collect all referenced metadata IDs."""
|
||||
all_referenced: set[str] = set()
|
||||
|
||||
app_model_config_rows = (
|
||||
db.session.query(AppModelConfig.dataset_configs)
|
||||
.join(App, App.id == AppModelConfig.app_id)
|
||||
.filter(
|
||||
App.tenant_id == tenant_id,
|
||||
AppModelConfig.dataset_configs.isnot(None),
|
||||
AppModelConfig.dataset_configs.contains('"metadata_id"'),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
workflow_rows = (
|
||||
db.session.query(Workflow.graph)
|
||||
.filter(
|
||||
Workflow.tenant_id == tenant_id,
|
||||
Workflow.graph.contains('"metadata_id"'),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
for (raw_payload,) in app_model_config_rows:
|
||||
payload = MetadataService._load_reference_payload(raw_payload, "app_model_configs.dataset_configs")
|
||||
if payload is not None:
|
||||
MetadataService._collect_referenced_metadata_ids(payload, all_referenced)
|
||||
|
||||
for (raw_payload,) in workflow_rows:
|
||||
payload = MetadataService._load_reference_payload(raw_payload, "workflows.graph")
|
||||
if payload is not None:
|
||||
MetadataService._collect_referenced_metadata_ids(payload, all_referenced)
|
||||
|
||||
return all_referenced
|
||||
|
||||
@staticmethod
|
||||
def _get_referenced_metadata_ids(tenant_id: str, metadata_ids: set[str], *, bypass_cache: bool = False) -> set[str]:
|
||||
"""Return metadata IDs (from the given set) that are referenced by pipeline configurations.
|
||||
|
||||
Results are cached per-tenant for _PIPELINE_REF_CACHE_TTL seconds.
|
||||
Pass bypass_cache=True for write paths (e.g. delete) that require fresh data.
|
||||
"""
|
||||
if not metadata_ids:
|
||||
return set()
|
||||
|
||||
cache_key = f"metadata:pipeline_refs:{tenant_id}"
|
||||
|
||||
if not bypass_cache:
|
||||
raw = redis_client.get(cache_key)
|
||||
if raw:
|
||||
all_referenced = set(json.loads(raw))
|
||||
return all_referenced & metadata_ids
|
||||
|
||||
all_referenced = MetadataService._scan_all_referenced_metadata_ids(tenant_id)
|
||||
redis_client.setex(cache_key, _PIPELINE_REF_CACHE_TTL, json.dumps(list(all_referenced)))
|
||||
return all_referenced & metadata_ids
|
||||
|
||||
@staticmethod
|
||||
def create_metadata(dataset_id: str, metadata_args: MetadataArgs) -> DatasetMetadata:
|
||||
# check if metadata name is too long
|
||||
|
|
@ -103,7 +199,13 @@ class MetadataService:
|
|||
MetadataService.knowledge_base_metadata_lock_check(dataset_id, None)
|
||||
metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first()
|
||||
if metadata is None:
|
||||
raise ValueError("Metadata not found.")
|
||||
raise NotFound("Metadata not found.")
|
||||
_, current_tenant_id = current_account_with_tenant()
|
||||
referenced_metadata_ids = MetadataService._get_referenced_metadata_ids(
|
||||
current_tenant_id, {metadata_id}, bypass_cache=True
|
||||
)
|
||||
if metadata_id in referenced_metadata_ids:
|
||||
raise MetadataInUseError("This metadata is referenced by a pipeline and cannot be deleted.")
|
||||
db.session.delete(metadata)
|
||||
|
||||
# deal related documents
|
||||
|
|
@ -123,8 +225,13 @@ class MetadataService:
|
|||
db.session.add(document)
|
||||
db.session.commit()
|
||||
return metadata
|
||||
except MetadataInUseError:
|
||||
raise
|
||||
except NotFound:
|
||||
raise
|
||||
except Exception:
|
||||
logger.exception("Delete metadata failed")
|
||||
raise
|
||||
finally:
|
||||
redis_client.delete(lock_key)
|
||||
|
||||
|
|
@ -269,6 +376,9 @@ class MetadataService:
|
|||
|
||||
@staticmethod
|
||||
def get_dataset_metadatas(dataset: Dataset):
|
||||
metadata_items = [item for item in dataset.doc_metadata or [] if item.get("id") != "built-in"]
|
||||
metadata_ids: set[str] = {mid for item in metadata_items if (mid := item.get("id")) is not None}
|
||||
referenced_metadata_ids = MetadataService._get_referenced_metadata_ids(dataset.tenant_id, metadata_ids)
|
||||
return {
|
||||
"doc_metadata": [
|
||||
{
|
||||
|
|
@ -278,9 +388,9 @@ class MetadataService:
|
|||
"count": db.session.query(DatasetMetadataBinding)
|
||||
.filter_by(metadata_id=item.get("id"), dataset_id=dataset.id)
|
||||
.count(),
|
||||
"is_referenced_by_pipeline": item.get("id") in referenced_metadata_ids,
|
||||
}
|
||||
for item in dataset.doc_metadata or []
|
||||
if item.get("id") != "built-in"
|
||||
for item in metadata_items
|
||||
],
|
||||
"built_in_field_enabled": dataset.built_in_field_enabled,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -268,7 +268,7 @@ class TestAccountService:
|
|||
fake = Faker()
|
||||
email = fake.email()
|
||||
name = fake.name()
|
||||
password = generate_valid_password(fake)
|
||||
password = fake.password(length=12)
|
||||
# Setup mocks
|
||||
mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True
|
||||
mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from unittest.mock import create_autospec, patch
|
|||
import pytest
|
||||
from faker import Faker
|
||||
from sqlalchemy.orm import Session
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.rag.index_processor.constant.built_in_field import BuiltInField
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
|
|
@ -470,11 +471,9 @@ class TestMetadataService:
|
|||
|
||||
fake_metadata_id = str(uuid.uuid4()) # Use valid UUID format
|
||||
|
||||
# Act: Execute the method under test
|
||||
result = MetadataService.delete_metadata(dataset.id, fake_metadata_id)
|
||||
|
||||
# Assert: Verify the method returns None when metadata is not found
|
||||
assert result is None
|
||||
# Act / Assert: deleting a missing metadata now surfaces a 404-compatible error
|
||||
with pytest.raises(NotFound, match="Metadata not found"):
|
||||
MetadataService.delete_metadata(dataset.id, fake_metadata_id)
|
||||
|
||||
def test_delete_metadata_with_document_bindings(
|
||||
self, db_session_with_containers: Session, mock_external_service_dependencies
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from unittest.mock import MagicMock, PropertyMock, patch
|
|||
|
||||
import pytest
|
||||
from flask import Flask
|
||||
from werkzeug.exceptions import NotFound
|
||||
from werkzeug.exceptions import BadRequest, NotFound
|
||||
|
||||
from controllers.console import console_ns
|
||||
from controllers.console.datasets.metadata import (
|
||||
|
|
@ -18,6 +18,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
|
|||
MetadataArgs,
|
||||
MetadataOperationData,
|
||||
)
|
||||
from services.errors.metadata_service import MetadataInUseError
|
||||
from services.metadata_service import MetadataService
|
||||
|
||||
|
||||
|
|
@ -267,6 +268,62 @@ class TestDatasetMetadataApi:
|
|||
assert status == 204
|
||||
assert result["result"] == "success"
|
||||
|
||||
def test_delete_metadata_referenced_by_pipeline(self, app, current_user, dataset, dataset_id, metadata_id):
|
||||
api = DatasetMetadataApi()
|
||||
method = unwrap(api.delete)
|
||||
|
||||
with (
|
||||
app.test_request_context("/"),
|
||||
patch(
|
||||
"controllers.console.datasets.metadata.current_account_with_tenant",
|
||||
return_value=(current_user, "tenant-1"),
|
||||
),
|
||||
patch.object(
|
||||
DatasetService,
|
||||
"get_dataset",
|
||||
return_value=dataset,
|
||||
),
|
||||
patch.object(
|
||||
DatasetService,
|
||||
"check_dataset_permission",
|
||||
),
|
||||
patch.object(
|
||||
MetadataService,
|
||||
"delete_metadata",
|
||||
side_effect=MetadataInUseError("This metadata is referenced by a pipeline and cannot be deleted."),
|
||||
),
|
||||
):
|
||||
with pytest.raises(BadRequest, match="referenced by a pipeline"):
|
||||
method(api, dataset_id, metadata_id)
|
||||
|
||||
def test_delete_metadata_not_found(self, app, current_user, dataset, dataset_id, metadata_id):
|
||||
api = DatasetMetadataApi()
|
||||
method = unwrap(api.delete)
|
||||
|
||||
with (
|
||||
app.test_request_context("/"),
|
||||
patch(
|
||||
"controllers.console.datasets.metadata.current_account_with_tenant",
|
||||
return_value=(current_user, "tenant-1"),
|
||||
),
|
||||
patch.object(
|
||||
DatasetService,
|
||||
"get_dataset",
|
||||
return_value=dataset,
|
||||
),
|
||||
patch.object(
|
||||
DatasetService,
|
||||
"check_dataset_permission",
|
||||
),
|
||||
patch.object(
|
||||
MetadataService,
|
||||
"delete_metadata",
|
||||
side_effect=NotFound("Metadata not found."),
|
||||
),
|
||||
):
|
||||
with pytest.raises(NotFound, match="Metadata not found"):
|
||||
method(api, dataset_id, metadata_id)
|
||||
|
||||
|
||||
class TestDatasetMetadataBuiltInFieldApi:
|
||||
def test_get_built_in_fields(self, app):
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ import uuid
|
|||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
from werkzeug.exceptions import NotFound
|
||||
from werkzeug.exceptions import BadRequest, NotFound
|
||||
|
||||
from controllers.service_api.dataset.metadata import (
|
||||
DatasetMetadataBuiltInFieldActionServiceApi,
|
||||
|
|
@ -28,6 +28,7 @@ from controllers.service_api.dataset.metadata import (
|
|||
DatasetMetadataServiceApi,
|
||||
DocumentMetadataEditServiceApi,
|
||||
)
|
||||
from services.errors.metadata_service import MetadataInUseError
|
||||
from tests.unit_tests.controllers.service_api.conftest import _unwrap
|
||||
|
||||
|
||||
|
|
@ -323,6 +324,70 @@ class TestDatasetMetadataServiceApiDelete:
|
|||
metadata_id=metadata_id,
|
||||
)
|
||||
|
||||
@patch("controllers.service_api.dataset.metadata.MetadataService")
|
||||
@patch("controllers.service_api.dataset.metadata.DatasetService")
|
||||
@patch("controllers.service_api.dataset.metadata.current_user")
|
||||
def test_delete_metadata_referenced_by_pipeline(
|
||||
self,
|
||||
mock_current_user,
|
||||
mock_dataset_svc,
|
||||
mock_meta_svc,
|
||||
app,
|
||||
mock_tenant,
|
||||
mock_dataset,
|
||||
):
|
||||
"""Test 400 when metadata is still referenced by a pipeline."""
|
||||
metadata_id = str(uuid.uuid4())
|
||||
mock_dataset_svc.get_dataset.return_value = mock_dataset
|
||||
mock_dataset_svc.check_dataset_permission.return_value = None
|
||||
mock_meta_svc.delete_metadata.side_effect = MetadataInUseError(
|
||||
"This metadata is referenced by a pipeline and cannot be deleted."
|
||||
)
|
||||
|
||||
with app.test_request_context(
|
||||
f"/datasets/{mock_dataset.id}/metadata/{metadata_id}",
|
||||
method="DELETE",
|
||||
):
|
||||
api = DatasetMetadataServiceApi()
|
||||
with pytest.raises(BadRequest, match="referenced by a pipeline"):
|
||||
self._call_delete(
|
||||
api,
|
||||
tenant_id=mock_tenant.id,
|
||||
dataset_id=mock_dataset.id,
|
||||
metadata_id=metadata_id,
|
||||
)
|
||||
|
||||
@patch("controllers.service_api.dataset.metadata.MetadataService")
|
||||
@patch("controllers.service_api.dataset.metadata.DatasetService")
|
||||
@patch("controllers.service_api.dataset.metadata.current_user")
|
||||
def test_delete_metadata_not_found(
|
||||
self,
|
||||
mock_current_user,
|
||||
mock_dataset_svc,
|
||||
mock_meta_svc,
|
||||
app,
|
||||
mock_tenant,
|
||||
mock_dataset,
|
||||
):
|
||||
"""Test 404 when metadata does not exist."""
|
||||
metadata_id = str(uuid.uuid4())
|
||||
mock_dataset_svc.get_dataset.return_value = mock_dataset
|
||||
mock_dataset_svc.check_dataset_permission.return_value = None
|
||||
mock_meta_svc.delete_metadata.side_effect = NotFound("Metadata not found.")
|
||||
|
||||
with app.test_request_context(
|
||||
f"/datasets/{mock_dataset.id}/metadata/{metadata_id}",
|
||||
method="DELETE",
|
||||
):
|
||||
api = DatasetMetadataServiceApi()
|
||||
with pytest.raises(NotFound, match="Metadata not found"):
|
||||
self._call_delete(
|
||||
api,
|
||||
tenant_id=mock_tenant.id,
|
||||
dataset_id=mock_dataset.id,
|
||||
metadata_id=metadata_id,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DatasetMetadataBuiltInFieldServiceApi
|
||||
|
|
|
|||
|
|
@ -0,0 +1,67 @@
|
|||
from unittest.mock import Mock, patch
|
||||
|
||||
from core.rag.index_processor.index_processor import IndexProcessor
|
||||
from models.dataset import DatasetMetadataBinding
|
||||
|
||||
|
||||
def test_save_doc_metadata_and_bindings_replaces_removed_metadata_and_bindings():
|
||||
processor = IndexProcessor()
|
||||
session = Mock()
|
||||
|
||||
new_metadata = Mock()
|
||||
new_metadata.id = "meta-new"
|
||||
new_metadata.name = "new_field"
|
||||
|
||||
old_metadata = Mock()
|
||||
old_metadata.id = "meta-old"
|
||||
old_metadata.name = "old_field"
|
||||
|
||||
old_binding = Mock(spec=DatasetMetadataBinding)
|
||||
old_binding.metadata_id = "meta-old"
|
||||
|
||||
first_scalars_result = Mock()
|
||||
first_scalars_result.all.return_value = [new_metadata]
|
||||
second_scalars_result = Mock()
|
||||
second_scalars_result.all.return_value = [old_binding]
|
||||
third_scalars_result = Mock()
|
||||
third_scalars_result.all.return_value = [old_metadata, new_metadata]
|
||||
session.scalars.side_effect = [
|
||||
first_scalars_result,
|
||||
second_scalars_result,
|
||||
third_scalars_result,
|
||||
]
|
||||
|
||||
delete_query = Mock()
|
||||
delete_where = Mock()
|
||||
delete_where.delete.return_value = 1
|
||||
delete_query.where.return_value = delete_where
|
||||
session.query.return_value = delete_query
|
||||
|
||||
document = Mock()
|
||||
document.id = "doc-1"
|
||||
document.doc_metadata = {
|
||||
"old_field": "stale",
|
||||
"keep_field": "keep",
|
||||
}
|
||||
|
||||
with patch("core.rag.index_processor.index_processor.attributes.flag_modified"):
|
||||
processor._save_doc_metadata_and_bindings(
|
||||
session=session,
|
||||
dataset_id="dataset-1",
|
||||
tenant_id="tenant-1",
|
||||
document=document,
|
||||
doc_metadata={"meta-new": "new_value"},
|
||||
metadata_binding_ids=["meta-new"],
|
||||
user_id="user-1",
|
||||
)
|
||||
|
||||
assert document.doc_metadata == {
|
||||
"keep_field": "keep",
|
||||
"new_field": "new_value",
|
||||
}
|
||||
delete_where.delete.assert_called_once_with(synchronize_session=False)
|
||||
|
||||
binding_instance = session.add.call_args_list[0].args[0]
|
||||
assert isinstance(binding_instance, DatasetMetadataBinding)
|
||||
assert binding_instance.document_id == "doc-1"
|
||||
assert binding_instance.metadata_id == "meta-new"
|
||||
|
|
@ -5,7 +5,7 @@ from unittest.mock import Mock
|
|||
import pytest
|
||||
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom
|
||||
from core.workflow.nodes.knowledge_index.entities import KnowledgeIndexNodeData
|
||||
from core.workflow.nodes.knowledge_index.entities import DocMetadata, KnowledgeIndexNodeData
|
||||
from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError
|
||||
from core.workflow.nodes.knowledge_index.knowledge_index_node import KnowledgeIndexNode
|
||||
from core.workflow.nodes.knowledge_index.protocols import (
|
||||
|
|
@ -512,6 +512,130 @@ class TestKnowledgeIndexNode:
|
|||
assert "Unexpected error" in result.error
|
||||
assert result.error_type == "Exception"
|
||||
|
||||
def test_run_with_doc_metadata(
|
||||
self,
|
||||
mock_graph_init_params,
|
||||
mock_graph_runtime_state,
|
||||
mock_index_processor,
|
||||
mock_summary_index_service,
|
||||
sample_chunks,
|
||||
):
|
||||
"""Test _run resolves doc_metadata from variable pool and passes to index_processor."""
|
||||
# Arrange
|
||||
dataset_id = str(uuid.uuid4())
|
||||
document_id = str(uuid.uuid4())
|
||||
batch = "batch_123"
|
||||
meta_id = "meta_uuid_1"
|
||||
chunks_selector = ["start", "chunks"]
|
||||
|
||||
mock_graph_runtime_state.variable_pool.add(
|
||||
["sys", SystemVariableKey.DATASET_ID],
|
||||
StringSegment(value=dataset_id),
|
||||
)
|
||||
mock_graph_runtime_state.variable_pool.add(
|
||||
["sys", SystemVariableKey.DOCUMENT_ID],
|
||||
StringSegment(value=document_id),
|
||||
)
|
||||
mock_graph_runtime_state.variable_pool.add(
|
||||
["sys", SystemVariableKey.BATCH],
|
||||
StringSegment(value=batch),
|
||||
)
|
||||
mock_graph_runtime_state.variable_pool.add(
|
||||
["sys", SystemVariableKey.INVOKE_FROM],
|
||||
StringSegment(value=InvokeFrom.SERVICE_API),
|
||||
)
|
||||
mock_graph_runtime_state.variable_pool.add(chunks_selector, sample_chunks)
|
||||
mock_graph_runtime_state.variable_pool.add(["start", "category"], StringSegment(value="Financial"))
|
||||
|
||||
mock_index_processor.index_and_clean.return_value = {"status": "indexed"}
|
||||
|
||||
node_data = KnowledgeIndexNodeData(
|
||||
title="Knowledge Index",
|
||||
type="knowledge-index",
|
||||
chunk_structure="general_structure",
|
||||
index_chunk_variable_selector=chunks_selector,
|
||||
doc_metadata=[DocMetadata(metadata_id=meta_id, value=["start", "category"])],
|
||||
)
|
||||
|
||||
node_id = str(uuid.uuid4())
|
||||
config = {"id": node_id, "data": node_data.model_dump()}
|
||||
|
||||
node = KnowledgeIndexNode(
|
||||
id=node_id,
|
||||
config=config,
|
||||
graph_init_params=mock_graph_init_params,
|
||||
graph_runtime_state=mock_graph_runtime_state,
|
||||
)
|
||||
|
||||
# Act
|
||||
result = node._run()
|
||||
|
||||
# Assert
|
||||
assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED
|
||||
call_kwargs = mock_index_processor.index_and_clean.call_args.kwargs
|
||||
assert call_kwargs["doc_metadata"] == {meta_id: "Financial"}
|
||||
assert meta_id in call_kwargs["metadata_binding_ids"]
|
||||
|
||||
def test_run_with_missing_metadata_variable_fails(
|
||||
self,
|
||||
mock_graph_init_params,
|
||||
mock_graph_runtime_state,
|
||||
mock_index_processor,
|
||||
mock_summary_index_service,
|
||||
sample_chunks,
|
||||
):
|
||||
"""Test _run fails when a metadata variable selector is not in the pool."""
|
||||
# Arrange
|
||||
dataset_id = str(uuid.uuid4())
|
||||
document_id = str(uuid.uuid4())
|
||||
batch = "batch_123"
|
||||
chunks_selector = ["start", "chunks"]
|
||||
|
||||
mock_graph_runtime_state.variable_pool.add(
|
||||
["sys", SystemVariableKey.DATASET_ID],
|
||||
StringSegment(value=dataset_id),
|
||||
)
|
||||
mock_graph_runtime_state.variable_pool.add(
|
||||
["sys", SystemVariableKey.DOCUMENT_ID],
|
||||
StringSegment(value=document_id),
|
||||
)
|
||||
mock_graph_runtime_state.variable_pool.add(
|
||||
["sys", SystemVariableKey.BATCH],
|
||||
StringSegment(value=batch),
|
||||
)
|
||||
mock_graph_runtime_state.variable_pool.add(
|
||||
["sys", SystemVariableKey.INVOKE_FROM],
|
||||
StringSegment(value=InvokeFrom.SERVICE_API),
|
||||
)
|
||||
mock_graph_runtime_state.variable_pool.add(chunks_selector, sample_chunks)
|
||||
# NOTE: "start.missing" is NOT added to the pool
|
||||
|
||||
node_data = KnowledgeIndexNodeData(
|
||||
title="Knowledge Index",
|
||||
type="knowledge-index",
|
||||
chunk_structure="general_structure",
|
||||
index_chunk_variable_selector=chunks_selector,
|
||||
doc_metadata=[DocMetadata(metadata_id="meta_uuid_1", value=["start", "missing"])],
|
||||
)
|
||||
|
||||
node_id = str(uuid.uuid4())
|
||||
config = {"id": node_id, "data": node_data.model_dump()}
|
||||
|
||||
node = KnowledgeIndexNode(
|
||||
id=node_id,
|
||||
config=config,
|
||||
graph_init_params=mock_graph_init_params,
|
||||
graph_runtime_state=mock_graph_runtime_state,
|
||||
)
|
||||
|
||||
# Act
|
||||
result = node._run()
|
||||
|
||||
# Assert
|
||||
assert result.status == WorkflowNodeExecutionStatus.FAILED
|
||||
assert "start.missing" in result.error
|
||||
mock_index_processor.index_and_clean.assert_not_called()
|
||||
|
||||
def test_invoke_knowledge_index(
|
||||
self,
|
||||
mock_graph_init_params,
|
||||
|
|
@ -645,6 +769,14 @@ class TestInvokeKnowledgeIndex:
|
|||
dataset_id, document_id, False, summary_setting
|
||||
)
|
||||
mock_index_processor.index_and_clean.assert_called_once_with(
|
||||
dataset_id, document_id, original_document_id, chunks, batch, summary_setting
|
||||
dataset_id,
|
||||
document_id,
|
||||
original_document_id,
|
||||
chunks,
|
||||
batch,
|
||||
summary_setting,
|
||||
doc_metadata=None,
|
||||
metadata_binding_ids=None,
|
||||
user_id=mock_graph_init_params.run_context["_dify"].user_id,
|
||||
)
|
||||
assert result == {"status": "indexed"}
|
||||
|
|
|
|||
|
|
@ -87,12 +87,12 @@ This test suite follows a comprehensive testing strategy that covers:
|
|||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.rag.index_processor.constant.built_in_field import BuiltInField
|
||||
from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding
|
||||
from services.entities.knowledge_entities.knowledge_entities import (
|
||||
MetadataArgs,
|
||||
MetadataValue,
|
||||
)
|
||||
from services.metadata_service import MetadataService
|
||||
|
||||
|
|
@ -308,7 +308,7 @@ class MetadataTestDataFactory:
|
|||
value: str = "test",
|
||||
) -> Mock:
|
||||
"""
|
||||
Create a mock MetadataValue entity.
|
||||
Create a mock metadata value entity.
|
||||
|
||||
Args:
|
||||
metadata_id: ID of the metadata field
|
||||
|
|
@ -316,9 +316,9 @@ class MetadataTestDataFactory:
|
|||
value: Value of the metadata
|
||||
|
||||
Returns:
|
||||
Mock object configured as a MetadataValue instance
|
||||
Mock object configured with metadata value fields
|
||||
"""
|
||||
metadata_value = Mock(spec=MetadataValue)
|
||||
metadata_value = Mock()
|
||||
metadata_value.id = metadata_id
|
||||
metadata_value.name = name
|
||||
metadata_value.value = value
|
||||
|
|
@ -775,7 +775,7 @@ class TestMetadataServiceDeleteMetadata:
|
|||
"""
|
||||
Test error handling when metadata is not found.
|
||||
|
||||
Verifies that when the metadata ID doesn't exist, a ValueError
|
||||
Verifies that when the metadata ID doesn't exist, a NotFound
|
||||
is raised and the lock is properly released.
|
||||
|
||||
This test ensures:
|
||||
|
|
@ -794,7 +794,7 @@ class TestMetadataServiceDeleteMetadata:
|
|||
mock_db_session.query.return_value = mock_query
|
||||
|
||||
# Act & Assert
|
||||
with pytest.raises(ValueError, match="Metadata not found"):
|
||||
with pytest.raises(NotFound, match="Metadata not found"):
|
||||
MetadataService.delete_metadata(dataset_id, metadata_id)
|
||||
|
||||
# Verify lock was released
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ def test_save_document_with_dataset_id_ignores_lock_not_owned(
|
|||
process_rule=None,
|
||||
duplicate=False,
|
||||
doc_language="en",
|
||||
doc_metadata=None,
|
||||
)
|
||||
|
||||
account = fake_current_user
|
||||
|
|
|
|||
|
|
@ -0,0 +1,310 @@
|
|||
from unittest.mock import Mock, patch
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from models.account import Account
|
||||
from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding, Document
|
||||
from models.model import UploadFile
|
||||
from services.dataset_service import DocumentService
|
||||
from services.entities.knowledge_entities.knowledge_entities import (
|
||||
DataSource,
|
||||
DocumentMetadataInput,
|
||||
FileInfo,
|
||||
InfoList,
|
||||
KnowledgeConfig,
|
||||
)
|
||||
|
||||
|
||||
class TestDocumentServiceMetadata:
|
||||
@pytest.fixture
|
||||
def mock_dependencies(self):
|
||||
with (
|
||||
patch("services.dataset_service.db.session") as mock_db,
|
||||
patch("services.dataset_service.DatasetService.get_dataset") as mock_get_dataset,
|
||||
patch("services.dataset_service.redis_client") as mock_redis,
|
||||
patch("services.dataset_service.DocumentService.build_document") as mock_build_document,
|
||||
patch("services.dataset_service.current_user") as mock_current_user,
|
||||
patch("services.dataset_service.DocumentIndexingTaskProxy") as mock_indexing_task,
|
||||
patch("services.dataset_service.DuplicateDocumentIndexingTaskProxy") as mock_duplicate_indexing_task,
|
||||
# We don't patch DocumentService.save_document_with_dataset_id as that's what we are testing
|
||||
):
|
||||
# Hack to pass isinstance check
|
||||
mock_current_user.__class__ = Account
|
||||
mock_current_user.current_tenant_id = "tenant-123"
|
||||
|
||||
yield {
|
||||
"db": mock_db,
|
||||
"get_dataset": mock_get_dataset,
|
||||
"redis": mock_redis,
|
||||
"build_document": mock_build_document,
|
||||
"current_user": mock_current_user,
|
||||
}
|
||||
|
||||
def test_save_document_with_metadata(self, mock_dependencies):
|
||||
# Arrange
|
||||
dataset_id = str(uuid4())
|
||||
tenant_id = str(uuid4())
|
||||
account = Mock(spec=Account)
|
||||
account.id = "account-1"
|
||||
account.current_tenant_id = tenant_id
|
||||
|
||||
dataset = Mock(spec=Dataset)
|
||||
dataset.id = dataset_id
|
||||
dataset.tenant_id = tenant_id
|
||||
dataset.built_in_field_enabled = False
|
||||
dataset.doc_form = "text_model"
|
||||
mock_dependencies["get_dataset"].return_value = dataset
|
||||
|
||||
# Define metadata inputs
|
||||
metadata_id = str(uuid4())
|
||||
doc_metadata_inputs = [DocumentMetadataInput(metadata_id=metadata_id, value="custom_value")]
|
||||
|
||||
# Knowledge config
|
||||
knowledge_config = KnowledgeConfig(
|
||||
data_source_type="upload_file",
|
||||
data_source=DataSource(
|
||||
info_list=InfoList(data_source_type="upload_file", file_info_list=FileInfo(file_ids=["file-1"]))
|
||||
),
|
||||
doc_form="text_model",
|
||||
doc_language="en",
|
||||
indexing_technique="high_quality",
|
||||
doc_metadata=doc_metadata_inputs,
|
||||
)
|
||||
|
||||
# Mock local file for upload_file type
|
||||
with patch("services.dataset_service.db.session.query") as mock_query:
|
||||
# Mock DatasetMetadata lookup
|
||||
mock_metadata_def = Mock(spec=DatasetMetadata)
|
||||
mock_metadata_def.id = metadata_id
|
||||
mock_metadata_def.name = "custom_field"
|
||||
mock_metadata_def.field_type = "text"
|
||||
|
||||
# Create a side effect for query(Model)
|
||||
def query_side_effect(*models):
|
||||
m = Mock()
|
||||
if len(models) == 1 and models[0] == DatasetMetadata:
|
||||
m.filter.return_value.filter.return_value.first.return_value = mock_metadata_def
|
||||
# handle the specific chain in code
|
||||
m.filter_by.return_value.first.return_value = mock_metadata_def
|
||||
m.filter.return_value.all.return_value = [mock_metadata_def]
|
||||
return m
|
||||
if len(models) == 1 and models[0] == Document:
|
||||
doc_mock = Mock()
|
||||
doc_mock.position = 1
|
||||
# For get_documents_position
|
||||
m.filter_by.return_value.order_by.return_value.first.return_value = doc_mock
|
||||
# For duplicate check
|
||||
m.where.return_value.all.return_value = []
|
||||
return m
|
||||
if len(models) == 1 and models[0] == UploadFile:
|
||||
m.where.return_value.all.return_value = [Mock(id="file-1", tenant_id=tenant_id)]
|
||||
return m
|
||||
if len(models) == 2:
|
||||
m.filter.return_value.all.return_value = []
|
||||
return m
|
||||
|
||||
return m
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
# Mock build_document to return a document
|
||||
mock_document = Mock(spec=Document)
|
||||
mock_document.id = "doc-123"
|
||||
mock_document.doc_metadata = {}
|
||||
mock_dependencies["build_document"].return_value = mock_document
|
||||
|
||||
# Act
|
||||
DocumentService.save_document_with_dataset_id(
|
||||
dataset=dataset, knowledge_config=knowledge_config, account=account
|
||||
)
|
||||
|
||||
# Assert
|
||||
# 1. Check custom metadata passed to build_document
|
||||
call_args = mock_dependencies["build_document"].call_args
|
||||
assert call_args is not None
|
||||
_, kwargs = call_args
|
||||
assert "custom_metadata" in kwargs
|
||||
assert kwargs["custom_metadata"] == {"custom_field": "custom_value"}
|
||||
|
||||
# 2. Check DatasetMetadataBinding creation
|
||||
binding_instances = [
|
||||
call.args[0]
|
||||
for call in mock_dependencies["db"].add.call_args_list
|
||||
if isinstance(call.args[0], DatasetMetadataBinding)
|
||||
]
|
||||
assert len(binding_instances) == 1
|
||||
assert binding_instances[0].document_id == "doc-123"
|
||||
assert binding_instances[0].metadata_id == metadata_id
|
||||
|
||||
def test_save_duplicate_document_with_metadata_creates_binding(self, mock_dependencies):
|
||||
# Arrange
|
||||
dataset_id = str(uuid4())
|
||||
tenant_id = str(uuid4())
|
||||
account = Mock(spec=Account)
|
||||
account.id = "account-1"
|
||||
account.current_tenant_id = tenant_id
|
||||
|
||||
dataset = Mock(spec=Dataset)
|
||||
dataset.id = dataset_id
|
||||
dataset.tenant_id = tenant_id
|
||||
dataset.built_in_field_enabled = False
|
||||
dataset.doc_form = "text_model"
|
||||
mock_dependencies["get_dataset"].return_value = dataset
|
||||
|
||||
metadata_id = str(uuid4())
|
||||
knowledge_config = KnowledgeConfig(
|
||||
data_source_type="upload_file",
|
||||
data_source=DataSource(
|
||||
info_list=InfoList(data_source_type="upload_file", file_info_list=FileInfo(file_ids=["file-1"]))
|
||||
),
|
||||
doc_form="text_model",
|
||||
doc_language="en",
|
||||
indexing_technique="high_quality",
|
||||
duplicate=True,
|
||||
doc_metadata=[DocumentMetadataInput(metadata_id=metadata_id, value="custom_value")],
|
||||
)
|
||||
|
||||
existing_document = Mock(spec=Document)
|
||||
existing_document.id = "dup-doc-1"
|
||||
existing_document.name = "dup.txt"
|
||||
existing_document.doc_metadata = {"existing_field": "existing_value"}
|
||||
|
||||
with patch("services.dataset_service.db.session.query") as mock_query:
|
||||
mock_metadata_def = Mock(spec=DatasetMetadata)
|
||||
mock_metadata_def.id = metadata_id
|
||||
mock_metadata_def.name = "custom_field"
|
||||
mock_metadata_def.field_type = "text"
|
||||
|
||||
def query_side_effect(*models):
|
||||
m = Mock()
|
||||
if len(models) == 1 and models[0] == DatasetMetadata:
|
||||
m.filter.return_value.all.return_value = [mock_metadata_def]
|
||||
return m
|
||||
if len(models) == 1 and models[0] == Document:
|
||||
doc_mock = Mock()
|
||||
doc_mock.position = 1
|
||||
m.filter_by.return_value.order_by.return_value.first.return_value = doc_mock
|
||||
m.where.return_value.all.return_value = [existing_document]
|
||||
return m
|
||||
if len(models) == 1 and models[0] == UploadFile:
|
||||
file_mock = Mock(id="file-1", tenant_id=tenant_id)
|
||||
file_mock.name = "dup.txt"
|
||||
m.where.return_value.all.return_value = [file_mock]
|
||||
return m
|
||||
if len(models) == 2:
|
||||
m.filter.return_value.all.return_value = []
|
||||
return m
|
||||
|
||||
return m
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
# Act
|
||||
DocumentService.save_document_with_dataset_id(
|
||||
dataset=dataset, knowledge_config=knowledge_config, account=account
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_dependencies["build_document"].assert_not_called()
|
||||
assert existing_document.doc_metadata["custom_field"] == "custom_value"
|
||||
|
||||
binding_instances = [
|
||||
call.args[0]
|
||||
for call in mock_dependencies["db"].add.call_args_list
|
||||
if isinstance(call.args[0], DatasetMetadataBinding)
|
||||
]
|
||||
assert any(
|
||||
binding.document_id == existing_document.id and binding.metadata_id == metadata_id
|
||||
for binding in binding_instances
|
||||
)
|
||||
|
||||
def test_reindex_document_replaces_removed_metadata_and_bindings(self, mock_dependencies):
|
||||
dataset_id = str(uuid4())
|
||||
tenant_id = str(uuid4())
|
||||
account = Mock(spec=Account)
|
||||
account.id = "account-1"
|
||||
account.current_tenant_id = tenant_id
|
||||
|
||||
dataset = Mock(spec=Dataset)
|
||||
dataset.id = dataset_id
|
||||
dataset.tenant_id = tenant_id
|
||||
dataset.built_in_field_enabled = False
|
||||
dataset.doc_form = "text_model"
|
||||
dataset.indexing_technique = "high_quality"
|
||||
|
||||
old_metadata_id = str(uuid4())
|
||||
new_metadata_id = str(uuid4())
|
||||
existing_document = Mock(spec=Document)
|
||||
existing_document.id = "doc-1"
|
||||
existing_document.batch = "batch-1"
|
||||
existing_document.doc_metadata = {
|
||||
"old_field": "stale",
|
||||
"unchanged_field": "keep",
|
||||
}
|
||||
|
||||
knowledge_config = KnowledgeConfig(
|
||||
original_document_id=existing_document.id,
|
||||
doc_form="text_model",
|
||||
doc_language="en",
|
||||
indexing_technique="high_quality",
|
||||
doc_metadata=[DocumentMetadataInput(metadata_id=new_metadata_id, value="new_value")],
|
||||
)
|
||||
|
||||
old_binding = Mock(spec=DatasetMetadataBinding)
|
||||
old_binding.metadata_id = old_metadata_id
|
||||
|
||||
old_metadata_def = Mock(spec=DatasetMetadata)
|
||||
old_metadata_def.id = old_metadata_id
|
||||
old_metadata_def.name = "old_field"
|
||||
|
||||
new_metadata_def = Mock(spec=DatasetMetadata)
|
||||
new_metadata_def.id = new_metadata_id
|
||||
new_metadata_def.name = "new_field"
|
||||
|
||||
with (
|
||||
patch("services.dataset_service.DatasetService.check_doc_form"),
|
||||
patch("services.dataset_service.FeatureService.get_features") as mock_get_features,
|
||||
patch(
|
||||
"services.dataset_service.DocumentService.update_document_with_dataset_id",
|
||||
return_value=existing_document,
|
||||
),
|
||||
patch("services.dataset_service.db.session.query") as mock_query,
|
||||
patch("sqlalchemy.orm.attributes.flag_modified"),
|
||||
):
|
||||
mock_get_features.return_value = Mock(billing=Mock(enabled=False))
|
||||
|
||||
def query_side_effect(*models):
|
||||
m = Mock()
|
||||
if len(models) == 1 and models[0] == DatasetMetadata:
|
||||
m.filter.return_value.all.return_value = [old_metadata_def, new_metadata_def]
|
||||
return m
|
||||
if len(models) == 1 and models[0] == DatasetMetadataBinding:
|
||||
m.filter_by.return_value.all.return_value = [old_binding]
|
||||
m.filter.return_value.delete.return_value = 1
|
||||
return m
|
||||
return m
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
documents, batch = DocumentService.save_document_with_dataset_id(
|
||||
dataset=dataset,
|
||||
knowledge_config=knowledge_config,
|
||||
account=account,
|
||||
)
|
||||
|
||||
assert documents == [existing_document]
|
||||
assert batch == "batch-1"
|
||||
assert existing_document.doc_metadata == {
|
||||
"unchanged_field": "keep",
|
||||
"new_field": "new_value",
|
||||
}
|
||||
|
||||
binding_instances = [
|
||||
call.args[0]
|
||||
for call in mock_dependencies["db"].add.call_args_list
|
||||
if isinstance(call.args[0], DatasetMetadataBinding)
|
||||
]
|
||||
assert len(binding_instances) == 1
|
||||
assert binding_instances[0].document_id == existing_document.id
|
||||
assert binding_instances[0].metadata_id == new_metadata_id
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
from services.metadata_service import MetadataService
|
||||
|
||||
|
||||
class TestMetadataServicePipelineGuard:
|
||||
def test_collect_referenced_metadata_ids_from_nested_payload(self):
|
||||
payload = {
|
||||
"metadata_filtering_conditions": {
|
||||
"conditions": [
|
||||
{"id": "cond-1", "metadata_id": "meta-1"},
|
||||
{"id": "cond-2", "group": {"metadata_id": "meta-2"}},
|
||||
],
|
||||
},
|
||||
"node": {
|
||||
"data": {
|
||||
"doc_metadata": [
|
||||
{"metadata_id": "meta-3"},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
referenced_ids: set[str] = set()
|
||||
MetadataService._collect_referenced_metadata_ids(payload, referenced_ids)
|
||||
|
||||
# All metadata_ids found in the payload are collected (no candidate filter)
|
||||
assert referenced_ids == {"meta-1", "meta-2", "meta-3"}
|
||||
|
||||
def test_collect_referenced_metadata_ids_list_root(self):
|
||||
payload = [
|
||||
{"metadata_id": "meta-a"},
|
||||
{"nested": {"metadata_id": "meta-b"}},
|
||||
{"no_metadata": True},
|
||||
]
|
||||
|
||||
referenced_ids: set[str] = set()
|
||||
MetadataService._collect_referenced_metadata_ids(payload, referenced_ids)
|
||||
|
||||
assert referenced_ids == {"meta-a", "meta-b"}
|
||||
|
||||
def test_get_referenced_metadata_ids_filters_by_candidate_set(self):
|
||||
"""Only IDs that are both referenced AND in metadata_ids are returned."""
|
||||
with (
|
||||
patch.object(
|
||||
MetadataService,
|
||||
"_scan_all_referenced_metadata_ids",
|
||||
return_value={"meta-1", "meta-2", "meta-3"},
|
||||
),
|
||||
patch("services.metadata_service.redis_client") as mock_redis,
|
||||
):
|
||||
mock_redis.get.return_value = None # cache miss
|
||||
|
||||
result = MetadataService._get_referenced_metadata_ids("tenant-1", {"meta-2", "meta-9"})
|
||||
|
||||
assert result == {"meta-2"}
|
||||
|
||||
def test_get_referenced_metadata_ids_uses_cache(self):
|
||||
"""Cache hit returns the intersection without calling the DB scanner."""
|
||||
import json
|
||||
|
||||
cached_ids = ["meta-1", "meta-2"]
|
||||
with (
|
||||
patch("services.metadata_service.redis_client") as mock_redis,
|
||||
patch.object(MetadataService, "_scan_all_referenced_metadata_ids") as mock_scan,
|
||||
):
|
||||
mock_redis.get.return_value = json.dumps(cached_ids).encode()
|
||||
|
||||
result = MetadataService._get_referenced_metadata_ids("tenant-1", {"meta-2", "meta-9"})
|
||||
|
||||
mock_scan.assert_not_called()
|
||||
assert result == {"meta-2"}
|
||||
|
||||
def test_get_referenced_metadata_ids_bypass_cache_skips_redis(self):
|
||||
"""bypass_cache=True skips the Redis read and always scans the DB."""
|
||||
with (
|
||||
patch("services.metadata_service.redis_client") as mock_redis,
|
||||
patch.object(
|
||||
MetadataService,
|
||||
"_scan_all_referenced_metadata_ids",
|
||||
return_value={"meta-1"},
|
||||
) as mock_scan,
|
||||
):
|
||||
result = MetadataService._get_referenced_metadata_ids("tenant-1", {"meta-1"}, bypass_cache=True)
|
||||
|
||||
mock_redis.get.assert_not_called()
|
||||
mock_scan.assert_called_once_with("tenant-1")
|
||||
assert result == {"meta-1"}
|
||||
|
||||
def test_get_dataset_metadatas_marks_pipeline_references(self):
|
||||
dataset = Mock()
|
||||
dataset.id = "dataset-1"
|
||||
dataset.tenant_id = "tenant-1"
|
||||
dataset.built_in_field_enabled = False
|
||||
dataset.doc_metadata = [
|
||||
{"id": "meta-1", "name": "author", "type": "string"},
|
||||
{"id": "meta-2", "name": "category", "type": "string"},
|
||||
]
|
||||
|
||||
query_mock = MagicMock()
|
||||
query_mock.filter_by.return_value = query_mock
|
||||
query_mock.count.side_effect = [2, 5]
|
||||
|
||||
with (
|
||||
patch.object(MetadataService, "_get_referenced_metadata_ids", return_value={"meta-2"}),
|
||||
patch("services.metadata_service.db.session.query", return_value=query_mock),
|
||||
):
|
||||
result = MetadataService.get_dataset_metadatas(dataset)
|
||||
|
||||
assert result == {
|
||||
"doc_metadata": [
|
||||
{
|
||||
"id": "meta-1",
|
||||
"name": "author",
|
||||
"type": "string",
|
||||
"count": 2,
|
||||
"is_referenced_by_pipeline": False,
|
||||
},
|
||||
{
|
||||
"id": "meta-2",
|
||||
"name": "category",
|
||||
"type": "string",
|
||||
"count": 5,
|
||||
"is_referenced_by_pipeline": True,
|
||||
},
|
||||
],
|
||||
"built_in_field_enabled": False,
|
||||
}
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
import type { DataSet } from '@/models/datasets'
|
||||
import { render, screen } from '@testing-library/react'
|
||||
import * as React from 'react'
|
||||
import { IndexingType } from '@/app/components/datasets/create/step-two'
|
||||
import DatasetDetailLayout from './layout-main'
|
||||
|
||||
let mockPathname = '/datasets/dataset-1/documents'
|
||||
let mockDataset: Partial<DataSet> | undefined = {
|
||||
id: 'dataset-1',
|
||||
name: 'Pipeline Dataset',
|
||||
provider: 'vendor',
|
||||
runtime_mode: 'rag_pipeline',
|
||||
is_published: false,
|
||||
indexing_technique: IndexingType.QUALIFIED,
|
||||
document_count: 2,
|
||||
}
|
||||
|
||||
const mockSetAppSidebarExpand = vi.fn()
|
||||
|
||||
vi.mock('next/navigation', () => ({
|
||||
usePathname: () => mockPathname,
|
||||
}))
|
||||
|
||||
vi.mock('@/app/components/app-sidebar', () => ({
|
||||
default: ({ navigation }: { navigation: Array<{ name: string, disabled?: boolean }> }) => (
|
||||
<div data-testid="app-sidebar">
|
||||
{navigation.map(item => (
|
||||
<div
|
||||
key={item.name}
|
||||
data-testid={`nav-${item.name}`}
|
||||
data-disabled={String(Boolean(item.disabled))}
|
||||
>
|
||||
{item.name}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
),
|
||||
}))
|
||||
|
||||
vi.mock('@/app/components/base/loading', () => ({
|
||||
default: () => <div data-testid="loading" />,
|
||||
}))
|
||||
|
||||
vi.mock('@/app/components/datasets/extra-info', () => ({
|
||||
default: () => <div data-testid="extra-info" />,
|
||||
}))
|
||||
|
||||
vi.mock('@/app/components/app/store', () => ({
|
||||
useStore: (selector: (state: { setAppSidebarExpand: typeof mockSetAppSidebarExpand }) => unknown) => selector({
|
||||
setAppSidebarExpand: mockSetAppSidebarExpand,
|
||||
}),
|
||||
}))
|
||||
|
||||
vi.mock('@/context/app-context', () => ({
|
||||
useAppContext: () => ({
|
||||
isCurrentWorkspaceDatasetOperator: true,
|
||||
}),
|
||||
}))
|
||||
|
||||
vi.mock('@/context/event-emitter', () => ({
|
||||
useEventEmitterContextContext: () => ({
|
||||
eventEmitter: {
|
||||
useSubscription: vi.fn(),
|
||||
},
|
||||
}),
|
||||
}))
|
||||
|
||||
vi.mock('@/hooks/use-breakpoints', () => ({
|
||||
default: () => 'pc',
|
||||
MediaType: {
|
||||
mobile: 'mobile',
|
||||
tablet: 'tablet',
|
||||
pc: 'pc',
|
||||
},
|
||||
}))
|
||||
|
||||
vi.mock('@/hooks/use-document-title', () => ({
|
||||
default: vi.fn(),
|
||||
}))
|
||||
|
||||
vi.mock('@/service/knowledge/use-dataset', () => ({
|
||||
useDatasetDetail: () => ({
|
||||
data: mockDataset,
|
||||
error: undefined,
|
||||
refetch: vi.fn(),
|
||||
}),
|
||||
useDatasetRelatedApps: () => ({
|
||||
data: [],
|
||||
}),
|
||||
}))
|
||||
|
||||
describe('DatasetDetailLayout', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
localStorage.clear()
|
||||
mockPathname = '/datasets/dataset-1/documents'
|
||||
mockDataset = {
|
||||
id: 'dataset-1',
|
||||
name: 'Pipeline Dataset',
|
||||
provider: 'vendor',
|
||||
runtime_mode: 'rag_pipeline',
|
||||
is_published: false,
|
||||
indexing_technique: IndexingType.QUALIFIED,
|
||||
document_count: 2,
|
||||
}
|
||||
})
|
||||
|
||||
it('should keep documents navigation enabled when rag pipeline is unpublished', () => {
|
||||
render(
|
||||
<DatasetDetailLayout datasetId="dataset-1">
|
||||
<div>content</div>
|
||||
</DatasetDetailLayout>,
|
||||
)
|
||||
|
||||
expect(screen.getByTestId('nav-common.datasetMenus.documents')).toHaveAttribute('data-disabled', 'false')
|
||||
expect(screen.getByTestId('nav-common.datasetMenus.hitTesting')).toHaveAttribute('data-disabled', 'true')
|
||||
})
|
||||
})
|
||||
|
|
@ -57,7 +57,7 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
|
|||
|
||||
const { data: relatedApps } = useDatasetRelatedApps(datasetId)
|
||||
|
||||
const isButtonDisabledWithPipeline = useMemo(() => {
|
||||
const isHitTestingDisabled = useMemo(() => {
|
||||
if (!datasetRes)
|
||||
return true
|
||||
if (datasetRes.provider === 'external')
|
||||
|
|
@ -74,7 +74,7 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
|
|||
href: `/datasets/${datasetId}/hitTesting`,
|
||||
icon: RiFocus2Line,
|
||||
selectedIcon: RiFocus2Fill,
|
||||
disabled: isButtonDisabledWithPipeline,
|
||||
disabled: isHitTestingDisabled,
|
||||
},
|
||||
{
|
||||
name: t('datasetMenus.settings', { ns: 'common' }),
|
||||
|
|
@ -98,12 +98,12 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
|
|||
href: `/datasets/${datasetId}/documents`,
|
||||
icon: RiFileTextLine,
|
||||
selectedIcon: RiFileTextFill,
|
||||
disabled: isButtonDisabledWithPipeline,
|
||||
disabled: false,
|
||||
})
|
||||
}
|
||||
|
||||
return baseNavigation
|
||||
}, [t, datasetId, isButtonDisabledWithPipeline, datasetRes?.provider])
|
||||
}, [t, datasetId, isHitTestingDisabled, datasetRes?.provider])
|
||||
|
||||
useDocumentTitle(datasetRes?.name || t('menus.datasets', { ns: 'common' }))
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,69 @@
|
|||
'use client'
|
||||
import type { ChangeEventHandler, CSSProperties } from 'react'
|
||||
import * as React from 'react'
|
||||
import { cn } from '@/utils/classnames'
|
||||
|
||||
export type InputNumberProps = {
|
||||
value?: number
|
||||
defaultValue?: number
|
||||
min?: number
|
||||
max?: number
|
||||
step?: number
|
||||
onChange?: (value: number | null) => void
|
||||
readOnly?: boolean
|
||||
disabled?: boolean
|
||||
className?: string
|
||||
style?: CSSProperties
|
||||
size?: 'regular' | 'large'
|
||||
}
|
||||
|
||||
const InputNumber = React.forwardRef<HTMLInputElement, InputNumberProps>(({
|
||||
value,
|
||||
defaultValue,
|
||||
min,
|
||||
max,
|
||||
step = 1,
|
||||
onChange,
|
||||
readOnly,
|
||||
disabled,
|
||||
className,
|
||||
style,
|
||||
size = 'regular',
|
||||
}, ref) => {
|
||||
const handleChange: ChangeEventHandler<HTMLInputElement> = (e) => {
|
||||
const val = e.target.value
|
||||
if (val === '') {
|
||||
onChange?.(null)
|
||||
return
|
||||
}
|
||||
const num = Number.parseFloat(val)
|
||||
if (!Number.isNaN(num))
|
||||
onChange?.(num)
|
||||
}
|
||||
|
||||
return (
|
||||
<input
|
||||
ref={ref}
|
||||
type="number"
|
||||
value={value ?? ''}
|
||||
defaultValue={defaultValue}
|
||||
min={min}
|
||||
max={max}
|
||||
step={step}
|
||||
onChange={handleChange}
|
||||
readOnly={readOnly}
|
||||
disabled={disabled}
|
||||
style={style}
|
||||
className={cn(
|
||||
'w-full appearance-none bg-transparent text-components-input-text-filled outline-none placeholder:text-components-input-text-placeholder',
|
||||
size === 'regular' && 'text-[13px]',
|
||||
size === 'large' && 'text-[14px]',
|
||||
className,
|
||||
)}
|
||||
/>
|
||||
)
|
||||
})
|
||||
|
||||
InputNumber.displayName = 'InputNumber'
|
||||
|
||||
export default InputNumber
|
||||
|
|
@ -33,6 +33,7 @@ vi.mock('@/context/dataset-detail', () => ({
|
|||
embedding_available: true,
|
||||
data_source_type: DataSourceType.FILE,
|
||||
runtime_mode: 'rag',
|
||||
is_published: true,
|
||||
},
|
||||
}
|
||||
return selector(mockState as MockState)
|
||||
|
|
@ -144,6 +145,7 @@ vi.mock('../components/documents-header', () => ({
|
|||
default: ({
|
||||
datasetId,
|
||||
embeddingAvailable,
|
||||
canAddDocument,
|
||||
onInputChange,
|
||||
onAddDocument,
|
||||
onStatusFilterChange,
|
||||
|
|
@ -153,6 +155,7 @@ vi.mock('../components/documents-header', () => ({
|
|||
datasetId: string
|
||||
dataSourceType?: string
|
||||
embeddingAvailable: boolean
|
||||
canAddDocument: boolean
|
||||
isFreePlan: boolean
|
||||
statusFilterValue: string
|
||||
sortValue: string
|
||||
|
|
@ -176,6 +179,7 @@ vi.mock('../components/documents-header', () => ({
|
|||
<div data-testid="documents-header">
|
||||
<span data-testid="header-dataset-id">{datasetId}</span>
|
||||
<span data-testid="header-embedding-available">{String(embeddingAvailable)}</span>
|
||||
<span data-testid="header-can-add-document">{String(canAddDocument)}</span>
|
||||
<input
|
||||
data-testid="search-input"
|
||||
onChange={e => onInputChange(e.target.value)}
|
||||
|
|
@ -278,6 +282,7 @@ describe('Documents', () => {
|
|||
embedding_available: true,
|
||||
data_source_type: DataSourceType.FILE,
|
||||
runtime_mode: 'rag',
|
||||
is_published: true,
|
||||
},
|
||||
}
|
||||
return selector(mockState as MockState)
|
||||
|
|
@ -294,6 +299,7 @@ describe('Documents', () => {
|
|||
render(<Documents {...defaultProps} />)
|
||||
expect(screen.getByTestId('header-dataset-id')).toHaveTextContent('test-dataset-id')
|
||||
expect(screen.getByTestId('header-embedding-available')).toHaveTextContent('true')
|
||||
expect(screen.getByTestId('header-can-add-document')).toHaveTextContent('true')
|
||||
})
|
||||
|
||||
it('should render document list when documents exist', () => {
|
||||
|
|
@ -362,6 +368,7 @@ describe('Documents', () => {
|
|||
embedding_available: true,
|
||||
data_source_type: DataSourceType.NOTION,
|
||||
runtime_mode: 'rag',
|
||||
is_published: true,
|
||||
},
|
||||
}
|
||||
return selector(mockState as MockState)
|
||||
|
|
@ -466,6 +473,7 @@ describe('Documents', () => {
|
|||
embedding_available: true,
|
||||
data_source_type: DataSourceType.FILE,
|
||||
runtime_mode: 'rag_pipeline',
|
||||
is_published: true,
|
||||
},
|
||||
}
|
||||
return selector(mockState as MockState)
|
||||
|
|
@ -478,6 +486,30 @@ describe('Documents', () => {
|
|||
expect(mockPush).toHaveBeenCalledWith('/datasets/test-dataset-id/documents/create-from-pipeline')
|
||||
})
|
||||
|
||||
it('should not navigate to create page when rag pipeline is unpublished', () => {
|
||||
vi.mocked(useDatasetDetailContextWithSelector).mockImplementation((selector: MockSelector) => {
|
||||
const mockState = {
|
||||
dataset: {
|
||||
id: 'test-dataset-id',
|
||||
name: 'Test Dataset',
|
||||
embedding_available: true,
|
||||
data_source_type: DataSourceType.FILE,
|
||||
runtime_mode: 'rag_pipeline',
|
||||
is_published: false,
|
||||
},
|
||||
}
|
||||
return selector(mockState as MockState)
|
||||
})
|
||||
|
||||
render(<Documents {...defaultProps} />)
|
||||
|
||||
expect(screen.getByTestId('header-can-add-document')).toHaveTextContent('false')
|
||||
|
||||
screen.getByTestId('add-document-btn').click()
|
||||
|
||||
expect(mockPush).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should navigate from empty element add button', () => {
|
||||
vi.mocked(useDatasetDetailContextWithSelector).mockImplementation((selector: MockSelector) => {
|
||||
const mockState = {
|
||||
|
|
@ -487,6 +519,7 @@ describe('Documents', () => {
|
|||
embedding_available: true,
|
||||
data_source_type: DataSourceType.FILE,
|
||||
runtime_mode: 'rag',
|
||||
is_published: true,
|
||||
},
|
||||
}
|
||||
return selector(mockState as MockState)
|
||||
|
|
@ -649,6 +682,7 @@ describe('Documents', () => {
|
|||
embedding_available: false,
|
||||
data_source_type: DataSourceType.FILE,
|
||||
runtime_mode: 'rag',
|
||||
is_published: true,
|
||||
},
|
||||
}
|
||||
return selector(mockState as MockState)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,15 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'
|
|||
import { DataSourceType } from '@/models/datasets'
|
||||
import DocumentsHeader from '../documents-header'
|
||||
|
||||
const LIST_TITLE_RE = /list\.title/i
|
||||
const LIST_DESC_RE = /list\.desc/i
|
||||
const LIST_LEARN_MORE_RE = /list\.learnMore/i
|
||||
const METADATA_RE = /metadata\.metadata/i
|
||||
const ADD_FILE_RE = /list\.addFile/i
|
||||
const ADD_PAGES_RE = /list\.addPages/i
|
||||
const ADD_URL_RE = /list\.addUrl/i
|
||||
const CURRENT_DRAFT_UNPUBLISHED_RE = /workflow\.common\.currentDraftUnpublished/i
|
||||
|
||||
// Mock the context hooks
|
||||
vi.mock('@/context/i18n', () => ({
|
||||
useDocLink: () => (path: string) => `https://docs.example.com${path}`,
|
||||
|
|
@ -32,6 +41,7 @@ describe('DocumentsHeader', () => {
|
|||
datasetId: 'dataset-123',
|
||||
dataSourceType: DataSourceType.FILE,
|
||||
embeddingAvailable: true,
|
||||
canAddDocument: true,
|
||||
isFreePlan: false,
|
||||
statusFilterValue: 'all',
|
||||
sortValue: 'created_at' as SortType,
|
||||
|
|
@ -60,23 +70,23 @@ describe('DocumentsHeader', () => {
|
|||
describe('Rendering', () => {
|
||||
it('should render without crashing', () => {
|
||||
render(<DocumentsHeader {...defaultProps} />)
|
||||
expect(screen.getByText(/list\.title/i)).toBeInTheDocument()
|
||||
expect(screen.getByText(LIST_TITLE_RE)).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should render title', () => {
|
||||
render(<DocumentsHeader {...defaultProps} />)
|
||||
expect(screen.getByRole('heading', { level: 1 })).toHaveTextContent(/list\.title/i)
|
||||
expect(screen.getByRole('heading', { level: 1 })).toHaveTextContent(LIST_TITLE_RE)
|
||||
})
|
||||
|
||||
it('should render description text', () => {
|
||||
render(<DocumentsHeader {...defaultProps} />)
|
||||
expect(screen.getByText(/list\.desc/i)).toBeInTheDocument()
|
||||
expect(screen.getByText(LIST_DESC_RE)).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should render learn more link', () => {
|
||||
render(<DocumentsHeader {...defaultProps} />)
|
||||
const link = screen.getByRole('link')
|
||||
expect(link).toHaveTextContent(/list\.learnMore/i)
|
||||
expect(link).toHaveTextContent(LIST_LEARN_MORE_RE)
|
||||
expect(link).toHaveAttribute('href', expect.stringContaining('use-dify/knowledge'))
|
||||
expect(link).toHaveAttribute('target', '_blank')
|
||||
expect(link).toHaveAttribute('rel', 'noopener noreferrer')
|
||||
|
|
@ -110,35 +120,45 @@ describe('DocumentsHeader', () => {
|
|||
describe('Embedding Availability', () => {
|
||||
it('should show metadata button when embedding is available', () => {
|
||||
render(<DocumentsHeader {...defaultProps} embeddingAvailable={true} />)
|
||||
expect(screen.getByText(/metadata\.metadata/i)).toBeInTheDocument()
|
||||
expect(screen.getByText(METADATA_RE)).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should show add document button when embedding is available', () => {
|
||||
render(<DocumentsHeader {...defaultProps} embeddingAvailable={true} />)
|
||||
expect(screen.getByText(/list\.addFile/i)).toBeInTheDocument()
|
||||
expect(screen.getByText(ADD_FILE_RE)).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should show warning when embedding is not available', () => {
|
||||
render(<DocumentsHeader {...defaultProps} embeddingAvailable={false} />)
|
||||
expect(screen.queryByText(/metadata\.metadata/i)).not.toBeInTheDocument()
|
||||
expect(screen.queryByText(/list\.addFile/i)).not.toBeInTheDocument()
|
||||
expect(screen.queryByText(METADATA_RE)).not.toBeInTheDocument()
|
||||
expect(screen.queryByText(ADD_FILE_RE)).not.toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should disable add document button when document upload is unavailable', () => {
|
||||
render(<DocumentsHeader {...defaultProps} canAddDocument={false} />)
|
||||
expect(screen.getByRole('button', { name: ADD_FILE_RE })).toBeDisabled()
|
||||
})
|
||||
|
||||
it('should show unpublished warning when document upload is unavailable', () => {
|
||||
render(<DocumentsHeader {...defaultProps} canAddDocument={false} />)
|
||||
expect(screen.getByText(CURRENT_DRAFT_UNPUBLISHED_RE)).toBeInTheDocument()
|
||||
})
|
||||
})
|
||||
|
||||
describe('Add Button Text', () => {
|
||||
it('should show "Add File" for FILE data source', () => {
|
||||
render(<DocumentsHeader {...defaultProps} dataSourceType={DataSourceType.FILE} />)
|
||||
expect(screen.getByText(/list\.addFile/i)).toBeInTheDocument()
|
||||
expect(screen.getByText(ADD_FILE_RE)).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should show "Add Pages" for NOTION data source', () => {
|
||||
render(<DocumentsHeader {...defaultProps} dataSourceType={DataSourceType.NOTION} />)
|
||||
expect(screen.getByText(/list\.addPages/i)).toBeInTheDocument()
|
||||
expect(screen.getByText(ADD_PAGES_RE)).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should show "Add Url" for WEB data source', () => {
|
||||
render(<DocumentsHeader {...defaultProps} dataSourceType={DataSourceType.WEB} />)
|
||||
expect(screen.getByText(/list\.addUrl/i)).toBeInTheDocument()
|
||||
expect(screen.getByText(ADD_URL_RE)).toBeInTheDocument()
|
||||
})
|
||||
})
|
||||
|
||||
|
|
@ -159,7 +179,7 @@ describe('DocumentsHeader', () => {
|
|||
const showEditMetadataModal = vi.fn()
|
||||
render(<DocumentsHeader {...defaultProps} showEditMetadataModal={showEditMetadataModal} />)
|
||||
|
||||
const metadataButton = screen.getByText(/metadata\.metadata/i)
|
||||
const metadataButton = screen.getByText(METADATA_RE)
|
||||
fireEvent.click(metadataButton)
|
||||
|
||||
expect(showEditMetadataModal).toHaveBeenCalledTimes(1)
|
||||
|
|
@ -169,7 +189,7 @@ describe('DocumentsHeader', () => {
|
|||
const onAddDocument = vi.fn()
|
||||
render(<DocumentsHeader {...defaultProps} onAddDocument={onAddDocument} />)
|
||||
|
||||
const addButton = screen.getByText(/list\.addFile/i)
|
||||
const addButton = screen.getByText(ADD_FILE_RE)
|
||||
fireEvent.click(addButton)
|
||||
|
||||
expect(onAddDocument).toHaveBeenCalledTimes(1)
|
||||
|
|
@ -190,7 +210,7 @@ describe('DocumentsHeader', () => {
|
|||
it('should handle undefined dataSourceType', () => {
|
||||
render(<DocumentsHeader {...defaultProps} dataSourceType={undefined} />)
|
||||
// Should default to "Add File" text
|
||||
expect(screen.getByText(/list\.addFile/i)).toBeInTheDocument()
|
||||
expect(screen.getByText(ADD_FILE_RE)).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should handle empty metadata arrays', () => {
|
||||
|
|
@ -208,7 +228,7 @@ describe('DocumentsHeader', () => {
|
|||
it('should render with descending sort order', () => {
|
||||
render(<DocumentsHeader {...defaultProps} sortValue="-created_at" />)
|
||||
// Component should still render correctly
|
||||
expect(screen.getByText(/list\.title/i)).toBeInTheDocument()
|
||||
expect(screen.getByText(LIST_TITLE_RE)).toBeInTheDocument()
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ type DocumentsHeaderProps = {
|
|||
datasetId: string
|
||||
dataSourceType?: DataSourceType
|
||||
embeddingAvailable: boolean
|
||||
canAddDocument: boolean
|
||||
isFreePlan: boolean
|
||||
|
||||
// Filter & sort
|
||||
|
|
@ -55,6 +56,7 @@ const DocumentsHeader: FC<DocumentsHeaderProps> = ({
|
|||
datasetId,
|
||||
dataSourceType,
|
||||
embeddingAvailable,
|
||||
canAddDocument,
|
||||
isFreePlan,
|
||||
statusFilterValue,
|
||||
sortValue,
|
||||
|
|
@ -81,6 +83,7 @@ const DocumentsHeader: FC<DocumentsHeaderProps> = ({
|
|||
|
||||
const isDataSourceNotion = dataSourceType === DataSourceType.NOTION
|
||||
const isDataSourceWeb = dataSourceType === DataSourceType.WEB
|
||||
const showUnpublishedWarning = embeddingAvailable && !canAddDocument
|
||||
|
||||
const statusFilterItems: Item[] = useMemo(() => [
|
||||
{ value: 'all', name: t('list.index.all', { ns: 'datasetDocuments' }) as string },
|
||||
|
|
@ -168,6 +171,12 @@ const DocumentsHeader: FC<DocumentsHeaderProps> = ({
|
|||
description={t('embeddingModelNotAvailable', { ns: 'dataset' })}
|
||||
/>
|
||||
)}
|
||||
{showUnpublishedWarning && (
|
||||
<StatusWithAction
|
||||
type="warning"
|
||||
description={t('common.currentDraftUnpublished', { ns: 'workflow' })}
|
||||
/>
|
||||
)}
|
||||
{embeddingAvailable && (
|
||||
<Button variant="secondary" className="shrink-0" onClick={showEditMetadataModal}>
|
||||
<RiDraftLine className="mr-1 size-4" />
|
||||
|
|
@ -187,7 +196,7 @@ const DocumentsHeader: FC<DocumentsHeaderProps> = ({
|
|||
/>
|
||||
)}
|
||||
{embeddingAvailable && (
|
||||
<Button variant="primary" onClick={onAddDocument} className="shrink-0">
|
||||
<Button variant="primary" onClick={onAddDocument} className="shrink-0" disabled={!canAddDocument}>
|
||||
<PlusIcon className="mr-2 h-4 w-4 stroke-current" />
|
||||
{addButtonText}
|
||||
</Button>
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
|
|||
|
||||
const dataset = useDatasetDetailContextWithSelector(s => s.dataset)
|
||||
const embeddingAvailable = !!dataset?.embedding_available
|
||||
const canAddDocument = embeddingAvailable
|
||||
&& !(dataset?.runtime_mode === 'rag_pipeline' && !dataset?.is_published)
|
||||
|
||||
// Use custom hook for page state management
|
||||
const {
|
||||
|
|
@ -106,12 +108,14 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
|
|||
|
||||
// Route to document creation page
|
||||
const routeToDocCreate = useCallback(() => {
|
||||
if (!canAddDocument)
|
||||
return
|
||||
if (dataset?.runtime_mode === 'rag_pipeline') {
|
||||
router.push(`/datasets/${datasetId}/documents/create-from-pipeline`)
|
||||
return
|
||||
}
|
||||
router.push(`/datasets/${datasetId}/documents/create`)
|
||||
}, [dataset?.runtime_mode, datasetId, router])
|
||||
}, [canAddDocument, dataset?.runtime_mode, datasetId, router])
|
||||
|
||||
const total = documentsRes?.total || 0
|
||||
const documentsList = documentsRes?.data
|
||||
|
|
@ -147,7 +151,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
|
|||
const isDataSourceNotion = dataset?.data_source_type === DataSourceType.NOTION
|
||||
return (
|
||||
<EmptyElement
|
||||
canAdd={embeddingAvailable}
|
||||
canAdd={canAddDocument}
|
||||
onClick={routeToDocCreate}
|
||||
type={isDataSourceNotion ? 'sync' : 'upload'}
|
||||
/>
|
||||
|
|
@ -160,6 +164,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
|
|||
datasetId={datasetId}
|
||||
dataSourceType={dataset?.data_source_type}
|
||||
embeddingAvailable={embeddingAvailable}
|
||||
canAddDocument={canAddDocument}
|
||||
isFreePlan={isFreePlan}
|
||||
statusFilterValue={statusFilterValue}
|
||||
sortValue={sortValue}
|
||||
|
|
|
|||
|
|
@ -182,6 +182,23 @@ describe('WrappedDatePicker', () => {
|
|||
|
||||
expect(screen.getByTestId('date-picker-wrapper')).toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('should not allow changing or clearing the date when readonly', () => {
|
||||
const handleChange = vi.fn()
|
||||
const timestamp = Math.floor(Date.now() / 1000)
|
||||
const { container } = render(
|
||||
<WrappedDatePicker value={timestamp} onChange={handleChange} readonly />,
|
||||
)
|
||||
|
||||
fireEvent.click(screen.getByTestId('select-date'))
|
||||
fireEvent.click(screen.getByTestId('clear-date'))
|
||||
|
||||
const closeIcon = container.querySelector('.cursor-pointer.hover\\:text-components-input-text-filled')
|
||||
if (closeIcon)
|
||||
fireEvent.click(closeIcon)
|
||||
|
||||
expect(handleChange).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
describe('Styling', () => {
|
||||
|
|
|
|||
|
|
@ -15,28 +15,39 @@ type Props = {
|
|||
className?: string
|
||||
value?: number
|
||||
onChange: (date: number | null) => void
|
||||
readonly?: boolean
|
||||
}
|
||||
const WrappedDatePicker = ({
|
||||
className,
|
||||
value,
|
||||
onChange,
|
||||
readonly,
|
||||
}: Props) => {
|
||||
const { t } = useTranslation()
|
||||
const { userProfile: { timezone } } = useAppContext()
|
||||
const { formatTime: formatTimestamp } = useTimestamp()
|
||||
|
||||
const handleDateChange = useCallback((date?: dayjs.Dayjs) => {
|
||||
if (readonly)
|
||||
return
|
||||
if (date)
|
||||
onChange(date.unix())
|
||||
else
|
||||
onChange(null)
|
||||
}, [onChange])
|
||||
}, [onChange, readonly])
|
||||
|
||||
const renderTrigger = useCallback(({
|
||||
handleClickTrigger,
|
||||
}: TriggerProps) => {
|
||||
return (
|
||||
<div onClick={handleClickTrigger} className={cn('group flex items-center rounded-md bg-components-input-bg-normal', className)}>
|
||||
<div
|
||||
onClick={readonly ? undefined : handleClickTrigger}
|
||||
className={cn(
|
||||
'group flex items-center rounded-md bg-components-input-bg-normal',
|
||||
readonly && 'cursor-not-allowed opacity-50',
|
||||
className,
|
||||
)}
|
||||
>
|
||||
<div
|
||||
className={cn(
|
||||
'grow',
|
||||
|
|
@ -49,6 +60,7 @@ const WrappedDatePicker = ({
|
|||
className={cn(
|
||||
'hidden h-4 w-4 cursor-pointer hover:text-components-input-text-filled group-hover:block',
|
||||
value && 'text-text-quaternary',
|
||||
readonly && 'pointer-events-none',
|
||||
)}
|
||||
onClick={() => handleDateChange()}
|
||||
/>
|
||||
|
|
@ -60,7 +72,7 @@ const WrappedDatePicker = ({
|
|||
/>
|
||||
</div>
|
||||
)
|
||||
}, [className, value, formatTimestamp, t, handleDateChange])
|
||||
}, [className, value, formatTimestamp, t, handleDateChange, readonly])
|
||||
|
||||
return (
|
||||
<DatePicker
|
||||
|
|
|
|||
|
|
@ -488,6 +488,34 @@ describe('DatasetMetadataDrawer', () => {
|
|||
if (cancelBtn)
|
||||
fireEvent.click(cancelBtn)
|
||||
})
|
||||
|
||||
it('should show error toast when deleting metadata referenced by a pipeline', async () => {
|
||||
const onRemove = vi.fn().mockResolvedValue({})
|
||||
render(
|
||||
<DatasetMetadataDrawer
|
||||
{...defaultProps}
|
||||
onRemove={onRemove}
|
||||
userMetadata={[
|
||||
{ id: '1', name: 'field_one', type: DataType.string, count: 5, isReferencedByPipeline: true },
|
||||
]}
|
||||
/>,
|
||||
)
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByRole('dialog')).toBeInTheDocument()
|
||||
})
|
||||
|
||||
const deleteContainer = screen.getByTestId('metadata-delete-1')
|
||||
const deleteIcon = deleteContainer.querySelector('svg')
|
||||
if (deleteIcon)
|
||||
fireEvent.click(deleteIcon)
|
||||
|
||||
expect(mockToastNotify).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ type: 'error' }),
|
||||
)
|
||||
expect(onRemove).not.toHaveBeenCalled()
|
||||
expect(screen.queryByText('dataset.metadata.datasetMetadata.deleteTitle')).not.toBeInTheDocument()
|
||||
})
|
||||
})
|
||||
|
||||
describe('Props', () => {
|
||||
|
|
|
|||
|
|
@ -63,6 +63,13 @@ const Item: FC<ItemProps> = ({
|
|||
hideDeleteConfirm()
|
||||
onDelete?.()
|
||||
}, [hideDeleteConfirm, onDelete])
|
||||
const handleDeleteClick = useCallback(() => {
|
||||
if (payload.isReferencedByPipeline) {
|
||||
Toast.notify({ type: 'error', message: t(`${i18nPrefix}.deleteDisabledByPipeline`, { ns: 'dataset' }) })
|
||||
return
|
||||
}
|
||||
showDeleteConfirm()
|
||||
}, [payload.isReferencedByPipeline, showDeleteConfirm, t])
|
||||
|
||||
return (
|
||||
<div
|
||||
|
|
@ -75,24 +82,28 @@ const Item: FC<ItemProps> = ({
|
|||
>
|
||||
<div
|
||||
className={cn(
|
||||
'flex h-8 items-center justify-between px-2',
|
||||
'flex h-8 items-center justify-between px-2',
|
||||
disabled && 'opacity-30', // not include border and bg
|
||||
)}
|
||||
>
|
||||
<div className="flex h-full items-center space-x-1 text-text-tertiary">
|
||||
<Icon className="size-4 shrink-0" />
|
||||
<div className="system-sm-medium max-w-[250px] truncate text-text-primary">{payload.name}</div>
|
||||
<div className="system-xs-regular shrink-0">{payload.type}</div>
|
||||
<div className="max-w-[250px] truncate text-text-primary system-sm-medium">{payload.name}</div>
|
||||
<div className="shrink-0 system-xs-regular">{payload.type}</div>
|
||||
</div>
|
||||
{(!readonly || disabled) && (
|
||||
<div className="system-xs-regular ml-2 shrink-0 text-text-tertiary group-hover/item:hidden">
|
||||
<div className="ml-2 shrink-0 text-text-tertiary system-xs-regular group-hover/item:hidden">
|
||||
{disabled ? t(`${i18nPrefix}.disabled`, { ns: 'dataset' }) : t(`${i18nPrefix}.values`, { ns: 'dataset', num: payload.count || 0 })}
|
||||
</div>
|
||||
)}
|
||||
<div className="ml-2 hidden items-center space-x-1 text-text-tertiary group-hover/item:flex">
|
||||
<RiEditLine className="size-4 cursor-pointer" onClick={handleRename} />
|
||||
<div ref={deleteBtnRef} className="hover:text-text-destructive">
|
||||
<RiDeleteBinLine className="size-4 cursor-pointer" onClick={showDeleteConfirm} />
|
||||
<div
|
||||
ref={deleteBtnRef}
|
||||
data-testid={`metadata-delete-${payload.id}`}
|
||||
className="hover:text-text-destructive"
|
||||
>
|
||||
<RiDeleteBinLine className="size-4 cursor-pointer" onClick={handleDeleteClick} />
|
||||
</div>
|
||||
</div>
|
||||
{isShowDeleteConfirm && (
|
||||
|
|
@ -177,7 +188,7 @@ const DatasetMetadataDrawer: FC<Props> = ({
|
|||
panelClassName="px-4 block !max-w-[420px] my-2 rounded-l-2xl"
|
||||
>
|
||||
<div className="h-full overflow-y-auto">
|
||||
<div className="system-sm-regular text-text-tertiary">{t(`${i18nPrefix}.description`, { ns: 'dataset' })}</div>
|
||||
<div className="text-text-tertiary system-sm-regular">{t(`${i18nPrefix}.description`, { ns: 'dataset' })}</div>
|
||||
<CreateModal
|
||||
open={open}
|
||||
setOpen={setOpen}
|
||||
|
|
@ -207,7 +218,7 @@ const DatasetMetadataDrawer: FC<Props> = ({
|
|||
value={isBuiltInEnabled}
|
||||
onChange={onIsBuiltInEnabledChange}
|
||||
/>
|
||||
<div className="system-sm-semibold ml-2 mr-0.5 text-text-secondary">{t(`${i18nPrefix}.builtIn`, { ns: 'dataset' })}</div>
|
||||
<div className="ml-2 mr-0.5 text-text-secondary system-sm-semibold">{t(`${i18nPrefix}.builtIn`, { ns: 'dataset' })}</div>
|
||||
<Tooltip popupContent={<div className="max-w-[100px]">{t(`${i18nPrefix}.builtInDescription`, { ns: 'dataset' })}</div>} />
|
||||
</div>
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ export type MetadataItemWithValue = MetadataItem & {
|
|||
|
||||
export type MetadataItemWithValueLength = MetadataItem & {
|
||||
count: number
|
||||
isReferencedByPipeline?: boolean
|
||||
}
|
||||
|
||||
export type MetadataItemInBatchEdit = MetadataItemWithValue & {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,292 @@
|
|||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import type { DocMetadataItem } from '../types'
|
||||
import type { MetadataItemWithValueLength } from '@/app/components/datasets/metadata/types'
|
||||
import type { ValueSelector, Var } from '@/app/components/workflow/types'
|
||||
import { useCallback } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import InputNumber from '@/app/components/base/input-number'
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from '@/app/components/base/ui/tooltip'
|
||||
import Datepicker from '@/app/components/datasets/metadata/base/date-picker'
|
||||
import { DataType } from '@/app/components/datasets/metadata/types'
|
||||
import VarReferencePicker from '@/app/components/workflow/nodes/_base/components/variable/var-reference-picker'
|
||||
import { VarType } from '@/app/components/workflow/types'
|
||||
import { cn } from '@/utils/classnames'
|
||||
|
||||
type ConstantValueInputProps = {
|
||||
metadataType: DataType | undefined
|
||||
value: string | number | string[] | null
|
||||
onChange: (value: string | number | null) => void
|
||||
readonly?: boolean
|
||||
placeholder: string
|
||||
}
|
||||
|
||||
const ConstantValueInput: FC<ConstantValueInputProps> = ({
|
||||
metadataType,
|
||||
value,
|
||||
onChange,
|
||||
readonly,
|
||||
placeholder,
|
||||
}) => {
|
||||
if (metadataType === DataType.time) {
|
||||
const timeValue = typeof value === 'number' ? value : undefined
|
||||
return (
|
||||
<Datepicker
|
||||
className="h-full w-full"
|
||||
value={timeValue}
|
||||
onChange={v => onChange(v)}
|
||||
readonly={readonly}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
if (metadataType === DataType.number) {
|
||||
return (
|
||||
<InputNumber
|
||||
className="h-full w-full border-none bg-transparent p-0"
|
||||
value={typeof value === 'number' ? value : undefined}
|
||||
onChange={(v: number | null) => onChange(v)}
|
||||
readOnly={readonly}
|
||||
size="regular"
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<input
|
||||
type="text"
|
||||
value={typeof value === 'string' ? value : ''}
|
||||
onChange={e => onChange(e.target.value)}
|
||||
placeholder={placeholder}
|
||||
disabled={readonly}
|
||||
className="h-full w-full bg-transparent text-[13px] text-text-primary outline-none placeholder:text-text-placeholder disabled:opacity-50"
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
type MetadataSectionProps = {
|
||||
nodeId: string
|
||||
userMetadata?: MetadataItemWithValueLength[]
|
||||
docMetadata?: DocMetadataItem[]
|
||||
onDocMetadataChange?: (metadata: DocMetadataItem[]) => void
|
||||
readonly?: boolean
|
||||
className?: string
|
||||
}
|
||||
|
||||
const MetadataSection: FC<MetadataSectionProps> = ({
|
||||
nodeId,
|
||||
userMetadata = [],
|
||||
docMetadata = [],
|
||||
onDocMetadataChange,
|
||||
readonly,
|
||||
className,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
// Document metadata value handlers
|
||||
const handleAddDocMetadata = useCallback(() => {
|
||||
if (onDocMetadataChange) {
|
||||
onDocMetadataChange([...docMetadata, { metadata_id: '', value: null }])
|
||||
}
|
||||
}, [docMetadata, onDocMetadataChange])
|
||||
|
||||
const handleRemoveDocMetadata = useCallback((index: number) => {
|
||||
if (onDocMetadataChange) {
|
||||
const newMetadata = [...docMetadata]
|
||||
newMetadata.splice(index, 1)
|
||||
onDocMetadataChange(newMetadata)
|
||||
}
|
||||
}, [docMetadata, onDocMetadataChange])
|
||||
|
||||
const handleDocMetadataIdChange = useCallback((index: number, metadataId: string) => {
|
||||
if (onDocMetadataChange) {
|
||||
const newMetadata = [...docMetadata]
|
||||
newMetadata[index] = { metadata_id: metadataId, value: null }
|
||||
onDocMetadataChange(newMetadata)
|
||||
}
|
||||
}, [docMetadata, onDocMetadataChange])
|
||||
|
||||
const handleDocMetadataValueChange = useCallback((index: number, value: string | number | ValueSelector | null) => {
|
||||
if (onDocMetadataChange) {
|
||||
const newMetadata = [...docMetadata]
|
||||
newMetadata[index] = { ...newMetadata[index], value }
|
||||
onDocMetadataChange(newMetadata)
|
||||
}
|
||||
}, [docMetadata, onDocMetadataChange])
|
||||
|
||||
const getAvailableMetadataOptions = useCallback((currentId: string) => {
|
||||
const usedIds = docMetadata.map(m => m.metadata_id).filter(id => id !== currentId)
|
||||
return userMetadata.filter(m => !usedIds.includes(m.id))
|
||||
}, [userMetadata, docMetadata])
|
||||
|
||||
const getMetadataType = useCallback((metadataId: string): DataType | undefined => {
|
||||
return userMetadata.find(m => m.id === metadataId)?.type
|
||||
}, [userMetadata])
|
||||
|
||||
// Filter variables based on metadata type
|
||||
const createVarFilter = useCallback((metadataId: string) => {
|
||||
return (variable: Var): boolean => {
|
||||
const metadataType = getMetadataType(metadataId)
|
||||
|
||||
if (!metadataType)
|
||||
return false
|
||||
|
||||
// Type mapping: Metadata DataType -> Workflow VarType
|
||||
switch (metadataType) {
|
||||
case DataType.string:
|
||||
return variable.type === VarType.string
|
||||
case DataType.number:
|
||||
return variable.type === VarType.number || variable.type === VarType.integer
|
||||
case DataType.time:
|
||||
return variable.type === VarType.number || variable.type === VarType.integer
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
}, [getMetadataType])
|
||||
|
||||
return (
|
||||
<div className={cn('space-y-3', className)}>
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="text-text-tertiary system-xs-semibold-uppercase">
|
||||
{t('metadata.metadata', { ns: 'dataset' })}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Document Metadata Values Section */}
|
||||
{userMetadata.length > 0 && (
|
||||
<div className="space-y-2 rounded-lg border border-components-panel-border bg-components-panel-bg p-3">
|
||||
<div className="flex items-center justify-end">
|
||||
{!readonly && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleAddDocMetadata}
|
||||
className="flex items-center gap-1 text-text-accent-secondary system-xs-medium hover:text-text-accent disabled:opacity-50"
|
||||
disabled={docMetadata.length >= userMetadata.length}
|
||||
>
|
||||
<div className="i-ri-add-line size-3.5" />
|
||||
{t('operation.add', { ns: 'common' })}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{docMetadata.length > 0
|
||||
? (
|
||||
<div className="space-y-2">
|
||||
{docMetadata.map((item, index) => {
|
||||
const isVariable = Array.isArray(item.value)
|
||||
const itemKey = item.metadata_id ? `metadata-${item.metadata_id}` : `new-${index}`
|
||||
return (
|
||||
<div key={itemKey} className="flex items-center gap-2">
|
||||
<div className="flex w-0 grow items-center gap-2">
|
||||
<div className="flex w-1/3 items-center gap-1 rounded-lg border border-components-panel-border bg-components-input-bg-normal px-2">
|
||||
<select
|
||||
value={item.metadata_id}
|
||||
onChange={e => handleDocMetadataIdChange(index, e.target.value)}
|
||||
disabled={readonly}
|
||||
className="h-8 w-full appearance-none bg-transparent text-[13px] text-text-primary outline-none disabled:opacity-50"
|
||||
>
|
||||
<option value="" disabled>{t('placeholder.select', { ns: 'common' })}</option>
|
||||
{getAvailableMetadataOptions(item.metadata_id).map(opt => (
|
||||
<option key={opt.id} value={opt.id}>{opt.name}</option>
|
||||
))}
|
||||
{item.metadata_id && !getAvailableMetadataOptions(item.metadata_id).some(o => o.id === item.metadata_id) && (
|
||||
<option value={item.metadata_id}>{userMetadata.find(m => m.id === item.metadata_id)?.name}</option>
|
||||
)}
|
||||
</select>
|
||||
</div>
|
||||
<div className="flex h-8 grow items-center gap-1 rounded-lg border border-components-panel-border bg-components-input-bg-normal">
|
||||
<div className="ml-1 inline-flex shrink-0 gap-px rounded-[10px] bg-components-segmented-control-bg-normal p-0.5">
|
||||
<Tooltip>
|
||||
<TooltipTrigger render={(
|
||||
<div
|
||||
className={cn('cursor-pointer rounded-lg px-2.5 py-1.5 text-text-tertiary hover:bg-state-base-hover', isVariable && 'bg-components-segmented-control-item-active-bg text-text-secondary shadow-xs hover:bg-components-segmented-control-item-active-bg', readonly && 'cursor-not-allowed opacity-50')}
|
||||
onClick={() => !readonly && handleDocMetadataValueChange(index, [])}
|
||||
>
|
||||
<div className="i-custom-vender-solid-development-variable-02 h-4 w-4" />
|
||||
</div>
|
||||
)}
|
||||
/>
|
||||
{!isVariable && (
|
||||
<TooltipContent>
|
||||
{t('nodes.common.valueType.variable', { ns: 'workflow' })}
|
||||
</TooltipContent>
|
||||
)}
|
||||
</Tooltip>
|
||||
<Tooltip>
|
||||
<TooltipTrigger render={(
|
||||
<div
|
||||
className={cn('cursor-pointer rounded-lg px-2.5 py-1.5 text-text-tertiary hover:bg-state-base-hover', !isVariable && 'bg-components-segmented-control-item-active-bg text-text-secondary shadow-xs hover:bg-components-segmented-control-item-active-bg', readonly && 'cursor-not-allowed opacity-50')}
|
||||
onClick={() => !readonly && handleDocMetadataValueChange(index, '')}
|
||||
>
|
||||
<div className="i-ri-edit-line h-4 w-4" />
|
||||
</div>
|
||||
)}
|
||||
/>
|
||||
{isVariable && (
|
||||
<TooltipContent>
|
||||
{t('nodes.common.valueType.constant', { ns: 'workflow' })}
|
||||
</TooltipContent>
|
||||
)}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<div className="h-full w-px bg-divider-regular" />
|
||||
<div className="w-0 grow overflow-hidden">
|
||||
{isVariable
|
||||
? (
|
||||
<VarReferencePicker
|
||||
nodeId={nodeId}
|
||||
readonly={readonly || false}
|
||||
value={item.value as ValueSelector}
|
||||
onChange={value => handleDocMetadataValueChange(index, value)}
|
||||
isSupportConstantValue={false}
|
||||
isSupportFileVar={false}
|
||||
placeholder={t('placeholder.input', { ns: 'common' }) || ''}
|
||||
className="h-full border-none !bg-transparent p-0"
|
||||
zIndex={1000}
|
||||
isShowNodeName
|
||||
minWidth={360}
|
||||
filterVar={createVarFilter(item.metadata_id)}
|
||||
/>
|
||||
)
|
||||
: (
|
||||
<div className="flex h-full w-full items-center px-2">
|
||||
<ConstantValueInput
|
||||
metadataType={getMetadataType(item.metadata_id)}
|
||||
value={item.value}
|
||||
onChange={value => handleDocMetadataValueChange(index, value)}
|
||||
readonly={readonly}
|
||||
placeholder={t('placeholder.input', { ns: 'common' }) || ''}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{!readonly && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleRemoveDocMetadata(index)}
|
||||
className="flex size-8 shrink-0 items-center justify-center rounded-lg text-text-tertiary hover:bg-state-destructive-hover hover:text-text-destructive"
|
||||
>
|
||||
<div className="i-ri-delete-bin-line size-4" />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
: (
|
||||
<div className="py-2 text-center text-text-quaternary system-2xs-regular">
|
||||
{t('stepTwo.metadata.noValues', { ns: 'datasetCreation' })}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default MetadataSection
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
import type {
|
||||
DocMetadataItem,
|
||||
KnowledgeBaseNodeType,
|
||||
RerankingModel,
|
||||
SummaryIndexSetting,
|
||||
|
|
@ -247,6 +248,12 @@ export const useConfig = (id: string) => {
|
|||
})
|
||||
}, [handleNodeDataUpdate])
|
||||
|
||||
const handleDocMetadataChange = useCallback((docMetadata: DocMetadataItem[]) => {
|
||||
handleNodeDataUpdate({
|
||||
doc_metadata: docMetadata,
|
||||
})
|
||||
}, [handleNodeDataUpdate])
|
||||
|
||||
const handleSummaryIndexSettingChange = useCallback((summaryIndexSetting: SummaryIndexSetting) => {
|
||||
const nodeData = getNodeData()
|
||||
handleNodeDataUpdate({
|
||||
|
|
@ -271,6 +278,7 @@ export const useConfig = (id: string) => {
|
|||
handleScoreThresholdChange,
|
||||
handleScoreThresholdEnabledChange,
|
||||
handleInputVariableChange,
|
||||
handleDocMetadataChange,
|
||||
handleSummaryIndexSettingChange,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,11 +20,14 @@ import {
|
|||
} from '@/app/components/workflow/nodes/_base/components/layout'
|
||||
import VarReferencePicker from '@/app/components/workflow/nodes/_base/components/variable/var-reference-picker'
|
||||
import { IS_CE_EDITION } from '@/config'
|
||||
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
|
||||
import { consoleQuery } from '@/service/client'
|
||||
import { useDatasetMetaData } from '@/service/knowledge/use-metadata'
|
||||
import Split from '../_base/components/split'
|
||||
import ChunkStructure from './components/chunk-structure'
|
||||
import EmbeddingModel from './components/embedding-model'
|
||||
import IndexMethod from './components/index-method'
|
||||
import MetadataSection from './components/metadata-section'
|
||||
import RetrievalSetting from './components/retrieval-setting'
|
||||
import { useConfig } from './hooks/use-config'
|
||||
import { useEmbeddingModelStatus } from './hooks/use-embedding-model-status'
|
||||
|
|
@ -62,6 +65,10 @@ const Panel: FC<NodePanelProps<KnowledgeBaseNodeType>> = ({
|
|||
}),
|
||||
)
|
||||
|
||||
// Get datasetId from context and fetch metadata
|
||||
const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id)
|
||||
const { data: metadataList } = useDatasetMetaData(datasetId || '')
|
||||
|
||||
const {
|
||||
handleChunkStructureChange,
|
||||
handleIndexMethodChange,
|
||||
|
|
@ -76,6 +83,7 @@ const Panel: FC<NodePanelProps<KnowledgeBaseNodeType>> = ({
|
|||
handleScoreThresholdChange,
|
||||
handleScoreThresholdEnabledChange,
|
||||
handleInputVariableChange,
|
||||
handleDocMetadataChange,
|
||||
handleSummaryIndexSettingChange,
|
||||
} = useConfig(id)
|
||||
|
||||
|
|
@ -278,6 +286,17 @@ const Panel: FC<NodePanelProps<KnowledgeBaseNodeType>> = ({
|
|||
/>
|
||||
</div>
|
||||
</BoxGroup>
|
||||
{(metadataList?.doc_metadata?.length ?? 0) > 0 && (
|
||||
<BoxGroup>
|
||||
<MetadataSection
|
||||
nodeId={id}
|
||||
userMetadata={metadataList?.doc_metadata || []}
|
||||
docMetadata={data.doc_metadata}
|
||||
onDocMetadataChange={handleDocMetadataChange}
|
||||
readonly={nodesReadOnly}
|
||||
/>
|
||||
</BoxGroup>
|
||||
)}
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,12 +42,19 @@ export type RetrievalSetting = {
|
|||
score_threshold: number
|
||||
reranking_mode?: RerankingModeEnum
|
||||
}
|
||||
|
||||
export type DocMetadataItem = {
|
||||
metadata_id: string
|
||||
value: string | number | string[] | null // string[] for ValueSelector
|
||||
}
|
||||
|
||||
export type SummaryIndexSetting = {
|
||||
enable?: boolean
|
||||
model_name?: string
|
||||
model_provider_name?: string
|
||||
summary_prompt?: string
|
||||
}
|
||||
|
||||
export type KnowledgeBaseNodeType = CommonNodeType & {
|
||||
index_chunk_variable_selector: string[]
|
||||
chunk_structure?: ChunkStructureEnum
|
||||
|
|
@ -56,6 +63,7 @@ export type KnowledgeBaseNodeType = CommonNodeType & {
|
|||
embedding_model_provider?: string
|
||||
keyword_number: number
|
||||
retrieval_model: RetrievalSetting
|
||||
doc_metadata?: DocMetadataItem[]
|
||||
_embeddingModelList?: Model[]
|
||||
_embeddingProviderModelList?: ModelItem[]
|
||||
_rerankModelList?: Model[]
|
||||
|
|
|
|||
|
|
@ -4329,12 +4329,6 @@
|
|||
"app/components/datasets/metadata/metadata-dataset/dataset-metadata-drawer.tsx": {
|
||||
"no-restricted-imports": {
|
||||
"count": 4
|
||||
},
|
||||
"tailwindcss/enforce-consistent-class-order": {
|
||||
"count": 5
|
||||
},
|
||||
"tailwindcss/no-unnecessary-whitespace": {
|
||||
"count": 1
|
||||
}
|
||||
},
|
||||
"app/components/datasets/metadata/metadata-dataset/field.tsx": {
|
||||
|
|
|
|||
|
|
@ -121,6 +121,8 @@
|
|||
"stepTwo.indexSettingTip": "To change the index method & embedding model, please go to the ",
|
||||
"stepTwo.maxLength": "Maximum chunk length",
|
||||
"stepTwo.maxLengthCheck": "Maximum chunk length should be less than {{limit}}",
|
||||
"stepTwo.metadata.customValues": "Custom Values",
|
||||
"stepTwo.metadata.noValues": "No values configured",
|
||||
"stepTwo.nextStep": "Save & Process",
|
||||
"stepTwo.notAvailableForParentChild": "Not available for Parent-child Index",
|
||||
"stepTwo.notAvailableForQA": "Not available for Q&A Index",
|
||||
|
|
|
|||
|
|
@ -124,6 +124,7 @@
|
|||
"metadata.datasetMetadata.builtIn": "Built-in",
|
||||
"metadata.datasetMetadata.builtInDescription": "Built-in metadata is automatically extracted and generated. It must be enabled before use and cannot be edited.",
|
||||
"metadata.datasetMetadata.deleteContent": "Are you sure you want to delete the metadata \"{{name}}\"",
|
||||
"metadata.datasetMetadata.deleteDisabledByPipeline": "This metadata is used by a pipeline and cannot be deleted.",
|
||||
"metadata.datasetMetadata.deleteTitle": "Confirm to delete",
|
||||
"metadata.datasetMetadata.description": "You can manage all metadata in this knowledge here. Modifications will be synchronized to every document.",
|
||||
"metadata.datasetMetadata.disabled": "Disabled",
|
||||
|
|
|
|||
|
|
@ -455,6 +455,8 @@
|
|||
"nodes.common.retry.times": "times",
|
||||
"nodes.common.typeSwitch.input": "Input value",
|
||||
"nodes.common.typeSwitch.variable": "Use variable",
|
||||
"nodes.common.valueType.constant": "Constant",
|
||||
"nodes.common.valueType.variable": "Variable",
|
||||
"nodes.dataSource.add": "Add data source",
|
||||
"nodes.dataSource.supportedFileFormats": "Supported file formats",
|
||||
"nodes.dataSource.supportedFileFormatsPlaceholder": "File extension, e.g. doc",
|
||||
|
|
|
|||
|
|
@ -121,6 +121,8 @@
|
|||
"stepTwo.indexSettingTip": "要更改索引方法和 embedding 模型,请转到",
|
||||
"stepTwo.maxLength": "分段最大长度",
|
||||
"stepTwo.maxLengthCheck": "分段最大长度不能大于 {{limit}}",
|
||||
"stepTwo.metadata.customValues": "自定义值",
|
||||
"stepTwo.metadata.noValues": "未配置任何值",
|
||||
"stepTwo.nextStep": "保存并处理",
|
||||
"stepTwo.notAvailableForParentChild": "不支持父子索引",
|
||||
"stepTwo.notAvailableForQA": "不支持 Q&A 索引",
|
||||
|
|
|
|||
|
|
@ -455,6 +455,8 @@
|
|||
"nodes.common.retry.times": "次",
|
||||
"nodes.common.typeSwitch.input": "输入值",
|
||||
"nodes.common.typeSwitch.variable": "使用变量",
|
||||
"nodes.common.valueType.constant": "常量",
|
||||
"nodes.common.valueType.variable": "变量",
|
||||
"nodes.dataSource.add": "添加数据源",
|
||||
"nodes.dataSource.supportedFileFormats": "支持的文件格式",
|
||||
"nodes.dataSource.supportedFileFormatsPlaceholder": "文件格式,例如:doc",
|
||||
|
|
|
|||
|
|
@ -7,11 +7,32 @@ import { useDocumentListKey, useInvalidDocumentList } from './use-document'
|
|||
|
||||
const NAME_SPACE = 'dataset-metadata'
|
||||
|
||||
type DatasetMetadataResponse = {
|
||||
doc_metadata: MetadataItemWithValueLength[]
|
||||
built_in_field_enabled: boolean
|
||||
}
|
||||
|
||||
type DatasetMetadataApiItem = MetadataItemWithValueLength & {
|
||||
is_referenced_by_pipeline?: boolean
|
||||
}
|
||||
|
||||
type DatasetMetadataApiResponse = {
|
||||
doc_metadata: DatasetMetadataApiItem[]
|
||||
built_in_field_enabled: boolean
|
||||
}
|
||||
|
||||
export const useDatasetMetaData = (datasetId: string) => {
|
||||
return useQuery<{ doc_metadata: MetadataItemWithValueLength[], built_in_field_enabled: boolean }>({
|
||||
return useQuery<DatasetMetadataResponse>({
|
||||
queryKey: [NAME_SPACE, 'dataset', datasetId],
|
||||
queryFn: () => {
|
||||
return get<{ doc_metadata: MetadataItemWithValueLength[], built_in_field_enabled: boolean }>(`/datasets/${datasetId}/metadata`)
|
||||
queryFn: async () => {
|
||||
const response = await get<DatasetMetadataApiResponse>(`/datasets/${datasetId}/metadata`)
|
||||
return {
|
||||
...response,
|
||||
doc_metadata: response.doc_metadata.map(item => ({
|
||||
...item,
|
||||
isReferencedByPipeline: item.is_referenced_by_pipeline,
|
||||
})),
|
||||
}
|
||||
},
|
||||
})
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue