mirror of https://github.com/langgenius/dify.git
refactor: use EnumText for ApiToolProvider.schema_type_str and Docume… (#33983)
This commit is contained in:
parent
ecd3a964c1
commit
8b634a9bee
|
|
@ -10,6 +10,7 @@ from configs import dify_config
|
||||||
from core.rag.datasource.vdb.vector_factory import Vector
|
from core.rag.datasource.vdb.vector_factory import Vector
|
||||||
from core.rag.datasource.vdb.vector_type import VectorType
|
from core.rag.datasource.vdb.vector_type import VectorType
|
||||||
from core.rag.index_processor.constant.built_in_field import BuiltInField
|
from core.rag.index_processor.constant.built_in_field import BuiltInField
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from core.rag.models.document import ChildDocument, Document
|
from core.rag.models.document import ChildDocument, Document
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding, DocumentSegment
|
from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding, DocumentSegment
|
||||||
|
|
@ -269,7 +270,7 @@ def migrate_knowledge_vector_database():
|
||||||
"dataset_id": segment.dataset_id,
|
"dataset_id": segment.dataset_id,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
if dataset_document.doc_form == "hierarchical_model":
|
if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
|
||||||
child_chunks = segment.get_child_chunks()
|
child_chunks = segment.get_child_chunks()
|
||||||
if child_chunks:
|
if child_chunks:
|
||||||
child_documents = []
|
child_documents = []
|
||||||
|
|
|
||||||
|
|
@ -496,7 +496,9 @@ class Document(Base):
|
||||||
)
|
)
|
||||||
doc_type = mapped_column(EnumText(DocumentDocType, length=40), nullable=True)
|
doc_type = mapped_column(EnumText(DocumentDocType, length=40), nullable=True)
|
||||||
doc_metadata = mapped_column(AdjustedJSON, nullable=True)
|
doc_metadata = mapped_column(AdjustedJSON, nullable=True)
|
||||||
doc_form = mapped_column(String(255), nullable=False, server_default=sa.text("'text_model'"))
|
doc_form: Mapped[IndexStructureType] = mapped_column(
|
||||||
|
EnumText(IndexStructureType, length=255), nullable=False, server_default=sa.text("'text_model'")
|
||||||
|
)
|
||||||
doc_language = mapped_column(String(255), nullable=True)
|
doc_language = mapped_column(String(255), nullable=True)
|
||||||
need_summary: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false"))
|
need_summary: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false"))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -145,7 +145,9 @@ class ApiToolProvider(TypeBase):
|
||||||
icon: Mapped[str] = mapped_column(String(255), nullable=False)
|
icon: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||||
# original schema
|
# original schema
|
||||||
schema: Mapped[str] = mapped_column(LongText, nullable=False)
|
schema: Mapped[str] = mapped_column(LongText, nullable=False)
|
||||||
schema_type_str: Mapped[str] = mapped_column(String(40), nullable=False)
|
schema_type_str: Mapped[ApiProviderSchemaType] = mapped_column(
|
||||||
|
EnumText(ApiProviderSchemaType, length=40), nullable=False
|
||||||
|
)
|
||||||
# who created this tool
|
# who created this tool
|
||||||
user_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
user_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
|
||||||
# tenant id
|
# tenant id
|
||||||
|
|
|
||||||
|
|
@ -1440,7 +1440,7 @@ class DocumentService:
|
||||||
.filter(
|
.filter(
|
||||||
Document.id.in_(document_id_list),
|
Document.id.in_(document_id_list),
|
||||||
Document.dataset_id == dataset_id,
|
Document.dataset_id == dataset_id,
|
||||||
Document.doc_form != "qa_model", # Skip qa_model documents
|
Document.doc_form != IndexStructureType.QA_INDEX, # Skip qa_model documents
|
||||||
)
|
)
|
||||||
.update({Document.need_summary: need_summary}, synchronize_session=False)
|
.update({Document.need_summary: need_summary}, synchronize_session=False)
|
||||||
)
|
)
|
||||||
|
|
@ -2040,7 +2040,7 @@ class DocumentService:
|
||||||
document.dataset_process_rule_id = dataset_process_rule.id
|
document.dataset_process_rule_id = dataset_process_rule.id
|
||||||
document.updated_at = naive_utc_now()
|
document.updated_at = naive_utc_now()
|
||||||
document.created_from = created_from
|
document.created_from = created_from
|
||||||
document.doc_form = knowledge_config.doc_form
|
document.doc_form = IndexStructureType(knowledge_config.doc_form)
|
||||||
document.doc_language = knowledge_config.doc_language
|
document.doc_language = knowledge_config.doc_language
|
||||||
document.data_source_info = json.dumps(data_source_info)
|
document.data_source_info = json.dumps(data_source_info)
|
||||||
document.batch = batch
|
document.batch = batch
|
||||||
|
|
@ -2640,7 +2640,7 @@ class DocumentService:
|
||||||
document.splitting_completed_at = None
|
document.splitting_completed_at = None
|
||||||
document.updated_at = naive_utc_now()
|
document.updated_at = naive_utc_now()
|
||||||
document.created_from = created_from
|
document.created_from = created_from
|
||||||
document.doc_form = document_data.doc_form
|
document.doc_form = IndexStructureType(document_data.doc_form)
|
||||||
db.session.add(document)
|
db.session.add(document)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
# update document segment
|
# update document segment
|
||||||
|
|
@ -3101,7 +3101,7 @@ class DocumentService:
|
||||||
class SegmentService:
|
class SegmentService:
|
||||||
@classmethod
|
@classmethod
|
||||||
def segment_create_args_validate(cls, args: dict, document: Document):
|
def segment_create_args_validate(cls, args: dict, document: Document):
|
||||||
if document.doc_form == "qa_model":
|
if document.doc_form == IndexStructureType.QA_INDEX:
|
||||||
if "answer" not in args or not args["answer"]:
|
if "answer" not in args or not args["answer"]:
|
||||||
raise ValueError("Answer is required")
|
raise ValueError("Answer is required")
|
||||||
if not args["answer"].strip():
|
if not args["answer"].strip():
|
||||||
|
|
@ -3158,7 +3158,7 @@ class SegmentService:
|
||||||
completed_at=naive_utc_now(),
|
completed_at=naive_utc_now(),
|
||||||
created_by=current_user.id,
|
created_by=current_user.id,
|
||||||
)
|
)
|
||||||
if document.doc_form == "qa_model":
|
if document.doc_form == IndexStructureType.QA_INDEX:
|
||||||
segment_document.word_count += len(args["answer"])
|
segment_document.word_count += len(args["answer"])
|
||||||
segment_document.answer = args["answer"]
|
segment_document.answer = args["answer"]
|
||||||
|
|
||||||
|
|
@ -3232,7 +3232,7 @@ class SegmentService:
|
||||||
tokens = 0
|
tokens = 0
|
||||||
if dataset.indexing_technique == "high_quality" and embedding_model:
|
if dataset.indexing_technique == "high_quality" and embedding_model:
|
||||||
# calc embedding use tokens
|
# calc embedding use tokens
|
||||||
if document.doc_form == "qa_model":
|
if document.doc_form == IndexStructureType.QA_INDEX:
|
||||||
tokens = embedding_model.get_text_embedding_num_tokens(
|
tokens = embedding_model.get_text_embedding_num_tokens(
|
||||||
texts=[content + segment_item["answer"]]
|
texts=[content + segment_item["answer"]]
|
||||||
)[0]
|
)[0]
|
||||||
|
|
@ -3255,7 +3255,7 @@ class SegmentService:
|
||||||
completed_at=naive_utc_now(),
|
completed_at=naive_utc_now(),
|
||||||
created_by=current_user.id,
|
created_by=current_user.id,
|
||||||
)
|
)
|
||||||
if document.doc_form == "qa_model":
|
if document.doc_form == IndexStructureType.QA_INDEX:
|
||||||
segment_document.answer = segment_item["answer"]
|
segment_document.answer = segment_item["answer"]
|
||||||
segment_document.word_count += len(segment_item["answer"])
|
segment_document.word_count += len(segment_item["answer"])
|
||||||
increment_word_count += segment_document.word_count
|
increment_word_count += segment_document.word_count
|
||||||
|
|
@ -3322,7 +3322,7 @@ class SegmentService:
|
||||||
content = args.content or segment.content
|
content = args.content or segment.content
|
||||||
if segment.content == content:
|
if segment.content == content:
|
||||||
segment.word_count = len(content)
|
segment.word_count = len(content)
|
||||||
if document.doc_form == "qa_model":
|
if document.doc_form == IndexStructureType.QA_INDEX:
|
||||||
segment.answer = args.answer
|
segment.answer = args.answer
|
||||||
segment.word_count += len(args.answer) if args.answer else 0
|
segment.word_count += len(args.answer) if args.answer else 0
|
||||||
word_count_change = segment.word_count - word_count_change
|
word_count_change = segment.word_count - word_count_change
|
||||||
|
|
@ -3419,7 +3419,7 @@ class SegmentService:
|
||||||
)
|
)
|
||||||
|
|
||||||
# calc embedding use tokens
|
# calc embedding use tokens
|
||||||
if document.doc_form == "qa_model":
|
if document.doc_form == IndexStructureType.QA_INDEX:
|
||||||
segment.answer = args.answer
|
segment.answer = args.answer
|
||||||
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0] # type: ignore
|
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0] # type: ignore
|
||||||
else:
|
else:
|
||||||
|
|
@ -3436,7 +3436,7 @@ class SegmentService:
|
||||||
segment.enabled = True
|
segment.enabled = True
|
||||||
segment.disabled_at = None
|
segment.disabled_at = None
|
||||||
segment.disabled_by = None
|
segment.disabled_by = None
|
||||||
if document.doc_form == "qa_model":
|
if document.doc_form == IndexStructureType.QA_INDEX:
|
||||||
segment.answer = args.answer
|
segment.answer = args.answer
|
||||||
segment.word_count += len(args.answer) if args.answer else 0
|
segment.word_count += len(args.answer) if args.answer else 0
|
||||||
word_count_change = segment.word_count - word_count_change
|
word_count_change = segment.word_count - word_count_change
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from flask_login import current_user
|
||||||
|
|
||||||
from constants import DOCUMENT_EXTENSIONS
|
from constants import DOCUMENT_EXTENSIONS
|
||||||
from core.plugin.impl.plugin import PluginInstaller
|
from core.plugin.impl.plugin import PluginInstaller
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from factories import variable_factory
|
from factories import variable_factory
|
||||||
|
|
@ -79,9 +80,9 @@ class RagPipelineTransformService:
|
||||||
pipeline = self._create_pipeline(pipeline_yaml)
|
pipeline = self._create_pipeline(pipeline_yaml)
|
||||||
|
|
||||||
# save chunk structure to dataset
|
# save chunk structure to dataset
|
||||||
if doc_form == "hierarchical_model":
|
if doc_form == IndexStructureType.PARENT_CHILD_INDEX:
|
||||||
dataset.chunk_structure = "hierarchical_model"
|
dataset.chunk_structure = "hierarchical_model"
|
||||||
elif doc_form == "text_model":
|
elif doc_form == IndexStructureType.PARAGRAPH_INDEX:
|
||||||
dataset.chunk_structure = "text_model"
|
dataset.chunk_structure = "text_model"
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unsupported doc form")
|
raise ValueError("Unsupported doc form")
|
||||||
|
|
@ -101,7 +102,7 @@ class RagPipelineTransformService:
|
||||||
|
|
||||||
def _get_transform_yaml(self, doc_form: str, datasource_type: str, indexing_technique: str | None):
|
def _get_transform_yaml(self, doc_form: str, datasource_type: str, indexing_technique: str | None):
|
||||||
pipeline_yaml = {}
|
pipeline_yaml = {}
|
||||||
if doc_form == "text_model":
|
if doc_form == IndexStructureType.PARAGRAPH_INDEX:
|
||||||
match datasource_type:
|
match datasource_type:
|
||||||
case DataSourceType.UPLOAD_FILE:
|
case DataSourceType.UPLOAD_FILE:
|
||||||
if indexing_technique == "high_quality":
|
if indexing_technique == "high_quality":
|
||||||
|
|
@ -132,7 +133,7 @@ class RagPipelineTransformService:
|
||||||
pipeline_yaml = yaml.safe_load(f)
|
pipeline_yaml = yaml.safe_load(f)
|
||||||
case _:
|
case _:
|
||||||
raise ValueError("Unsupported datasource type")
|
raise ValueError("Unsupported datasource type")
|
||||||
elif doc_form == "hierarchical_model":
|
elif doc_form == IndexStructureType.PARENT_CHILD_INDEX:
|
||||||
match datasource_type:
|
match datasource_type:
|
||||||
case DataSourceType.UPLOAD_FILE:
|
case DataSourceType.UPLOAD_FILE:
|
||||||
# get graph from transform.file-parentchild.yml
|
# get graph from transform.file-parentchild.yml
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from sqlalchemy import func
|
||||||
|
|
||||||
from core.db.session_factory import session_factory
|
from core.db.session_factory import session_factory
|
||||||
from core.model_manager import ModelManager
|
from core.model_manager import ModelManager
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from dify_graph.model_runtime.entities.model_entities import ModelType
|
from dify_graph.model_runtime.entities.model_entities import ModelType
|
||||||
from extensions.ext_redis import redis_client
|
from extensions.ext_redis import redis_client
|
||||||
from extensions.ext_storage import storage
|
from extensions.ext_storage import storage
|
||||||
|
|
@ -109,7 +110,7 @@ def batch_create_segment_to_index_task(
|
||||||
df = pd.read_csv(file_path)
|
df = pd.read_csv(file_path)
|
||||||
content = []
|
content = []
|
||||||
for _, row in df.iterrows():
|
for _, row in df.iterrows():
|
||||||
if document_config["doc_form"] == "qa_model":
|
if document_config["doc_form"] == IndexStructureType.QA_INDEX:
|
||||||
data = {"content": row.iloc[0], "answer": row.iloc[1]}
|
data = {"content": row.iloc[0], "answer": row.iloc[1]}
|
||||||
else:
|
else:
|
||||||
data = {"content": row.iloc[0]}
|
data = {"content": row.iloc[0]}
|
||||||
|
|
@ -159,7 +160,7 @@ def batch_create_segment_to_index_task(
|
||||||
status="completed",
|
status="completed",
|
||||||
completed_at=naive_utc_now(),
|
completed_at=naive_utc_now(),
|
||||||
)
|
)
|
||||||
if document_config["doc_form"] == "qa_model":
|
if document_config["doc_form"] == IndexStructureType.QA_INDEX:
|
||||||
segment_document.answer = segment["answer"]
|
segment_document.answer = segment["answer"]
|
||||||
segment_document.word_count += len(segment["answer"])
|
segment_document.word_count += len(segment["answer"])
|
||||||
word_count_change += segment_document.word_count
|
word_count_change += segment_document.word_count
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ from configs import dify_config
|
||||||
from core.db.session_factory import session_factory
|
from core.db.session_factory import session_factory
|
||||||
from core.entities.document_task import DocumentTask
|
from core.entities.document_task import DocumentTask
|
||||||
from core.indexing_runner import DocumentIsPausedError, IndexingRunner
|
from core.indexing_runner import DocumentIsPausedError, IndexingRunner
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
|
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
|
||||||
from enums.cloud_plan import CloudPlan
|
from enums.cloud_plan import CloudPlan
|
||||||
from libs.datetime_utils import naive_utc_now
|
from libs.datetime_utils import naive_utc_now
|
||||||
|
|
@ -150,7 +151,7 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
|
||||||
)
|
)
|
||||||
if (
|
if (
|
||||||
document.indexing_status == IndexingStatus.COMPLETED
|
document.indexing_status == IndexingStatus.COMPLETED
|
||||||
and document.doc_form != "qa_model"
|
and document.doc_form != IndexStructureType.QA_INDEX
|
||||||
and document.need_summary is True
|
and document.need_summary is True
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from celery import shared_task
|
||||||
from sqlalchemy import or_, select
|
from sqlalchemy import or_, select
|
||||||
|
|
||||||
from core.db.session_factory import session_factory
|
from core.db.session_factory import session_factory
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.dataset import Dataset, DocumentSegment, DocumentSegmentSummary
|
from models.dataset import Dataset, DocumentSegment, DocumentSegmentSummary
|
||||||
from models.dataset import Document as DatasetDocument
|
from models.dataset import Document as DatasetDocument
|
||||||
from services.summary_index_service import SummaryIndexService
|
from services.summary_index_service import SummaryIndexService
|
||||||
|
|
@ -106,7 +107,7 @@ def regenerate_summary_index_task(
|
||||||
),
|
),
|
||||||
DatasetDocument.enabled == True, # Document must be enabled
|
DatasetDocument.enabled == True, # Document must be enabled
|
||||||
DatasetDocument.archived == False, # Document must not be archived
|
DatasetDocument.archived == False, # Document must not be archived
|
||||||
DatasetDocument.doc_form != "qa_model", # Skip qa_model documents
|
DatasetDocument.doc_form != IndexStructureType.QA_INDEX, # Skip qa_model documents
|
||||||
)
|
)
|
||||||
.order_by(DocumentSegment.document_id.asc(), DocumentSegment.position.asc())
|
.order_by(DocumentSegment.document_id.asc(), DocumentSegment.position.asc())
|
||||||
.all()
|
.all()
|
||||||
|
|
@ -209,7 +210,7 @@ def regenerate_summary_index_task(
|
||||||
|
|
||||||
for dataset_document in dataset_documents:
|
for dataset_document in dataset_documents:
|
||||||
# Skip qa_model documents
|
# Skip qa_model documents
|
||||||
if dataset_document.doc_form == "qa_model":
|
if dataset_document.doc_form == IndexStructureType.QA_INDEX:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ from unittest.mock import patch
|
||||||
import pytest
|
import pytest
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from core.rag.retrieval.dataset_retrieval import DatasetRetrieval
|
from core.rag.retrieval.dataset_retrieval import DatasetRetrieval
|
||||||
from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest
|
from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest
|
||||||
from models.dataset import Dataset, Document
|
from models.dataset import Dataset, Document
|
||||||
|
|
@ -55,7 +56,7 @@ class TestGetAvailableDatasetsIntegration:
|
||||||
name=f"Document {i}",
|
name=f"Document {i}",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -112,7 +113,7 @@ class TestGetAvailableDatasetsIntegration:
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
name=f"Archived Document {i}",
|
name=f"Archived Document {i}",
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
archived=True, # Archived
|
archived=True, # Archived
|
||||||
|
|
@ -165,7 +166,7 @@ class TestGetAvailableDatasetsIntegration:
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
name=f"Disabled Document {i}",
|
name=f"Disabled Document {i}",
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=False, # Disabled
|
enabled=False, # Disabled
|
||||||
archived=False,
|
archived=False,
|
||||||
|
|
@ -218,7 +219,7 @@ class TestGetAvailableDatasetsIntegration:
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
name=f"Document {status}",
|
name=f"Document {status}",
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
indexing_status=status, # Not completed
|
indexing_status=status, # Not completed
|
||||||
enabled=True,
|
enabled=True,
|
||||||
archived=False,
|
archived=False,
|
||||||
|
|
@ -336,7 +337,7 @@ class TestGetAvailableDatasetsIntegration:
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
name=f"Document for {dataset.name}",
|
name=f"Document for {dataset.name}",
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
archived=False,
|
archived=False,
|
||||||
|
|
@ -416,7 +417,7 @@ class TestGetAvailableDatasetsIntegration:
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
name=f"Document {i}",
|
name=f"Document {i}",
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
archived=False,
|
archived=False,
|
||||||
|
|
@ -476,7 +477,7 @@ class TestKnowledgeRetrievalIntegration:
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
archived=False,
|
archived=False,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
db_session_with_containers.add(document)
|
db_session_with_containers.add(document)
|
||||||
db_session_with_containers.commit()
|
db_session_with_containers.commit()
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ from uuid import uuid4
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from extensions.storage.storage_type import StorageType
|
from extensions.storage.storage_type import StorageType
|
||||||
from models import Account
|
from models import Account
|
||||||
from models.dataset import Dataset, Document
|
from models.dataset import Dataset, Document
|
||||||
|
|
@ -91,7 +92,7 @@ class DocumentStatusTestDataFactory:
|
||||||
name=name,
|
name=name,
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=created_by,
|
created_by=created_by,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
document.id = document_id
|
document.id = document_id
|
||||||
document.indexing_status = indexing_status
|
document.indexing_status = indexing_status
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from uuid import uuid4
|
||||||
import pytest
|
import pytest
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
from dify_graph.model_runtime.entities.model_entities import ModelType
|
from dify_graph.model_runtime.entities.model_entities import ModelType
|
||||||
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||||
|
|
@ -106,7 +107,7 @@ class DatasetServiceIntegrationDataFactory:
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=created_by,
|
created_by=created_by,
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
db_session_with_containers.add(document)
|
db_session_with_containers.add(document)
|
||||||
db_session_with_containers.flush()
|
db_session_with_containers.flush()
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ from uuid import uuid4
|
||||||
import pytest
|
import pytest
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.dataset import Dataset, Document
|
from models.dataset import Dataset, Document
|
||||||
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus
|
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus
|
||||||
from services.dataset_service import DocumentService
|
from services.dataset_service import DocumentService
|
||||||
|
|
@ -79,7 +80,7 @@ class DocumentBatchUpdateIntegrationDataFactory:
|
||||||
name=name,
|
name=name,
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=created_by or str(uuid4()),
|
created_by=created_by or str(uuid4()),
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
document.id = document_id or str(uuid4())
|
document.id = document_id or str(uuid4())
|
||||||
document.enabled = enabled
|
document.enabled = enabled
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||||
from models.dataset import Dataset, Document
|
from models.dataset import Dataset, Document
|
||||||
from models.enums import DataSourceType, DocumentCreatedFrom
|
from models.enums import DataSourceType, DocumentCreatedFrom
|
||||||
|
|
@ -78,7 +79,7 @@ class DatasetDeleteIntegrationDataFactory:
|
||||||
tenant_id: str,
|
tenant_id: str,
|
||||||
dataset_id: str,
|
dataset_id: str,
|
||||||
created_by: str,
|
created_by: str,
|
||||||
doc_form: str = "text_model",
|
doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
|
||||||
) -> Document:
|
) -> Document:
|
||||||
"""Persist a document so dataset.doc_form resolves through the real document path."""
|
"""Persist a document so dataset.doc_form resolves through the real document path."""
|
||||||
document = Document(
|
document = Document(
|
||||||
|
|
@ -119,7 +120,7 @@ class TestDatasetServiceDeleteDataset:
|
||||||
tenant_id=tenant.id,
|
tenant_id=tenant.id,
|
||||||
dataset_id=dataset.id,
|
dataset_id=dataset.id,
|
||||||
created_by=owner.id,
|
created_by=owner.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ from uuid import uuid4
|
||||||
|
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.dataset import Dataset, Document
|
from models.dataset import Dataset, Document
|
||||||
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus
|
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus
|
||||||
from services.dataset_service import DocumentService
|
from services.dataset_service import DocumentService
|
||||||
|
|
@ -42,7 +43,7 @@ def _create_document(
|
||||||
name=f"doc-{uuid4()}",
|
name=f"doc-{uuid4()}",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=str(uuid4()),
|
created_by=str(uuid4()),
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
document.id = str(uuid4())
|
document.id = str(uuid4())
|
||||||
document.indexing_status = indexing_status
|
document.indexing_status = indexing_status
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ from uuid import uuid4
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from extensions.storage.storage_type import StorageType
|
from extensions.storage.storage_type import StorageType
|
||||||
from models import Account
|
from models import Account
|
||||||
from models.dataset import Dataset, Document
|
from models.dataset import Dataset, Document
|
||||||
|
|
@ -69,7 +70,7 @@ def make_document(
|
||||||
name=name,
|
name=name,
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=str(uuid4()),
|
created_by=str(uuid4()),
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
doc.id = document_id
|
doc.id = document_id
|
||||||
doc.indexing_status = "completed"
|
doc.indexing_status = "completed"
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ from faker import Faker
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from core.rag.index_processor.constant.built_in_field import BuiltInField
|
from core.rag.index_processor.constant.built_in_field import BuiltInField
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||||
from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding, Document
|
from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding, Document
|
||||||
from models.enums import DatasetMetadataType, DataSourceType, DocumentCreatedFrom
|
from models.enums import DatasetMetadataType, DataSourceType, DocumentCreatedFrom
|
||||||
|
|
@ -139,7 +140,7 @@ class TestMetadataService:
|
||||||
name=fake.file_name(),
|
name=fake.file_name(),
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from core.tools.entities.api_entities import ToolProviderApiEntity
|
from core.tools.entities.api_entities import ToolProviderApiEntity
|
||||||
from core.tools.entities.common_entities import I18nObject
|
from core.tools.entities.common_entities import I18nObject
|
||||||
from core.tools.entities.tool_entities import ToolProviderType
|
from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolProviderType
|
||||||
from models.tools import ApiToolProvider, BuiltinToolProvider, MCPToolProvider, WorkflowToolProvider
|
from models.tools import ApiToolProvider, BuiltinToolProvider, MCPToolProvider, WorkflowToolProvider
|
||||||
from services.plugin.plugin_service import PluginService
|
from services.plugin.plugin_service import PluginService
|
||||||
from services.tools.tools_transform_service import ToolTransformService
|
from services.tools.tools_transform_service import ToolTransformService
|
||||||
|
|
@ -52,7 +52,7 @@ class TestToolTransformService:
|
||||||
user_id="test_user_id",
|
user_id="test_user_id",
|
||||||
credentials_str='{"auth_type": "api_key_header", "api_key": "test_key"}',
|
credentials_str='{"auth_type": "api_key_header", "api_key": "test_key"}',
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
)
|
)
|
||||||
elif provider_type == "builtin":
|
elif provider_type == "builtin":
|
||||||
|
|
@ -659,7 +659,7 @@ class TestToolTransformService:
|
||||||
user_id=fake.uuid4(),
|
user_id=fake.uuid4(),
|
||||||
credentials_str='{"auth_type": "api_key_header", "api_key": "test_key"}',
|
credentials_str='{"auth_type": "api_key_header", "api_key": "test_key"}',
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -695,7 +695,7 @@ class TestToolTransformService:
|
||||||
user_id=fake.uuid4(),
|
user_id=fake.uuid4(),
|
||||||
credentials_str='{"auth_type": "api_key_query", "api_key": "test_key"}',
|
credentials_str='{"auth_type": "api_key_query", "api_key": "test_key"}',
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -731,7 +731,7 @@ class TestToolTransformService:
|
||||||
user_id=fake.uuid4(),
|
user_id=fake.uuid4(),
|
||||||
credentials_str='{"auth_type": "api_key", "api_key": "test_key"}',
|
credentials_str='{"auth_type": "api_key", "api_key": "test_key"}',
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ import pytest
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from extensions.storage.storage_type import StorageType
|
from extensions.storage.storage_type import StorageType
|
||||||
from libs.datetime_utils import naive_utc_now
|
from libs.datetime_utils import naive_utc_now
|
||||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||||
|
|
@ -152,7 +153,7 @@ class TestBatchCleanDocumentTask:
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
|
|
||||||
db_session_with_containers.add(document)
|
db_session_with_containers.add(document)
|
||||||
|
|
@ -392,7 +393,12 @@ class TestBatchCleanDocumentTask:
|
||||||
db_session_with_containers.commit()
|
db_session_with_containers.commit()
|
||||||
|
|
||||||
# Execute the task with non-existent dataset
|
# Execute the task with non-existent dataset
|
||||||
batch_clean_document_task(document_ids=[document_id], dataset_id=dataset_id, doc_form="text_model", file_ids=[])
|
batch_clean_document_task(
|
||||||
|
document_ids=[document_id],
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
|
file_ids=[],
|
||||||
|
)
|
||||||
|
|
||||||
# Verify that no index processing occurred
|
# Verify that no index processing occurred
|
||||||
mock_external_service_dependencies["index_processor"].clean.assert_not_called()
|
mock_external_service_dependencies["index_processor"].clean.assert_not_called()
|
||||||
|
|
@ -525,7 +531,11 @@ class TestBatchCleanDocumentTask:
|
||||||
account = self._create_test_account(db_session_with_containers)
|
account = self._create_test_account(db_session_with_containers)
|
||||||
|
|
||||||
# Test different doc_form types
|
# Test different doc_form types
|
||||||
doc_forms = ["text_model", "qa_model", "hierarchical_model"]
|
doc_forms = [
|
||||||
|
IndexStructureType.PARAGRAPH_INDEX,
|
||||||
|
IndexStructureType.QA_INDEX,
|
||||||
|
IndexStructureType.PARENT_CHILD_INDEX,
|
||||||
|
]
|
||||||
|
|
||||||
for doc_form in doc_forms:
|
for doc_form in doc_forms:
|
||||||
dataset = self._create_test_dataset(db_session_with_containers, account)
|
dataset = self._create_test_dataset(db_session_with_containers, account)
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ import pytest
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from extensions.storage.storage_type import StorageType
|
from extensions.storage.storage_type import StorageType
|
||||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||||
from models.dataset import Dataset, Document, DocumentSegment
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
|
|
@ -179,7 +180,7 @@ class TestBatchCreateSegmentToIndexTask:
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
archived=False,
|
archived=False,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
word_count=0,
|
word_count=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -221,17 +222,17 @@ class TestBatchCreateSegmentToIndexTask:
|
||||||
|
|
||||||
return upload_file
|
return upload_file
|
||||||
|
|
||||||
def _create_test_csv_content(self, content_type="text_model"):
|
def _create_test_csv_content(self, content_type=IndexStructureType.PARAGRAPH_INDEX):
|
||||||
"""
|
"""
|
||||||
Helper method to create test CSV content.
|
Helper method to create test CSV content.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
content_type: Type of content to create ("text_model" or "qa_model")
|
content_type: Type of content to create (IndexStructureType.PARAGRAPH_INDEX or IndexStructureType.QA_INDEX)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: CSV content as string
|
str: CSV content as string
|
||||||
"""
|
"""
|
||||||
if content_type == "qa_model":
|
if content_type == IndexStructureType.QA_INDEX:
|
||||||
csv_content = "content,answer\n"
|
csv_content = "content,answer\n"
|
||||||
csv_content += "This is the first segment content,This is the first answer\n"
|
csv_content += "This is the first segment content,This is the first answer\n"
|
||||||
csv_content += "This is the second segment content,This is the second answer\n"
|
csv_content += "This is the second segment content,This is the second answer\n"
|
||||||
|
|
@ -264,7 +265,7 @@ class TestBatchCreateSegmentToIndexTask:
|
||||||
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
|
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
|
||||||
|
|
||||||
# Create CSV content
|
# Create CSV content
|
||||||
csv_content = self._create_test_csv_content("text_model")
|
csv_content = self._create_test_csv_content(IndexStructureType.PARAGRAPH_INDEX)
|
||||||
|
|
||||||
# Mock storage to return our CSV content
|
# Mock storage to return our CSV content
|
||||||
mock_storage = mock_external_service_dependencies["storage"]
|
mock_storage = mock_external_service_dependencies["storage"]
|
||||||
|
|
@ -451,7 +452,7 @@ class TestBatchCreateSegmentToIndexTask:
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=False, # Document is disabled
|
enabled=False, # Document is disabled
|
||||||
archived=False,
|
archived=False,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
word_count=0,
|
word_count=0,
|
||||||
),
|
),
|
||||||
# Archived document
|
# Archived document
|
||||||
|
|
@ -467,7 +468,7 @@ class TestBatchCreateSegmentToIndexTask:
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
archived=True, # Document is archived
|
archived=True, # Document is archived
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
word_count=0,
|
word_count=0,
|
||||||
),
|
),
|
||||||
# Document with incomplete indexing
|
# Document with incomplete indexing
|
||||||
|
|
@ -483,7 +484,7 @@ class TestBatchCreateSegmentToIndexTask:
|
||||||
indexing_status=IndexingStatus.INDEXING, # Not completed
|
indexing_status=IndexingStatus.INDEXING, # Not completed
|
||||||
enabled=True,
|
enabled=True,
|
||||||
archived=False,
|
archived=False,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
word_count=0,
|
word_count=0,
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
@ -655,7 +656,7 @@ class TestBatchCreateSegmentToIndexTask:
|
||||||
db_session_with_containers.commit()
|
db_session_with_containers.commit()
|
||||||
|
|
||||||
# Create CSV content
|
# Create CSV content
|
||||||
csv_content = self._create_test_csv_content("text_model")
|
csv_content = self._create_test_csv_content(IndexStructureType.PARAGRAPH_INDEX)
|
||||||
|
|
||||||
# Mock storage to return our CSV content
|
# Mock storage to return our CSV content
|
||||||
mock_storage = mock_external_service_dependencies["storage"]
|
mock_storage = mock_external_service_dependencies["storage"]
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ import pytest
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from extensions.storage.storage_type import StorageType
|
from extensions.storage.storage_type import StorageType
|
||||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||||
from models.dataset import (
|
from models.dataset import (
|
||||||
|
|
@ -192,7 +193,7 @@ class TestCleanDatasetTask:
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
archived=False,
|
archived=False,
|
||||||
doc_form="paragraph_index",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
word_count=100,
|
word_count=100,
|
||||||
created_at=datetime.now(),
|
created_at=datetime.now(),
|
||||||
updated_at=datetime.now(),
|
updated_at=datetime.now(),
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ from unittest.mock import Mock, patch
|
||||||
import pytest
|
import pytest
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.dataset import Dataset, Document, DocumentSegment
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
|
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
|
||||||
from services.account_service import AccountService, TenantService
|
from services.account_service import AccountService, TenantService
|
||||||
|
|
@ -114,7 +115,7 @@ class TestCleanNotionDocumentTask:
|
||||||
name=f"Notion Page {i}",
|
name=f"Notion Page {i}",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model", # Set doc_form to ensure dataset.doc_form works
|
doc_form=IndexStructureType.PARAGRAPH_INDEX, # Set doc_form to ensure dataset.doc_form works
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
)
|
)
|
||||||
|
|
@ -261,7 +262,7 @@ class TestCleanNotionDocumentTask:
|
||||||
|
|
||||||
# Test different index types
|
# Test different index types
|
||||||
# Note: Only testing text_model to avoid dependency on external services
|
# Note: Only testing text_model to avoid dependency on external services
|
||||||
index_types = ["text_model"]
|
index_types = [IndexStructureType.PARAGRAPH_INDEX]
|
||||||
|
|
||||||
for index_type in index_types:
|
for index_type in index_types:
|
||||||
# Create dataset (doc_form will be set via document creation)
|
# Create dataset (doc_form will be set via document creation)
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ from uuid import uuid4
|
||||||
import pytest
|
import pytest
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from extensions.ext_redis import redis_client
|
from extensions.ext_redis import redis_client
|
||||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||||
from models.dataset import Dataset, Document, DocumentSegment
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
|
|
@ -141,7 +142,7 @@ class TestCreateSegmentToIndexTask:
|
||||||
enabled=True,
|
enabled=True,
|
||||||
archived=False,
|
archived=False,
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
doc_form="qa_model",
|
doc_form=IndexStructureType.QA_INDEX,
|
||||||
)
|
)
|
||||||
db_session_with_containers.add(document)
|
db_session_with_containers.add(document)
|
||||||
db_session_with_containers.commit()
|
db_session_with_containers.commit()
|
||||||
|
|
@ -301,7 +302,7 @@ class TestCreateSegmentToIndexTask:
|
||||||
enabled=True,
|
enabled=True,
|
||||||
archived=False,
|
archived=False,
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
db_session_with_containers.add(document)
|
db_session_with_containers.add(document)
|
||||||
db_session_with_containers.commit()
|
db_session_with_containers.commit()
|
||||||
|
|
@ -552,7 +553,11 @@ class TestCreateSegmentToIndexTask:
|
||||||
- Processing completes successfully for different forms
|
- Processing completes successfully for different forms
|
||||||
"""
|
"""
|
||||||
# Arrange: Test different doc_forms
|
# Arrange: Test different doc_forms
|
||||||
doc_forms = ["qa_model", "text_model", "web_model"]
|
doc_forms = [
|
||||||
|
IndexStructureType.QA_INDEX,
|
||||||
|
IndexStructureType.PARAGRAPH_INDEX,
|
||||||
|
IndexStructureType.PARAGRAPH_INDEX,
|
||||||
|
]
|
||||||
|
|
||||||
for doc_form in doc_forms:
|
for doc_form in doc_forms:
|
||||||
# Create fresh test data for each form
|
# Create fresh test data for each form
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ from unittest.mock import ANY, Mock, patch
|
||||||
import pytest
|
import pytest
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.dataset import Dataset, Document, DocumentSegment
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
|
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
|
||||||
from services.account_service import AccountService, TenantService
|
from services.account_service import AccountService, TenantService
|
||||||
|
|
@ -107,7 +108,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Document for doc_form",
|
name="Document for doc_form",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -167,7 +168,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Document for doc_form",
|
name="Document for doc_form",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -187,7 +188,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Test Document",
|
name="Test Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -268,7 +269,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Document for doc_form",
|
name="Document for doc_form",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="parent_child_index",
|
doc_form=IndexStructureType.PARENT_CHILD_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -288,7 +289,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Test Document",
|
name="Test Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="parent_child_index",
|
doc_form=IndexStructureType.PARENT_CHILD_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -416,7 +417,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Test Document",
|
name="Test Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -505,7 +506,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Document for doc_form",
|
name="Document for doc_form",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -525,7 +526,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Test Document",
|
name="Test Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -601,7 +602,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Test Document",
|
name="Test Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="qa_index",
|
doc_form=IndexStructureType.QA_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -638,7 +639,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
assert updated_document.indexing_status == IndexingStatus.COMPLETED
|
assert updated_document.indexing_status == IndexingStatus.COMPLETED
|
||||||
|
|
||||||
# Verify index processor was initialized with custom index type
|
# Verify index processor was initialized with custom index type
|
||||||
mock_index_processor_factory.assert_called_once_with("qa_index")
|
mock_index_processor_factory.assert_called_once_with(IndexStructureType.QA_INDEX)
|
||||||
mock_factory = mock_index_processor_factory.return_value
|
mock_factory = mock_index_processor_factory.return_value
|
||||||
mock_processor = mock_factory.init_index_processor.return_value
|
mock_processor = mock_factory.init_index_processor.return_value
|
||||||
mock_processor.load.assert_called_once()
|
mock_processor.load.assert_called_once()
|
||||||
|
|
@ -677,7 +678,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Test Document",
|
name="Test Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -714,7 +715,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
assert updated_document.indexing_status == IndexingStatus.COMPLETED
|
assert updated_document.indexing_status == IndexingStatus.COMPLETED
|
||||||
|
|
||||||
# Verify index processor was initialized with the document's index type
|
# Verify index processor was initialized with the document's index type
|
||||||
mock_index_processor_factory.assert_called_once_with("text_model")
|
mock_index_processor_factory.assert_called_once_with(IndexStructureType.PARAGRAPH_INDEX)
|
||||||
mock_factory = mock_index_processor_factory.return_value
|
mock_factory = mock_index_processor_factory.return_value
|
||||||
mock_processor = mock_factory.init_index_processor.return_value
|
mock_processor = mock_factory.init_index_processor.return_value
|
||||||
mock_processor.load.assert_called_once()
|
mock_processor.load.assert_called_once()
|
||||||
|
|
@ -753,7 +754,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Document for doc_form",
|
name="Document for doc_form",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -775,7 +776,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name=f"Test Document {i}",
|
name=f"Test Document {i}",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -856,7 +857,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Document for doc_form",
|
name="Document for doc_form",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -876,7 +877,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Test Document",
|
name="Test Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -953,7 +954,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Document for doc_form",
|
name="Document for doc_form",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -973,7 +974,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Enabled Document",
|
name="Enabled Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -992,7 +993,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Disabled Document",
|
name="Disabled Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=False, # This document should be skipped
|
enabled=False, # This document should be skipped
|
||||||
|
|
@ -1074,7 +1075,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Document for doc_form",
|
name="Document for doc_form",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -1094,7 +1095,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Active Document",
|
name="Active Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -1113,7 +1114,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Archived Document",
|
name="Archived Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -1195,7 +1196,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Document for doc_form",
|
name="Document for doc_form",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -1215,7 +1216,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Completed Document",
|
name="Completed Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.COMPLETED,
|
indexing_status=IndexingStatus.COMPLETED,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
@ -1234,7 +1235,7 @@ class TestDealDatasetVectorIndexTask:
|
||||||
name="Incomplete Document",
|
name="Incomplete Document",
|
||||||
created_from=DocumentCreatedFrom.WEB,
|
created_from=DocumentCreatedFrom.WEB,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
indexing_status=IndexingStatus.INDEXING, # This document should be skipped
|
indexing_status=IndexingStatus.INDEXING, # This document should be skipped
|
||||||
enabled=True,
|
enabled=True,
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ import pytest
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from extensions.ext_redis import redis_client
|
from extensions.ext_redis import redis_client
|
||||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||||
from models.dataset import Dataset, Document, DocumentSegment
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
|
|
@ -113,7 +114,7 @@ class TestDisableSegmentFromIndexTask:
|
||||||
dataset: Dataset,
|
dataset: Dataset,
|
||||||
tenant: Tenant,
|
tenant: Tenant,
|
||||||
account: Account,
|
account: Account,
|
||||||
doc_form: str = "text_model",
|
doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
|
||||||
) -> Document:
|
) -> Document:
|
||||||
"""
|
"""
|
||||||
Helper method to create a test document.
|
Helper method to create a test document.
|
||||||
|
|
@ -476,7 +477,11 @@ class TestDisableSegmentFromIndexTask:
|
||||||
- Index processor clean method is called correctly
|
- Index processor clean method is called correctly
|
||||||
"""
|
"""
|
||||||
# Test different document forms
|
# Test different document forms
|
||||||
doc_forms = ["text_model", "qa_model", "table_model"]
|
doc_forms = [
|
||||||
|
IndexStructureType.PARAGRAPH_INDEX,
|
||||||
|
IndexStructureType.QA_INDEX,
|
||||||
|
IndexStructureType.PARENT_CHILD_INDEX,
|
||||||
|
]
|
||||||
|
|
||||||
for doc_form in doc_forms:
|
for doc_form in doc_forms:
|
||||||
# Arrange: Create test data for each form
|
# Arrange: Create test data for each form
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from unittest.mock import MagicMock, patch
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models import Account, Dataset, DocumentSegment
|
from models import Account, Dataset, DocumentSegment
|
||||||
from models import Document as DatasetDocument
|
from models import Document as DatasetDocument
|
||||||
from models.dataset import DatasetProcessRule
|
from models.dataset import DatasetProcessRule
|
||||||
|
|
@ -153,7 +154,7 @@ class TestDisableSegmentsFromIndexTask:
|
||||||
document.indexing_status = "completed"
|
document.indexing_status = "completed"
|
||||||
document.enabled = True
|
document.enabled = True
|
||||||
document.archived = False
|
document.archived = False
|
||||||
document.doc_form = "text_model" # Use text_model form for testing
|
document.doc_form = IndexStructureType.PARAGRAPH_INDEX # Use text_model form for testing
|
||||||
document.doc_language = "en"
|
document.doc_language = "en"
|
||||||
db_session_with_containers.add(document)
|
db_session_with_containers.add(document)
|
||||||
db_session_with_containers.commit()
|
db_session_with_containers.commit()
|
||||||
|
|
@ -500,7 +501,11 @@ class TestDisableSegmentsFromIndexTask:
|
||||||
segment_ids = [segment.id for segment in segments]
|
segment_ids = [segment.id for segment in segments]
|
||||||
|
|
||||||
# Test different document forms
|
# Test different document forms
|
||||||
doc_forms = ["text_model", "qa_model", "hierarchical_model"]
|
doc_forms = [
|
||||||
|
IndexStructureType.PARAGRAPH_INDEX,
|
||||||
|
IndexStructureType.QA_INDEX,
|
||||||
|
IndexStructureType.PARENT_CHILD_INDEX,
|
||||||
|
]
|
||||||
|
|
||||||
for doc_form in doc_forms:
|
for doc_form in doc_forms:
|
||||||
# Update document form
|
# Update document form
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ from uuid import uuid4
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from core.indexing_runner import DocumentIsPausedError, IndexingRunner
|
from core.indexing_runner import DocumentIsPausedError, IndexingRunner
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||||
from models.dataset import Dataset, Document, DocumentSegment
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
|
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
|
||||||
|
|
@ -85,7 +86,7 @@ class DocumentIndexingSyncTaskTestDataFactory:
|
||||||
created_by=created_by,
|
created_by=created_by,
|
||||||
indexing_status=indexing_status,
|
indexing_status=indexing_status,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
doc_language="en",
|
doc_language="en",
|
||||||
)
|
)
|
||||||
db_session_with_containers.add(document)
|
db_session_with_containers.add(document)
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ from unittest.mock import MagicMock, patch
|
||||||
import pytest
|
import pytest
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||||
from models.dataset import Dataset, Document, DocumentSegment
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
|
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
|
||||||
|
|
@ -80,7 +81,7 @@ class TestDocumentIndexingUpdateTask:
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
indexing_status=IndexingStatus.WAITING,
|
indexing_status=IndexingStatus.WAITING,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
db_session_with_containers.add(document)
|
db_session_with_containers.add(document)
|
||||||
db_session_with_containers.commit()
|
db_session_with_containers.commit()
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import pytest
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
|
|
||||||
from core.indexing_runner import DocumentIsPausedError
|
from core.indexing_runner import DocumentIsPausedError
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from enums.cloud_plan import CloudPlan
|
from enums.cloud_plan import CloudPlan
|
||||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||||
from models.dataset import Dataset, Document, DocumentSegment
|
from models.dataset import Dataset, Document, DocumentSegment
|
||||||
|
|
@ -130,7 +131,7 @@ class TestDuplicateDocumentIndexingTasks:
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
indexing_status=IndexingStatus.WAITING,
|
indexing_status=IndexingStatus.WAITING,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
db_session_with_containers.add(document)
|
db_session_with_containers.add(document)
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
|
|
@ -265,7 +266,7 @@ class TestDuplicateDocumentIndexingTasks:
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
indexing_status=IndexingStatus.WAITING,
|
indexing_status=IndexingStatus.WAITING,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
db_session_with_containers.add(document)
|
db_session_with_containers.add(document)
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
|
|
@ -524,7 +525,7 @@ class TestDuplicateDocumentIndexingTasks:
|
||||||
created_by=dataset.created_by,
|
created_by=dataset.created_by,
|
||||||
indexing_status=IndexingStatus.WAITING,
|
indexing_status=IndexingStatus.WAITING,
|
||||||
enabled=True,
|
enabled=True,
|
||||||
doc_form="text_model",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
db_session_with_containers.add(document)
|
db_session_with_containers.add(document)
|
||||||
extra_documents.append(document)
|
extra_documents.append(document)
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from controllers.console.datasets.data_source import (
|
||||||
DataSourceNotionDocumentSyncApi,
|
DataSourceNotionDocumentSyncApi,
|
||||||
DataSourceNotionListApi,
|
DataSourceNotionListApi,
|
||||||
)
|
)
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
|
|
||||||
|
|
||||||
def unwrap(func):
|
def unwrap(func):
|
||||||
|
|
@ -343,7 +344,7 @@ class TestDataSourceNotionApi:
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"process_rule": {"rules": {}},
|
"process_rule": {"rules": {}},
|
||||||
"doc_form": "text_model",
|
"doc_form": IndexStructureType.PARAGRAPH_INDEX,
|
||||||
"doc_language": "English",
|
"doc_language": "English",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ from controllers.console.datasets.datasets import (
|
||||||
from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError
|
from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError
|
||||||
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||||
from core.provider_manager import ProviderManager
|
from core.provider_manager import ProviderManager
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from extensions.storage.storage_type import StorageType
|
from extensions.storage.storage_type import StorageType
|
||||||
from models.enums import CreatorUserRole
|
from models.enums import CreatorUserRole
|
||||||
from models.model import ApiToken, UploadFile
|
from models.model import ApiToken, UploadFile
|
||||||
|
|
@ -1146,7 +1147,7 @@ class TestDatasetIndexingEstimateApi:
|
||||||
},
|
},
|
||||||
"process_rule": {"chunk_size": 100},
|
"process_rule": {"chunk_size": 100},
|
||||||
"indexing_technique": "high_quality",
|
"indexing_technique": "high_quality",
|
||||||
"doc_form": "text_model",
|
"doc_form": IndexStructureType.PARAGRAPH_INDEX,
|
||||||
"doc_language": "English",
|
"doc_language": "English",
|
||||||
"dataset_id": None,
|
"dataset_id": None,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ from controllers.console.datasets.error import (
|
||||||
InvalidActionError,
|
InvalidActionError,
|
||||||
InvalidMetadataError,
|
InvalidMetadataError,
|
||||||
)
|
)
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.enums import DataSourceType, IndexingStatus
|
from models.enums import DataSourceType, IndexingStatus
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -66,7 +67,7 @@ def document():
|
||||||
indexing_status=IndexingStatus.INDEXING,
|
indexing_status=IndexingStatus.INDEXING,
|
||||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||||
data_source_info_dict={"upload_file_id": "file-1"},
|
data_source_info_dict={"upload_file_id": "file-1"},
|
||||||
doc_form="text",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
archived=False,
|
archived=False,
|
||||||
is_paused=False,
|
is_paused=False,
|
||||||
dataset_process_rule=None,
|
dataset_process_rule=None,
|
||||||
|
|
@ -765,8 +766,8 @@ class TestDocumentGenerateSummaryApi:
|
||||||
summary_index_setting={"enable": True},
|
summary_index_setting={"enable": True},
|
||||||
)
|
)
|
||||||
|
|
||||||
doc1 = MagicMock(id="doc-1", doc_form="qa_model")
|
doc1 = MagicMock(id="doc-1", doc_form=IndexStructureType.QA_INDEX)
|
||||||
doc2 = MagicMock(id="doc-2", doc_form="text")
|
doc2 = MagicMock(id="doc-2", doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||||
|
|
||||||
payload = {"document_list": ["doc-1", "doc-2"]}
|
payload = {"document_list": ["doc-1", "doc-2"]}
|
||||||
|
|
||||||
|
|
@ -822,7 +823,7 @@ class TestDocumentIndexingEstimateApi:
|
||||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||||
data_source_info_dict={"upload_file_id": "file-1"},
|
data_source_info_dict={"upload_file_id": "file-1"},
|
||||||
tenant_id="tenant-1",
|
tenant_id="tenant-1",
|
||||||
doc_form="text",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
dataset_process_rule=None,
|
dataset_process_rule=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -849,7 +850,7 @@ class TestDocumentIndexingEstimateApi:
|
||||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||||
data_source_info_dict={"upload_file_id": "file-1"},
|
data_source_info_dict={"upload_file_id": "file-1"},
|
||||||
tenant_id="tenant-1",
|
tenant_id="tenant-1",
|
||||||
doc_form="text",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
dataset_process_rule=None,
|
dataset_process_rule=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -973,7 +974,7 @@ class TestDocumentBatchIndexingEstimateApi:
|
||||||
"mode": "single",
|
"mode": "single",
|
||||||
"only_main_content": True,
|
"only_main_content": True,
|
||||||
},
|
},
|
||||||
doc_form="text",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
|
|
||||||
with (
|
with (
|
||||||
|
|
@ -1001,7 +1002,7 @@ class TestDocumentBatchIndexingEstimateApi:
|
||||||
"notion_page_id": "p1",
|
"notion_page_id": "p1",
|
||||||
"type": "page",
|
"type": "page",
|
||||||
},
|
},
|
||||||
doc_form="text",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
|
|
||||||
with (
|
with (
|
||||||
|
|
@ -1024,7 +1025,7 @@ class TestDocumentBatchIndexingEstimateApi:
|
||||||
indexing_status=IndexingStatus.INDEXING,
|
indexing_status=IndexingStatus.INDEXING,
|
||||||
data_source_type="unknown",
|
data_source_type="unknown",
|
||||||
data_source_info_dict={},
|
data_source_info_dict={},
|
||||||
doc_form="text",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
|
|
||||||
with app.test_request_context("/"), patch.object(api, "get_batch_documents", return_value=[document]):
|
with app.test_request_context("/"), patch.object(api, "get_batch_documents", return_value=[document]):
|
||||||
|
|
@ -1353,7 +1354,7 @@ class TestDocumentIndexingEdgeCases:
|
||||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||||
data_source_info_dict={"upload_file_id": "file-1"},
|
data_source_info_dict={"upload_file_id": "file-1"},
|
||||||
tenant_id="tenant-1",
|
tenant_id="tenant-1",
|
||||||
doc_form="text",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
dataset_process_rule=None,
|
dataset_process_rule=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ from controllers.console.datasets.error import (
|
||||||
InvalidActionError,
|
InvalidActionError,
|
||||||
)
|
)
|
||||||
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.dataset import ChildChunk, DocumentSegment
|
from models.dataset import ChildChunk, DocumentSegment
|
||||||
from models.model import UploadFile
|
from models.model import UploadFile
|
||||||
|
|
||||||
|
|
@ -366,7 +367,7 @@ class TestDatasetDocumentSegmentAddApi:
|
||||||
dataset.indexing_technique = "economy"
|
dataset.indexing_technique = "economy"
|
||||||
|
|
||||||
document = MagicMock()
|
document = MagicMock()
|
||||||
document.doc_form = "text"
|
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
|
|
||||||
segment = MagicMock()
|
segment = MagicMock()
|
||||||
segment.id = "seg-1"
|
segment.id = "seg-1"
|
||||||
|
|
@ -505,7 +506,7 @@ class TestDatasetDocumentSegmentUpdateApi:
|
||||||
dataset.indexing_technique = "economy"
|
dataset.indexing_technique = "economy"
|
||||||
|
|
||||||
document = MagicMock()
|
document = MagicMock()
|
||||||
document.doc_form = "text"
|
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
|
|
||||||
segment = MagicMock()
|
segment = MagicMock()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ from unittest.mock import Mock
|
||||||
import pytest
|
import pytest
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.account import TenantStatus
|
from models.account import TenantStatus
|
||||||
from models.model import App, AppMode, EndUser
|
from models.model import App, AppMode, EndUser
|
||||||
from tests.unit_tests.conftest import setup_mock_tenant_account_query
|
from tests.unit_tests.conftest import setup_mock_tenant_account_query
|
||||||
|
|
@ -175,7 +176,7 @@ def mock_document():
|
||||||
document.name = "test_document.txt"
|
document.name = "test_document.txt"
|
||||||
document.indexing_status = "completed"
|
document.indexing_status = "completed"
|
||||||
document.enabled = True
|
document.enabled = True
|
||||||
document.doc_form = "text_model"
|
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
return document
|
return document
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ from controllers.service_api.dataset.segment import (
|
||||||
SegmentCreatePayload,
|
SegmentCreatePayload,
|
||||||
SegmentListQuery,
|
SegmentListQuery,
|
||||||
)
|
)
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.dataset import ChildChunk, Dataset, Document, DocumentSegment
|
from models.dataset import ChildChunk, Dataset, Document, DocumentSegment
|
||||||
from models.enums import IndexingStatus
|
from models.enums import IndexingStatus
|
||||||
from services.dataset_service import DocumentService, SegmentService
|
from services.dataset_service import DocumentService, SegmentService
|
||||||
|
|
@ -788,7 +789,7 @@ class TestSegmentApiGet:
|
||||||
# Arrange
|
# Arrange
|
||||||
mock_account_fn.return_value = (Mock(), mock_tenant.id)
|
mock_account_fn.return_value = (Mock(), mock_tenant.id)
|
||||||
mock_db.session.query.return_value.where.return_value.first.return_value = mock_dataset
|
mock_db.session.query.return_value.where.return_value.first.return_value = mock_dataset
|
||||||
mock_doc_svc.get_document.return_value = Mock(doc_form="text_model")
|
mock_doc_svc.get_document.return_value = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||||
mock_seg_svc.get_segments.return_value = ([mock_segment], 1)
|
mock_seg_svc.get_segments.return_value = ([mock_segment], 1)
|
||||||
mock_marshal.return_value = [{"id": mock_segment.id}]
|
mock_marshal.return_value = [{"id": mock_segment.id}]
|
||||||
|
|
||||||
|
|
@ -903,7 +904,7 @@ class TestSegmentApiPost:
|
||||||
mock_doc = Mock()
|
mock_doc = Mock()
|
||||||
mock_doc.indexing_status = "completed"
|
mock_doc.indexing_status = "completed"
|
||||||
mock_doc.enabled = True
|
mock_doc.enabled = True
|
||||||
mock_doc.doc_form = "text_model"
|
mock_doc.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
mock_doc_svc.get_document.return_value = mock_doc
|
mock_doc_svc.get_document.return_value = mock_doc
|
||||||
|
|
||||||
mock_seg_svc.segment_create_args_validate.return_value = None
|
mock_seg_svc.segment_create_args_validate.return_value = None
|
||||||
|
|
@ -1091,7 +1092,7 @@ class TestDatasetSegmentApiDelete:
|
||||||
mock_doc = Mock()
|
mock_doc = Mock()
|
||||||
mock_doc.indexing_status = "completed"
|
mock_doc.indexing_status = "completed"
|
||||||
mock_doc.enabled = True
|
mock_doc.enabled = True
|
||||||
mock_doc.doc_form = "text_model"
|
mock_doc.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
mock_doc_svc.get_document.return_value = mock_doc
|
mock_doc_svc.get_document.return_value = mock_doc
|
||||||
|
|
||||||
mock_seg_svc.get_segment_by_id.return_value = None # Segment not found
|
mock_seg_svc.get_segment_by_id.return_value = None # Segment not found
|
||||||
|
|
@ -1371,7 +1372,7 @@ class TestDatasetSegmentApiGetSingle:
|
||||||
mock_account_fn.return_value = (Mock(), mock_tenant.id)
|
mock_account_fn.return_value = (Mock(), mock_tenant.id)
|
||||||
mock_db.session.query.return_value.where.return_value.first.return_value = mock_dataset
|
mock_db.session.query.return_value.where.return_value.first.return_value = mock_dataset
|
||||||
mock_dataset_svc.check_dataset_model_setting.return_value = None
|
mock_dataset_svc.check_dataset_model_setting.return_value = None
|
||||||
mock_doc = Mock(doc_form="text_model")
|
mock_doc = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||||
mock_doc_svc.get_document.return_value = mock_doc
|
mock_doc_svc.get_document.return_value = mock_doc
|
||||||
mock_seg_svc.get_segment_by_id.return_value = mock_segment
|
mock_seg_svc.get_segment_by_id.return_value = mock_segment
|
||||||
mock_marshal.return_value = {"id": mock_segment.id}
|
mock_marshal.return_value = {"id": mock_segment.id}
|
||||||
|
|
@ -1390,7 +1391,7 @@ class TestDatasetSegmentApiGetSingle:
|
||||||
|
|
||||||
assert status == 200
|
assert status == 200
|
||||||
assert "data" in response
|
assert "data" in response
|
||||||
assert response["doc_form"] == "text_model"
|
assert response["doc_form"] == IndexStructureType.PARAGRAPH_INDEX
|
||||||
|
|
||||||
@patch("controllers.service_api.dataset.segment.current_account_with_tenant")
|
@patch("controllers.service_api.dataset.segment.current_account_with_tenant")
|
||||||
@patch("controllers.service_api.dataset.segment.db")
|
@patch("controllers.service_api.dataset.segment.db")
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ from controllers.service_api.dataset.document import (
|
||||||
InvalidMetadataError,
|
InvalidMetadataError,
|
||||||
)
|
)
|
||||||
from controllers.service_api.dataset.error import ArchivedDocumentImmutableError
|
from controllers.service_api.dataset.error import ArchivedDocumentImmutableError
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.enums import IndexingStatus
|
from models.enums import IndexingStatus
|
||||||
from services.dataset_service import DocumentService
|
from services.dataset_service import DocumentService
|
||||||
from services.entities.knowledge_entities.knowledge_entities import ProcessRule, RetrievalModel
|
from services.entities.knowledge_entities.knowledge_entities import ProcessRule, RetrievalModel
|
||||||
|
|
@ -52,7 +53,7 @@ class TestDocumentTextCreatePayload:
|
||||||
def test_payload_with_defaults(self):
|
def test_payload_with_defaults(self):
|
||||||
"""Test payload default values."""
|
"""Test payload default values."""
|
||||||
payload = DocumentTextCreatePayload(name="Doc", text="Content")
|
payload = DocumentTextCreatePayload(name="Doc", text="Content")
|
||||||
assert payload.doc_form == "text_model"
|
assert payload.doc_form == IndexStructureType.PARAGRAPH_INDEX
|
||||||
assert payload.doc_language == "English"
|
assert payload.doc_language == "English"
|
||||||
assert payload.process_rule is None
|
assert payload.process_rule is None
|
||||||
assert payload.indexing_technique is None
|
assert payload.indexing_technique is None
|
||||||
|
|
@ -62,14 +63,14 @@ class TestDocumentTextCreatePayload:
|
||||||
payload = DocumentTextCreatePayload(
|
payload = DocumentTextCreatePayload(
|
||||||
name="Full Document",
|
name="Full Document",
|
||||||
text="Complete document content here",
|
text="Complete document content here",
|
||||||
doc_form="qa_model",
|
doc_form=IndexStructureType.QA_INDEX,
|
||||||
doc_language="Chinese",
|
doc_language="Chinese",
|
||||||
indexing_technique="high_quality",
|
indexing_technique="high_quality",
|
||||||
embedding_model="text-embedding-ada-002",
|
embedding_model="text-embedding-ada-002",
|
||||||
embedding_model_provider="openai",
|
embedding_model_provider="openai",
|
||||||
)
|
)
|
||||||
assert payload.name == "Full Document"
|
assert payload.name == "Full Document"
|
||||||
assert payload.doc_form == "qa_model"
|
assert payload.doc_form == IndexStructureType.QA_INDEX
|
||||||
assert payload.doc_language == "Chinese"
|
assert payload.doc_language == "Chinese"
|
||||||
assert payload.indexing_technique == "high_quality"
|
assert payload.indexing_technique == "high_quality"
|
||||||
assert payload.embedding_model == "text-embedding-ada-002"
|
assert payload.embedding_model == "text-embedding-ada-002"
|
||||||
|
|
@ -147,8 +148,8 @@ class TestDocumentTextUpdate:
|
||||||
|
|
||||||
def test_payload_with_doc_form_update(self):
|
def test_payload_with_doc_form_update(self):
|
||||||
"""Test payload with doc_form update."""
|
"""Test payload with doc_form update."""
|
||||||
payload = DocumentTextUpdate(doc_form="qa_model")
|
payload = DocumentTextUpdate(doc_form=IndexStructureType.QA_INDEX)
|
||||||
assert payload.doc_form == "qa_model"
|
assert payload.doc_form == IndexStructureType.QA_INDEX
|
||||||
|
|
||||||
def test_payload_with_language_update(self):
|
def test_payload_with_language_update(self):
|
||||||
"""Test payload with doc_language update."""
|
"""Test payload with doc_language update."""
|
||||||
|
|
@ -158,7 +159,7 @@ class TestDocumentTextUpdate:
|
||||||
def test_payload_default_values(self):
|
def test_payload_default_values(self):
|
||||||
"""Test payload default values."""
|
"""Test payload default values."""
|
||||||
payload = DocumentTextUpdate()
|
payload = DocumentTextUpdate()
|
||||||
assert payload.doc_form == "text_model"
|
assert payload.doc_form == IndexStructureType.PARAGRAPH_INDEX
|
||||||
assert payload.doc_language == "English"
|
assert payload.doc_language == "English"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -272,14 +273,24 @@ class TestDocumentDocForm:
|
||||||
|
|
||||||
def test_text_model_form(self):
|
def test_text_model_form(self):
|
||||||
"""Test text_model form."""
|
"""Test text_model form."""
|
||||||
doc_form = "text_model"
|
doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
valid_forms = ["text_model", "qa_model", "hierarchical_model", "parent_child_model"]
|
valid_forms = [
|
||||||
|
IndexStructureType.PARAGRAPH_INDEX,
|
||||||
|
IndexStructureType.QA_INDEX,
|
||||||
|
IndexStructureType.PARENT_CHILD_INDEX,
|
||||||
|
"parent_child_model",
|
||||||
|
]
|
||||||
assert doc_form in valid_forms
|
assert doc_form in valid_forms
|
||||||
|
|
||||||
def test_qa_model_form(self):
|
def test_qa_model_form(self):
|
||||||
"""Test qa_model form."""
|
"""Test qa_model form."""
|
||||||
doc_form = "qa_model"
|
doc_form = IndexStructureType.QA_INDEX
|
||||||
valid_forms = ["text_model", "qa_model", "hierarchical_model", "parent_child_model"]
|
valid_forms = [
|
||||||
|
IndexStructureType.PARAGRAPH_INDEX,
|
||||||
|
IndexStructureType.QA_INDEX,
|
||||||
|
IndexStructureType.PARENT_CHILD_INDEX,
|
||||||
|
"parent_child_model",
|
||||||
|
]
|
||||||
assert doc_form in valid_forms
|
assert doc_form in valid_forms
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -504,7 +515,7 @@ class TestDocumentApiGet:
|
||||||
doc.name = "test_document.txt"
|
doc.name = "test_document.txt"
|
||||||
doc.indexing_status = "completed"
|
doc.indexing_status = "completed"
|
||||||
doc.enabled = True
|
doc.enabled = True
|
||||||
doc.doc_form = "text_model"
|
doc.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
doc.doc_language = "English"
|
doc.doc_language = "English"
|
||||||
doc.doc_type = "book"
|
doc.doc_type = "book"
|
||||||
doc.doc_metadata_details = {"source": "upload"}
|
doc.doc_metadata_details = {"source": "upload"}
|
||||||
|
|
|
||||||
|
|
@ -4800,8 +4800,8 @@ class TestInternalHooksCoverage:
|
||||||
dataset_docs = [
|
dataset_docs = [
|
||||||
SimpleNamespace(id="doc-a", doc_form=IndexStructureType.PARENT_CHILD_INDEX),
|
SimpleNamespace(id="doc-a", doc_form=IndexStructureType.PARENT_CHILD_INDEX),
|
||||||
SimpleNamespace(id="doc-b", doc_form=IndexStructureType.PARENT_CHILD_INDEX),
|
SimpleNamespace(id="doc-b", doc_form=IndexStructureType.PARENT_CHILD_INDEX),
|
||||||
SimpleNamespace(id="doc-c", doc_form="qa_model"),
|
SimpleNamespace(id="doc-c", doc_form=IndexStructureType.QA_INDEX),
|
||||||
SimpleNamespace(id="doc-d", doc_form="qa_model"),
|
SimpleNamespace(id="doc-d", doc_form=IndexStructureType.QA_INDEX),
|
||||||
]
|
]
|
||||||
child_chunks = [SimpleNamespace(index_node_id="idx-a", segment_id="seg-a")]
|
child_chunks = [SimpleNamespace(index_node_id="idx-a", segment_id="seg-a")]
|
||||||
segments = [SimpleNamespace(index_node_id="idx-c", id="seg-c")]
|
segments = [SimpleNamespace(index_node_id="idx-c", id="seg-c")]
|
||||||
|
|
|
||||||
|
|
@ -238,7 +238,7 @@ class TestApiToolProviderValidation:
|
||||||
name=provider_name,
|
name=provider_name,
|
||||||
icon='{"type": "emoji", "value": "🔧"}',
|
icon='{"type": "emoji", "value": "🔧"}',
|
||||||
schema=schema,
|
schema=schema,
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="Custom API for testing",
|
description="Custom API for testing",
|
||||||
tools_str=json.dumps(tools),
|
tools_str=json.dumps(tools),
|
||||||
credentials_str=json.dumps(credentials),
|
credentials_str=json.dumps(credentials),
|
||||||
|
|
@ -249,7 +249,7 @@ class TestApiToolProviderValidation:
|
||||||
assert api_provider.user_id == user_id
|
assert api_provider.user_id == user_id
|
||||||
assert api_provider.name == provider_name
|
assert api_provider.name == provider_name
|
||||||
assert api_provider.schema == schema
|
assert api_provider.schema == schema
|
||||||
assert api_provider.schema_type_str == "openapi"
|
assert api_provider.schema_type_str == ApiProviderSchemaType.OPENAPI
|
||||||
assert api_provider.description == "Custom API for testing"
|
assert api_provider.description == "Custom API for testing"
|
||||||
|
|
||||||
def test_api_tool_provider_schema_type_property(self):
|
def test_api_tool_provider_schema_type_property(self):
|
||||||
|
|
@ -261,7 +261,7 @@ class TestApiToolProviderValidation:
|
||||||
name="Test API",
|
name="Test API",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="Test",
|
description="Test",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str="{}",
|
credentials_str="{}",
|
||||||
|
|
@ -314,7 +314,7 @@ class TestApiToolProviderValidation:
|
||||||
name="Weather API",
|
name="Weather API",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="Weather API",
|
description="Weather API",
|
||||||
tools_str=json.dumps(tools_data),
|
tools_str=json.dumps(tools_data),
|
||||||
credentials_str="{}",
|
credentials_str="{}",
|
||||||
|
|
@ -343,7 +343,7 @@ class TestApiToolProviderValidation:
|
||||||
name="Secure API",
|
name="Secure API",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="Secure API",
|
description="Secure API",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str=json.dumps(credentials_data),
|
credentials_str=json.dumps(credentials_data),
|
||||||
|
|
@ -369,7 +369,7 @@ class TestApiToolProviderValidation:
|
||||||
name="Privacy API",
|
name="Privacy API",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="API with privacy policy",
|
description="API with privacy policy",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str="{}",
|
credentials_str="{}",
|
||||||
|
|
@ -391,7 +391,7 @@ class TestApiToolProviderValidation:
|
||||||
name="Disclaimer API",
|
name="Disclaimer API",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="API with disclaimer",
|
description="API with disclaimer",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str="{}",
|
credentials_str="{}",
|
||||||
|
|
@ -410,7 +410,7 @@ class TestApiToolProviderValidation:
|
||||||
name="Default API",
|
name="Default API",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="API",
|
description="API",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str="{}",
|
credentials_str="{}",
|
||||||
|
|
@ -432,7 +432,7 @@ class TestApiToolProviderValidation:
|
||||||
name=provider_name,
|
name=provider_name,
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="Unique API",
|
description="Unique API",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str="{}",
|
credentials_str="{}",
|
||||||
|
|
@ -454,7 +454,7 @@ class TestApiToolProviderValidation:
|
||||||
name="Public API",
|
name="Public API",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="Public API with no auth",
|
description="Public API with no auth",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str=json.dumps(credentials),
|
credentials_str=json.dumps(credentials),
|
||||||
|
|
@ -479,7 +479,7 @@ class TestApiToolProviderValidation:
|
||||||
name="Query Auth API",
|
name="Query Auth API",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="API with query auth",
|
description="API with query auth",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str=json.dumps(credentials),
|
credentials_str=json.dumps(credentials),
|
||||||
|
|
@ -741,7 +741,7 @@ class TestCredentialStorage:
|
||||||
name="Test API",
|
name="Test API",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="Test",
|
description="Test",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str=json.dumps(credentials),
|
credentials_str=json.dumps(credentials),
|
||||||
|
|
@ -788,7 +788,7 @@ class TestCredentialStorage:
|
||||||
name="Update Test",
|
name="Update Test",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="Test",
|
description="Test",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str=json.dumps(original_credentials),
|
credentials_str=json.dumps(original_credentials),
|
||||||
|
|
@ -897,7 +897,7 @@ class TestToolProviderRelationships:
|
||||||
name="User API",
|
name="User API",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="Test",
|
description="Test",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str="{}",
|
credentials_str="{}",
|
||||||
|
|
@ -931,7 +931,7 @@ class TestToolProviderRelationships:
|
||||||
name="Custom API 1",
|
name="Custom API 1",
|
||||||
icon="{}",
|
icon="{}",
|
||||||
schema="{}",
|
schema="{}",
|
||||||
schema_type_str="openapi",
|
schema_type_str=ApiProviderSchemaType.OPENAPI,
|
||||||
description="Test",
|
description="Test",
|
||||||
tools_str="[]",
|
tools_str="[]",
|
||||||
credentials_str="{}",
|
credentials_str="{}",
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,7 @@ from unittest.mock import Mock, patch
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from dify_graph.model_runtime.entities.model_entities import ModelType
|
from dify_graph.model_runtime.entities.model_entities import ModelType
|
||||||
from models.dataset import Dataset, DatasetProcessRule, Document
|
from models.dataset import Dataset, DatasetProcessRule, Document
|
||||||
from services.dataset_service import DatasetService, DocumentService
|
from services.dataset_service import DatasetService, DocumentService
|
||||||
|
|
@ -188,7 +189,7 @@ class DocumentValidationTestDataFactory:
|
||||||
def create_knowledge_config_mock(
|
def create_knowledge_config_mock(
|
||||||
data_source: DataSource | None = None,
|
data_source: DataSource | None = None,
|
||||||
process_rule: ProcessRule | None = None,
|
process_rule: ProcessRule | None = None,
|
||||||
doc_form: str = "text_model",
|
doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
|
||||||
indexing_technique: str = "high_quality",
|
indexing_technique: str = "high_quality",
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> Mock:
|
) -> Mock:
|
||||||
|
|
@ -326,8 +327,8 @@ class TestDatasetServiceCheckDocForm:
|
||||||
- Validation logic works correctly
|
- Validation logic works correctly
|
||||||
"""
|
"""
|
||||||
# Arrange
|
# Arrange
|
||||||
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form="text_model")
|
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||||
doc_form = "text_model"
|
doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
|
|
||||||
# Act (should not raise)
|
# Act (should not raise)
|
||||||
DatasetService.check_doc_form(dataset, doc_form)
|
DatasetService.check_doc_form(dataset, doc_form)
|
||||||
|
|
@ -349,7 +350,7 @@ class TestDatasetServiceCheckDocForm:
|
||||||
"""
|
"""
|
||||||
# Arrange
|
# Arrange
|
||||||
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form=None)
|
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form=None)
|
||||||
doc_form = "text_model"
|
doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
|
|
||||||
# Act (should not raise)
|
# Act (should not raise)
|
||||||
DatasetService.check_doc_form(dataset, doc_form)
|
DatasetService.check_doc_form(dataset, doc_form)
|
||||||
|
|
@ -370,8 +371,8 @@ class TestDatasetServiceCheckDocForm:
|
||||||
- Error type is correct
|
- Error type is correct
|
||||||
"""
|
"""
|
||||||
# Arrange
|
# Arrange
|
||||||
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form="text_model")
|
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||||
doc_form = "table_model" # Different form
|
doc_form = IndexStructureType.PARENT_CHILD_INDEX # Different form
|
||||||
|
|
||||||
# Act & Assert
|
# Act & Assert
|
||||||
with pytest.raises(ValueError, match="doc_form is different from the dataset doc_form"):
|
with pytest.raises(ValueError, match="doc_form is different from the dataset doc_form"):
|
||||||
|
|
@ -390,7 +391,7 @@ class TestDatasetServiceCheckDocForm:
|
||||||
"""
|
"""
|
||||||
# Arrange
|
# Arrange
|
||||||
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form="knowledge_card")
|
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form="knowledge_card")
|
||||||
doc_form = "text_model" # Different form
|
doc_form = IndexStructureType.PARAGRAPH_INDEX # Different form
|
||||||
|
|
||||||
# Act & Assert
|
# Act & Assert
|
||||||
with pytest.raises(ValueError, match="doc_form is different from the dataset doc_form"):
|
with pytest.raises(ValueError, match="doc_form is different from the dataset doc_form"):
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ from unittest.mock import MagicMock, Mock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.account import Account
|
from models.account import Account
|
||||||
from models.dataset import ChildChunk, Dataset, Document, DocumentSegment
|
from models.dataset import ChildChunk, Dataset, Document, DocumentSegment
|
||||||
from models.enums import SegmentType
|
from models.enums import SegmentType
|
||||||
|
|
@ -91,7 +92,7 @@ class SegmentTestDataFactory:
|
||||||
document_id: str = "doc-123",
|
document_id: str = "doc-123",
|
||||||
dataset_id: str = "dataset-123",
|
dataset_id: str = "dataset-123",
|
||||||
tenant_id: str = "tenant-123",
|
tenant_id: str = "tenant-123",
|
||||||
doc_form: str = "text_model",
|
doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
|
||||||
word_count: int = 100,
|
word_count: int = 100,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> Mock:
|
) -> Mock:
|
||||||
|
|
@ -210,7 +211,7 @@ class TestSegmentServiceCreateSegment:
|
||||||
def test_create_segment_with_qa_model(self, mock_db_session, mock_current_user):
|
def test_create_segment_with_qa_model(self, mock_db_session, mock_current_user):
|
||||||
"""Test creation of segment with QA model (requires answer)."""
|
"""Test creation of segment with QA model (requires answer)."""
|
||||||
# Arrange
|
# Arrange
|
||||||
document = SegmentTestDataFactory.create_document_mock(doc_form="qa_model", word_count=100)
|
document = SegmentTestDataFactory.create_document_mock(doc_form=IndexStructureType.QA_INDEX, word_count=100)
|
||||||
dataset = SegmentTestDataFactory.create_dataset_mock(indexing_technique="economy")
|
dataset = SegmentTestDataFactory.create_dataset_mock(indexing_technique="economy")
|
||||||
args = {"content": "What is AI?", "answer": "AI is Artificial Intelligence", "keywords": ["ai"]}
|
args = {"content": "What is AI?", "answer": "AI is Artificial Intelligence", "keywords": ["ai"]}
|
||||||
|
|
||||||
|
|
@ -429,7 +430,7 @@ class TestSegmentServiceUpdateSegment:
|
||||||
"""Test update segment with QA model (includes answer)."""
|
"""Test update segment with QA model (includes answer)."""
|
||||||
# Arrange
|
# Arrange
|
||||||
segment = SegmentTestDataFactory.create_segment_mock(enabled=True, word_count=10)
|
segment = SegmentTestDataFactory.create_segment_mock(enabled=True, word_count=10)
|
||||||
document = SegmentTestDataFactory.create_document_mock(doc_form="qa_model", word_count=100)
|
document = SegmentTestDataFactory.create_document_mock(doc_form=IndexStructureType.QA_INDEX, word_count=100)
|
||||||
dataset = SegmentTestDataFactory.create_dataset_mock(indexing_technique="economy")
|
dataset = SegmentTestDataFactory.create_dataset_mock(indexing_technique="economy")
|
||||||
args = SegmentUpdateArgs(content="Updated question", answer="Updated answer", keywords=["qa"])
|
args = SegmentUpdateArgs(content="Updated question", answer="Updated answer", keywords=["qa"])
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ from unittest.mock import Mock, create_autospec
|
||||||
import pytest
|
import pytest
|
||||||
from redis.exceptions import LockNotOwnedError
|
from redis.exceptions import LockNotOwnedError
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.account import Account
|
from models.account import Account
|
||||||
from models.dataset import Dataset, Document
|
from models.dataset import Dataset, Document
|
||||||
from services.dataset_service import DocumentService, SegmentService
|
from services.dataset_service import DocumentService, SegmentService
|
||||||
|
|
@ -76,7 +77,7 @@ def test_save_document_with_dataset_id_ignores_lock_not_owned(
|
||||||
info_list = types.SimpleNamespace(data_source_type="upload_file")
|
info_list = types.SimpleNamespace(data_source_type="upload_file")
|
||||||
data_source = types.SimpleNamespace(info_list=info_list)
|
data_source = types.SimpleNamespace(info_list=info_list)
|
||||||
knowledge_config = types.SimpleNamespace(
|
knowledge_config = types.SimpleNamespace(
|
||||||
doc_form="qa_model",
|
doc_form=IndexStructureType.QA_INDEX,
|
||||||
original_document_id=None, # go into "new document" branch
|
original_document_id=None, # go into "new document" branch
|
||||||
data_source=data_source,
|
data_source=data_source,
|
||||||
indexing_technique="high_quality",
|
indexing_technique="high_quality",
|
||||||
|
|
@ -131,7 +132,7 @@ def test_add_segment_ignores_lock_not_owned(
|
||||||
document.id = "doc-1"
|
document.id = "doc-1"
|
||||||
document.dataset_id = dataset.id
|
document.dataset_id = dataset.id
|
||||||
document.word_count = 0
|
document.word_count = 0
|
||||||
document.doc_form = "qa_model"
|
document.doc_form = IndexStructureType.QA_INDEX
|
||||||
|
|
||||||
# Minimal args required by add_segment
|
# Minimal args required by add_segment
|
||||||
args = {
|
args = {
|
||||||
|
|
@ -174,4 +175,4 @@ def test_multi_create_segment_ignores_lock_not_owned(
|
||||||
document.id = "doc-1"
|
document.id = "doc-1"
|
||||||
document.dataset_id = dataset.id
|
document.dataset_id = dataset.id
|
||||||
document.word_count = 0
|
document.word_count = 0
|
||||||
document.doc_form = "qa_model"
|
document.doc_form = IndexStructureType.QA_INDEX
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from unittest.mock import MagicMock
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import services.summary_index_service as summary_module
|
import services.summary_index_service as summary_module
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.enums import SegmentStatus, SummaryStatus
|
from models.enums import SegmentStatus, SummaryStatus
|
||||||
from services.summary_index_service import SummaryIndexService
|
from services.summary_index_service import SummaryIndexService
|
||||||
|
|
||||||
|
|
@ -48,7 +49,7 @@ def _segment(*, has_document: bool = True) -> MagicMock:
|
||||||
if has_document:
|
if has_document:
|
||||||
doc = MagicMock(name="document")
|
doc = MagicMock(name="document")
|
||||||
doc.doc_language = "en"
|
doc.doc_language = "en"
|
||||||
doc.doc_form = "text_model"
|
doc.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
segment.document = doc
|
segment.document = doc
|
||||||
else:
|
else:
|
||||||
segment.document = None
|
segment.document = None
|
||||||
|
|
@ -623,13 +624,13 @@ def test_generate_summaries_for_document_skip_conditions(monkeypatch: pytest.Mon
|
||||||
dataset = _dataset(indexing_technique="economy")
|
dataset = _dataset(indexing_technique="economy")
|
||||||
document = MagicMock(spec=summary_module.DatasetDocument)
|
document = MagicMock(spec=summary_module.DatasetDocument)
|
||||||
document.id = "doc-1"
|
document.id = "doc-1"
|
||||||
document.doc_form = "text_model"
|
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == []
|
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == []
|
||||||
|
|
||||||
dataset = _dataset()
|
dataset = _dataset()
|
||||||
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": False}) == []
|
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": False}) == []
|
||||||
|
|
||||||
document.doc_form = "qa_model"
|
document.doc_form = IndexStructureType.QA_INDEX
|
||||||
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == []
|
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == []
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -637,7 +638,7 @@ def test_generate_summaries_for_document_runs_and_handles_errors(monkeypatch: py
|
||||||
dataset = _dataset()
|
dataset = _dataset()
|
||||||
document = MagicMock(spec=summary_module.DatasetDocument)
|
document = MagicMock(spec=summary_module.DatasetDocument)
|
||||||
document.id = "doc-1"
|
document.id = "doc-1"
|
||||||
document.doc_form = "text_model"
|
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
|
|
||||||
seg1 = _segment()
|
seg1 = _segment()
|
||||||
seg2 = _segment()
|
seg2 = _segment()
|
||||||
|
|
@ -673,7 +674,7 @@ def test_generate_summaries_for_document_no_segments_returns_empty(monkeypatch:
|
||||||
dataset = _dataset()
|
dataset = _dataset()
|
||||||
document = MagicMock(spec=summary_module.DatasetDocument)
|
document = MagicMock(spec=summary_module.DatasetDocument)
|
||||||
document.id = "doc-1"
|
document.id = "doc-1"
|
||||||
document.doc_form = "text_model"
|
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
|
|
||||||
session = MagicMock()
|
session = MagicMock()
|
||||||
query = MagicMock()
|
query = MagicMock()
|
||||||
|
|
@ -696,7 +697,7 @@ def test_generate_summaries_for_document_applies_segment_ids_and_only_parent_chu
|
||||||
dataset = _dataset()
|
dataset = _dataset()
|
||||||
document = MagicMock(spec=summary_module.DatasetDocument)
|
document = MagicMock(spec=summary_module.DatasetDocument)
|
||||||
document.id = "doc-1"
|
document.id = "doc-1"
|
||||||
document.doc_form = "text_model"
|
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
seg = _segment()
|
seg = _segment()
|
||||||
|
|
||||||
session = MagicMock()
|
session = MagicMock()
|
||||||
|
|
@ -935,7 +936,7 @@ def test_update_summary_for_segment_skip_conditions() -> None:
|
||||||
SummaryIndexService.update_summary_for_segment(_segment(), _dataset(indexing_technique="economy"), "x") is None
|
SummaryIndexService.update_summary_for_segment(_segment(), _dataset(indexing_technique="economy"), "x") is None
|
||||||
)
|
)
|
||||||
seg = _segment(has_document=True)
|
seg = _segment(has_document=True)
|
||||||
seg.document.doc_form = "qa_model"
|
seg.document.doc_form = IndexStructureType.QA_INDEX
|
||||||
assert SummaryIndexService.update_summary_for_segment(seg, _dataset(), "x") is None
|
assert SummaryIndexService.update_summary_for_segment(seg, _dataset(), "x") is None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from unittest.mock import MagicMock
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import services.vector_service as vector_service_module
|
import services.vector_service as vector_service_module
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from services.vector_service import VectorService
|
from services.vector_service import VectorService
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -32,7 +33,7 @@ class _ParentDocStub:
|
||||||
def _make_dataset(
|
def _make_dataset(
|
||||||
*,
|
*,
|
||||||
indexing_technique: str = "high_quality",
|
indexing_technique: str = "high_quality",
|
||||||
doc_form: str = "text_model",
|
doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
|
||||||
tenant_id: str = "tenant-1",
|
tenant_id: str = "tenant-1",
|
||||||
dataset_id: str = "dataset-1",
|
dataset_id: str = "dataset-1",
|
||||||
is_multimodal: bool = False,
|
is_multimodal: bool = False,
|
||||||
|
|
@ -106,7 +107,7 @@ def test_create_segments_vector_regular_indexing_loads_documents_and_keywords(mo
|
||||||
factory_instance.init_index_processor.return_value = index_processor
|
factory_instance.init_index_processor.return_value = index_processor
|
||||||
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
|
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
|
||||||
|
|
||||||
VectorService.create_segments_vector([["k1"]], [segment], dataset, "text_model")
|
VectorService.create_segments_vector([["k1"]], [segment], dataset, IndexStructureType.PARAGRAPH_INDEX)
|
||||||
|
|
||||||
index_processor.load.assert_called_once()
|
index_processor.load.assert_called_once()
|
||||||
args, kwargs = index_processor.load.call_args
|
args, kwargs = index_processor.load.call_args
|
||||||
|
|
@ -131,7 +132,7 @@ def test_create_segments_vector_regular_indexing_loads_multimodal_documents(monk
|
||||||
factory_instance.init_index_processor.return_value = index_processor
|
factory_instance.init_index_processor.return_value = index_processor
|
||||||
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
|
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
|
||||||
|
|
||||||
VectorService.create_segments_vector([["k1"]], [segment], dataset, "text_model")
|
VectorService.create_segments_vector([["k1"]], [segment], dataset, IndexStructureType.PARAGRAPH_INDEX)
|
||||||
|
|
||||||
assert index_processor.load.call_count == 2
|
assert index_processor.load.call_count == 2
|
||||||
first_args, first_kwargs = index_processor.load.call_args_list[0]
|
first_args, first_kwargs = index_processor.load.call_args_list[0]
|
||||||
|
|
@ -153,7 +154,7 @@ def test_create_segments_vector_with_no_segments_does_not_load(monkeypatch: pyte
|
||||||
factory_instance.init_index_processor.return_value = index_processor
|
factory_instance.init_index_processor.return_value = index_processor
|
||||||
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
|
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
|
||||||
|
|
||||||
VectorService.create_segments_vector(None, [], dataset, "text_model")
|
VectorService.create_segments_vector(None, [], dataset, IndexStructureType.PARAGRAPH_INDEX)
|
||||||
index_processor.load.assert_not_called()
|
index_processor.load.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -392,7 +393,7 @@ def test_update_segment_vector_economy_uses_keyword_without_keywords_list(monkey
|
||||||
|
|
||||||
|
|
||||||
def test_generate_child_chunks_regenerate_cleans_then_saves_children(monkeypatch: pytest.MonkeyPatch) -> None:
|
def test_generate_child_chunks_regenerate_cleans_then_saves_children(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
dataset = _make_dataset(doc_form="text_model", tenant_id="tenant-1", dataset_id="dataset-1")
|
dataset = _make_dataset(doc_form=IndexStructureType.PARAGRAPH_INDEX, tenant_id="tenant-1", dataset_id="dataset-1")
|
||||||
segment = _make_segment(segment_id="seg-1")
|
segment = _make_segment(segment_id="seg-1")
|
||||||
|
|
||||||
dataset_document = MagicMock()
|
dataset_document = MagicMock()
|
||||||
|
|
@ -439,7 +440,7 @@ def test_generate_child_chunks_regenerate_cleans_then_saves_children(monkeypatch
|
||||||
|
|
||||||
|
|
||||||
def test_generate_child_chunks_commits_even_when_no_children(monkeypatch: pytest.MonkeyPatch) -> None:
|
def test_generate_child_chunks_commits_even_when_no_children(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
dataset = _make_dataset(doc_form="text_model")
|
dataset = _make_dataset(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||||
segment = _make_segment()
|
segment = _make_segment()
|
||||||
dataset_document = MagicMock()
|
dataset_document = MagicMock()
|
||||||
dataset_document.doc_language = "en"
|
dataset_document.doc_language = "en"
|
||||||
|
|
|
||||||
|
|
@ -121,6 +121,7 @@ import pytest
|
||||||
from core.rag.datasource.vdb.vector_base import BaseVector
|
from core.rag.datasource.vdb.vector_base import BaseVector
|
||||||
from core.rag.datasource.vdb.vector_factory import Vector
|
from core.rag.datasource.vdb.vector_factory import Vector
|
||||||
from core.rag.datasource.vdb.vector_type import VectorType
|
from core.rag.datasource.vdb.vector_type import VectorType
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
from models.dataset import ChildChunk, Dataset, DatasetDocument, DatasetProcessRule, DocumentSegment
|
from models.dataset import ChildChunk, Dataset, DatasetDocument, DatasetProcessRule, DocumentSegment
|
||||||
from services.vector_service import VectorService
|
from services.vector_service import VectorService
|
||||||
|
|
@ -151,7 +152,7 @@ class VectorServiceTestDataFactory:
|
||||||
def create_dataset_mock(
|
def create_dataset_mock(
|
||||||
dataset_id: str = "dataset-123",
|
dataset_id: str = "dataset-123",
|
||||||
tenant_id: str = "tenant-123",
|
tenant_id: str = "tenant-123",
|
||||||
doc_form: str = "text_model",
|
doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
|
||||||
indexing_technique: str = "high_quality",
|
indexing_technique: str = "high_quality",
|
||||||
embedding_model_provider: str = "openai",
|
embedding_model_provider: str = "openai",
|
||||||
embedding_model: str = "text-embedding-ada-002",
|
embedding_model: str = "text-embedding-ada-002",
|
||||||
|
|
@ -493,7 +494,7 @@ class TestVectorService:
|
||||||
"""
|
"""
|
||||||
# Arrange
|
# Arrange
|
||||||
dataset = VectorServiceTestDataFactory.create_dataset_mock(
|
dataset = VectorServiceTestDataFactory.create_dataset_mock(
|
||||||
doc_form="text_model", indexing_technique="high_quality"
|
doc_form=IndexStructureType.PARAGRAPH_INDEX, indexing_technique="high_quality"
|
||||||
)
|
)
|
||||||
|
|
||||||
segment = VectorServiceTestDataFactory.create_document_segment_mock()
|
segment = VectorServiceTestDataFactory.create_document_segment_mock()
|
||||||
|
|
@ -505,7 +506,7 @@ class TestVectorService:
|
||||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_index_processor
|
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_index_processor
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
VectorService.create_segments_vector(keywords_list, [segment], dataset, "text_model")
|
VectorService.create_segments_vector(keywords_list, [segment], dataset, IndexStructureType.PARAGRAPH_INDEX)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
mock_index_processor.load.assert_called_once()
|
mock_index_processor.load.assert_called_once()
|
||||||
|
|
@ -649,7 +650,7 @@ class TestVectorService:
|
||||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_index_processor
|
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_index_processor
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
VectorService.create_segments_vector(None, [], dataset, "text_model")
|
VectorService.create_segments_vector(None, [], dataset, IndexStructureType.PARAGRAPH_INDEX)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
mock_index_processor.load.assert_not_called()
|
mock_index_processor.load.assert_not_called()
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.enums import DataSourceType
|
from models.enums import DataSourceType
|
||||||
from tasks.clean_dataset_task import clean_dataset_task
|
from tasks.clean_dataset_task import clean_dataset_task
|
||||||
|
|
||||||
|
|
@ -186,7 +187,7 @@ class TestErrorHandling:
|
||||||
indexing_technique="high_quality",
|
indexing_technique="high_quality",
|
||||||
index_struct='{"type": "paragraph"}',
|
index_struct='{"type": "paragraph"}',
|
||||||
collection_binding_id=collection_binding_id,
|
collection_binding_id=collection_binding_id,
|
||||||
doc_form="paragraph_index",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
|
|
@ -231,7 +232,7 @@ class TestPipelineAndWorkflowDeletion:
|
||||||
indexing_technique="high_quality",
|
indexing_technique="high_quality",
|
||||||
index_struct='{"type": "paragraph"}',
|
index_struct='{"type": "paragraph"}',
|
||||||
collection_binding_id=collection_binding_id,
|
collection_binding_id=collection_binding_id,
|
||||||
doc_form="paragraph_index",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
pipeline_id=pipeline_id,
|
pipeline_id=pipeline_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -267,7 +268,7 @@ class TestPipelineAndWorkflowDeletion:
|
||||||
indexing_technique="high_quality",
|
indexing_technique="high_quality",
|
||||||
index_struct='{"type": "paragraph"}',
|
index_struct='{"type": "paragraph"}',
|
||||||
collection_binding_id=collection_binding_id,
|
collection_binding_id=collection_binding_id,
|
||||||
doc_form="paragraph_index",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
pipeline_id=None,
|
pipeline_id=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -323,7 +324,7 @@ class TestSegmentAttachmentCleanup:
|
||||||
indexing_technique="high_quality",
|
indexing_technique="high_quality",
|
||||||
index_struct='{"type": "paragraph"}',
|
index_struct='{"type": "paragraph"}',
|
||||||
collection_binding_id=collection_binding_id,
|
collection_binding_id=collection_binding_id,
|
||||||
doc_form="paragraph_index",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
|
|
@ -368,7 +369,7 @@ class TestSegmentAttachmentCleanup:
|
||||||
indexing_technique="high_quality",
|
indexing_technique="high_quality",
|
||||||
index_struct='{"type": "paragraph"}',
|
index_struct='{"type": "paragraph"}',
|
||||||
collection_binding_id=collection_binding_id,
|
collection_binding_id=collection_binding_id,
|
||||||
doc_form="paragraph_index",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assert - storage delete was attempted
|
# Assert - storage delete was attempted
|
||||||
|
|
@ -410,7 +411,7 @@ class TestEdgeCases:
|
||||||
indexing_technique="high_quality",
|
indexing_technique="high_quality",
|
||||||
index_struct='{"type": "paragraph"}',
|
index_struct='{"type": "paragraph"}',
|
||||||
collection_binding_id=collection_binding_id,
|
collection_binding_id=collection_binding_id,
|
||||||
doc_form="paragraph_index",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
|
|
@ -454,7 +455,7 @@ class TestIndexProcessorParameters:
|
||||||
indexing_technique=indexing_technique,
|
indexing_technique=indexing_technique,
|
||||||
index_struct=index_struct,
|
index_struct=index_struct,
|
||||||
collection_binding_id=collection_binding_id,
|
collection_binding_id=collection_binding_id,
|
||||||
doc_form="paragraph_index",
|
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ from unittest.mock import MagicMock, Mock, patch
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from core.indexing_runner import DocumentIsPausedError
|
from core.indexing_runner import DocumentIsPausedError
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
|
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
|
||||||
from enums.cloud_plan import CloudPlan
|
from enums.cloud_plan import CloudPlan
|
||||||
from extensions.ext_redis import redis_client
|
from extensions.ext_redis import redis_client
|
||||||
|
|
@ -222,7 +223,7 @@ def mock_documents(document_ids, dataset_id):
|
||||||
doc.stopped_at = None
|
doc.stopped_at = None
|
||||||
doc.processing_started_at = None
|
doc.processing_started_at = None
|
||||||
# optional attribute used in some code paths
|
# optional attribute used in some code paths
|
||||||
doc.doc_form = "text_model"
|
doc.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
documents.append(doc)
|
documents.append(doc)
|
||||||
return documents
|
return documents
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from unittest.mock import MagicMock, Mock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||||
from models.dataset import Dataset, Document
|
from models.dataset import Dataset, Document
|
||||||
from tasks.document_indexing_sync_task import document_indexing_sync_task
|
from tasks.document_indexing_sync_task import document_indexing_sync_task
|
||||||
|
|
||||||
|
|
@ -62,7 +63,7 @@ def mock_document(document_id, dataset_id, notion_workspace_id, notion_page_id,
|
||||||
document.tenant_id = str(uuid.uuid4())
|
document.tenant_id = str(uuid.uuid4())
|
||||||
document.data_source_type = "notion_import"
|
document.data_source_type = "notion_import"
|
||||||
document.indexing_status = "completed"
|
document.indexing_status = "completed"
|
||||||
document.doc_form = "text_model"
|
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||||
document.data_source_info_dict = {
|
document.data_source_info_dict = {
|
||||||
"notion_workspace_id": notion_workspace_id,
|
"notion_workspace_id": notion_workspace_id,
|
||||||
"notion_page_id": notion_page_id,
|
"notion_page_id": notion_page_id,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue