refactor: use EnumText for ApiToolProvider.schema_type_str and Docume… (#33983)

This commit is contained in:
tmimmanuel 2026-03-24 05:27:50 +01:00 committed by GitHub
parent ecd3a964c1
commit 8b634a9bee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
46 changed files with 255 additions and 180 deletions

View File

@ -10,6 +10,7 @@ from configs import dify_config
from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.datasource.vdb.vector_type import VectorType from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.index_processor.constant.built_in_field import BuiltInField from core.rag.index_processor.constant.built_in_field import BuiltInField
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.models.document import ChildDocument, Document from core.rag.models.document import ChildDocument, Document
from extensions.ext_database import db from extensions.ext_database import db
from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding, DocumentSegment from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding, DocumentSegment
@ -269,7 +270,7 @@ def migrate_knowledge_vector_database():
"dataset_id": segment.dataset_id, "dataset_id": segment.dataset_id,
}, },
) )
if dataset_document.doc_form == "hierarchical_model": if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
child_chunks = segment.get_child_chunks() child_chunks = segment.get_child_chunks()
if child_chunks: if child_chunks:
child_documents = [] child_documents = []

View File

@ -496,7 +496,9 @@ class Document(Base):
) )
doc_type = mapped_column(EnumText(DocumentDocType, length=40), nullable=True) doc_type = mapped_column(EnumText(DocumentDocType, length=40), nullable=True)
doc_metadata = mapped_column(AdjustedJSON, nullable=True) doc_metadata = mapped_column(AdjustedJSON, nullable=True)
doc_form = mapped_column(String(255), nullable=False, server_default=sa.text("'text_model'")) doc_form: Mapped[IndexStructureType] = mapped_column(
EnumText(IndexStructureType, length=255), nullable=False, server_default=sa.text("'text_model'")
)
doc_language = mapped_column(String(255), nullable=True) doc_language = mapped_column(String(255), nullable=True)
need_summary: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) need_summary: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false"))

View File

@ -145,7 +145,9 @@ class ApiToolProvider(TypeBase):
icon: Mapped[str] = mapped_column(String(255), nullable=False) icon: Mapped[str] = mapped_column(String(255), nullable=False)
# original schema # original schema
schema: Mapped[str] = mapped_column(LongText, nullable=False) schema: Mapped[str] = mapped_column(LongText, nullable=False)
schema_type_str: Mapped[str] = mapped_column(String(40), nullable=False) schema_type_str: Mapped[ApiProviderSchemaType] = mapped_column(
EnumText(ApiProviderSchemaType, length=40), nullable=False
)
# who created this tool # who created this tool
user_id: Mapped[str] = mapped_column(StringUUID, nullable=False) user_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
# tenant id # tenant id

View File

@ -1440,7 +1440,7 @@ class DocumentService:
.filter( .filter(
Document.id.in_(document_id_list), Document.id.in_(document_id_list),
Document.dataset_id == dataset_id, Document.dataset_id == dataset_id,
Document.doc_form != "qa_model", # Skip qa_model documents Document.doc_form != IndexStructureType.QA_INDEX, # Skip qa_model documents
) )
.update({Document.need_summary: need_summary}, synchronize_session=False) .update({Document.need_summary: need_summary}, synchronize_session=False)
) )
@ -2040,7 +2040,7 @@ class DocumentService:
document.dataset_process_rule_id = dataset_process_rule.id document.dataset_process_rule_id = dataset_process_rule.id
document.updated_at = naive_utc_now() document.updated_at = naive_utc_now()
document.created_from = created_from document.created_from = created_from
document.doc_form = knowledge_config.doc_form document.doc_form = IndexStructureType(knowledge_config.doc_form)
document.doc_language = knowledge_config.doc_language document.doc_language = knowledge_config.doc_language
document.data_source_info = json.dumps(data_source_info) document.data_source_info = json.dumps(data_source_info)
document.batch = batch document.batch = batch
@ -2640,7 +2640,7 @@ class DocumentService:
document.splitting_completed_at = None document.splitting_completed_at = None
document.updated_at = naive_utc_now() document.updated_at = naive_utc_now()
document.created_from = created_from document.created_from = created_from
document.doc_form = document_data.doc_form document.doc_form = IndexStructureType(document_data.doc_form)
db.session.add(document) db.session.add(document)
db.session.commit() db.session.commit()
# update document segment # update document segment
@ -3101,7 +3101,7 @@ class DocumentService:
class SegmentService: class SegmentService:
@classmethod @classmethod
def segment_create_args_validate(cls, args: dict, document: Document): def segment_create_args_validate(cls, args: dict, document: Document):
if document.doc_form == "qa_model": if document.doc_form == IndexStructureType.QA_INDEX:
if "answer" not in args or not args["answer"]: if "answer" not in args or not args["answer"]:
raise ValueError("Answer is required") raise ValueError("Answer is required")
if not args["answer"].strip(): if not args["answer"].strip():
@ -3158,7 +3158,7 @@ class SegmentService:
completed_at=naive_utc_now(), completed_at=naive_utc_now(),
created_by=current_user.id, created_by=current_user.id,
) )
if document.doc_form == "qa_model": if document.doc_form == IndexStructureType.QA_INDEX:
segment_document.word_count += len(args["answer"]) segment_document.word_count += len(args["answer"])
segment_document.answer = args["answer"] segment_document.answer = args["answer"]
@ -3232,7 +3232,7 @@ class SegmentService:
tokens = 0 tokens = 0
if dataset.indexing_technique == "high_quality" and embedding_model: if dataset.indexing_technique == "high_quality" and embedding_model:
# calc embedding use tokens # calc embedding use tokens
if document.doc_form == "qa_model": if document.doc_form == IndexStructureType.QA_INDEX:
tokens = embedding_model.get_text_embedding_num_tokens( tokens = embedding_model.get_text_embedding_num_tokens(
texts=[content + segment_item["answer"]] texts=[content + segment_item["answer"]]
)[0] )[0]
@ -3255,7 +3255,7 @@ class SegmentService:
completed_at=naive_utc_now(), completed_at=naive_utc_now(),
created_by=current_user.id, created_by=current_user.id,
) )
if document.doc_form == "qa_model": if document.doc_form == IndexStructureType.QA_INDEX:
segment_document.answer = segment_item["answer"] segment_document.answer = segment_item["answer"]
segment_document.word_count += len(segment_item["answer"]) segment_document.word_count += len(segment_item["answer"])
increment_word_count += segment_document.word_count increment_word_count += segment_document.word_count
@ -3322,7 +3322,7 @@ class SegmentService:
content = args.content or segment.content content = args.content or segment.content
if segment.content == content: if segment.content == content:
segment.word_count = len(content) segment.word_count = len(content)
if document.doc_form == "qa_model": if document.doc_form == IndexStructureType.QA_INDEX:
segment.answer = args.answer segment.answer = args.answer
segment.word_count += len(args.answer) if args.answer else 0 segment.word_count += len(args.answer) if args.answer else 0
word_count_change = segment.word_count - word_count_change word_count_change = segment.word_count - word_count_change
@ -3419,7 +3419,7 @@ class SegmentService:
) )
# calc embedding use tokens # calc embedding use tokens
if document.doc_form == "qa_model": if document.doc_form == IndexStructureType.QA_INDEX:
segment.answer = args.answer segment.answer = args.answer
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0] # type: ignore tokens = embedding_model.get_text_embedding_num_tokens(texts=[content + segment.answer])[0] # type: ignore
else: else:
@ -3436,7 +3436,7 @@ class SegmentService:
segment.enabled = True segment.enabled = True
segment.disabled_at = None segment.disabled_at = None
segment.disabled_by = None segment.disabled_by = None
if document.doc_form == "qa_model": if document.doc_form == IndexStructureType.QA_INDEX:
segment.answer = args.answer segment.answer = args.answer
segment.word_count += len(args.answer) if args.answer else 0 segment.word_count += len(args.answer) if args.answer else 0
word_count_change = segment.word_count - word_count_change word_count_change = segment.word_count - word_count_change

View File

@ -9,6 +9,7 @@ from flask_login import current_user
from constants import DOCUMENT_EXTENSIONS from constants import DOCUMENT_EXTENSIONS
from core.plugin.impl.plugin import PluginInstaller from core.plugin.impl.plugin import PluginInstaller
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db from extensions.ext_database import db
from factories import variable_factory from factories import variable_factory
@ -79,9 +80,9 @@ class RagPipelineTransformService:
pipeline = self._create_pipeline(pipeline_yaml) pipeline = self._create_pipeline(pipeline_yaml)
# save chunk structure to dataset # save chunk structure to dataset
if doc_form == "hierarchical_model": if doc_form == IndexStructureType.PARENT_CHILD_INDEX:
dataset.chunk_structure = "hierarchical_model" dataset.chunk_structure = "hierarchical_model"
elif doc_form == "text_model": elif doc_form == IndexStructureType.PARAGRAPH_INDEX:
dataset.chunk_structure = "text_model" dataset.chunk_structure = "text_model"
else: else:
raise ValueError("Unsupported doc form") raise ValueError("Unsupported doc form")
@ -101,7 +102,7 @@ class RagPipelineTransformService:
def _get_transform_yaml(self, doc_form: str, datasource_type: str, indexing_technique: str | None): def _get_transform_yaml(self, doc_form: str, datasource_type: str, indexing_technique: str | None):
pipeline_yaml = {} pipeline_yaml = {}
if doc_form == "text_model": if doc_form == IndexStructureType.PARAGRAPH_INDEX:
match datasource_type: match datasource_type:
case DataSourceType.UPLOAD_FILE: case DataSourceType.UPLOAD_FILE:
if indexing_technique == "high_quality": if indexing_technique == "high_quality":
@ -132,7 +133,7 @@ class RagPipelineTransformService:
pipeline_yaml = yaml.safe_load(f) pipeline_yaml = yaml.safe_load(f)
case _: case _:
raise ValueError("Unsupported datasource type") raise ValueError("Unsupported datasource type")
elif doc_form == "hierarchical_model": elif doc_form == IndexStructureType.PARENT_CHILD_INDEX:
match datasource_type: match datasource_type:
case DataSourceType.UPLOAD_FILE: case DataSourceType.UPLOAD_FILE:
# get graph from transform.file-parentchild.yml # get graph from transform.file-parentchild.yml

View File

@ -11,6 +11,7 @@ from sqlalchemy import func
from core.db.session_factory import session_factory from core.db.session_factory import session_factory
from core.model_manager import ModelManager from core.model_manager import ModelManager
from core.rag.index_processor.constant.index_type import IndexStructureType
from dify_graph.model_runtime.entities.model_entities import ModelType from dify_graph.model_runtime.entities.model_entities import ModelType
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from extensions.ext_storage import storage from extensions.ext_storage import storage
@ -109,7 +110,7 @@ def batch_create_segment_to_index_task(
df = pd.read_csv(file_path) df = pd.read_csv(file_path)
content = [] content = []
for _, row in df.iterrows(): for _, row in df.iterrows():
if document_config["doc_form"] == "qa_model": if document_config["doc_form"] == IndexStructureType.QA_INDEX:
data = {"content": row.iloc[0], "answer": row.iloc[1]} data = {"content": row.iloc[0], "answer": row.iloc[1]}
else: else:
data = {"content": row.iloc[0]} data = {"content": row.iloc[0]}
@ -159,7 +160,7 @@ def batch_create_segment_to_index_task(
status="completed", status="completed",
completed_at=naive_utc_now(), completed_at=naive_utc_now(),
) )
if document_config["doc_form"] == "qa_model": if document_config["doc_form"] == IndexStructureType.QA_INDEX:
segment_document.answer = segment["answer"] segment_document.answer = segment["answer"]
segment_document.word_count += len(segment["answer"]) segment_document.word_count += len(segment["answer"])
word_count_change += segment_document.word_count word_count_change += segment_document.word_count

View File

@ -10,6 +10,7 @@ from configs import dify_config
from core.db.session_factory import session_factory from core.db.session_factory import session_factory
from core.entities.document_task import DocumentTask from core.entities.document_task import DocumentTask
from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.indexing_runner import DocumentIsPausedError, IndexingRunner
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.pipeline.queue import TenantIsolatedTaskQueue from core.rag.pipeline.queue import TenantIsolatedTaskQueue
from enums.cloud_plan import CloudPlan from enums.cloud_plan import CloudPlan
from libs.datetime_utils import naive_utc_now from libs.datetime_utils import naive_utc_now
@ -150,7 +151,7 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
) )
if ( if (
document.indexing_status == IndexingStatus.COMPLETED document.indexing_status == IndexingStatus.COMPLETED
and document.doc_form != "qa_model" and document.doc_form != IndexStructureType.QA_INDEX
and document.need_summary is True and document.need_summary is True
): ):
try: try:

View File

@ -9,6 +9,7 @@ from celery import shared_task
from sqlalchemy import or_, select from sqlalchemy import or_, select
from core.db.session_factory import session_factory from core.db.session_factory import session_factory
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.dataset import Dataset, DocumentSegment, DocumentSegmentSummary from models.dataset import Dataset, DocumentSegment, DocumentSegmentSummary
from models.dataset import Document as DatasetDocument from models.dataset import Document as DatasetDocument
from services.summary_index_service import SummaryIndexService from services.summary_index_service import SummaryIndexService
@ -106,7 +107,7 @@ def regenerate_summary_index_task(
), ),
DatasetDocument.enabled == True, # Document must be enabled DatasetDocument.enabled == True, # Document must be enabled
DatasetDocument.archived == False, # Document must not be archived DatasetDocument.archived == False, # Document must not be archived
DatasetDocument.doc_form != "qa_model", # Skip qa_model documents DatasetDocument.doc_form != IndexStructureType.QA_INDEX, # Skip qa_model documents
) )
.order_by(DocumentSegment.document_id.asc(), DocumentSegment.position.asc()) .order_by(DocumentSegment.document_id.asc(), DocumentSegment.position.asc())
.all() .all()
@ -209,7 +210,7 @@ def regenerate_summary_index_task(
for dataset_document in dataset_documents: for dataset_document in dataset_documents:
# Skip qa_model documents # Skip qa_model documents
if dataset_document.doc_form == "qa_model": if dataset_document.doc_form == IndexStructureType.QA_INDEX:
continue continue
try: try:

View File

@ -4,6 +4,7 @@ from unittest.mock import patch
import pytest import pytest
from faker import Faker from faker import Faker
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.retrieval.dataset_retrieval import DatasetRetrieval from core.rag.retrieval.dataset_retrieval import DatasetRetrieval
from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest
from models.dataset import Dataset, Document from models.dataset import Dataset, Document
@ -55,7 +56,7 @@ class TestGetAvailableDatasetsIntegration:
name=f"Document {i}", name=f"Document {i}",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -112,7 +113,7 @@ class TestGetAvailableDatasetsIntegration:
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
name=f"Archived Document {i}", name=f"Archived Document {i}",
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
archived=True, # Archived archived=True, # Archived
@ -165,7 +166,7 @@ class TestGetAvailableDatasetsIntegration:
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
name=f"Disabled Document {i}", name=f"Disabled Document {i}",
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=False, # Disabled enabled=False, # Disabled
archived=False, archived=False,
@ -218,7 +219,7 @@ class TestGetAvailableDatasetsIntegration:
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
name=f"Document {status}", name=f"Document {status}",
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
indexing_status=status, # Not completed indexing_status=status, # Not completed
enabled=True, enabled=True,
archived=False, archived=False,
@ -336,7 +337,7 @@ class TestGetAvailableDatasetsIntegration:
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
name=f"Document for {dataset.name}", name=f"Document for {dataset.name}",
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
archived=False, archived=False,
@ -416,7 +417,7 @@ class TestGetAvailableDatasetsIntegration:
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
name=f"Document {i}", name=f"Document {i}",
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
archived=False, archived=False,
@ -476,7 +477,7 @@ class TestKnowledgeRetrievalIntegration:
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
archived=False, archived=False,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
db_session_with_containers.add(document) db_session_with_containers.add(document)
db_session_with_containers.commit() db_session_with_containers.commit()

View File

@ -13,6 +13,7 @@ from uuid import uuid4
import pytest import pytest
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.storage.storage_type import StorageType from extensions.storage.storage_type import StorageType
from models import Account from models import Account
from models.dataset import Dataset, Document from models.dataset import Dataset, Document
@ -91,7 +92,7 @@ class DocumentStatusTestDataFactory:
name=name, name=name,
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=created_by, created_by=created_by,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
document.id = document_id document.id = document_id
document.indexing_status = indexing_status document.indexing_status = indexing_status

View File

@ -11,6 +11,7 @@ from uuid import uuid4
import pytest import pytest
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
from dify_graph.model_runtime.entities.model_entities import ModelType from dify_graph.model_runtime.entities.model_entities import ModelType
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
@ -106,7 +107,7 @@ class DatasetServiceIntegrationDataFactory:
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=created_by, created_by=created_by,
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
db_session_with_containers.add(document) db_session_with_containers.add(document)
db_session_with_containers.flush() db_session_with_containers.flush()

View File

@ -13,6 +13,7 @@ from uuid import uuid4
import pytest import pytest
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.dataset import Dataset, Document from models.dataset import Dataset, Document
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus
from services.dataset_service import DocumentService from services.dataset_service import DocumentService
@ -79,7 +80,7 @@ class DocumentBatchUpdateIntegrationDataFactory:
name=name, name=name,
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=created_by or str(uuid4()), created_by=created_by or str(uuid4()),
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
document.id = document_id or str(uuid4()) document.id = document_id or str(uuid4())
document.enabled = enabled document.enabled = enabled

View File

@ -3,6 +3,7 @@
from unittest.mock import patch from unittest.mock import patch
from uuid import uuid4 from uuid import uuid4
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document from models.dataset import Dataset, Document
from models.enums import DataSourceType, DocumentCreatedFrom from models.enums import DataSourceType, DocumentCreatedFrom
@ -78,7 +79,7 @@ class DatasetDeleteIntegrationDataFactory:
tenant_id: str, tenant_id: str,
dataset_id: str, dataset_id: str,
created_by: str, created_by: str,
doc_form: str = "text_model", doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
) -> Document: ) -> Document:
"""Persist a document so dataset.doc_form resolves through the real document path.""" """Persist a document so dataset.doc_form resolves through the real document path."""
document = Document( document = Document(
@ -119,7 +120,7 @@ class TestDatasetServiceDeleteDataset:
tenant_id=tenant.id, tenant_id=tenant.id,
dataset_id=dataset.id, dataset_id=dataset.id,
created_by=owner.id, created_by=owner.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
# Act # Act

View File

@ -3,6 +3,7 @@ from uuid import uuid4
from sqlalchemy import select from sqlalchemy import select
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.dataset import Dataset, Document from models.dataset import Dataset, Document
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus
from services.dataset_service import DocumentService from services.dataset_service import DocumentService
@ -42,7 +43,7 @@ def _create_document(
name=f"doc-{uuid4()}", name=f"doc-{uuid4()}",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=str(uuid4()), created_by=str(uuid4()),
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
document.id = str(uuid4()) document.id = str(uuid4())
document.indexing_status = indexing_status document.indexing_status = indexing_status

View File

@ -7,6 +7,7 @@ from uuid import uuid4
import pytest import pytest
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.storage.storage_type import StorageType from extensions.storage.storage_type import StorageType
from models import Account from models import Account
from models.dataset import Dataset, Document from models.dataset import Dataset, Document
@ -69,7 +70,7 @@ def make_document(
name=name, name=name,
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=str(uuid4()), created_by=str(uuid4()),
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
doc.id = document_id doc.id = document_id
doc.indexing_status = "completed" doc.indexing_status = "completed"

View File

@ -5,6 +5,7 @@ from faker import Faker
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from core.rag.index_processor.constant.built_in_field import BuiltInField from core.rag.index_processor.constant.built_in_field import BuiltInField
from core.rag.index_processor.constant.index_type import IndexStructureType
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding, Document from models.dataset import Dataset, DatasetMetadata, DatasetMetadataBinding, Document
from models.enums import DatasetMetadataType, DataSourceType, DocumentCreatedFrom from models.enums import DatasetMetadataType, DataSourceType, DocumentCreatedFrom
@ -139,7 +140,7 @@ class TestMetadataService:
name=fake.file_name(), name=fake.file_name(),
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
) )

View File

@ -6,7 +6,7 @@ from sqlalchemy.orm import Session
from core.tools.entities.api_entities import ToolProviderApiEntity from core.tools.entities.api_entities import ToolProviderApiEntity
from core.tools.entities.common_entities import I18nObject from core.tools.entities.common_entities import I18nObject
from core.tools.entities.tool_entities import ToolProviderType from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolProviderType
from models.tools import ApiToolProvider, BuiltinToolProvider, MCPToolProvider, WorkflowToolProvider from models.tools import ApiToolProvider, BuiltinToolProvider, MCPToolProvider, WorkflowToolProvider
from services.plugin.plugin_service import PluginService from services.plugin.plugin_service import PluginService
from services.tools.tools_transform_service import ToolTransformService from services.tools.tools_transform_service import ToolTransformService
@ -52,7 +52,7 @@ class TestToolTransformService:
user_id="test_user_id", user_id="test_user_id",
credentials_str='{"auth_type": "api_key_header", "api_key": "test_key"}', credentials_str='{"auth_type": "api_key_header", "api_key": "test_key"}',
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
tools_str="[]", tools_str="[]",
) )
elif provider_type == "builtin": elif provider_type == "builtin":
@ -659,7 +659,7 @@ class TestToolTransformService:
user_id=fake.uuid4(), user_id=fake.uuid4(),
credentials_str='{"auth_type": "api_key_header", "api_key": "test_key"}', credentials_str='{"auth_type": "api_key_header", "api_key": "test_key"}',
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
tools_str="[]", tools_str="[]",
) )
@ -695,7 +695,7 @@ class TestToolTransformService:
user_id=fake.uuid4(), user_id=fake.uuid4(),
credentials_str='{"auth_type": "api_key_query", "api_key": "test_key"}', credentials_str='{"auth_type": "api_key_query", "api_key": "test_key"}',
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
tools_str="[]", tools_str="[]",
) )
@ -731,7 +731,7 @@ class TestToolTransformService:
user_id=fake.uuid4(), user_id=fake.uuid4(),
credentials_str='{"auth_type": "api_key", "api_key": "test_key"}', credentials_str='{"auth_type": "api_key", "api_key": "test_key"}',
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
tools_str="[]", tools_str="[]",
) )

View File

@ -13,6 +13,7 @@ import pytest
from faker import Faker from faker import Faker
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.storage.storage_type import StorageType from extensions.storage.storage_type import StorageType
from libs.datetime_utils import naive_utc_now from libs.datetime_utils import naive_utc_now
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
@ -152,7 +153,7 @@ class TestBatchCleanDocumentTask:
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
db_session_with_containers.add(document) db_session_with_containers.add(document)
@ -392,7 +393,12 @@ class TestBatchCleanDocumentTask:
db_session_with_containers.commit() db_session_with_containers.commit()
# Execute the task with non-existent dataset # Execute the task with non-existent dataset
batch_clean_document_task(document_ids=[document_id], dataset_id=dataset_id, doc_form="text_model", file_ids=[]) batch_clean_document_task(
document_ids=[document_id],
dataset_id=dataset_id,
doc_form=IndexStructureType.PARAGRAPH_INDEX,
file_ids=[],
)
# Verify that no index processing occurred # Verify that no index processing occurred
mock_external_service_dependencies["index_processor"].clean.assert_not_called() mock_external_service_dependencies["index_processor"].clean.assert_not_called()
@ -525,7 +531,11 @@ class TestBatchCleanDocumentTask:
account = self._create_test_account(db_session_with_containers) account = self._create_test_account(db_session_with_containers)
# Test different doc_form types # Test different doc_form types
doc_forms = ["text_model", "qa_model", "hierarchical_model"] doc_forms = [
IndexStructureType.PARAGRAPH_INDEX,
IndexStructureType.QA_INDEX,
IndexStructureType.PARENT_CHILD_INDEX,
]
for doc_form in doc_forms: for doc_form in doc_forms:
dataset = self._create_test_dataset(db_session_with_containers, account) dataset = self._create_test_dataset(db_session_with_containers, account)

View File

@ -19,6 +19,7 @@ import pytest
from faker import Faker from faker import Faker
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.storage.storage_type import StorageType from extensions.storage.storage_type import StorageType
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
@ -179,7 +180,7 @@ class TestBatchCreateSegmentToIndexTask:
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
archived=False, archived=False,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
word_count=0, word_count=0,
) )
@ -221,17 +222,17 @@ class TestBatchCreateSegmentToIndexTask:
return upload_file return upload_file
def _create_test_csv_content(self, content_type="text_model"): def _create_test_csv_content(self, content_type=IndexStructureType.PARAGRAPH_INDEX):
""" """
Helper method to create test CSV content. Helper method to create test CSV content.
Args: Args:
content_type: Type of content to create ("text_model" or "qa_model") content_type: Type of content to create (IndexStructureType.PARAGRAPH_INDEX or IndexStructureType.QA_INDEX)
Returns: Returns:
str: CSV content as string str: CSV content as string
""" """
if content_type == "qa_model": if content_type == IndexStructureType.QA_INDEX:
csv_content = "content,answer\n" csv_content = "content,answer\n"
csv_content += "This is the first segment content,This is the first answer\n" csv_content += "This is the first segment content,This is the first answer\n"
csv_content += "This is the second segment content,This is the second answer\n" csv_content += "This is the second segment content,This is the second answer\n"
@ -264,7 +265,7 @@ class TestBatchCreateSegmentToIndexTask:
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant) upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
# Create CSV content # Create CSV content
csv_content = self._create_test_csv_content("text_model") csv_content = self._create_test_csv_content(IndexStructureType.PARAGRAPH_INDEX)
# Mock storage to return our CSV content # Mock storage to return our CSV content
mock_storage = mock_external_service_dependencies["storage"] mock_storage = mock_external_service_dependencies["storage"]
@ -451,7 +452,7 @@ class TestBatchCreateSegmentToIndexTask:
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=False, # Document is disabled enabled=False, # Document is disabled
archived=False, archived=False,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
word_count=0, word_count=0,
), ),
# Archived document # Archived document
@ -467,7 +468,7 @@ class TestBatchCreateSegmentToIndexTask:
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
archived=True, # Document is archived archived=True, # Document is archived
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
word_count=0, word_count=0,
), ),
# Document with incomplete indexing # Document with incomplete indexing
@ -483,7 +484,7 @@ class TestBatchCreateSegmentToIndexTask:
indexing_status=IndexingStatus.INDEXING, # Not completed indexing_status=IndexingStatus.INDEXING, # Not completed
enabled=True, enabled=True,
archived=False, archived=False,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
word_count=0, word_count=0,
), ),
] ]
@ -655,7 +656,7 @@ class TestBatchCreateSegmentToIndexTask:
db_session_with_containers.commit() db_session_with_containers.commit()
# Create CSV content # Create CSV content
csv_content = self._create_test_csv_content("text_model") csv_content = self._create_test_csv_content(IndexStructureType.PARAGRAPH_INDEX)
# Mock storage to return our CSV content # Mock storage to return our CSV content
mock_storage = mock_external_service_dependencies["storage"] mock_storage = mock_external_service_dependencies["storage"]

View File

@ -18,6 +18,7 @@ import pytest
from faker import Faker from faker import Faker
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.storage.storage_type import StorageType from extensions.storage.storage_type import StorageType
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import ( from models.dataset import (
@ -192,7 +193,7 @@ class TestCleanDatasetTask:
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
archived=False, archived=False,
doc_form="paragraph_index", doc_form=IndexStructureType.PARAGRAPH_INDEX,
word_count=100, word_count=100,
created_at=datetime.now(), created_at=datetime.now(),
updated_at=datetime.now(), updated_at=datetime.now(),

View File

@ -12,6 +12,7 @@ from unittest.mock import Mock, patch
import pytest import pytest
from faker import Faker from faker import Faker
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
from services.account_service import AccountService, TenantService from services.account_service import AccountService, TenantService
@ -114,7 +115,7 @@ class TestCleanNotionDocumentTask:
name=f"Notion Page {i}", name=f"Notion Page {i}",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", # Set doc_form to ensure dataset.doc_form works doc_form=IndexStructureType.PARAGRAPH_INDEX, # Set doc_form to ensure dataset.doc_form works
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
) )
@ -261,7 +262,7 @@ class TestCleanNotionDocumentTask:
# Test different index types # Test different index types
# Note: Only testing text_model to avoid dependency on external services # Note: Only testing text_model to avoid dependency on external services
index_types = ["text_model"] index_types = [IndexStructureType.PARAGRAPH_INDEX]
for index_type in index_types: for index_type in index_types:
# Create dataset (doc_form will be set via document creation) # Create dataset (doc_form will be set via document creation)

View File

@ -12,6 +12,7 @@ from uuid import uuid4
import pytest import pytest
from faker import Faker from faker import Faker
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
@ -141,7 +142,7 @@ class TestCreateSegmentToIndexTask:
enabled=True, enabled=True,
archived=False, archived=False,
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
doc_form="qa_model", doc_form=IndexStructureType.QA_INDEX,
) )
db_session_with_containers.add(document) db_session_with_containers.add(document)
db_session_with_containers.commit() db_session_with_containers.commit()
@ -301,7 +302,7 @@ class TestCreateSegmentToIndexTask:
enabled=True, enabled=True,
archived=False, archived=False,
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
db_session_with_containers.add(document) db_session_with_containers.add(document)
db_session_with_containers.commit() db_session_with_containers.commit()
@ -552,7 +553,11 @@ class TestCreateSegmentToIndexTask:
- Processing completes successfully for different forms - Processing completes successfully for different forms
""" """
# Arrange: Test different doc_forms # Arrange: Test different doc_forms
doc_forms = ["qa_model", "text_model", "web_model"] doc_forms = [
IndexStructureType.QA_INDEX,
IndexStructureType.PARAGRAPH_INDEX,
IndexStructureType.PARAGRAPH_INDEX,
]
for doc_form in doc_forms: for doc_form in doc_forms:
# Create fresh test data for each form # Create fresh test data for each form

View File

@ -12,6 +12,7 @@ from unittest.mock import ANY, Mock, patch
import pytest import pytest
from faker import Faker from faker import Faker
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
from services.account_service import AccountService, TenantService from services.account_service import AccountService, TenantService
@ -107,7 +108,7 @@ class TestDealDatasetVectorIndexTask:
name="Document for doc_form", name="Document for doc_form",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -167,7 +168,7 @@ class TestDealDatasetVectorIndexTask:
name="Document for doc_form", name="Document for doc_form",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -187,7 +188,7 @@ class TestDealDatasetVectorIndexTask:
name="Test Document", name="Test Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -268,7 +269,7 @@ class TestDealDatasetVectorIndexTask:
name="Document for doc_form", name="Document for doc_form",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="parent_child_index", doc_form=IndexStructureType.PARENT_CHILD_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -288,7 +289,7 @@ class TestDealDatasetVectorIndexTask:
name="Test Document", name="Test Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="parent_child_index", doc_form=IndexStructureType.PARENT_CHILD_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -416,7 +417,7 @@ class TestDealDatasetVectorIndexTask:
name="Test Document", name="Test Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -505,7 +506,7 @@ class TestDealDatasetVectorIndexTask:
name="Document for doc_form", name="Document for doc_form",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -525,7 +526,7 @@ class TestDealDatasetVectorIndexTask:
name="Test Document", name="Test Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -601,7 +602,7 @@ class TestDealDatasetVectorIndexTask:
name="Test Document", name="Test Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="qa_index", doc_form=IndexStructureType.QA_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -638,7 +639,7 @@ class TestDealDatasetVectorIndexTask:
assert updated_document.indexing_status == IndexingStatus.COMPLETED assert updated_document.indexing_status == IndexingStatus.COMPLETED
# Verify index processor was initialized with custom index type # Verify index processor was initialized with custom index type
mock_index_processor_factory.assert_called_once_with("qa_index") mock_index_processor_factory.assert_called_once_with(IndexStructureType.QA_INDEX)
mock_factory = mock_index_processor_factory.return_value mock_factory = mock_index_processor_factory.return_value
mock_processor = mock_factory.init_index_processor.return_value mock_processor = mock_factory.init_index_processor.return_value
mock_processor.load.assert_called_once() mock_processor.load.assert_called_once()
@ -677,7 +678,7 @@ class TestDealDatasetVectorIndexTask:
name="Test Document", name="Test Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -714,7 +715,7 @@ class TestDealDatasetVectorIndexTask:
assert updated_document.indexing_status == IndexingStatus.COMPLETED assert updated_document.indexing_status == IndexingStatus.COMPLETED
# Verify index processor was initialized with the document's index type # Verify index processor was initialized with the document's index type
mock_index_processor_factory.assert_called_once_with("text_model") mock_index_processor_factory.assert_called_once_with(IndexStructureType.PARAGRAPH_INDEX)
mock_factory = mock_index_processor_factory.return_value mock_factory = mock_index_processor_factory.return_value
mock_processor = mock_factory.init_index_processor.return_value mock_processor = mock_factory.init_index_processor.return_value
mock_processor.load.assert_called_once() mock_processor.load.assert_called_once()
@ -753,7 +754,7 @@ class TestDealDatasetVectorIndexTask:
name="Document for doc_form", name="Document for doc_form",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -775,7 +776,7 @@ class TestDealDatasetVectorIndexTask:
name=f"Test Document {i}", name=f"Test Document {i}",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -856,7 +857,7 @@ class TestDealDatasetVectorIndexTask:
name="Document for doc_form", name="Document for doc_form",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -876,7 +877,7 @@ class TestDealDatasetVectorIndexTask:
name="Test Document", name="Test Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -953,7 +954,7 @@ class TestDealDatasetVectorIndexTask:
name="Document for doc_form", name="Document for doc_form",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -973,7 +974,7 @@ class TestDealDatasetVectorIndexTask:
name="Enabled Document", name="Enabled Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -992,7 +993,7 @@ class TestDealDatasetVectorIndexTask:
name="Disabled Document", name="Disabled Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=False, # This document should be skipped enabled=False, # This document should be skipped
@ -1074,7 +1075,7 @@ class TestDealDatasetVectorIndexTask:
name="Document for doc_form", name="Document for doc_form",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -1094,7 +1095,7 @@ class TestDealDatasetVectorIndexTask:
name="Active Document", name="Active Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -1113,7 +1114,7 @@ class TestDealDatasetVectorIndexTask:
name="Archived Document", name="Archived Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -1195,7 +1196,7 @@ class TestDealDatasetVectorIndexTask:
name="Document for doc_form", name="Document for doc_form",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -1215,7 +1216,7 @@ class TestDealDatasetVectorIndexTask:
name="Completed Document", name="Completed Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.COMPLETED, indexing_status=IndexingStatus.COMPLETED,
enabled=True, enabled=True,
@ -1234,7 +1235,7 @@ class TestDealDatasetVectorIndexTask:
name="Incomplete Document", name="Incomplete Document",
created_from=DocumentCreatedFrom.WEB, created_from=DocumentCreatedFrom.WEB,
created_by=account.id, created_by=account.id,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
indexing_status=IndexingStatus.INDEXING, # This document should be skipped indexing_status=IndexingStatus.INDEXING, # This document should be skipped
enabled=True, enabled=True,

View File

@ -15,6 +15,7 @@ import pytest
from faker import Faker from faker import Faker
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
@ -113,7 +114,7 @@ class TestDisableSegmentFromIndexTask:
dataset: Dataset, dataset: Dataset,
tenant: Tenant, tenant: Tenant,
account: Account, account: Account,
doc_form: str = "text_model", doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
) -> Document: ) -> Document:
""" """
Helper method to create a test document. Helper method to create a test document.
@ -476,7 +477,11 @@ class TestDisableSegmentFromIndexTask:
- Index processor clean method is called correctly - Index processor clean method is called correctly
""" """
# Test different document forms # Test different document forms
doc_forms = ["text_model", "qa_model", "table_model"] doc_forms = [
IndexStructureType.PARAGRAPH_INDEX,
IndexStructureType.QA_INDEX,
IndexStructureType.PARENT_CHILD_INDEX,
]
for doc_form in doc_forms: for doc_form in doc_forms:
# Arrange: Create test data for each form # Arrange: Create test data for each form

View File

@ -11,6 +11,7 @@ from unittest.mock import MagicMock, patch
from faker import Faker from faker import Faker
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from core.rag.index_processor.constant.index_type import IndexStructureType
from models import Account, Dataset, DocumentSegment from models import Account, Dataset, DocumentSegment
from models import Document as DatasetDocument from models import Document as DatasetDocument
from models.dataset import DatasetProcessRule from models.dataset import DatasetProcessRule
@ -153,7 +154,7 @@ class TestDisableSegmentsFromIndexTask:
document.indexing_status = "completed" document.indexing_status = "completed"
document.enabled = True document.enabled = True
document.archived = False document.archived = False
document.doc_form = "text_model" # Use text_model form for testing document.doc_form = IndexStructureType.PARAGRAPH_INDEX # Use text_model form for testing
document.doc_language = "en" document.doc_language = "en"
db_session_with_containers.add(document) db_session_with_containers.add(document)
db_session_with_containers.commit() db_session_with_containers.commit()
@ -500,7 +501,11 @@ class TestDisableSegmentsFromIndexTask:
segment_ids = [segment.id for segment in segments] segment_ids = [segment.id for segment in segments]
# Test different document forms # Test different document forms
doc_forms = ["text_model", "qa_model", "hierarchical_model"] doc_forms = [
IndexStructureType.PARAGRAPH_INDEX,
IndexStructureType.QA_INDEX,
IndexStructureType.PARENT_CHILD_INDEX,
]
for doc_form in doc_forms: for doc_form in doc_forms:
# Update document form # Update document form

View File

@ -14,6 +14,7 @@ from uuid import uuid4
import pytest import pytest
from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.indexing_runner import DocumentIsPausedError, IndexingRunner
from core.rag.index_processor.constant.index_type import IndexStructureType
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
@ -85,7 +86,7 @@ class DocumentIndexingSyncTaskTestDataFactory:
created_by=created_by, created_by=created_by,
indexing_status=indexing_status, indexing_status=indexing_status,
enabled=True, enabled=True,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
doc_language="en", doc_language="en",
) )
db_session_with_containers.add(document) db_session_with_containers.add(document)

View File

@ -3,6 +3,7 @@ from unittest.mock import MagicMock, patch
import pytest import pytest
from faker import Faker from faker import Faker
from core.rag.index_processor.constant.index_type import IndexStructureType
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus
@ -80,7 +81,7 @@ class TestDocumentIndexingUpdateTask:
created_by=account.id, created_by=account.id,
indexing_status=IndexingStatus.WAITING, indexing_status=IndexingStatus.WAITING,
enabled=True, enabled=True,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
db_session_with_containers.add(document) db_session_with_containers.add(document)
db_session_with_containers.commit() db_session_with_containers.commit()

View File

@ -4,6 +4,7 @@ import pytest
from faker import Faker from faker import Faker
from core.indexing_runner import DocumentIsPausedError from core.indexing_runner import DocumentIsPausedError
from core.rag.index_processor.constant.index_type import IndexStructureType
from enums.cloud_plan import CloudPlan from enums.cloud_plan import CloudPlan
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
@ -130,7 +131,7 @@ class TestDuplicateDocumentIndexingTasks:
created_by=account.id, created_by=account.id,
indexing_status=IndexingStatus.WAITING, indexing_status=IndexingStatus.WAITING,
enabled=True, enabled=True,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
db_session_with_containers.add(document) db_session_with_containers.add(document)
documents.append(document) documents.append(document)
@ -265,7 +266,7 @@ class TestDuplicateDocumentIndexingTasks:
created_by=account.id, created_by=account.id,
indexing_status=IndexingStatus.WAITING, indexing_status=IndexingStatus.WAITING,
enabled=True, enabled=True,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
db_session_with_containers.add(document) db_session_with_containers.add(document)
documents.append(document) documents.append(document)
@ -524,7 +525,7 @@ class TestDuplicateDocumentIndexingTasks:
created_by=dataset.created_by, created_by=dataset.created_by,
indexing_status=IndexingStatus.WAITING, indexing_status=IndexingStatus.WAITING,
enabled=True, enabled=True,
doc_form="text_model", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
db_session_with_containers.add(document) db_session_with_containers.add(document)
extra_documents.append(document) extra_documents.append(document)

View File

@ -11,6 +11,7 @@ from controllers.console.datasets.data_source import (
DataSourceNotionDocumentSyncApi, DataSourceNotionDocumentSyncApi,
DataSourceNotionListApi, DataSourceNotionListApi,
) )
from core.rag.index_processor.constant.index_type import IndexStructureType
def unwrap(func): def unwrap(func):
@ -343,7 +344,7 @@ class TestDataSourceNotionApi:
} }
], ],
"process_rule": {"rules": {}}, "process_rule": {"rules": {}},
"doc_form": "text_model", "doc_form": IndexStructureType.PARAGRAPH_INDEX,
"doc_language": "English", "doc_language": "English",
} }

View File

@ -28,6 +28,7 @@ from controllers.console.datasets.datasets import (
from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.provider_manager import ProviderManager from core.provider_manager import ProviderManager
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.storage.storage_type import StorageType from extensions.storage.storage_type import StorageType
from models.enums import CreatorUserRole from models.enums import CreatorUserRole
from models.model import ApiToken, UploadFile from models.model import ApiToken, UploadFile
@ -1146,7 +1147,7 @@ class TestDatasetIndexingEstimateApi:
}, },
"process_rule": {"chunk_size": 100}, "process_rule": {"chunk_size": 100},
"indexing_technique": "high_quality", "indexing_technique": "high_quality",
"doc_form": "text_model", "doc_form": IndexStructureType.PARAGRAPH_INDEX,
"doc_language": "English", "doc_language": "English",
"dataset_id": None, "dataset_id": None,
} }

View File

@ -30,6 +30,7 @@ from controllers.console.datasets.error import (
InvalidActionError, InvalidActionError,
InvalidMetadataError, InvalidMetadataError,
) )
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.enums import DataSourceType, IndexingStatus from models.enums import DataSourceType, IndexingStatus
@ -66,7 +67,7 @@ def document():
indexing_status=IndexingStatus.INDEXING, indexing_status=IndexingStatus.INDEXING,
data_source_type=DataSourceType.UPLOAD_FILE, data_source_type=DataSourceType.UPLOAD_FILE,
data_source_info_dict={"upload_file_id": "file-1"}, data_source_info_dict={"upload_file_id": "file-1"},
doc_form="text", doc_form=IndexStructureType.PARAGRAPH_INDEX,
archived=False, archived=False,
is_paused=False, is_paused=False,
dataset_process_rule=None, dataset_process_rule=None,
@ -765,8 +766,8 @@ class TestDocumentGenerateSummaryApi:
summary_index_setting={"enable": True}, summary_index_setting={"enable": True},
) )
doc1 = MagicMock(id="doc-1", doc_form="qa_model") doc1 = MagicMock(id="doc-1", doc_form=IndexStructureType.QA_INDEX)
doc2 = MagicMock(id="doc-2", doc_form="text") doc2 = MagicMock(id="doc-2", doc_form=IndexStructureType.PARAGRAPH_INDEX)
payload = {"document_list": ["doc-1", "doc-2"]} payload = {"document_list": ["doc-1", "doc-2"]}
@ -822,7 +823,7 @@ class TestDocumentIndexingEstimateApi:
data_source_type=DataSourceType.UPLOAD_FILE, data_source_type=DataSourceType.UPLOAD_FILE,
data_source_info_dict={"upload_file_id": "file-1"}, data_source_info_dict={"upload_file_id": "file-1"},
tenant_id="tenant-1", tenant_id="tenant-1",
doc_form="text", doc_form=IndexStructureType.PARAGRAPH_INDEX,
dataset_process_rule=None, dataset_process_rule=None,
) )
@ -849,7 +850,7 @@ class TestDocumentIndexingEstimateApi:
data_source_type=DataSourceType.UPLOAD_FILE, data_source_type=DataSourceType.UPLOAD_FILE,
data_source_info_dict={"upload_file_id": "file-1"}, data_source_info_dict={"upload_file_id": "file-1"},
tenant_id="tenant-1", tenant_id="tenant-1",
doc_form="text", doc_form=IndexStructureType.PARAGRAPH_INDEX,
dataset_process_rule=None, dataset_process_rule=None,
) )
@ -973,7 +974,7 @@ class TestDocumentBatchIndexingEstimateApi:
"mode": "single", "mode": "single",
"only_main_content": True, "only_main_content": True,
}, },
doc_form="text", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
with ( with (
@ -1001,7 +1002,7 @@ class TestDocumentBatchIndexingEstimateApi:
"notion_page_id": "p1", "notion_page_id": "p1",
"type": "page", "type": "page",
}, },
doc_form="text", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
with ( with (
@ -1024,7 +1025,7 @@ class TestDocumentBatchIndexingEstimateApi:
indexing_status=IndexingStatus.INDEXING, indexing_status=IndexingStatus.INDEXING,
data_source_type="unknown", data_source_type="unknown",
data_source_info_dict={}, data_source_info_dict={},
doc_form="text", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
with app.test_request_context("/"), patch.object(api, "get_batch_documents", return_value=[document]): with app.test_request_context("/"), patch.object(api, "get_batch_documents", return_value=[document]):
@ -1353,7 +1354,7 @@ class TestDocumentIndexingEdgeCases:
data_source_type=DataSourceType.UPLOAD_FILE, data_source_type=DataSourceType.UPLOAD_FILE,
data_source_info_dict={"upload_file_id": "file-1"}, data_source_info_dict={"upload_file_id": "file-1"},
tenant_id="tenant-1", tenant_id="tenant-1",
doc_form="text", doc_form=IndexStructureType.PARAGRAPH_INDEX,
dataset_process_rule=None, dataset_process_rule=None,
) )

View File

@ -24,6 +24,7 @@ from controllers.console.datasets.error import (
InvalidActionError, InvalidActionError,
) )
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.dataset import ChildChunk, DocumentSegment from models.dataset import ChildChunk, DocumentSegment
from models.model import UploadFile from models.model import UploadFile
@ -366,7 +367,7 @@ class TestDatasetDocumentSegmentAddApi:
dataset.indexing_technique = "economy" dataset.indexing_technique = "economy"
document = MagicMock() document = MagicMock()
document.doc_form = "text" document.doc_form = IndexStructureType.PARAGRAPH_INDEX
segment = MagicMock() segment = MagicMock()
segment.id = "seg-1" segment.id = "seg-1"
@ -505,7 +506,7 @@ class TestDatasetDocumentSegmentUpdateApi:
dataset.indexing_technique = "economy" dataset.indexing_technique = "economy"
document = MagicMock() document = MagicMock()
document.doc_form = "text" document.doc_form = IndexStructureType.PARAGRAPH_INDEX
segment = MagicMock() segment = MagicMock()

View File

@ -12,6 +12,7 @@ from unittest.mock import Mock
import pytest import pytest
from flask import Flask from flask import Flask
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.account import TenantStatus from models.account import TenantStatus
from models.model import App, AppMode, EndUser from models.model import App, AppMode, EndUser
from tests.unit_tests.conftest import setup_mock_tenant_account_query from tests.unit_tests.conftest import setup_mock_tenant_account_query
@ -175,7 +176,7 @@ def mock_document():
document.name = "test_document.txt" document.name = "test_document.txt"
document.indexing_status = "completed" document.indexing_status = "completed"
document.enabled = True document.enabled = True
document.doc_form = "text_model" document.doc_form = IndexStructureType.PARAGRAPH_INDEX
return document return document

View File

@ -31,6 +31,7 @@ from controllers.service_api.dataset.segment import (
SegmentCreatePayload, SegmentCreatePayload,
SegmentListQuery, SegmentListQuery,
) )
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.dataset import ChildChunk, Dataset, Document, DocumentSegment from models.dataset import ChildChunk, Dataset, Document, DocumentSegment
from models.enums import IndexingStatus from models.enums import IndexingStatus
from services.dataset_service import DocumentService, SegmentService from services.dataset_service import DocumentService, SegmentService
@ -788,7 +789,7 @@ class TestSegmentApiGet:
# Arrange # Arrange
mock_account_fn.return_value = (Mock(), mock_tenant.id) mock_account_fn.return_value = (Mock(), mock_tenant.id)
mock_db.session.query.return_value.where.return_value.first.return_value = mock_dataset mock_db.session.query.return_value.where.return_value.first.return_value = mock_dataset
mock_doc_svc.get_document.return_value = Mock(doc_form="text_model") mock_doc_svc.get_document.return_value = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
mock_seg_svc.get_segments.return_value = ([mock_segment], 1) mock_seg_svc.get_segments.return_value = ([mock_segment], 1)
mock_marshal.return_value = [{"id": mock_segment.id}] mock_marshal.return_value = [{"id": mock_segment.id}]
@ -903,7 +904,7 @@ class TestSegmentApiPost:
mock_doc = Mock() mock_doc = Mock()
mock_doc.indexing_status = "completed" mock_doc.indexing_status = "completed"
mock_doc.enabled = True mock_doc.enabled = True
mock_doc.doc_form = "text_model" mock_doc.doc_form = IndexStructureType.PARAGRAPH_INDEX
mock_doc_svc.get_document.return_value = mock_doc mock_doc_svc.get_document.return_value = mock_doc
mock_seg_svc.segment_create_args_validate.return_value = None mock_seg_svc.segment_create_args_validate.return_value = None
@ -1091,7 +1092,7 @@ class TestDatasetSegmentApiDelete:
mock_doc = Mock() mock_doc = Mock()
mock_doc.indexing_status = "completed" mock_doc.indexing_status = "completed"
mock_doc.enabled = True mock_doc.enabled = True
mock_doc.doc_form = "text_model" mock_doc.doc_form = IndexStructureType.PARAGRAPH_INDEX
mock_doc_svc.get_document.return_value = mock_doc mock_doc_svc.get_document.return_value = mock_doc
mock_seg_svc.get_segment_by_id.return_value = None # Segment not found mock_seg_svc.get_segment_by_id.return_value = None # Segment not found
@ -1371,7 +1372,7 @@ class TestDatasetSegmentApiGetSingle:
mock_account_fn.return_value = (Mock(), mock_tenant.id) mock_account_fn.return_value = (Mock(), mock_tenant.id)
mock_db.session.query.return_value.where.return_value.first.return_value = mock_dataset mock_db.session.query.return_value.where.return_value.first.return_value = mock_dataset
mock_dataset_svc.check_dataset_model_setting.return_value = None mock_dataset_svc.check_dataset_model_setting.return_value = None
mock_doc = Mock(doc_form="text_model") mock_doc = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
mock_doc_svc.get_document.return_value = mock_doc mock_doc_svc.get_document.return_value = mock_doc
mock_seg_svc.get_segment_by_id.return_value = mock_segment mock_seg_svc.get_segment_by_id.return_value = mock_segment
mock_marshal.return_value = {"id": mock_segment.id} mock_marshal.return_value = {"id": mock_segment.id}
@ -1390,7 +1391,7 @@ class TestDatasetSegmentApiGetSingle:
assert status == 200 assert status == 200
assert "data" in response assert "data" in response
assert response["doc_form"] == "text_model" assert response["doc_form"] == IndexStructureType.PARAGRAPH_INDEX
@patch("controllers.service_api.dataset.segment.current_account_with_tenant") @patch("controllers.service_api.dataset.segment.current_account_with_tenant")
@patch("controllers.service_api.dataset.segment.db") @patch("controllers.service_api.dataset.segment.db")

View File

@ -35,6 +35,7 @@ from controllers.service_api.dataset.document import (
InvalidMetadataError, InvalidMetadataError,
) )
from controllers.service_api.dataset.error import ArchivedDocumentImmutableError from controllers.service_api.dataset.error import ArchivedDocumentImmutableError
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.enums import IndexingStatus from models.enums import IndexingStatus
from services.dataset_service import DocumentService from services.dataset_service import DocumentService
from services.entities.knowledge_entities.knowledge_entities import ProcessRule, RetrievalModel from services.entities.knowledge_entities.knowledge_entities import ProcessRule, RetrievalModel
@ -52,7 +53,7 @@ class TestDocumentTextCreatePayload:
def test_payload_with_defaults(self): def test_payload_with_defaults(self):
"""Test payload default values.""" """Test payload default values."""
payload = DocumentTextCreatePayload(name="Doc", text="Content") payload = DocumentTextCreatePayload(name="Doc", text="Content")
assert payload.doc_form == "text_model" assert payload.doc_form == IndexStructureType.PARAGRAPH_INDEX
assert payload.doc_language == "English" assert payload.doc_language == "English"
assert payload.process_rule is None assert payload.process_rule is None
assert payload.indexing_technique is None assert payload.indexing_technique is None
@ -62,14 +63,14 @@ class TestDocumentTextCreatePayload:
payload = DocumentTextCreatePayload( payload = DocumentTextCreatePayload(
name="Full Document", name="Full Document",
text="Complete document content here", text="Complete document content here",
doc_form="qa_model", doc_form=IndexStructureType.QA_INDEX,
doc_language="Chinese", doc_language="Chinese",
indexing_technique="high_quality", indexing_technique="high_quality",
embedding_model="text-embedding-ada-002", embedding_model="text-embedding-ada-002",
embedding_model_provider="openai", embedding_model_provider="openai",
) )
assert payload.name == "Full Document" assert payload.name == "Full Document"
assert payload.doc_form == "qa_model" assert payload.doc_form == IndexStructureType.QA_INDEX
assert payload.doc_language == "Chinese" assert payload.doc_language == "Chinese"
assert payload.indexing_technique == "high_quality" assert payload.indexing_technique == "high_quality"
assert payload.embedding_model == "text-embedding-ada-002" assert payload.embedding_model == "text-embedding-ada-002"
@ -147,8 +148,8 @@ class TestDocumentTextUpdate:
def test_payload_with_doc_form_update(self): def test_payload_with_doc_form_update(self):
"""Test payload with doc_form update.""" """Test payload with doc_form update."""
payload = DocumentTextUpdate(doc_form="qa_model") payload = DocumentTextUpdate(doc_form=IndexStructureType.QA_INDEX)
assert payload.doc_form == "qa_model" assert payload.doc_form == IndexStructureType.QA_INDEX
def test_payload_with_language_update(self): def test_payload_with_language_update(self):
"""Test payload with doc_language update.""" """Test payload with doc_language update."""
@ -158,7 +159,7 @@ class TestDocumentTextUpdate:
def test_payload_default_values(self): def test_payload_default_values(self):
"""Test payload default values.""" """Test payload default values."""
payload = DocumentTextUpdate() payload = DocumentTextUpdate()
assert payload.doc_form == "text_model" assert payload.doc_form == IndexStructureType.PARAGRAPH_INDEX
assert payload.doc_language == "English" assert payload.doc_language == "English"
@ -272,14 +273,24 @@ class TestDocumentDocForm:
def test_text_model_form(self): def test_text_model_form(self):
"""Test text_model form.""" """Test text_model form."""
doc_form = "text_model" doc_form = IndexStructureType.PARAGRAPH_INDEX
valid_forms = ["text_model", "qa_model", "hierarchical_model", "parent_child_model"] valid_forms = [
IndexStructureType.PARAGRAPH_INDEX,
IndexStructureType.QA_INDEX,
IndexStructureType.PARENT_CHILD_INDEX,
"parent_child_model",
]
assert doc_form in valid_forms assert doc_form in valid_forms
def test_qa_model_form(self): def test_qa_model_form(self):
"""Test qa_model form.""" """Test qa_model form."""
doc_form = "qa_model" doc_form = IndexStructureType.QA_INDEX
valid_forms = ["text_model", "qa_model", "hierarchical_model", "parent_child_model"] valid_forms = [
IndexStructureType.PARAGRAPH_INDEX,
IndexStructureType.QA_INDEX,
IndexStructureType.PARENT_CHILD_INDEX,
"parent_child_model",
]
assert doc_form in valid_forms assert doc_form in valid_forms
@ -504,7 +515,7 @@ class TestDocumentApiGet:
doc.name = "test_document.txt" doc.name = "test_document.txt"
doc.indexing_status = "completed" doc.indexing_status = "completed"
doc.enabled = True doc.enabled = True
doc.doc_form = "text_model" doc.doc_form = IndexStructureType.PARAGRAPH_INDEX
doc.doc_language = "English" doc.doc_language = "English"
doc.doc_type = "book" doc.doc_type = "book"
doc.doc_metadata_details = {"source": "upload"} doc.doc_metadata_details = {"source": "upload"}

View File

@ -4800,8 +4800,8 @@ class TestInternalHooksCoverage:
dataset_docs = [ dataset_docs = [
SimpleNamespace(id="doc-a", doc_form=IndexStructureType.PARENT_CHILD_INDEX), SimpleNamespace(id="doc-a", doc_form=IndexStructureType.PARENT_CHILD_INDEX),
SimpleNamespace(id="doc-b", doc_form=IndexStructureType.PARENT_CHILD_INDEX), SimpleNamespace(id="doc-b", doc_form=IndexStructureType.PARENT_CHILD_INDEX),
SimpleNamespace(id="doc-c", doc_form="qa_model"), SimpleNamespace(id="doc-c", doc_form=IndexStructureType.QA_INDEX),
SimpleNamespace(id="doc-d", doc_form="qa_model"), SimpleNamespace(id="doc-d", doc_form=IndexStructureType.QA_INDEX),
] ]
child_chunks = [SimpleNamespace(index_node_id="idx-a", segment_id="seg-a")] child_chunks = [SimpleNamespace(index_node_id="idx-a", segment_id="seg-a")]
segments = [SimpleNamespace(index_node_id="idx-c", id="seg-c")] segments = [SimpleNamespace(index_node_id="idx-c", id="seg-c")]

View File

@ -238,7 +238,7 @@ class TestApiToolProviderValidation:
name=provider_name, name=provider_name,
icon='{"type": "emoji", "value": "🔧"}', icon='{"type": "emoji", "value": "🔧"}',
schema=schema, schema=schema,
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="Custom API for testing", description="Custom API for testing",
tools_str=json.dumps(tools), tools_str=json.dumps(tools),
credentials_str=json.dumps(credentials), credentials_str=json.dumps(credentials),
@ -249,7 +249,7 @@ class TestApiToolProviderValidation:
assert api_provider.user_id == user_id assert api_provider.user_id == user_id
assert api_provider.name == provider_name assert api_provider.name == provider_name
assert api_provider.schema == schema assert api_provider.schema == schema
assert api_provider.schema_type_str == "openapi" assert api_provider.schema_type_str == ApiProviderSchemaType.OPENAPI
assert api_provider.description == "Custom API for testing" assert api_provider.description == "Custom API for testing"
def test_api_tool_provider_schema_type_property(self): def test_api_tool_provider_schema_type_property(self):
@ -261,7 +261,7 @@ class TestApiToolProviderValidation:
name="Test API", name="Test API",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="Test", description="Test",
tools_str="[]", tools_str="[]",
credentials_str="{}", credentials_str="{}",
@ -314,7 +314,7 @@ class TestApiToolProviderValidation:
name="Weather API", name="Weather API",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="Weather API", description="Weather API",
tools_str=json.dumps(tools_data), tools_str=json.dumps(tools_data),
credentials_str="{}", credentials_str="{}",
@ -343,7 +343,7 @@ class TestApiToolProviderValidation:
name="Secure API", name="Secure API",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="Secure API", description="Secure API",
tools_str="[]", tools_str="[]",
credentials_str=json.dumps(credentials_data), credentials_str=json.dumps(credentials_data),
@ -369,7 +369,7 @@ class TestApiToolProviderValidation:
name="Privacy API", name="Privacy API",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="API with privacy policy", description="API with privacy policy",
tools_str="[]", tools_str="[]",
credentials_str="{}", credentials_str="{}",
@ -391,7 +391,7 @@ class TestApiToolProviderValidation:
name="Disclaimer API", name="Disclaimer API",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="API with disclaimer", description="API with disclaimer",
tools_str="[]", tools_str="[]",
credentials_str="{}", credentials_str="{}",
@ -410,7 +410,7 @@ class TestApiToolProviderValidation:
name="Default API", name="Default API",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="API", description="API",
tools_str="[]", tools_str="[]",
credentials_str="{}", credentials_str="{}",
@ -432,7 +432,7 @@ class TestApiToolProviderValidation:
name=provider_name, name=provider_name,
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="Unique API", description="Unique API",
tools_str="[]", tools_str="[]",
credentials_str="{}", credentials_str="{}",
@ -454,7 +454,7 @@ class TestApiToolProviderValidation:
name="Public API", name="Public API",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="Public API with no auth", description="Public API with no auth",
tools_str="[]", tools_str="[]",
credentials_str=json.dumps(credentials), credentials_str=json.dumps(credentials),
@ -479,7 +479,7 @@ class TestApiToolProviderValidation:
name="Query Auth API", name="Query Auth API",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="API with query auth", description="API with query auth",
tools_str="[]", tools_str="[]",
credentials_str=json.dumps(credentials), credentials_str=json.dumps(credentials),
@ -741,7 +741,7 @@ class TestCredentialStorage:
name="Test API", name="Test API",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="Test", description="Test",
tools_str="[]", tools_str="[]",
credentials_str=json.dumps(credentials), credentials_str=json.dumps(credentials),
@ -788,7 +788,7 @@ class TestCredentialStorage:
name="Update Test", name="Update Test",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="Test", description="Test",
tools_str="[]", tools_str="[]",
credentials_str=json.dumps(original_credentials), credentials_str=json.dumps(original_credentials),
@ -897,7 +897,7 @@ class TestToolProviderRelationships:
name="User API", name="User API",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="Test", description="Test",
tools_str="[]", tools_str="[]",
credentials_str="{}", credentials_str="{}",
@ -931,7 +931,7 @@ class TestToolProviderRelationships:
name="Custom API 1", name="Custom API 1",
icon="{}", icon="{}",
schema="{}", schema="{}",
schema_type_str="openapi", schema_type_str=ApiProviderSchemaType.OPENAPI,
description="Test", description="Test",
tools_str="[]", tools_str="[]",
credentials_str="{}", credentials_str="{}",

View File

@ -111,6 +111,7 @@ from unittest.mock import Mock, patch
import pytest import pytest
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.rag.index_processor.constant.index_type import IndexStructureType
from dify_graph.model_runtime.entities.model_entities import ModelType from dify_graph.model_runtime.entities.model_entities import ModelType
from models.dataset import Dataset, DatasetProcessRule, Document from models.dataset import Dataset, DatasetProcessRule, Document
from services.dataset_service import DatasetService, DocumentService from services.dataset_service import DatasetService, DocumentService
@ -188,7 +189,7 @@ class DocumentValidationTestDataFactory:
def create_knowledge_config_mock( def create_knowledge_config_mock(
data_source: DataSource | None = None, data_source: DataSource | None = None,
process_rule: ProcessRule | None = None, process_rule: ProcessRule | None = None,
doc_form: str = "text_model", doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
indexing_technique: str = "high_quality", indexing_technique: str = "high_quality",
**kwargs, **kwargs,
) -> Mock: ) -> Mock:
@ -326,8 +327,8 @@ class TestDatasetServiceCheckDocForm:
- Validation logic works correctly - Validation logic works correctly
""" """
# Arrange # Arrange
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form="text_model") dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
doc_form = "text_model" doc_form = IndexStructureType.PARAGRAPH_INDEX
# Act (should not raise) # Act (should not raise)
DatasetService.check_doc_form(dataset, doc_form) DatasetService.check_doc_form(dataset, doc_form)
@ -349,7 +350,7 @@ class TestDatasetServiceCheckDocForm:
""" """
# Arrange # Arrange
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form=None) dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form=None)
doc_form = "text_model" doc_form = IndexStructureType.PARAGRAPH_INDEX
# Act (should not raise) # Act (should not raise)
DatasetService.check_doc_form(dataset, doc_form) DatasetService.check_doc_form(dataset, doc_form)
@ -370,8 +371,8 @@ class TestDatasetServiceCheckDocForm:
- Error type is correct - Error type is correct
""" """
# Arrange # Arrange
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form="text_model") dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
doc_form = "table_model" # Different form doc_form = IndexStructureType.PARENT_CHILD_INDEX # Different form
# Act & Assert # Act & Assert
with pytest.raises(ValueError, match="doc_form is different from the dataset doc_form"): with pytest.raises(ValueError, match="doc_form is different from the dataset doc_form"):
@ -390,7 +391,7 @@ class TestDatasetServiceCheckDocForm:
""" """
# Arrange # Arrange
dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form="knowledge_card") dataset = DocumentValidationTestDataFactory.create_dataset_mock(doc_form="knowledge_card")
doc_form = "text_model" # Different form doc_form = IndexStructureType.PARAGRAPH_INDEX # Different form
# Act & Assert # Act & Assert
with pytest.raises(ValueError, match="doc_form is different from the dataset doc_form"): with pytest.raises(ValueError, match="doc_form is different from the dataset doc_form"):

View File

@ -2,6 +2,7 @@ from unittest.mock import MagicMock, Mock, patch
import pytest import pytest
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.account import Account from models.account import Account
from models.dataset import ChildChunk, Dataset, Document, DocumentSegment from models.dataset import ChildChunk, Dataset, Document, DocumentSegment
from models.enums import SegmentType from models.enums import SegmentType
@ -91,7 +92,7 @@ class SegmentTestDataFactory:
document_id: str = "doc-123", document_id: str = "doc-123",
dataset_id: str = "dataset-123", dataset_id: str = "dataset-123",
tenant_id: str = "tenant-123", tenant_id: str = "tenant-123",
doc_form: str = "text_model", doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
word_count: int = 100, word_count: int = 100,
**kwargs, **kwargs,
) -> Mock: ) -> Mock:
@ -210,7 +211,7 @@ class TestSegmentServiceCreateSegment:
def test_create_segment_with_qa_model(self, mock_db_session, mock_current_user): def test_create_segment_with_qa_model(self, mock_db_session, mock_current_user):
"""Test creation of segment with QA model (requires answer).""" """Test creation of segment with QA model (requires answer)."""
# Arrange # Arrange
document = SegmentTestDataFactory.create_document_mock(doc_form="qa_model", word_count=100) document = SegmentTestDataFactory.create_document_mock(doc_form=IndexStructureType.QA_INDEX, word_count=100)
dataset = SegmentTestDataFactory.create_dataset_mock(indexing_technique="economy") dataset = SegmentTestDataFactory.create_dataset_mock(indexing_technique="economy")
args = {"content": "What is AI?", "answer": "AI is Artificial Intelligence", "keywords": ["ai"]} args = {"content": "What is AI?", "answer": "AI is Artificial Intelligence", "keywords": ["ai"]}
@ -429,7 +430,7 @@ class TestSegmentServiceUpdateSegment:
"""Test update segment with QA model (includes answer).""" """Test update segment with QA model (includes answer)."""
# Arrange # Arrange
segment = SegmentTestDataFactory.create_segment_mock(enabled=True, word_count=10) segment = SegmentTestDataFactory.create_segment_mock(enabled=True, word_count=10)
document = SegmentTestDataFactory.create_document_mock(doc_form="qa_model", word_count=100) document = SegmentTestDataFactory.create_document_mock(doc_form=IndexStructureType.QA_INDEX, word_count=100)
dataset = SegmentTestDataFactory.create_dataset_mock(indexing_technique="economy") dataset = SegmentTestDataFactory.create_dataset_mock(indexing_technique="economy")
args = SegmentUpdateArgs(content="Updated question", answer="Updated answer", keywords=["qa"]) args = SegmentUpdateArgs(content="Updated question", answer="Updated answer", keywords=["qa"])

View File

@ -4,6 +4,7 @@ from unittest.mock import Mock, create_autospec
import pytest import pytest
from redis.exceptions import LockNotOwnedError from redis.exceptions import LockNotOwnedError
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.account import Account from models.account import Account
from models.dataset import Dataset, Document from models.dataset import Dataset, Document
from services.dataset_service import DocumentService, SegmentService from services.dataset_service import DocumentService, SegmentService
@ -76,7 +77,7 @@ def test_save_document_with_dataset_id_ignores_lock_not_owned(
info_list = types.SimpleNamespace(data_source_type="upload_file") info_list = types.SimpleNamespace(data_source_type="upload_file")
data_source = types.SimpleNamespace(info_list=info_list) data_source = types.SimpleNamespace(info_list=info_list)
knowledge_config = types.SimpleNamespace( knowledge_config = types.SimpleNamespace(
doc_form="qa_model", doc_form=IndexStructureType.QA_INDEX,
original_document_id=None, # go into "new document" branch original_document_id=None, # go into "new document" branch
data_source=data_source, data_source=data_source,
indexing_technique="high_quality", indexing_technique="high_quality",
@ -131,7 +132,7 @@ def test_add_segment_ignores_lock_not_owned(
document.id = "doc-1" document.id = "doc-1"
document.dataset_id = dataset.id document.dataset_id = dataset.id
document.word_count = 0 document.word_count = 0
document.doc_form = "qa_model" document.doc_form = IndexStructureType.QA_INDEX
# Minimal args required by add_segment # Minimal args required by add_segment
args = { args = {
@ -174,4 +175,4 @@ def test_multi_create_segment_ignores_lock_not_owned(
document.id = "doc-1" document.id = "doc-1"
document.dataset_id = dataset.id document.dataset_id = dataset.id
document.word_count = 0 document.word_count = 0
document.doc_form = "qa_model" document.doc_form = IndexStructureType.QA_INDEX

View File

@ -11,6 +11,7 @@ from unittest.mock import MagicMock
import pytest import pytest
import services.summary_index_service as summary_module import services.summary_index_service as summary_module
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.enums import SegmentStatus, SummaryStatus from models.enums import SegmentStatus, SummaryStatus
from services.summary_index_service import SummaryIndexService from services.summary_index_service import SummaryIndexService
@ -48,7 +49,7 @@ def _segment(*, has_document: bool = True) -> MagicMock:
if has_document: if has_document:
doc = MagicMock(name="document") doc = MagicMock(name="document")
doc.doc_language = "en" doc.doc_language = "en"
doc.doc_form = "text_model" doc.doc_form = IndexStructureType.PARAGRAPH_INDEX
segment.document = doc segment.document = doc
else: else:
segment.document = None segment.document = None
@ -623,13 +624,13 @@ def test_generate_summaries_for_document_skip_conditions(monkeypatch: pytest.Mon
dataset = _dataset(indexing_technique="economy") dataset = _dataset(indexing_technique="economy")
document = MagicMock(spec=summary_module.DatasetDocument) document = MagicMock(spec=summary_module.DatasetDocument)
document.id = "doc-1" document.id = "doc-1"
document.doc_form = "text_model" document.doc_form = IndexStructureType.PARAGRAPH_INDEX
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == [] assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == []
dataset = _dataset() dataset = _dataset()
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": False}) == [] assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": False}) == []
document.doc_form = "qa_model" document.doc_form = IndexStructureType.QA_INDEX
assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == [] assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == []
@ -637,7 +638,7 @@ def test_generate_summaries_for_document_runs_and_handles_errors(monkeypatch: py
dataset = _dataset() dataset = _dataset()
document = MagicMock(spec=summary_module.DatasetDocument) document = MagicMock(spec=summary_module.DatasetDocument)
document.id = "doc-1" document.id = "doc-1"
document.doc_form = "text_model" document.doc_form = IndexStructureType.PARAGRAPH_INDEX
seg1 = _segment() seg1 = _segment()
seg2 = _segment() seg2 = _segment()
@ -673,7 +674,7 @@ def test_generate_summaries_for_document_no_segments_returns_empty(monkeypatch:
dataset = _dataset() dataset = _dataset()
document = MagicMock(spec=summary_module.DatasetDocument) document = MagicMock(spec=summary_module.DatasetDocument)
document.id = "doc-1" document.id = "doc-1"
document.doc_form = "text_model" document.doc_form = IndexStructureType.PARAGRAPH_INDEX
session = MagicMock() session = MagicMock()
query = MagicMock() query = MagicMock()
@ -696,7 +697,7 @@ def test_generate_summaries_for_document_applies_segment_ids_and_only_parent_chu
dataset = _dataset() dataset = _dataset()
document = MagicMock(spec=summary_module.DatasetDocument) document = MagicMock(spec=summary_module.DatasetDocument)
document.id = "doc-1" document.id = "doc-1"
document.doc_form = "text_model" document.doc_form = IndexStructureType.PARAGRAPH_INDEX
seg = _segment() seg = _segment()
session = MagicMock() session = MagicMock()
@ -935,7 +936,7 @@ def test_update_summary_for_segment_skip_conditions() -> None:
SummaryIndexService.update_summary_for_segment(_segment(), _dataset(indexing_technique="economy"), "x") is None SummaryIndexService.update_summary_for_segment(_segment(), _dataset(indexing_technique="economy"), "x") is None
) )
seg = _segment(has_document=True) seg = _segment(has_document=True)
seg.document.doc_form = "qa_model" seg.document.doc_form = IndexStructureType.QA_INDEX
assert SummaryIndexService.update_summary_for_segment(seg, _dataset(), "x") is None assert SummaryIndexService.update_summary_for_segment(seg, _dataset(), "x") is None

View File

@ -9,6 +9,7 @@ from unittest.mock import MagicMock
import pytest import pytest
import services.vector_service as vector_service_module import services.vector_service as vector_service_module
from core.rag.index_processor.constant.index_type import IndexStructureType
from services.vector_service import VectorService from services.vector_service import VectorService
@ -32,7 +33,7 @@ class _ParentDocStub:
def _make_dataset( def _make_dataset(
*, *,
indexing_technique: str = "high_quality", indexing_technique: str = "high_quality",
doc_form: str = "text_model", doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
tenant_id: str = "tenant-1", tenant_id: str = "tenant-1",
dataset_id: str = "dataset-1", dataset_id: str = "dataset-1",
is_multimodal: bool = False, is_multimodal: bool = False,
@ -106,7 +107,7 @@ def test_create_segments_vector_regular_indexing_loads_documents_and_keywords(mo
factory_instance.init_index_processor.return_value = index_processor factory_instance.init_index_processor.return_value = index_processor
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance)) monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
VectorService.create_segments_vector([["k1"]], [segment], dataset, "text_model") VectorService.create_segments_vector([["k1"]], [segment], dataset, IndexStructureType.PARAGRAPH_INDEX)
index_processor.load.assert_called_once() index_processor.load.assert_called_once()
args, kwargs = index_processor.load.call_args args, kwargs = index_processor.load.call_args
@ -131,7 +132,7 @@ def test_create_segments_vector_regular_indexing_loads_multimodal_documents(monk
factory_instance.init_index_processor.return_value = index_processor factory_instance.init_index_processor.return_value = index_processor
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance)) monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
VectorService.create_segments_vector([["k1"]], [segment], dataset, "text_model") VectorService.create_segments_vector([["k1"]], [segment], dataset, IndexStructureType.PARAGRAPH_INDEX)
assert index_processor.load.call_count == 2 assert index_processor.load.call_count == 2
first_args, first_kwargs = index_processor.load.call_args_list[0] first_args, first_kwargs = index_processor.load.call_args_list[0]
@ -153,7 +154,7 @@ def test_create_segments_vector_with_no_segments_does_not_load(monkeypatch: pyte
factory_instance.init_index_processor.return_value = index_processor factory_instance.init_index_processor.return_value = index_processor
monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance)) monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance))
VectorService.create_segments_vector(None, [], dataset, "text_model") VectorService.create_segments_vector(None, [], dataset, IndexStructureType.PARAGRAPH_INDEX)
index_processor.load.assert_not_called() index_processor.load.assert_not_called()
@ -392,7 +393,7 @@ def test_update_segment_vector_economy_uses_keyword_without_keywords_list(monkey
def test_generate_child_chunks_regenerate_cleans_then_saves_children(monkeypatch: pytest.MonkeyPatch) -> None: def test_generate_child_chunks_regenerate_cleans_then_saves_children(monkeypatch: pytest.MonkeyPatch) -> None:
dataset = _make_dataset(doc_form="text_model", tenant_id="tenant-1", dataset_id="dataset-1") dataset = _make_dataset(doc_form=IndexStructureType.PARAGRAPH_INDEX, tenant_id="tenant-1", dataset_id="dataset-1")
segment = _make_segment(segment_id="seg-1") segment = _make_segment(segment_id="seg-1")
dataset_document = MagicMock() dataset_document = MagicMock()
@ -439,7 +440,7 @@ def test_generate_child_chunks_regenerate_cleans_then_saves_children(monkeypatch
def test_generate_child_chunks_commits_even_when_no_children(monkeypatch: pytest.MonkeyPatch) -> None: def test_generate_child_chunks_commits_even_when_no_children(monkeypatch: pytest.MonkeyPatch) -> None:
dataset = _make_dataset(doc_form="text_model") dataset = _make_dataset(doc_form=IndexStructureType.PARAGRAPH_INDEX)
segment = _make_segment() segment = _make_segment()
dataset_document = MagicMock() dataset_document = MagicMock()
dataset_document.doc_language = "en" dataset_document.doc_language = "en"

View File

@ -121,6 +121,7 @@ import pytest
from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.datasource.vdb.vector_type import VectorType from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.models.document import Document from core.rag.models.document import Document
from models.dataset import ChildChunk, Dataset, DatasetDocument, DatasetProcessRule, DocumentSegment from models.dataset import ChildChunk, Dataset, DatasetDocument, DatasetProcessRule, DocumentSegment
from services.vector_service import VectorService from services.vector_service import VectorService
@ -151,7 +152,7 @@ class VectorServiceTestDataFactory:
def create_dataset_mock( def create_dataset_mock(
dataset_id: str = "dataset-123", dataset_id: str = "dataset-123",
tenant_id: str = "tenant-123", tenant_id: str = "tenant-123",
doc_form: str = "text_model", doc_form: str = IndexStructureType.PARAGRAPH_INDEX,
indexing_technique: str = "high_quality", indexing_technique: str = "high_quality",
embedding_model_provider: str = "openai", embedding_model_provider: str = "openai",
embedding_model: str = "text-embedding-ada-002", embedding_model: str = "text-embedding-ada-002",
@ -493,7 +494,7 @@ class TestVectorService:
""" """
# Arrange # Arrange
dataset = VectorServiceTestDataFactory.create_dataset_mock( dataset = VectorServiceTestDataFactory.create_dataset_mock(
doc_form="text_model", indexing_technique="high_quality" doc_form=IndexStructureType.PARAGRAPH_INDEX, indexing_technique="high_quality"
) )
segment = VectorServiceTestDataFactory.create_document_segment_mock() segment = VectorServiceTestDataFactory.create_document_segment_mock()
@ -505,7 +506,7 @@ class TestVectorService:
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_index_processor mock_index_processor_factory.return_value.init_index_processor.return_value = mock_index_processor
# Act # Act
VectorService.create_segments_vector(keywords_list, [segment], dataset, "text_model") VectorService.create_segments_vector(keywords_list, [segment], dataset, IndexStructureType.PARAGRAPH_INDEX)
# Assert # Assert
mock_index_processor.load.assert_called_once() mock_index_processor.load.assert_called_once()
@ -649,7 +650,7 @@ class TestVectorService:
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_index_processor mock_index_processor_factory.return_value.init_index_processor.return_value = mock_index_processor
# Act # Act
VectorService.create_segments_vector(None, [], dataset, "text_model") VectorService.create_segments_vector(None, [], dataset, IndexStructureType.PARAGRAPH_INDEX)
# Assert # Assert
mock_index_processor.load.assert_not_called() mock_index_processor.load.assert_not_called()

View File

@ -16,6 +16,7 @@ from unittest.mock import MagicMock, patch
import pytest import pytest
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.enums import DataSourceType from models.enums import DataSourceType
from tasks.clean_dataset_task import clean_dataset_task from tasks.clean_dataset_task import clean_dataset_task
@ -186,7 +187,7 @@ class TestErrorHandling:
indexing_technique="high_quality", indexing_technique="high_quality",
index_struct='{"type": "paragraph"}', index_struct='{"type": "paragraph"}',
collection_binding_id=collection_binding_id, collection_binding_id=collection_binding_id,
doc_form="paragraph_index", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
# Assert # Assert
@ -231,7 +232,7 @@ class TestPipelineAndWorkflowDeletion:
indexing_technique="high_quality", indexing_technique="high_quality",
index_struct='{"type": "paragraph"}', index_struct='{"type": "paragraph"}',
collection_binding_id=collection_binding_id, collection_binding_id=collection_binding_id,
doc_form="paragraph_index", doc_form=IndexStructureType.PARAGRAPH_INDEX,
pipeline_id=pipeline_id, pipeline_id=pipeline_id,
) )
@ -267,7 +268,7 @@ class TestPipelineAndWorkflowDeletion:
indexing_technique="high_quality", indexing_technique="high_quality",
index_struct='{"type": "paragraph"}', index_struct='{"type": "paragraph"}',
collection_binding_id=collection_binding_id, collection_binding_id=collection_binding_id,
doc_form="paragraph_index", doc_form=IndexStructureType.PARAGRAPH_INDEX,
pipeline_id=None, pipeline_id=None,
) )
@ -323,7 +324,7 @@ class TestSegmentAttachmentCleanup:
indexing_technique="high_quality", indexing_technique="high_quality",
index_struct='{"type": "paragraph"}', index_struct='{"type": "paragraph"}',
collection_binding_id=collection_binding_id, collection_binding_id=collection_binding_id,
doc_form="paragraph_index", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
# Assert # Assert
@ -368,7 +369,7 @@ class TestSegmentAttachmentCleanup:
indexing_technique="high_quality", indexing_technique="high_quality",
index_struct='{"type": "paragraph"}', index_struct='{"type": "paragraph"}',
collection_binding_id=collection_binding_id, collection_binding_id=collection_binding_id,
doc_form="paragraph_index", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
# Assert - storage delete was attempted # Assert - storage delete was attempted
@ -410,7 +411,7 @@ class TestEdgeCases:
indexing_technique="high_quality", indexing_technique="high_quality",
index_struct='{"type": "paragraph"}', index_struct='{"type": "paragraph"}',
collection_binding_id=collection_binding_id, collection_binding_id=collection_binding_id,
doc_form="paragraph_index", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
# Assert # Assert
@ -454,7 +455,7 @@ class TestIndexProcessorParameters:
indexing_technique=indexing_technique, indexing_technique=indexing_technique,
index_struct=index_struct, index_struct=index_struct,
collection_binding_id=collection_binding_id, collection_binding_id=collection_binding_id,
doc_form="paragraph_index", doc_form=IndexStructureType.PARAGRAPH_INDEX,
) )
# Assert # Assert

View File

@ -15,6 +15,7 @@ from unittest.mock import MagicMock, Mock, patch
import pytest import pytest
from core.indexing_runner import DocumentIsPausedError from core.indexing_runner import DocumentIsPausedError
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.pipeline.queue import TenantIsolatedTaskQueue from core.rag.pipeline.queue import TenantIsolatedTaskQueue
from enums.cloud_plan import CloudPlan from enums.cloud_plan import CloudPlan
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
@ -222,7 +223,7 @@ def mock_documents(document_ids, dataset_id):
doc.stopped_at = None doc.stopped_at = None
doc.processing_started_at = None doc.processing_started_at = None
# optional attribute used in some code paths # optional attribute used in some code paths
doc.doc_form = "text_model" doc.doc_form = IndexStructureType.PARAGRAPH_INDEX
documents.append(doc) documents.append(doc)
return documents return documents

View File

@ -11,6 +11,7 @@ from unittest.mock import MagicMock, Mock, patch
import pytest import pytest
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.dataset import Dataset, Document from models.dataset import Dataset, Document
from tasks.document_indexing_sync_task import document_indexing_sync_task from tasks.document_indexing_sync_task import document_indexing_sync_task
@ -62,7 +63,7 @@ def mock_document(document_id, dataset_id, notion_workspace_id, notion_page_id,
document.tenant_id = str(uuid.uuid4()) document.tenant_id = str(uuid.uuid4())
document.data_source_type = "notion_import" document.data_source_type = "notion_import"
document.indexing_status = "completed" document.indexing_status = "completed"
document.doc_form = "text_model" document.doc_form = IndexStructureType.PARAGRAPH_INDEX
document.data_source_info_dict = { document.data_source_info_dict = {
"notion_workspace_id": notion_workspace_id, "notion_workspace_id": notion_workspace_id,
"notion_page_id": notion_page_id, "notion_page_id": notion_page_id,