refactor(api): replace dict with SummaryIndexSettingDict TypedDict in core/rag (#33633)

This commit is contained in:
BitToby 2026-03-18 06:26:49 +02:00 committed by GitHub
parent 69d1ccb7a7
commit 3454224ff9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 39 additions and 17 deletions

View File

@ -9,6 +9,7 @@ from flask import current_app
from sqlalchemy import delete, func, select
from core.db.session_factory import session_factory
from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError
from core.workflow.nodes.knowledge_index.protocols import Preview, PreviewItem, QaPreview
from models.dataset import Dataset, Document, DocumentSegment
@ -51,7 +52,7 @@ class IndexProcessor:
original_document_id: str,
chunks: Mapping[str, Any],
batch: Any,
summary_index_setting: dict | None = None,
summary_index_setting: SummaryIndexSettingDict | None = None,
):
with session_factory.create_session() as session:
document = session.query(Document).filter_by(id=document_id).first()
@ -131,7 +132,12 @@ class IndexProcessor:
}
def get_preview_output(
self, chunks: Any, dataset_id: str, document_id: str, chunk_structure: str, summary_index_setting: dict | None
self,
chunks: Any,
dataset_id: str,
document_id: str,
chunk_structure: str,
summary_index_setting: SummaryIndexSettingDict | None,
) -> Preview:
doc_language = None
with session_factory.create_session() as session:

View File

@ -7,10 +7,11 @@ import os
import re
from abc import ABC, abstractmethod
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any, NotRequired, Optional
from urllib.parse import unquote, urlparse
import httpx
from typing_extensions import TypedDict
from configs import dify_config
from core.entities.knowledge_entities import PreviewDetail
@ -36,6 +37,13 @@ if TYPE_CHECKING:
from core.model_manager import ModelInstance
class SummaryIndexSettingDict(TypedDict):
enable: bool
model_name: NotRequired[str]
model_provider_name: NotRequired[str]
summary_prompt: NotRequired[str]
class BaseIndexProcessor(ABC):
"""Interface for extract files."""
@ -52,7 +60,7 @@ class BaseIndexProcessor(ABC):
self,
tenant_id: str,
preview_texts: list[PreviewDetail],
summary_index_setting: dict,
summary_index_setting: SummaryIndexSettingDict,
doc_language: str | None = None,
) -> list[PreviewDetail]:
"""

View File

@ -23,7 +23,7 @@ from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.doc_type import DocType
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
from core.rag.index_processor.index_processor_base import BaseIndexProcessor, SummaryIndexSettingDict
from core.rag.models.document import AttachmentDocument, Document, MultimodalGeneralStructureChunk
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.tools.utils.text_processing_utils import remove_leading_symbols
@ -279,7 +279,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
self,
tenant_id: str,
preview_texts: list[PreviewDetail],
summary_index_setting: dict,
summary_index_setting: SummaryIndexSettingDict,
doc_language: str | None = None,
) -> list[PreviewDetail]:
"""
@ -363,7 +363,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
def generate_summary(
tenant_id: str,
text: str,
summary_index_setting: dict | None = None,
summary_index_setting: SummaryIndexSettingDict | None = None,
segment_id: str | None = None,
document_language: str | None = None,
) -> tuple[str, LLMUsage]:

View File

@ -19,7 +19,7 @@ from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.doc_type import DocType
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
from core.rag.index_processor.index_processor_base import BaseIndexProcessor, SummaryIndexSettingDict
from core.rag.models.document import AttachmentDocument, ChildDocument, Document, ParentChildStructureChunk
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db
@ -362,7 +362,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
self,
tenant_id: str,
preview_texts: list[PreviewDetail],
summary_index_setting: dict,
summary_index_setting: SummaryIndexSettingDict,
doc_language: str | None = None,
) -> list[PreviewDetail]:
"""

View File

@ -22,7 +22,7 @@ from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.extract_processor import ExtractProcessor
from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
from core.rag.index_processor.index_processor_base import BaseIndexProcessor, SummaryIndexSettingDict
from core.rag.models.document import AttachmentDocument, Document, QAStructureChunk
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.tools.utils.text_processing_utils import remove_leading_symbols
@ -245,7 +245,7 @@ class QAIndexProcessor(BaseIndexProcessor):
self,
tenant_id: str,
preview_texts: list[PreviewDetail],
summary_index_setting: dict,
summary_index_setting: SummaryIndexSettingDict,
doc_language: str | None = None,
) -> list[PreviewDetail]:
"""

View File

@ -2,6 +2,7 @@ import concurrent.futures
import logging
from core.db.session_factory import session_factory
from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
from models.dataset import Dataset, Document, DocumentSegment, DocumentSegmentSummary
from services.summary_index_service import SummaryIndexService
from tasks.generate_summary_index_task import generate_summary_index_task
@ -11,7 +12,11 @@ logger = logging.getLogger(__name__)
class SummaryIndex:
def generate_and_vectorize_summary(
self, dataset_id: str, document_id: str, is_preview: bool, summary_index_setting: dict | None = None
self,
dataset_id: str,
document_id: str,
is_preview: bool,
summary_index_setting: SummaryIndexSettingDict | None = None,
) -> None:
if is_preview:
with session_factory.create_session() as session:

View File

@ -2,6 +2,7 @@ from typing import Literal, Union
from pydantic import BaseModel
from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE
from dify_graph.entities.base_node_data import BaseNodeData
@ -161,4 +162,4 @@ class KnowledgeIndexNodeData(BaseNodeData):
chunk_structure: str
index_chunk_variable_selector: list[str]
indexing_technique: str | None = None
summary_index_setting: dict | None = None
summary_index_setting: SummaryIndexSettingDict | None = None

View File

@ -3,6 +3,7 @@ from collections.abc import Mapping
from typing import TYPE_CHECKING, Any
from core.rag.index_processor.index_processor import IndexProcessor
from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
from core.rag.summary_index.summary_index import SummaryIndex
from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE
from dify_graph.entities.graph_config import NodeConfigDict
@ -127,7 +128,7 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]):
is_preview: bool,
batch: Any,
chunks: Mapping[str, Any],
summary_index_setting: dict | None = None,
summary_index_setting: SummaryIndexSettingDict | None = None,
):
if not document_id:
raise KnowledgeIndexNodeError("document_id is required.")

View File

@ -12,6 +12,7 @@ from core.db.session_factory import session_factory
from core.model_manager import ModelManager
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.index_processor.constant.doc_type import DocType
from core.rag.index_processor.index_processor_base import SummaryIndexSettingDict
from core.rag.models.document import Document
from dify_graph.model_runtime.entities.llm_entities import LLMUsage
from dify_graph.model_runtime.entities.model_entities import ModelType
@ -30,7 +31,7 @@ class SummaryIndexService:
def generate_summary_for_segment(
segment: DocumentSegment,
dataset: Dataset,
summary_index_setting: dict,
summary_index_setting: SummaryIndexSettingDict,
) -> tuple[str, LLMUsage]:
"""
Generate summary for a single segment.
@ -600,7 +601,7 @@ class SummaryIndexService:
def generate_and_vectorize_summary(
segment: DocumentSegment,
dataset: Dataset,
summary_index_setting: dict,
summary_index_setting: SummaryIndexSettingDict,
) -> DocumentSegmentSummary:
"""
Generate summary for a segment and vectorize it.
@ -705,7 +706,7 @@ class SummaryIndexService:
def generate_summaries_for_document(
dataset: Dataset,
document: DatasetDocument,
summary_index_setting: dict,
summary_index_setting: SummaryIndexSettingDict,
segment_ids: list[str] | None = None,
only_parent_chunks: bool = False,
) -> list[DocumentSegmentSummary]: