feat: Add Hologres as a VDB & FullText DB choice (#32830)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Crazywoola <100913391+crazywoola@users.noreply.github.com>
2026-03-15 20:55:02 +08:00 · 2026-03-15 20:55:02 +08:00 · f795d24151
parent 0fa7548346
commit f795d24151
21 changed files with 921 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -237,3 +237,6 @@ scripts/stress-test/reports/
 # settings
 *.local.json
 *.local.md
 # Code Agent Folder
 .qoder/*
--- a/api/.env.example
+++ b/api/.env.example
@ -180,7 +180,7 @@ CONSOLE_CORS_ALLOW_ORIGINS=http://localhost:3000,*
 COOKIE_DOMAIN=
 # Vector database configuration
-# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`,  `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`.
+# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`,  `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `hologres`.
 VECTOR_STORE=weaviate
 # Prefix used to create collection name in vector database
 VECTOR_INDEX_NAME_PREFIX=Vector_index
@ -217,6 +217,20 @@ COUCHBASE_PASSWORD=password
 COUCHBASE_BUCKET_NAME=Embeddings
 COUCHBASE_SCOPE_NAME=_default
 # Hologres configuration
 # access_key_id is used as the PG username, access_key_secret is used as the PG password
 HOLOGRES_HOST=
 HOLOGRES_PORT=80
 HOLOGRES_DATABASE=
 HOLOGRES_ACCESS_KEY_ID=
 HOLOGRES_ACCESS_KEY_SECRET=
 HOLOGRES_SCHEMA=public
 HOLOGRES_TOKENIZER=jieba
 HOLOGRES_DISTANCE_METHOD=Cosine
 HOLOGRES_BASE_QUANTIZATION_TYPE=rabitq
 HOLOGRES_MAX_DEGREE=64
 HOLOGRES_EF_CONSTRUCTION=400
 # Milvus configuration
 MILVUS_URI=http://127.0.0.1:19530
 MILVUS_TOKEN=
--- a/api/commands/vector.py
+++ b/api/commands/vector.py
@ -160,6 +160,7 @@ def migrate_knowledge_vector_database():
    }
    lower_collection_vector_types = {
        VectorType.ANALYTICDB,
        VectorType.HOLOGRES,
        VectorType.CHROMA,
        VectorType.MYSCALE,
        VectorType.PGVECTO_RS,
--- a/api/configs/middleware/init.py
+++ b/api/configs/middleware/init.py
@ -26,6 +26,7 @@ from .vdb.chroma_config import ChromaConfig
 from .vdb.clickzetta_config import ClickzettaConfig
 from .vdb.couchbase_config import CouchbaseConfig
 from .vdb.elasticsearch_config import ElasticsearchConfig
 from .vdb.hologres_config import HologresConfig
 from .vdb.huawei_cloud_config import HuaweiCloudConfig
 from .vdb.iris_config import IrisVectorConfig
 from .vdb.lindorm_config import LindormConfig
@ -347,6 +348,7 @@ class MiddlewareConfig(
    AnalyticdbConfig,
    ChromaConfig,
    ClickzettaConfig,
    HologresConfig,
    HuaweiCloudConfig,
    IrisVectorConfig,
    MilvusConfig,
--- a/api/configs/middleware/vdb/hologres_config.py
+++ b/api/configs/middleware/vdb/hologres_config.py
@ -0,0 +1,68 @@
 from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType
 from pydantic import Field
 from pydantic_settings import BaseSettings
 class HologresConfig(BaseSettings):
    """
    Configuration settings for Hologres vector database.
    Hologres is compatible with PostgreSQL protocol.
    access_key_id is used as the PostgreSQL username,
    and access_key_secret is used as the PostgreSQL password.
    """
    HOLOGRES_HOST: str | None = Field(
        description="Hostname or IP address of the Hologres instance.",
        default=None,
    )
    HOLOGRES_PORT: int = Field(
        description="Port number for connecting to the Hologres instance.",
        default=80,
    )
    HOLOGRES_DATABASE: str | None = Field(
        description="Name of the Hologres database to connect to.",
        default=None,
    )
    HOLOGRES_ACCESS_KEY_ID: str | None = Field(
        description="Alibaba Cloud AccessKey ID, also used as the PostgreSQL username.",
        default=None,
    )
    HOLOGRES_ACCESS_KEY_SECRET: str | None = Field(
        description="Alibaba Cloud AccessKey Secret, also used as the PostgreSQL password.",
        default=None,
    )
    HOLOGRES_SCHEMA: str = Field(
        description="Schema name in the Hologres database.",
        default="public",
    )
    HOLOGRES_TOKENIZER: TokenizerType = Field(
        description="Tokenizer for full-text search index (e.g., 'jieba', 'ik', 'standard', 'simple').",
        default="jieba",
    )
    HOLOGRES_DISTANCE_METHOD: DistanceType = Field(
        description="Distance method for vector index (e.g., 'Cosine', 'Euclidean', 'InnerProduct').",
        default="Cosine",
    )
    HOLOGRES_BASE_QUANTIZATION_TYPE: BaseQuantizationType = Field(
        description="Base quantization type for vector index (e.g., 'rabitq', 'sq8', 'fp16', 'fp32').",
        default="rabitq",
    )
    HOLOGRES_MAX_DEGREE: int = Field(
        description="Max degree (M) parameter for HNSW vector index.",
        default=64,
    )
    HOLOGRES_EF_CONSTRUCTION: int = Field(
        description="ef_construction parameter for HNSW vector index.",
        default=400,
    )
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@ -263,6 +263,7 @@ def _get_retrieval_methods_by_vector_type(vector_type: str | None, is_mock: bool
        VectorType.BAIDU,
        VectorType.ALIBABACLOUD_MYSQL,
        VectorType.IRIS,
        VectorType.HOLOGRES,
    }
    semantic_methods = {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
--- a/api/core/rag/datasource/vdb/hologres/init.py
+++ b/api/core/rag/datasource/vdb/hologres/init.py
--- a/api/core/rag/datasource/vdb/hologres/hologres_vector.py
+++ b/api/core/rag/datasource/vdb/hologres/hologres_vector.py
@ -0,0 +1,361 @@
 import json
 import logging
 import time
 from typing import Any
 import holo_search_sdk as holo  # type: ignore
 from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType
 from psycopg import sql as psql
 from pydantic import BaseModel, model_validator
 from configs import dify_config
 from core.rag.datasource.vdb.vector_base import BaseVector
 from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
 from core.rag.datasource.vdb.vector_type import VectorType
 from core.rag.embedding.embedding_base import Embeddings
 from core.rag.models.document import Document
 from extensions.ext_redis import redis_client
 from models.dataset import Dataset
 logger = logging.getLogger(__name__)
 class HologresVectorConfig(BaseModel):
    """
    Configuration for Hologres vector database connection.
    In Hologres, access_key_id is used as the PostgreSQL username,
    and access_key_secret is used as the PostgreSQL password.
    """
    host: str
    port: int = 80
    database: str
    access_key_id: str
    access_key_secret: str
    schema_name: str = "public"
    tokenizer: TokenizerType = "jieba"
    distance_method: DistanceType = "Cosine"
    base_quantization_type: BaseQuantizationType = "rabitq"
    max_degree: int = 64
    ef_construction: int = 400
    @model_validator(mode="before")
    @classmethod
    def validate_config(cls, values: dict):
        if not values.get("host"):
            raise ValueError("config HOLOGRES_HOST is required")
        if not values.get("database"):
            raise ValueError("config HOLOGRES_DATABASE is required")
        if not values.get("access_key_id"):
            raise ValueError("config HOLOGRES_ACCESS_KEY_ID is required")
        if not values.get("access_key_secret"):
            raise ValueError("config HOLOGRES_ACCESS_KEY_SECRET is required")
        return values
 class HologresVector(BaseVector):
    """
    Hologres vector storage implementation using holo-search-sdk.
    Supports semantic search (vector), full-text search, and hybrid search.
    """
    def __init__(self, collection_name: str, config: HologresVectorConfig):
        super().__init__(collection_name)
        self._config = config
        self._client = self._init_client(config)
        self.table_name = f"embedding_{collection_name}".lower()
    def _init_client(self, config: HologresVectorConfig):
        """Initialize and return a holo-search-sdk client."""
        client = holo.connect(
            host=config.host,
            port=config.port,
            database=config.database,
            access_key_id=config.access_key_id,
            access_key_secret=config.access_key_secret,
            schema=config.schema_name,
        )
        client.connect()
        return client
    def get_type(self) -> str:
        return VectorType.HOLOGRES
    def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
        """Create collection table with vector and full-text indexes, then add texts."""
        dimension = len(embeddings[0])
        self._create_collection(dimension)
        self.add_texts(texts, embeddings)
    def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
        """Add texts with embeddings to the collection using batch upsert."""
        if not documents:
            return []
        pks: list[str] = []
        batch_size = 100
        for i in range(0, len(documents), batch_size):
            batch_docs = documents[i : i + batch_size]
            batch_embeddings = embeddings[i : i + batch_size]
            values = []
            column_names = ["id", "text", "meta", "embedding"]
            for j, doc in enumerate(batch_docs):
                doc_id = doc.metadata.get("doc_id", "") if doc.metadata else ""
                pks.append(doc_id)
                values.append(
                    [
                        doc_id,
                        doc.page_content,
                        json.dumps(doc.metadata or {}),
                        batch_embeddings[j],
                    ]
                )
            table = self._client.open_table(self.table_name)
            table.upsert_multi(
                index_column="id",
                values=values,
                column_names=column_names,
                update=True,
                update_columns=["text", "meta", "embedding"],
            )
        return pks
    def text_exists(self, id: str) -> bool:
        """Check if a text with the given doc_id exists in the collection."""
        if not self._client.check_table_exist(self.table_name):
            return False
        result = self._client.execute(
            psql.SQL("SELECT 1 FROM {} WHERE id = {} LIMIT 1").format(
                psql.Identifier(self.table_name), psql.Literal(id)
            ),
            fetch_result=True,
        )
        return bool(result)
    def get_ids_by_metadata_field(self, key: str, value: str) -> list[str] | None:
        """Get document IDs by metadata field key and value."""
        result = self._client.execute(
            psql.SQL("SELECT id FROM {} WHERE meta->>{} = {}").format(
                psql.Identifier(self.table_name), psql.Literal(key), psql.Literal(value)
            ),
            fetch_result=True,
        )
        if result:
            return [row[0] for row in result]
        return None
    def delete_by_ids(self, ids: list[str]):
        """Delete documents by their doc_id list."""
        if not ids:
            return
        if not self._client.check_table_exist(self.table_name):
            return
        self._client.execute(
            psql.SQL("DELETE FROM {} WHERE id IN ({})").format(
                psql.Identifier(self.table_name),
                psql.SQL(", ").join(psql.Literal(id) for id in ids),
            )
        )
    def delete_by_metadata_field(self, key: str, value: str):
        """Delete documents by metadata field key and value."""
        if not self._client.check_table_exist(self.table_name):
            return
        self._client.execute(
            psql.SQL("DELETE FROM {} WHERE meta->>{} = {}").format(
                psql.Identifier(self.table_name), psql.Literal(key), psql.Literal(value)
            )
        )
    def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
        """Search for documents by vector similarity."""
        if not self._client.check_table_exist(self.table_name):
            return []
        top_k = kwargs.get("top_k", 4)
        score_threshold = float(kwargs.get("score_threshold") or 0.0)
        table = self._client.open_table(self.table_name)
        query = (
            table.search_vector(
                vector=query_vector,
                column="embedding",
                distance_method=self._config.distance_method,
                output_name="distance",
            )
            .select(["id", "text", "meta"])
            .limit(top_k)
        )
        # Apply document_ids_filter if provided
        document_ids_filter = kwargs.get("document_ids_filter")
        if document_ids_filter:
            filter_sql = psql.SQL("meta->>'document_id' IN ({})").format(
                psql.SQL(", ").join(psql.Literal(id) for id in document_ids_filter)
            )
            query = query.where(filter_sql)
        results = query.fetchall()
        return self._process_vector_results(results, score_threshold)
    def _process_vector_results(self, results: list, score_threshold: float) -> list[Document]:
        """Process vector search results into Document objects."""
        docs = []
        for row in results:
            # row format: (distance, id, text, meta)
            # distance is first because search_vector() adds the computed column before selected columns
            distance = row[0]
            text = row[2]
            meta = row[3]
            if isinstance(meta, str):
                meta = json.loads(meta)
            # Convert distance to similarity score (consistent with pgvector)
            score = 1 - distance
            meta["score"] = score
            if score >= score_threshold:
                docs.append(Document(page_content=text, metadata=meta))
        return docs
    def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
        """Search for documents by full-text search."""
        if not self._client.check_table_exist(self.table_name):
            return []
        top_k = kwargs.get("top_k", 4)
        table = self._client.open_table(self.table_name)
        search_query = table.search_text(
            column="text",
            expression=query,
            return_score=True,
            return_score_name="score",
            return_all_columns=True,
        ).limit(top_k)
        # Apply document_ids_filter if provided
        document_ids_filter = kwargs.get("document_ids_filter")
        if document_ids_filter:
            filter_sql = psql.SQL("meta->>'document_id' IN ({})").format(
                psql.SQL(", ").join(psql.Literal(id) for id in document_ids_filter)
            )
            search_query = search_query.where(filter_sql)
        results = search_query.fetchall()
        return self._process_full_text_results(results)
    def _process_full_text_results(self, results: list) -> list[Document]:
        """Process full-text search results into Document objects."""
        docs = []
        for row in results:
            # row format: (id, text, meta, embedding, score)
            text = row[1]
            meta = row[2]
            score = row[-1]  # score is the last column from return_score
            if isinstance(meta, str):
                meta = json.loads(meta)
            meta["score"] = score
            docs.append(Document(page_content=text, metadata=meta))
        return docs
    def delete(self):
        """Delete the entire collection table."""
        if self._client.check_table_exist(self.table_name):
            self._client.drop_table(self.table_name)
    def _create_collection(self, dimension: int):
        """Create the collection table with vector and full-text indexes."""
        lock_name = f"vector_indexing_lock_{self._collection_name}"
        with redis_client.lock(lock_name, timeout=20):
            collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
            if redis_client.get(collection_exist_cache_key):
                return
            if not self._client.check_table_exist(self.table_name):
                # Create table via SQL with CHECK constraint for vector dimension
                create_table_sql = psql.SQL("""
                    CREATE TABLE IF NOT EXISTS {} (
                        id TEXT PRIMARY KEY,
                        text TEXT NOT NULL,
                        meta JSONB NOT NULL,
                        embedding float4[] NOT NULL
                            CHECK (array_ndims(embedding) = 1
                                   AND array_length(embedding, 1) = {})
                    );
                """).format(psql.Identifier(self.table_name), psql.Literal(dimension))
                self._client.execute(create_table_sql)
                # Wait for table to be fully ready before creating indexes
                max_wait_seconds = 30
                poll_interval = 2
                for _ in range(max_wait_seconds // poll_interval):
                    if self._client.check_table_exist(self.table_name):
                        break
                    time.sleep(poll_interval)
                else:
                    raise RuntimeError(f"Table {self.table_name} was not ready after {max_wait_seconds}s")
                # Open table and set vector index
                table = self._client.open_table(self.table_name)
                table.set_vector_index(
                    column="embedding",
                    distance_method=self._config.distance_method,
                    base_quantization_type=self._config.base_quantization_type,
                    max_degree=self._config.max_degree,
                    ef_construction=self._config.ef_construction,
                    use_reorder=self._config.base_quantization_type == "rabitq",
                )
                # Create full-text search index
                table.create_text_index(
                    index_name=f"ft_idx_{self._collection_name}",
                    column="text",
                    tokenizer=self._config.tokenizer,
                )
            redis_client.set(collection_exist_cache_key, 1, ex=3600)
 class HologresVectorFactory(AbstractVectorFactory):
    """Factory class for creating HologresVector instances."""
    def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> HologresVector:
        if dataset.index_struct_dict:
            class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
            collection_name = class_prefix
        else:
            dataset_id = dataset.id
            collection_name = Dataset.gen_collection_name_by_id(dataset_id)
            dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.HOLOGRES, collection_name))
        return HologresVector(
            collection_name=collection_name,
            config=HologresVectorConfig(
                host=dify_config.HOLOGRES_HOST or "",
                port=dify_config.HOLOGRES_PORT,
                database=dify_config.HOLOGRES_DATABASE or "",
                access_key_id=dify_config.HOLOGRES_ACCESS_KEY_ID or "",
                access_key_secret=dify_config.HOLOGRES_ACCESS_KEY_SECRET or "",
                schema_name=dify_config.HOLOGRES_SCHEMA,
                tokenizer=dify_config.HOLOGRES_TOKENIZER,
                distance_method=dify_config.HOLOGRES_DISTANCE_METHOD,
                base_quantization_type=dify_config.HOLOGRES_BASE_QUANTIZATION_TYPE,
                max_degree=dify_config.HOLOGRES_MAX_DEGREE,
                ef_construction=dify_config.HOLOGRES_EF_CONSTRUCTION,
            ),
        )
--- a/api/core/rag/datasource/vdb/vector_factory.py
+++ b/api/core/rag/datasource/vdb/vector_factory.py
@ -191,6 +191,10 @@ class Vector:
                from core.rag.datasource.vdb.iris.iris_vector import IrisVectorFactory
                return IrisVectorFactory
            case VectorType.HOLOGRES:
                from core.rag.datasource.vdb.hologres.hologres_vector import HologresVectorFactory
                return HologresVectorFactory
            case _:
                raise ValueError(f"Vector store {vector_type} is not supported.")
--- a/api/core/rag/datasource/vdb/vector_type.py
+++ b/api/core/rag/datasource/vdb/vector_type.py
@ -34,3 +34,4 @@ class VectorType(StrEnum):
    MATRIXONE = "matrixone"
    CLICKZETTA = "clickzetta"
    IRIS = "iris"
    HOLOGRES = "hologres"
--- a/api/core/tools/utils/configuration.py
+++ b/api/core/tools/utils/configuration.py
@ -116,6 +116,7 @@ class ToolParameterConfigurationManager:
        return a deep copy of parameters with decrypted values
        """
        parameters = self._deep_copy(parameters)
        cache = ToolParameterCache(
            tenant_id=self.tenant_id,
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -226,6 +226,7 @@ vdb = [
    "xinference-client~=1.2.2",
    "mo-vector~=0.1.13",
    "mysql-connector-python>=9.3.0",
    "holo-search-sdk>=0.4.1",
 ]
 [tool.mypy]
--- a/api/pyrightconfig.json
+++ b/api/pyrightconfig.json
@ -35,7 +35,8 @@
    "tos",
    "gmpy2",
    "sendgrid",
-    "sendgrid.helpers.mail"
+    "sendgrid.helpers.mail",
    "holo_search_sdk.types"
  ],
  "reportUnknownMemberType": "hint",
  "reportUnknownParameterType": "hint",
--- a/api/tests/integration_tests/.env.example
+++ b/api/tests/integration_tests/.env.example
@ -77,6 +77,19 @@ IRIS_MAX_CONNECTION=3
 IRIS_TEXT_INDEX=true
 IRIS_TEXT_INDEX_LANGUAGE=en
 # Hologres configuration
 HOLOGRES_HOST=localhost
 HOLOGRES_PORT=80
 HOLOGRES_DATABASE=test_db
 HOLOGRES_ACCESS_KEY_ID=test_access_key_id
 HOLOGRES_ACCESS_KEY_SECRET=test_access_key_secret
 HOLOGRES_SCHEMA=public
 HOLOGRES_TOKENIZER=jieba
 HOLOGRES_DISTANCE_METHOD=Cosine
 HOLOGRES_BASE_QUANTIZATION_TYPE=rabitq
 HOLOGRES_MAX_DEGREE=64
 HOLOGRES_EF_CONSTRUCTION=400
 # Upload configuration
 UPLOAD_FILE_SIZE_LIMIT=15
--- a/api/tests/integration_tests/vdb/__mock/hologres.py
+++ b/api/tests/integration_tests/vdb/__mock/hologres.py
@ -0,0 +1,209 @@
 import json
 import os
 from typing import Any
 import holo_search_sdk as holo
 import pytest
 from _pytest.monkeypatch import MonkeyPatch
 from psycopg import sql as psql
 # Shared in-memory storage: {table_name: {doc_id: {"id", "text", "meta", "embedding"}}}
 _mock_tables: dict[str, dict[str, dict[str, Any]]] = {}
 class MockSearchQuery:
    """Mock query builder for search_vector and search_text results."""
    def __init__(self, table_name: str, search_type: str):
        self._table_name = table_name
        self._search_type = search_type
        self._limit_val = 10
        self._filter_sql = None
    def select(self, columns):
        return self
    def limit(self, n):
        self._limit_val = n
        return self
    def where(self, filter_sql):
        self._filter_sql = filter_sql
        return self
    def _apply_filter(self, row: dict[str, Any]) -> bool:
        """Apply the filter SQL to check if a row matches."""
        if self._filter_sql is None:
            return True
        # Extract literals (the document IDs) from the filter SQL
        # Filter format: meta->>'document_id' IN ('doc1', 'doc2')
        literals = [v for t, v in _extract_identifiers_and_literals(self._filter_sql) if t == "literal"]
        if not literals:
            return True
        # Get the document_id from the row's meta field
        meta = row.get("meta", "{}")
        if isinstance(meta, str):
            meta = json.loads(meta)
        doc_id = meta.get("document_id")
        return doc_id in literals
    def fetchall(self):
        data = _mock_tables.get(self._table_name, {})
        results = []
        for row in list(data.values())[: self._limit_val]:
            # Apply filter if present
            if not self._apply_filter(row):
                continue
            if self._search_type == "vector":
                # row format expected by _process_vector_results: (distance, id, text, meta)
                results.append((0.1, row["id"], row["text"], row["meta"]))
            else:
                # row format expected by _process_full_text_results: (id, text, meta, embedding, score)
                results.append((row["id"], row["text"], row["meta"], row.get("embedding", []), 0.9))
        return results
 class MockTable:
    """Mock table object returned by client.open_table()."""
    def __init__(self, table_name: str):
        self._table_name = table_name
    def upsert_multi(self, index_column, values, column_names, update=True, update_columns=None):
        if self._table_name not in _mock_tables:
            _mock_tables[self._table_name] = {}
        id_idx = column_names.index("id")
        for row in values:
            doc_id = row[id_idx]
            _mock_tables[self._table_name][doc_id] = dict(zip(column_names, row))
    def search_vector(self, vector, column, distance_method, output_name):
        return MockSearchQuery(self._table_name, "vector")
    def search_text(self, column, expression, return_score=False, return_score_name="score", return_all_columns=False):
        return MockSearchQuery(self._table_name, "text")
    def set_vector_index(
        self, column, distance_method, base_quantization_type, max_degree, ef_construction, use_reorder
    ):
        pass
    def create_text_index(self, index_name, column, tokenizer):
        pass
 def _extract_sql_template(query) -> str:
    """Extract the SQL template string from a psycopg Composed object."""
    if isinstance(query, psql.Composed):
        for part in query:
            if isinstance(part, psql.SQL):
                return part._obj
    if isinstance(query, psql.SQL):
        return query._obj
    return ""
 def _extract_identifiers_and_literals(query) -> list[Any]:
    """Extract Identifier and Literal values from a psycopg Composed object."""
    values: list[Any] = []
    if isinstance(query, psql.Composed):
        for part in query:
            if isinstance(part, psql.Identifier):
                values.append(("ident", part._obj[0] if part._obj else ""))
            elif isinstance(part, psql.Literal):
                values.append(("literal", part._obj))
            elif isinstance(part, psql.Composed):
                # Handles SQL(...).join(...) for IN clauses
                for sub in part:
                    if isinstance(sub, psql.Literal):
                        values.append(("literal", sub._obj))
    return values
 class MockHologresClient:
    """Mock holo_search_sdk client that stores data in memory."""
    def connect(self):
        pass
    def check_table_exist(self, table_name):
        return table_name in _mock_tables
    def open_table(self, table_name):
        return MockTable(table_name)
    def execute(self, query, fetch_result=False):
        template = _extract_sql_template(query)
        params = _extract_identifiers_and_literals(query)
        if "CREATE TABLE" in template.upper():
            # Extract table name from first identifier
            table_name = next((v for t, v in params if t == "ident"), "unknown")
            if table_name not in _mock_tables:
                _mock_tables[table_name] = {}
            return None
        if "SELECT 1" in template:
            # text_exists: SELECT 1 FROM {table} WHERE id = {id} LIMIT 1
            table_name = next((v for t, v in params if t == "ident"), "")
            doc_id = next((v for t, v in params if t == "literal"), "")
            data = _mock_tables.get(table_name, {})
            return [(1,)] if doc_id in data else []
        if "SELECT id" in template:
            # get_ids_by_metadata_field: SELECT id FROM {table} WHERE meta->>{key} = {value}
            table_name = next((v for t, v in params if t == "ident"), "")
            literals = [v for t, v in params if t == "literal"]
            key = literals[0] if len(literals) > 0 else ""
            value = literals[1] if len(literals) > 1 else ""
            data = _mock_tables.get(table_name, {})
            return [(doc_id,) for doc_id, row in data.items() if json.loads(row.get("meta", "{}")).get(key) == value]
        if "DELETE" in template.upper():
            table_name = next((v for t, v in params if t == "ident"), "")
            if "id IN" in template:
                # delete_by_ids
                ids_to_delete = [v for t, v in params if t == "literal"]
                for did in ids_to_delete:
                    _mock_tables.get(table_name, {}).pop(did, None)
            elif "meta->>" in template:
                # delete_by_metadata_field
                literals = [v for t, v in params if t == "literal"]
                key = literals[0] if len(literals) > 0 else ""
                value = literals[1] if len(literals) > 1 else ""
                data = _mock_tables.get(table_name, {})
                to_remove = [
                    doc_id for doc_id, row in data.items() if json.loads(row.get("meta", "{}")).get(key) == value
                ]
                for did in to_remove:
                    data.pop(did, None)
            return None
        return [] if fetch_result else None
    def drop_table(self, table_name):
        _mock_tables.pop(table_name, None)
 def mock_connect(**kwargs):
    """Replacement for holo_search_sdk.connect() that returns a mock client."""
    return MockHologresClient()
 MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
@pytest.fixture
 def setup_hologres_mock(monkeypatch: MonkeyPatch):
    if MOCK:
        monkeypatch.setattr(holo, "connect", mock_connect)
    yield
    if MOCK:
        _mock_tables.clear()
        monkeypatch.undo()
--- a/api/tests/integration_tests/vdb/hologres/init.py
+++ b/api/tests/integration_tests/vdb/hologres/init.py
--- a/api/tests/integration_tests/vdb/hologres/test_hologres.py
+++ b/api/tests/integration_tests/vdb/hologres/test_hologres.py
@ -0,0 +1,149 @@
 import os
 import uuid
 from typing import cast
 from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType
 from core.rag.datasource.vdb.hologres.hologres_vector import HologresVector, HologresVectorConfig
 from core.rag.models.document import Document
 from tests.integration_tests.vdb.__mock.hologres import setup_hologres_mock
 from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text, setup_mock_redis
 MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
 class HologresVectorTest(AbstractVectorTest):
    def __init__(self):
        super().__init__()
        # Hologres requires collection names to be lowercase
        self.collection_name = self.collection_name.lower()
        self.vector = HologresVector(
            collection_name=self.collection_name,
            config=HologresVectorConfig(
                host=os.environ.get("HOLOGRES_HOST", "localhost"),
                port=int(os.environ.get("HOLOGRES_PORT", "80")),
                database=os.environ.get("HOLOGRES_DATABASE", "test_db"),
                access_key_id=os.environ.get("HOLOGRES_ACCESS_KEY_ID", "test_key"),
                access_key_secret=os.environ.get("HOLOGRES_ACCESS_KEY_SECRET", "test_secret"),
                schema_name=os.environ.get("HOLOGRES_SCHEMA", "public"),
                tokenizer=cast(TokenizerType, os.environ.get("HOLOGRES_TOKENIZER", "jieba")),
                distance_method=cast(DistanceType, os.environ.get("HOLOGRES_DISTANCE_METHOD", "Cosine")),
                base_quantization_type=cast(
                    BaseQuantizationType, os.environ.get("HOLOGRES_BASE_QUANTIZATION_TYPE", "rabitq")
                ),
                max_degree=int(os.environ.get("HOLOGRES_MAX_DEGREE", "64")),
                ef_construction=int(os.environ.get("HOLOGRES_EF_CONSTRUCTION", "400")),
            ),
        )
    def search_by_full_text(self):
        """Override: full-text index may not be immediately ready in real mode."""
        hits_by_full_text = self.vector.search_by_full_text(query=get_example_text())
        if MOCK:
            # In mock mode, full-text search should return the document we inserted
            assert len(hits_by_full_text) == 1
            assert hits_by_full_text[0].metadata["doc_id"] == self.example_doc_id
        else:
            # In real mode, full-text index may need time to become active
            assert len(hits_by_full_text) >= 0
    def search_by_vector_with_filter(self):
        """Test vector search with document_ids_filter."""
        # Create another document with different document_id
        other_doc_id = str(uuid.uuid4())
        other_doc = Document(
            page_content="other_text",
            metadata={
                "doc_id": other_doc_id,
                "doc_hash": other_doc_id,
                "document_id": other_doc_id,
                "dataset_id": self.dataset_id,
            },
        )
        self.vector.add_texts(documents=[other_doc], embeddings=[self.example_embedding])
        # Search with filter - should only return the original document
        hits = self.vector.search_by_vector(
            query_vector=self.example_embedding,
            document_ids_filter=[self.example_doc_id],
        )
        assert len(hits) == 1
        assert hits[0].metadata["doc_id"] == self.example_doc_id
        # Search without filter - should return both
        all_hits = self.vector.search_by_vector(query_vector=self.example_embedding, top_k=10)
        assert len(all_hits) >= 2
    def search_by_full_text_with_filter(self):
        """Test full-text search with document_ids_filter."""
        # Create another document with different document_id
        other_doc_id = str(uuid.uuid4())
        other_doc = Document(
            page_content="unique_other_text",
            metadata={
                "doc_id": other_doc_id,
                "doc_hash": other_doc_id,
                "document_id": other_doc_id,
                "dataset_id": self.dataset_id,
            },
        )
        self.vector.add_texts(documents=[other_doc], embeddings=[self.example_embedding])
        # Search with filter - should only return the original document
        hits = self.vector.search_by_full_text(
            query=get_example_text(),
            document_ids_filter=[self.example_doc_id],
        )
        if MOCK:
            assert len(hits) == 1
            assert hits[0].metadata["doc_id"] == self.example_doc_id
    def get_ids_by_metadata_field(self):
        """Override: Hologres implements this method via JSONB query."""
        ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id)
        assert ids is not None
        assert len(ids) == 1
    def run_all_tests(self):
        # Clean up before running tests
        self.vector.delete()
        # Run base tests (create, search, text_exists, get_ids, add_texts, delete_by_ids, delete)
        super().run_all_tests()
        # Additional filter tests require fresh data (table was deleted by base tests)
        if MOCK:
            # Recreate collection for filter tests
            self.vector.create(
                texts=[
                    Document(
                        page_content=get_example_text(),
                        metadata={
                            "doc_id": self.example_doc_id,
                            "doc_hash": self.example_doc_id,
                            "document_id": self.example_doc_id,
                            "dataset_id": self.dataset_id,
                        },
                    )
                ],
                embeddings=[self.example_embedding],
            )
            self.search_by_vector_with_filter()
            self.search_by_full_text_with_filter()
            # Clean up
            self.vector.delete()
 def test_hologres_vector(setup_mock_redis, setup_hologres_mock):
    """
    Test Hologres vector database implementation.
    This test covers:
    - Creating collection with vector index
    - Adding texts with embeddings
    - Vector similarity search
    - Full-text search
    - Text existence check
    - Batch deletion by IDs
    - Collection deletion
    """
    HologresVectorTest().run_all_tests()
--- a/api/uv.lock
+++ b/api/uv.lock
@ -1609,6 +1609,7 @@ vdb = [
    { name = "clickzetta-connector-python" },
    { name = "couchbase" },
    { name = "elasticsearch" },
    { name = "holo-search-sdk" },
    { name = "intersystems-irispython" },
    { name = "mo-vector" },
    { name = "mysql-connector-python" },
@ -1809,6 +1810,7 @@ vdb = [
    { name = "clickzetta-connector-python", specifier = ">=0.8.102" },
    { name = "couchbase", specifier = "~=4.3.0" },
    { name = "elasticsearch", specifier = "==8.14.0" },
    { name = "holo-search-sdk", specifier = ">=0.4.1" },
    { name = "intersystems-irispython", specifier = ">=5.1.0" },
    { name = "mo-vector", specifier = "~=0.1.13" },
    { name = "mysql-connector-python", specifier = ">=9.3.0" },
@ -2878,6 +2880,20 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/f5/a9/55a4ac9c16fdf32e92e9e22c49f61affe5135e177ca19b014484e28950f7/hiredis-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:04ec150e95eea3de9ff8bac754978aa17b8bf30a86d4ab2689862020945396b0", size = 22379, upload-time = "2025-10-14T16:32:22.916Z" },
 ]
 [[package]]
 name = "holo-search-sdk"
 version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "numpy" },
    { name = "psycopg", extra = ["binary"] },
    { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0b/b8/70a4999dabbba15e98d201a7399aab76ab96931ad1a27392ba5252cc9165/holo_search_sdk-0.4.1.tar.gz", hash = "sha256:9aea98b6078b9202abb568ed69d798d5e0505d2b4cc3a136a6aa84402bcd2133", size = 56701, upload-time = "2026-01-28T01:44:57.645Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/8a/30/3059a979272f90a96f31b167443cc27675e8cc8f970a3ac0cb80bf803c70/holo_search_sdk-0.4.1-py3-none-any.whl", hash = "sha256:ef1059895ea936ff6a087f68dac92bd1ae0320e51ec5b1d4e7bed7a5dd6beb45", size = 32647, upload-time = "2026-01-28T01:44:56.098Z" },
 ]
 [[package]]
 name = "hpack"
 version = "4.1.0"
@ -4859,6 +4875,53 @@ version = "1.0.2"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/eb/72/4a7965cf54e341006ad74cdc72cd6572c789bc4f4e3fadc78672f1fbcfbd/psycogreen-1.0.2.tar.gz", hash = "sha256:c429845a8a49cf2f76b71265008760bcd7c7c77d80b806db4dc81116dbcd130d", size = 5411, upload-time = "2020-02-22T19:55:22.02Z" }
 [[package]]
 name = "psycopg"
 version = "3.3.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "typing-extensions" },
    { name = "tzdata", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/d3/b6/379d0a960f8f435ec78720462fd94c4863e7a31237cf81bf76d0af5883bf/psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9", size = 165624, upload-time = "2026-02-18T16:52:16.546Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/c8/5b/181e2e3becb7672b502f0ed7f16ed7352aca7c109cfb94cf3878a9186db9/psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698", size = 212768, upload-time = "2026-02-18T16:46:27.365Z" },
 ]
 [package.optional-dependencies]
 binary = [
    { name = "psycopg-binary", marker = "implementation_name != 'pypy'" },
 ]
 [[package]]
 name = "psycopg-binary"
 version = "3.3.3"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/be/c0/b389119dd754483d316805260f3e73cdcad97925839107cc7a296f6132b1/psycopg_binary-3.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a89bb9ee11177b2995d87186b1d9fa892d8ea725e85eab28c6525e4cc14ee048", size = 4609740, upload-time = "2026-02-18T16:47:51.093Z" },
    { url = "https://files.pythonhosted.org/packages/cf/e3/9976eef20f61840285174d360da4c820a311ab39d6b82fa09fbb545be825/psycopg_binary-3.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f7d0cf072c6fbac3795b08c98ef9ea013f11db609659dcfc6b1f6cc31f9e181", size = 4676837, upload-time = "2026-02-18T16:47:55.523Z" },
    { url = "https://files.pythonhosted.org/packages/9f/f2/d28ba2f7404fd7f68d41e8a11df86313bd646258244cb12a8dd83b868a97/psycopg_binary-3.3.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:90eecd93073922f085967f3ed3a98ba8c325cbbc8c1a204e300282abd2369e13", size = 5497070, upload-time = "2026-02-18T16:47:59.929Z" },
    { url = "https://files.pythonhosted.org/packages/de/2f/6c5c54b815edeb30a281cfcea96dc93b3bb6be939aea022f00cab7aa1420/psycopg_binary-3.3.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dac7ee2f88b4d7bb12837989ca354c38d400eeb21bce3b73dac02622f0a3c8d6", size = 5172410, upload-time = "2026-02-18T16:48:05.665Z" },
    { url = "https://files.pythonhosted.org/packages/51/75/8206c7008b57de03c1ada46bd3110cc3743f3fd9ed52031c4601401d766d/psycopg_binary-3.3.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b62cf8784eb6d35beaee1056d54caf94ec6ecf2b7552395e305518ab61eb8fd2", size = 6763408, upload-time = "2026-02-18T16:48:13.541Z" },
    { url = "https://files.pythonhosted.org/packages/d4/5a/ea1641a1e6c8c8b3454b0fcb43c3045133a8b703e6e824fae134088e63bd/psycopg_binary-3.3.3-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a39f34c9b18e8f6794cca17bfbcd64572ca2482318db644268049f8c738f35a6", size = 5006255, upload-time = "2026-02-18T16:48:22.176Z" },
    { url = "https://files.pythonhosted.org/packages/aa/fb/538df099bf55ae1637d52d7ccb6b9620b535a40f4c733897ac2b7bb9e14c/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:883d68d48ca9ff3cb3d10c5fdebea02c79b48eecacdddbf7cce6e7cdbdc216b8", size = 4532694, upload-time = "2026-02-18T16:48:27.338Z" },
    { url = "https://files.pythonhosted.org/packages/a1/d1/00780c0e187ea3c13dfc53bd7060654b2232cd30df562aac91a5f1c545ac/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:cab7bc3d288d37a80aa8c0820033250c95e40b1c2b5c57cf59827b19c2a8b69d", size = 4222833, upload-time = "2026-02-18T16:48:31.221Z" },
    { url = "https://files.pythonhosted.org/packages/7a/34/a07f1ff713c51d64dc9f19f2c32be80299a2055d5d109d5853662b922cb4/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:56c767007ca959ca32f796b42379fc7e1ae2ed085d29f20b05b3fc394f3715cc", size = 3952818, upload-time = "2026-02-18T16:48:35.869Z" },
    { url = "https://files.pythonhosted.org/packages/d3/67/d33f268a7759b4445f3c9b5a181039b01af8c8263c865c1be7a6444d4749/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:da2f331a01af232259a21573a01338530c6016dcfad74626c01330535bcd8628", size = 4258061, upload-time = "2026-02-18T16:48:41.365Z" },
    { url = "https://files.pythonhosted.org/packages/b4/3b/0d8d2c5e8e29ccc07d28c8af38445d9d9abcd238d590186cac82ee71fc84/psycopg_binary-3.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:19f93235ece6dbfc4036b5e4f6d8b13f0b8f2b3eeb8b0bd2936d406991bcdd40", size = 3558915, upload-time = "2026-02-18T16:48:46.679Z" },
    { url = "https://files.pythonhosted.org/packages/90/15/021be5c0cbc5b7c1ab46e91cc3434eb42569f79a0592e67b8d25e66d844d/psycopg_binary-3.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6698dbab5bcef8fdb570fc9d35fd9ac52041771bfcfe6fd0fc5f5c4e36f1e99d", size = 4591170, upload-time = "2026-02-18T16:48:55.594Z" },
    { url = "https://files.pythonhosted.org/packages/f1/54/a60211c346c9a2f8c6b272b5f2bbe21f6e11800ce7f61e99ba75cf8b63e1/psycopg_binary-3.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:329ff393441e75f10b673ae99ab45276887993d49e65f141da20d915c05aafd8", size = 4670009, upload-time = "2026-02-18T16:49:03.608Z" },
    { url = "https://files.pythonhosted.org/packages/c1/53/ac7c18671347c553362aadbf65f92786eef9540676ca24114cc02f5be405/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:eb072949b8ebf4082ae24289a2b0fd724da9adc8f22743409d6fd718ddb379df", size = 5469735, upload-time = "2026-02-18T16:49:10.128Z" },
    { url = "https://files.pythonhosted.org/packages/7f/c3/4f4e040902b82a344eff1c736cde2f2720f127fe939c7e7565706f96dd44/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:263a24f39f26e19ed7fc982d7859a36f17841b05bebad3eb47bb9cd2dd785351", size = 5152919, upload-time = "2026-02-18T16:49:16.335Z" },
    { url = "https://files.pythonhosted.org/packages/0c/e7/d929679c6a5c212bcf738806c7c89f5b3d0919f2e1685a0e08d6ff877945/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5152d50798c2fa5bd9b68ec68eb68a1b71b95126c1d70adaa1a08cd5eefdc23d", size = 6738785, upload-time = "2026-02-18T16:49:22.687Z" },
    { url = "https://files.pythonhosted.org/packages/69/b0/09703aeb69a9443d232d7b5318d58742e8ca51ff79f90ffe6b88f1db45e7/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9d6a1e56dd267848edb824dbeb08cf5bac649e02ee0b03ba883ba3f4f0bd54f2", size = 4979008, upload-time = "2026-02-18T16:49:27.313Z" },
    { url = "https://files.pythonhosted.org/packages/cc/a6/e662558b793c6e13a7473b970fee327d635270e41eded3090ef14045a6a5/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73eaaf4bb04709f545606c1db2f65f4000e8a04cdbf3e00d165a23004692093e", size = 4508255, upload-time = "2026-02-18T16:49:31.575Z" },
    { url = "https://files.pythonhosted.org/packages/5f/7f/0f8b2e1d5e0093921b6f324a948a5c740c1447fbb45e97acaf50241d0f39/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:162e5675efb4704192411eaf8e00d07f7960b679cd3306e7efb120bb8d9456cc", size = 4189166, upload-time = "2026-02-18T16:49:35.801Z" },
    { url = "https://files.pythonhosted.org/packages/92/ec/ce2e91c33bc8d10b00c87e2f6b0fb570641a6a60042d6a9ae35658a3a797/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:fab6b5e37715885c69f5d091f6ff229be71e235f272ebaa35158d5a46fd548a0", size = 3924544, upload-time = "2026-02-18T16:49:41.129Z" },
    { url = "https://files.pythonhosted.org/packages/c5/2f/7718141485f73a924205af60041c392938852aa447a94c8cbd222ff389a1/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a4aab31bd6d1057f287c96c0effca3a25584eb9cc702f282ecb96ded7814e830", size = 4235297, upload-time = "2026-02-18T16:49:46.726Z" },
    { url = "https://files.pythonhosted.org/packages/57/f9/1add717e2643a003bbde31b1b220172e64fbc0cb09f06429820c9173f7fc/psycopg_binary-3.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:59aa31fe11a0e1d1bcc2ce37ed35fe2ac84cd65bb9036d049b1a1c39064d0f14", size = 3547659, upload-time = "2026-02-18T16:49:52.999Z" },
 ]
 [[package]]
 name = "psycopg2-binary"
 version = "2.9.11"
--- a/dev/pytest/pytest_vdb.sh
+++ b/dev/pytest/pytest_vdb.sh
@ -21,3 +21,4 @@ pytest --timeout "${PYTEST_TIMEOUT}" api/tests/integration_tests/vdb/chroma \
  api/tests/integration_tests/vdb/oceanbase \
  api/tests/integration_tests/vdb/tidb_vector \
  api/tests/integration_tests/vdb/huawei \
  api/tests/integration_tests/vdb/hologres \
--- a/docker/.env.example
+++ b/docker/.env.example
@ -541,7 +541,7 @@ SUPABASE_URL=your-server-url
 # ------------------------------
 # The type of vector store to use.
-# Supported values are `weaviate`, `oceanbase`, `seekdb`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`, `vastbase`, `tidb`, `tidb_on_qdrant`, `baidu`, `lindorm`, `huawei_cloud`, `upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`, `iris`.
+# Supported values are `weaviate`, `oceanbase`, `seekdb`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`, `vastbase`, `tidb`, `tidb_on_qdrant`, `baidu`, `lindorm`, `huawei_cloud`, `upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`, `iris`, `hologres`.
 VECTOR_STORE=weaviate
 # Prefix used to create collection name in vector database
 VECTOR_INDEX_NAME_PREFIX=Vector_index
@ -605,6 +605,20 @@ COUCHBASE_PASSWORD=password
 COUCHBASE_BUCKET_NAME=Embeddings
 COUCHBASE_SCOPE_NAME=_default
 # Hologres configurations, only available when VECTOR_STORE is `hologres`
 # access_key_id is used as the PG username, access_key_secret is used as the PG password
 HOLOGRES_HOST=
 HOLOGRES_PORT=80
 HOLOGRES_DATABASE=
 HOLOGRES_ACCESS_KEY_ID=
 HOLOGRES_ACCESS_KEY_SECRET=
 HOLOGRES_SCHEMA=public
 HOLOGRES_TOKENIZER=jieba
 HOLOGRES_DISTANCE_METHOD=Cosine
 HOLOGRES_BASE_QUANTIZATION_TYPE=rabitq
 HOLOGRES_MAX_DEGREE=64
 HOLOGRES_EF_CONSTRUCTION=400
 # pgvector configurations, only available when VECTOR_STORE is `pgvector`
 PGVECTOR_HOST=pgvector
 PGVECTOR_PORT=5432
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@ -215,6 +215,17 @@ x-shared-env: &shared-api-worker-env
  COUCHBASE_PASSWORD: ${COUCHBASE_PASSWORD:-password}
  COUCHBASE_BUCKET_NAME: ${COUCHBASE_BUCKET_NAME:-Embeddings}
  COUCHBASE_SCOPE_NAME: ${COUCHBASE_SCOPE_NAME:-_default}
  HOLOGRES_HOST: ${HOLOGRES_HOST:-}
  HOLOGRES_PORT: ${HOLOGRES_PORT:-80}
  HOLOGRES_DATABASE: ${HOLOGRES_DATABASE:-}
  HOLOGRES_ACCESS_KEY_ID: ${HOLOGRES_ACCESS_KEY_ID:-}
  HOLOGRES_ACCESS_KEY_SECRET: ${HOLOGRES_ACCESS_KEY_SECRET:-}
  HOLOGRES_SCHEMA: ${HOLOGRES_SCHEMA:-public}
  HOLOGRES_TOKENIZER: ${HOLOGRES_TOKENIZER:-jieba}
  HOLOGRES_DISTANCE_METHOD: ${HOLOGRES_DISTANCE_METHOD:-Cosine}
  HOLOGRES_BASE_QUANTIZATION_TYPE: ${HOLOGRES_BASE_QUANTIZATION_TYPE:-rabitq}
  HOLOGRES_MAX_DEGREE: ${HOLOGRES_MAX_DEGREE:-64}
  HOLOGRES_EF_CONSTRUCTION: ${HOLOGRES_EF_CONSTRUCTION:-400}
  PGVECTOR_HOST: ${PGVECTOR_HOST:-pgvector}
  PGVECTOR_PORT: ${PGVECTOR_PORT:-5432}
  PGVECTOR_USER: ${PGVECTOR_USER:-postgres}