feat: Add Hologres as a VDB & FullText DB choice (#32830)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
Ye Ding 2026-03-15 20:55:02 +08:00 committed by GitHub
parent 0fa7548346
commit f795d24151
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 921 additions and 3 deletions

3
.gitignore vendored
View File

@ -237,3 +237,6 @@ scripts/stress-test/reports/
# settings # settings
*.local.json *.local.json
*.local.md *.local.md
# Code Agent Folder
.qoder/*

View File

@ -180,7 +180,7 @@ CONSOLE_CORS_ALLOW_ORIGINS=http://localhost:3000,*
COOKIE_DOMAIN= COOKIE_DOMAIN=
# Vector database configuration # Vector database configuration
# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`. # Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `hologres`.
VECTOR_STORE=weaviate VECTOR_STORE=weaviate
# Prefix used to create collection name in vector database # Prefix used to create collection name in vector database
VECTOR_INDEX_NAME_PREFIX=Vector_index VECTOR_INDEX_NAME_PREFIX=Vector_index
@ -217,6 +217,20 @@ COUCHBASE_PASSWORD=password
COUCHBASE_BUCKET_NAME=Embeddings COUCHBASE_BUCKET_NAME=Embeddings
COUCHBASE_SCOPE_NAME=_default COUCHBASE_SCOPE_NAME=_default
# Hologres configuration
# access_key_id is used as the PG username, access_key_secret is used as the PG password
HOLOGRES_HOST=
HOLOGRES_PORT=80
HOLOGRES_DATABASE=
HOLOGRES_ACCESS_KEY_ID=
HOLOGRES_ACCESS_KEY_SECRET=
HOLOGRES_SCHEMA=public
HOLOGRES_TOKENIZER=jieba
HOLOGRES_DISTANCE_METHOD=Cosine
HOLOGRES_BASE_QUANTIZATION_TYPE=rabitq
HOLOGRES_MAX_DEGREE=64
HOLOGRES_EF_CONSTRUCTION=400
# Milvus configuration # Milvus configuration
MILVUS_URI=http://127.0.0.1:19530 MILVUS_URI=http://127.0.0.1:19530
MILVUS_TOKEN= MILVUS_TOKEN=

View File

@ -160,6 +160,7 @@ def migrate_knowledge_vector_database():
} }
lower_collection_vector_types = { lower_collection_vector_types = {
VectorType.ANALYTICDB, VectorType.ANALYTICDB,
VectorType.HOLOGRES,
VectorType.CHROMA, VectorType.CHROMA,
VectorType.MYSCALE, VectorType.MYSCALE,
VectorType.PGVECTO_RS, VectorType.PGVECTO_RS,

View File

@ -26,6 +26,7 @@ from .vdb.chroma_config import ChromaConfig
from .vdb.clickzetta_config import ClickzettaConfig from .vdb.clickzetta_config import ClickzettaConfig
from .vdb.couchbase_config import CouchbaseConfig from .vdb.couchbase_config import CouchbaseConfig
from .vdb.elasticsearch_config import ElasticsearchConfig from .vdb.elasticsearch_config import ElasticsearchConfig
from .vdb.hologres_config import HologresConfig
from .vdb.huawei_cloud_config import HuaweiCloudConfig from .vdb.huawei_cloud_config import HuaweiCloudConfig
from .vdb.iris_config import IrisVectorConfig from .vdb.iris_config import IrisVectorConfig
from .vdb.lindorm_config import LindormConfig from .vdb.lindorm_config import LindormConfig
@ -347,6 +348,7 @@ class MiddlewareConfig(
AnalyticdbConfig, AnalyticdbConfig,
ChromaConfig, ChromaConfig,
ClickzettaConfig, ClickzettaConfig,
HologresConfig,
HuaweiCloudConfig, HuaweiCloudConfig,
IrisVectorConfig, IrisVectorConfig,
MilvusConfig, MilvusConfig,

View File

@ -0,0 +1,68 @@
from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType
from pydantic import Field
from pydantic_settings import BaseSettings
class HologresConfig(BaseSettings):
"""
Configuration settings for Hologres vector database.
Hologres is compatible with PostgreSQL protocol.
access_key_id is used as the PostgreSQL username,
and access_key_secret is used as the PostgreSQL password.
"""
HOLOGRES_HOST: str | None = Field(
description="Hostname or IP address of the Hologres instance.",
default=None,
)
HOLOGRES_PORT: int = Field(
description="Port number for connecting to the Hologres instance.",
default=80,
)
HOLOGRES_DATABASE: str | None = Field(
description="Name of the Hologres database to connect to.",
default=None,
)
HOLOGRES_ACCESS_KEY_ID: str | None = Field(
description="Alibaba Cloud AccessKey ID, also used as the PostgreSQL username.",
default=None,
)
HOLOGRES_ACCESS_KEY_SECRET: str | None = Field(
description="Alibaba Cloud AccessKey Secret, also used as the PostgreSQL password.",
default=None,
)
HOLOGRES_SCHEMA: str = Field(
description="Schema name in the Hologres database.",
default="public",
)
HOLOGRES_TOKENIZER: TokenizerType = Field(
description="Tokenizer for full-text search index (e.g., 'jieba', 'ik', 'standard', 'simple').",
default="jieba",
)
HOLOGRES_DISTANCE_METHOD: DistanceType = Field(
description="Distance method for vector index (e.g., 'Cosine', 'Euclidean', 'InnerProduct').",
default="Cosine",
)
HOLOGRES_BASE_QUANTIZATION_TYPE: BaseQuantizationType = Field(
description="Base quantization type for vector index (e.g., 'rabitq', 'sq8', 'fp16', 'fp32').",
default="rabitq",
)
HOLOGRES_MAX_DEGREE: int = Field(
description="Max degree (M) parameter for HNSW vector index.",
default=64,
)
HOLOGRES_EF_CONSTRUCTION: int = Field(
description="ef_construction parameter for HNSW vector index.",
default=400,
)

View File

@ -263,6 +263,7 @@ def _get_retrieval_methods_by_vector_type(vector_type: str | None, is_mock: bool
VectorType.BAIDU, VectorType.BAIDU,
VectorType.ALIBABACLOUD_MYSQL, VectorType.ALIBABACLOUD_MYSQL,
VectorType.IRIS, VectorType.IRIS,
VectorType.HOLOGRES,
} }
semantic_methods = {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]} semantic_methods = {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}

View File

@ -0,0 +1,361 @@
import json
import logging
import time
from typing import Any
import holo_search_sdk as holo # type: ignore
from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType
from psycopg import sql as psql
from pydantic import BaseModel, model_validator
from configs import dify_config
from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
from core.rag.models.document import Document
from extensions.ext_redis import redis_client
from models.dataset import Dataset
logger = logging.getLogger(__name__)
class HologresVectorConfig(BaseModel):
"""
Configuration for Hologres vector database connection.
In Hologres, access_key_id is used as the PostgreSQL username,
and access_key_secret is used as the PostgreSQL password.
"""
host: str
port: int = 80
database: str
access_key_id: str
access_key_secret: str
schema_name: str = "public"
tokenizer: TokenizerType = "jieba"
distance_method: DistanceType = "Cosine"
base_quantization_type: BaseQuantizationType = "rabitq"
max_degree: int = 64
ef_construction: int = 400
@model_validator(mode="before")
@classmethod
def validate_config(cls, values: dict):
if not values.get("host"):
raise ValueError("config HOLOGRES_HOST is required")
if not values.get("database"):
raise ValueError("config HOLOGRES_DATABASE is required")
if not values.get("access_key_id"):
raise ValueError("config HOLOGRES_ACCESS_KEY_ID is required")
if not values.get("access_key_secret"):
raise ValueError("config HOLOGRES_ACCESS_KEY_SECRET is required")
return values
class HologresVector(BaseVector):
"""
Hologres vector storage implementation using holo-search-sdk.
Supports semantic search (vector), full-text search, and hybrid search.
"""
def __init__(self, collection_name: str, config: HologresVectorConfig):
super().__init__(collection_name)
self._config = config
self._client = self._init_client(config)
self.table_name = f"embedding_{collection_name}".lower()
def _init_client(self, config: HologresVectorConfig):
"""Initialize and return a holo-search-sdk client."""
client = holo.connect(
host=config.host,
port=config.port,
database=config.database,
access_key_id=config.access_key_id,
access_key_secret=config.access_key_secret,
schema=config.schema_name,
)
client.connect()
return client
def get_type(self) -> str:
return VectorType.HOLOGRES
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
"""Create collection table with vector and full-text indexes, then add texts."""
dimension = len(embeddings[0])
self._create_collection(dimension)
self.add_texts(texts, embeddings)
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
"""Add texts with embeddings to the collection using batch upsert."""
if not documents:
return []
pks: list[str] = []
batch_size = 100
for i in range(0, len(documents), batch_size):
batch_docs = documents[i : i + batch_size]
batch_embeddings = embeddings[i : i + batch_size]
values = []
column_names = ["id", "text", "meta", "embedding"]
for j, doc in enumerate(batch_docs):
doc_id = doc.metadata.get("doc_id", "") if doc.metadata else ""
pks.append(doc_id)
values.append(
[
doc_id,
doc.page_content,
json.dumps(doc.metadata or {}),
batch_embeddings[j],
]
)
table = self._client.open_table(self.table_name)
table.upsert_multi(
index_column="id",
values=values,
column_names=column_names,
update=True,
update_columns=["text", "meta", "embedding"],
)
return pks
def text_exists(self, id: str) -> bool:
"""Check if a text with the given doc_id exists in the collection."""
if not self._client.check_table_exist(self.table_name):
return False
result = self._client.execute(
psql.SQL("SELECT 1 FROM {} WHERE id = {} LIMIT 1").format(
psql.Identifier(self.table_name), psql.Literal(id)
),
fetch_result=True,
)
return bool(result)
def get_ids_by_metadata_field(self, key: str, value: str) -> list[str] | None:
"""Get document IDs by metadata field key and value."""
result = self._client.execute(
psql.SQL("SELECT id FROM {} WHERE meta->>{} = {}").format(
psql.Identifier(self.table_name), psql.Literal(key), psql.Literal(value)
),
fetch_result=True,
)
if result:
return [row[0] for row in result]
return None
def delete_by_ids(self, ids: list[str]):
"""Delete documents by their doc_id list."""
if not ids:
return
if not self._client.check_table_exist(self.table_name):
return
self._client.execute(
psql.SQL("DELETE FROM {} WHERE id IN ({})").format(
psql.Identifier(self.table_name),
psql.SQL(", ").join(psql.Literal(id) for id in ids),
)
)
def delete_by_metadata_field(self, key: str, value: str):
"""Delete documents by metadata field key and value."""
if not self._client.check_table_exist(self.table_name):
return
self._client.execute(
psql.SQL("DELETE FROM {} WHERE meta->>{} = {}").format(
psql.Identifier(self.table_name), psql.Literal(key), psql.Literal(value)
)
)
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
"""Search for documents by vector similarity."""
if not self._client.check_table_exist(self.table_name):
return []
top_k = kwargs.get("top_k", 4)
score_threshold = float(kwargs.get("score_threshold") or 0.0)
table = self._client.open_table(self.table_name)
query = (
table.search_vector(
vector=query_vector,
column="embedding",
distance_method=self._config.distance_method,
output_name="distance",
)
.select(["id", "text", "meta"])
.limit(top_k)
)
# Apply document_ids_filter if provided
document_ids_filter = kwargs.get("document_ids_filter")
if document_ids_filter:
filter_sql = psql.SQL("meta->>'document_id' IN ({})").format(
psql.SQL(", ").join(psql.Literal(id) for id in document_ids_filter)
)
query = query.where(filter_sql)
results = query.fetchall()
return self._process_vector_results(results, score_threshold)
def _process_vector_results(self, results: list, score_threshold: float) -> list[Document]:
"""Process vector search results into Document objects."""
docs = []
for row in results:
# row format: (distance, id, text, meta)
# distance is first because search_vector() adds the computed column before selected columns
distance = row[0]
text = row[2]
meta = row[3]
if isinstance(meta, str):
meta = json.loads(meta)
# Convert distance to similarity score (consistent with pgvector)
score = 1 - distance
meta["score"] = score
if score >= score_threshold:
docs.append(Document(page_content=text, metadata=meta))
return docs
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
"""Search for documents by full-text search."""
if not self._client.check_table_exist(self.table_name):
return []
top_k = kwargs.get("top_k", 4)
table = self._client.open_table(self.table_name)
search_query = table.search_text(
column="text",
expression=query,
return_score=True,
return_score_name="score",
return_all_columns=True,
).limit(top_k)
# Apply document_ids_filter if provided
document_ids_filter = kwargs.get("document_ids_filter")
if document_ids_filter:
filter_sql = psql.SQL("meta->>'document_id' IN ({})").format(
psql.SQL(", ").join(psql.Literal(id) for id in document_ids_filter)
)
search_query = search_query.where(filter_sql)
results = search_query.fetchall()
return self._process_full_text_results(results)
def _process_full_text_results(self, results: list) -> list[Document]:
"""Process full-text search results into Document objects."""
docs = []
for row in results:
# row format: (id, text, meta, embedding, score)
text = row[1]
meta = row[2]
score = row[-1] # score is the last column from return_score
if isinstance(meta, str):
meta = json.loads(meta)
meta["score"] = score
docs.append(Document(page_content=text, metadata=meta))
return docs
def delete(self):
"""Delete the entire collection table."""
if self._client.check_table_exist(self.table_name):
self._client.drop_table(self.table_name)
def _create_collection(self, dimension: int):
"""Create the collection table with vector and full-text indexes."""
lock_name = f"vector_indexing_lock_{self._collection_name}"
with redis_client.lock(lock_name, timeout=20):
collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
if redis_client.get(collection_exist_cache_key):
return
if not self._client.check_table_exist(self.table_name):
# Create table via SQL with CHECK constraint for vector dimension
create_table_sql = psql.SQL("""
CREATE TABLE IF NOT EXISTS {} (
id TEXT PRIMARY KEY,
text TEXT NOT NULL,
meta JSONB NOT NULL,
embedding float4[] NOT NULL
CHECK (array_ndims(embedding) = 1
AND array_length(embedding, 1) = {})
);
""").format(psql.Identifier(self.table_name), psql.Literal(dimension))
self._client.execute(create_table_sql)
# Wait for table to be fully ready before creating indexes
max_wait_seconds = 30
poll_interval = 2
for _ in range(max_wait_seconds // poll_interval):
if self._client.check_table_exist(self.table_name):
break
time.sleep(poll_interval)
else:
raise RuntimeError(f"Table {self.table_name} was not ready after {max_wait_seconds}s")
# Open table and set vector index
table = self._client.open_table(self.table_name)
table.set_vector_index(
column="embedding",
distance_method=self._config.distance_method,
base_quantization_type=self._config.base_quantization_type,
max_degree=self._config.max_degree,
ef_construction=self._config.ef_construction,
use_reorder=self._config.base_quantization_type == "rabitq",
)
# Create full-text search index
table.create_text_index(
index_name=f"ft_idx_{self._collection_name}",
column="text",
tokenizer=self._config.tokenizer,
)
redis_client.set(collection_exist_cache_key, 1, ex=3600)
class HologresVectorFactory(AbstractVectorFactory):
"""Factory class for creating HologresVector instances."""
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> HologresVector:
if dataset.index_struct_dict:
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
collection_name = class_prefix
else:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.HOLOGRES, collection_name))
return HologresVector(
collection_name=collection_name,
config=HologresVectorConfig(
host=dify_config.HOLOGRES_HOST or "",
port=dify_config.HOLOGRES_PORT,
database=dify_config.HOLOGRES_DATABASE or "",
access_key_id=dify_config.HOLOGRES_ACCESS_KEY_ID or "",
access_key_secret=dify_config.HOLOGRES_ACCESS_KEY_SECRET or "",
schema_name=dify_config.HOLOGRES_SCHEMA,
tokenizer=dify_config.HOLOGRES_TOKENIZER,
distance_method=dify_config.HOLOGRES_DISTANCE_METHOD,
base_quantization_type=dify_config.HOLOGRES_BASE_QUANTIZATION_TYPE,
max_degree=dify_config.HOLOGRES_MAX_DEGREE,
ef_construction=dify_config.HOLOGRES_EF_CONSTRUCTION,
),
)

View File

@ -191,6 +191,10 @@ class Vector:
from core.rag.datasource.vdb.iris.iris_vector import IrisVectorFactory from core.rag.datasource.vdb.iris.iris_vector import IrisVectorFactory
return IrisVectorFactory return IrisVectorFactory
case VectorType.HOLOGRES:
from core.rag.datasource.vdb.hologres.hologres_vector import HologresVectorFactory
return HologresVectorFactory
case _: case _:
raise ValueError(f"Vector store {vector_type} is not supported.") raise ValueError(f"Vector store {vector_type} is not supported.")

View File

@ -34,3 +34,4 @@ class VectorType(StrEnum):
MATRIXONE = "matrixone" MATRIXONE = "matrixone"
CLICKZETTA = "clickzetta" CLICKZETTA = "clickzetta"
IRIS = "iris" IRIS = "iris"
HOLOGRES = "hologres"

View File

@ -116,6 +116,7 @@ class ToolParameterConfigurationManager:
return a deep copy of parameters with decrypted values return a deep copy of parameters with decrypted values
""" """
parameters = self._deep_copy(parameters)
cache = ToolParameterCache( cache = ToolParameterCache(
tenant_id=self.tenant_id, tenant_id=self.tenant_id,

View File

@ -226,6 +226,7 @@ vdb = [
"xinference-client~=1.2.2", "xinference-client~=1.2.2",
"mo-vector~=0.1.13", "mo-vector~=0.1.13",
"mysql-connector-python>=9.3.0", "mysql-connector-python>=9.3.0",
"holo-search-sdk>=0.4.1",
] ]
[tool.mypy] [tool.mypy]

View File

@ -35,7 +35,8 @@
"tos", "tos",
"gmpy2", "gmpy2",
"sendgrid", "sendgrid",
"sendgrid.helpers.mail" "sendgrid.helpers.mail",
"holo_search_sdk.types"
], ],
"reportUnknownMemberType": "hint", "reportUnknownMemberType": "hint",
"reportUnknownParameterType": "hint", "reportUnknownParameterType": "hint",

View File

@ -77,6 +77,19 @@ IRIS_MAX_CONNECTION=3
IRIS_TEXT_INDEX=true IRIS_TEXT_INDEX=true
IRIS_TEXT_INDEX_LANGUAGE=en IRIS_TEXT_INDEX_LANGUAGE=en
# Hologres configuration
HOLOGRES_HOST=localhost
HOLOGRES_PORT=80
HOLOGRES_DATABASE=test_db
HOLOGRES_ACCESS_KEY_ID=test_access_key_id
HOLOGRES_ACCESS_KEY_SECRET=test_access_key_secret
HOLOGRES_SCHEMA=public
HOLOGRES_TOKENIZER=jieba
HOLOGRES_DISTANCE_METHOD=Cosine
HOLOGRES_BASE_QUANTIZATION_TYPE=rabitq
HOLOGRES_MAX_DEGREE=64
HOLOGRES_EF_CONSTRUCTION=400
# Upload configuration # Upload configuration
UPLOAD_FILE_SIZE_LIMIT=15 UPLOAD_FILE_SIZE_LIMIT=15

View File

@ -0,0 +1,209 @@
import json
import os
from typing import Any
import holo_search_sdk as holo
import pytest
from _pytest.monkeypatch import MonkeyPatch
from psycopg import sql as psql
# Shared in-memory storage: {table_name: {doc_id: {"id", "text", "meta", "embedding"}}}
_mock_tables: dict[str, dict[str, dict[str, Any]]] = {}
class MockSearchQuery:
"""Mock query builder for search_vector and search_text results."""
def __init__(self, table_name: str, search_type: str):
self._table_name = table_name
self._search_type = search_type
self._limit_val = 10
self._filter_sql = None
def select(self, columns):
return self
def limit(self, n):
self._limit_val = n
return self
def where(self, filter_sql):
self._filter_sql = filter_sql
return self
def _apply_filter(self, row: dict[str, Any]) -> bool:
"""Apply the filter SQL to check if a row matches."""
if self._filter_sql is None:
return True
# Extract literals (the document IDs) from the filter SQL
# Filter format: meta->>'document_id' IN ('doc1', 'doc2')
literals = [v for t, v in _extract_identifiers_and_literals(self._filter_sql) if t == "literal"]
if not literals:
return True
# Get the document_id from the row's meta field
meta = row.get("meta", "{}")
if isinstance(meta, str):
meta = json.loads(meta)
doc_id = meta.get("document_id")
return doc_id in literals
def fetchall(self):
data = _mock_tables.get(self._table_name, {})
results = []
for row in list(data.values())[: self._limit_val]:
# Apply filter if present
if not self._apply_filter(row):
continue
if self._search_type == "vector":
# row format expected by _process_vector_results: (distance, id, text, meta)
results.append((0.1, row["id"], row["text"], row["meta"]))
else:
# row format expected by _process_full_text_results: (id, text, meta, embedding, score)
results.append((row["id"], row["text"], row["meta"], row.get("embedding", []), 0.9))
return results
class MockTable:
"""Mock table object returned by client.open_table()."""
def __init__(self, table_name: str):
self._table_name = table_name
def upsert_multi(self, index_column, values, column_names, update=True, update_columns=None):
if self._table_name not in _mock_tables:
_mock_tables[self._table_name] = {}
id_idx = column_names.index("id")
for row in values:
doc_id = row[id_idx]
_mock_tables[self._table_name][doc_id] = dict(zip(column_names, row))
def search_vector(self, vector, column, distance_method, output_name):
return MockSearchQuery(self._table_name, "vector")
def search_text(self, column, expression, return_score=False, return_score_name="score", return_all_columns=False):
return MockSearchQuery(self._table_name, "text")
def set_vector_index(
self, column, distance_method, base_quantization_type, max_degree, ef_construction, use_reorder
):
pass
def create_text_index(self, index_name, column, tokenizer):
pass
def _extract_sql_template(query) -> str:
"""Extract the SQL template string from a psycopg Composed object."""
if isinstance(query, psql.Composed):
for part in query:
if isinstance(part, psql.SQL):
return part._obj
if isinstance(query, psql.SQL):
return query._obj
return ""
def _extract_identifiers_and_literals(query) -> list[Any]:
"""Extract Identifier and Literal values from a psycopg Composed object."""
values: list[Any] = []
if isinstance(query, psql.Composed):
for part in query:
if isinstance(part, psql.Identifier):
values.append(("ident", part._obj[0] if part._obj else ""))
elif isinstance(part, psql.Literal):
values.append(("literal", part._obj))
elif isinstance(part, psql.Composed):
# Handles SQL(...).join(...) for IN clauses
for sub in part:
if isinstance(sub, psql.Literal):
values.append(("literal", sub._obj))
return values
class MockHologresClient:
"""Mock holo_search_sdk client that stores data in memory."""
def connect(self):
pass
def check_table_exist(self, table_name):
return table_name in _mock_tables
def open_table(self, table_name):
return MockTable(table_name)
def execute(self, query, fetch_result=False):
template = _extract_sql_template(query)
params = _extract_identifiers_and_literals(query)
if "CREATE TABLE" in template.upper():
# Extract table name from first identifier
table_name = next((v for t, v in params if t == "ident"), "unknown")
if table_name not in _mock_tables:
_mock_tables[table_name] = {}
return None
if "SELECT 1" in template:
# text_exists: SELECT 1 FROM {table} WHERE id = {id} LIMIT 1
table_name = next((v for t, v in params if t == "ident"), "")
doc_id = next((v for t, v in params if t == "literal"), "")
data = _mock_tables.get(table_name, {})
return [(1,)] if doc_id in data else []
if "SELECT id" in template:
# get_ids_by_metadata_field: SELECT id FROM {table} WHERE meta->>{key} = {value}
table_name = next((v for t, v in params if t == "ident"), "")
literals = [v for t, v in params if t == "literal"]
key = literals[0] if len(literals) > 0 else ""
value = literals[1] if len(literals) > 1 else ""
data = _mock_tables.get(table_name, {})
return [(doc_id,) for doc_id, row in data.items() if json.loads(row.get("meta", "{}")).get(key) == value]
if "DELETE" in template.upper():
table_name = next((v for t, v in params if t == "ident"), "")
if "id IN" in template:
# delete_by_ids
ids_to_delete = [v for t, v in params if t == "literal"]
for did in ids_to_delete:
_mock_tables.get(table_name, {}).pop(did, None)
elif "meta->>" in template:
# delete_by_metadata_field
literals = [v for t, v in params if t == "literal"]
key = literals[0] if len(literals) > 0 else ""
value = literals[1] if len(literals) > 1 else ""
data = _mock_tables.get(table_name, {})
to_remove = [
doc_id for doc_id, row in data.items() if json.loads(row.get("meta", "{}")).get(key) == value
]
for did in to_remove:
data.pop(did, None)
return None
return [] if fetch_result else None
def drop_table(self, table_name):
_mock_tables.pop(table_name, None)
def mock_connect(**kwargs):
"""Replacement for holo_search_sdk.connect() that returns a mock client."""
return MockHologresClient()
MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
@pytest.fixture
def setup_hologres_mock(monkeypatch: MonkeyPatch):
if MOCK:
monkeypatch.setattr(holo, "connect", mock_connect)
yield
if MOCK:
_mock_tables.clear()
monkeypatch.undo()

View File

@ -0,0 +1,149 @@
import os
import uuid
from typing import cast
from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType
from core.rag.datasource.vdb.hologres.hologres_vector import HologresVector, HologresVectorConfig
from core.rag.models.document import Document
from tests.integration_tests.vdb.__mock.hologres import setup_hologres_mock
from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text, setup_mock_redis
MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
class HologresVectorTest(AbstractVectorTest):
def __init__(self):
super().__init__()
# Hologres requires collection names to be lowercase
self.collection_name = self.collection_name.lower()
self.vector = HologresVector(
collection_name=self.collection_name,
config=HologresVectorConfig(
host=os.environ.get("HOLOGRES_HOST", "localhost"),
port=int(os.environ.get("HOLOGRES_PORT", "80")),
database=os.environ.get("HOLOGRES_DATABASE", "test_db"),
access_key_id=os.environ.get("HOLOGRES_ACCESS_KEY_ID", "test_key"),
access_key_secret=os.environ.get("HOLOGRES_ACCESS_KEY_SECRET", "test_secret"),
schema_name=os.environ.get("HOLOGRES_SCHEMA", "public"),
tokenizer=cast(TokenizerType, os.environ.get("HOLOGRES_TOKENIZER", "jieba")),
distance_method=cast(DistanceType, os.environ.get("HOLOGRES_DISTANCE_METHOD", "Cosine")),
base_quantization_type=cast(
BaseQuantizationType, os.environ.get("HOLOGRES_BASE_QUANTIZATION_TYPE", "rabitq")
),
max_degree=int(os.environ.get("HOLOGRES_MAX_DEGREE", "64")),
ef_construction=int(os.environ.get("HOLOGRES_EF_CONSTRUCTION", "400")),
),
)
def search_by_full_text(self):
"""Override: full-text index may not be immediately ready in real mode."""
hits_by_full_text = self.vector.search_by_full_text(query=get_example_text())
if MOCK:
# In mock mode, full-text search should return the document we inserted
assert len(hits_by_full_text) == 1
assert hits_by_full_text[0].metadata["doc_id"] == self.example_doc_id
else:
# In real mode, full-text index may need time to become active
assert len(hits_by_full_text) >= 0
def search_by_vector_with_filter(self):
"""Test vector search with document_ids_filter."""
# Create another document with different document_id
other_doc_id = str(uuid.uuid4())
other_doc = Document(
page_content="other_text",
metadata={
"doc_id": other_doc_id,
"doc_hash": other_doc_id,
"document_id": other_doc_id,
"dataset_id": self.dataset_id,
},
)
self.vector.add_texts(documents=[other_doc], embeddings=[self.example_embedding])
# Search with filter - should only return the original document
hits = self.vector.search_by_vector(
query_vector=self.example_embedding,
document_ids_filter=[self.example_doc_id],
)
assert len(hits) == 1
assert hits[0].metadata["doc_id"] == self.example_doc_id
# Search without filter - should return both
all_hits = self.vector.search_by_vector(query_vector=self.example_embedding, top_k=10)
assert len(all_hits) >= 2
def search_by_full_text_with_filter(self):
"""Test full-text search with document_ids_filter."""
# Create another document with different document_id
other_doc_id = str(uuid.uuid4())
other_doc = Document(
page_content="unique_other_text",
metadata={
"doc_id": other_doc_id,
"doc_hash": other_doc_id,
"document_id": other_doc_id,
"dataset_id": self.dataset_id,
},
)
self.vector.add_texts(documents=[other_doc], embeddings=[self.example_embedding])
# Search with filter - should only return the original document
hits = self.vector.search_by_full_text(
query=get_example_text(),
document_ids_filter=[self.example_doc_id],
)
if MOCK:
assert len(hits) == 1
assert hits[0].metadata["doc_id"] == self.example_doc_id
def get_ids_by_metadata_field(self):
"""Override: Hologres implements this method via JSONB query."""
ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id)
assert ids is not None
assert len(ids) == 1
def run_all_tests(self):
# Clean up before running tests
self.vector.delete()
# Run base tests (create, search, text_exists, get_ids, add_texts, delete_by_ids, delete)
super().run_all_tests()
# Additional filter tests require fresh data (table was deleted by base tests)
if MOCK:
# Recreate collection for filter tests
self.vector.create(
texts=[
Document(
page_content=get_example_text(),
metadata={
"doc_id": self.example_doc_id,
"doc_hash": self.example_doc_id,
"document_id": self.example_doc_id,
"dataset_id": self.dataset_id,
},
)
],
embeddings=[self.example_embedding],
)
self.search_by_vector_with_filter()
self.search_by_full_text_with_filter()
# Clean up
self.vector.delete()
def test_hologres_vector(setup_mock_redis, setup_hologres_mock):
"""
Test Hologres vector database implementation.
This test covers:
- Creating collection with vector index
- Adding texts with embeddings
- Vector similarity search
- Full-text search
- Text existence check
- Batch deletion by IDs
- Collection deletion
"""
HologresVectorTest().run_all_tests()

View File

@ -1609,6 +1609,7 @@ vdb = [
{ name = "clickzetta-connector-python" }, { name = "clickzetta-connector-python" },
{ name = "couchbase" }, { name = "couchbase" },
{ name = "elasticsearch" }, { name = "elasticsearch" },
{ name = "holo-search-sdk" },
{ name = "intersystems-irispython" }, { name = "intersystems-irispython" },
{ name = "mo-vector" }, { name = "mo-vector" },
{ name = "mysql-connector-python" }, { name = "mysql-connector-python" },
@ -1809,6 +1810,7 @@ vdb = [
{ name = "clickzetta-connector-python", specifier = ">=0.8.102" }, { name = "clickzetta-connector-python", specifier = ">=0.8.102" },
{ name = "couchbase", specifier = "~=4.3.0" }, { name = "couchbase", specifier = "~=4.3.0" },
{ name = "elasticsearch", specifier = "==8.14.0" }, { name = "elasticsearch", specifier = "==8.14.0" },
{ name = "holo-search-sdk", specifier = ">=0.4.1" },
{ name = "intersystems-irispython", specifier = ">=5.1.0" }, { name = "intersystems-irispython", specifier = ">=5.1.0" },
{ name = "mo-vector", specifier = "~=0.1.13" }, { name = "mo-vector", specifier = "~=0.1.13" },
{ name = "mysql-connector-python", specifier = ">=9.3.0" }, { name = "mysql-connector-python", specifier = ">=9.3.0" },
@ -2878,6 +2880,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f5/a9/55a4ac9c16fdf32e92e9e22c49f61affe5135e177ca19b014484e28950f7/hiredis-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:04ec150e95eea3de9ff8bac754978aa17b8bf30a86d4ab2689862020945396b0", size = 22379, upload-time = "2025-10-14T16:32:22.916Z" }, { url = "https://files.pythonhosted.org/packages/f5/a9/55a4ac9c16fdf32e92e9e22c49f61affe5135e177ca19b014484e28950f7/hiredis-3.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:04ec150e95eea3de9ff8bac754978aa17b8bf30a86d4ab2689862020945396b0", size = 22379, upload-time = "2025-10-14T16:32:22.916Z" },
] ]
[[package]]
name = "holo-search-sdk"
version = "0.4.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
{ name = "psycopg", extra = ["binary"] },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/0b/b8/70a4999dabbba15e98d201a7399aab76ab96931ad1a27392ba5252cc9165/holo_search_sdk-0.4.1.tar.gz", hash = "sha256:9aea98b6078b9202abb568ed69d798d5e0505d2b4cc3a136a6aa84402bcd2133", size = 56701, upload-time = "2026-01-28T01:44:57.645Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/8a/30/3059a979272f90a96f31b167443cc27675e8cc8f970a3ac0cb80bf803c70/holo_search_sdk-0.4.1-py3-none-any.whl", hash = "sha256:ef1059895ea936ff6a087f68dac92bd1ae0320e51ec5b1d4e7bed7a5dd6beb45", size = 32647, upload-time = "2026-01-28T01:44:56.098Z" },
]
[[package]] [[package]]
name = "hpack" name = "hpack"
version = "4.1.0" version = "4.1.0"
@ -4859,6 +4875,53 @@ version = "1.0.2"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/eb/72/4a7965cf54e341006ad74cdc72cd6572c789bc4f4e3fadc78672f1fbcfbd/psycogreen-1.0.2.tar.gz", hash = "sha256:c429845a8a49cf2f76b71265008760bcd7c7c77d80b806db4dc81116dbcd130d", size = 5411, upload-time = "2020-02-22T19:55:22.02Z" } sdist = { url = "https://files.pythonhosted.org/packages/eb/72/4a7965cf54e341006ad74cdc72cd6572c789bc4f4e3fadc78672f1fbcfbd/psycogreen-1.0.2.tar.gz", hash = "sha256:c429845a8a49cf2f76b71265008760bcd7c7c77d80b806db4dc81116dbcd130d", size = 5411, upload-time = "2020-02-22T19:55:22.02Z" }
[[package]]
name = "psycopg"
version = "3.3.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
{ name = "tzdata", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/d3/b6/379d0a960f8f435ec78720462fd94c4863e7a31237cf81bf76d0af5883bf/psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9", size = 165624, upload-time = "2026-02-18T16:52:16.546Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c8/5b/181e2e3becb7672b502f0ed7f16ed7352aca7c109cfb94cf3878a9186db9/psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698", size = 212768, upload-time = "2026-02-18T16:46:27.365Z" },
]
[package.optional-dependencies]
binary = [
{ name = "psycopg-binary", marker = "implementation_name != 'pypy'" },
]
[[package]]
name = "psycopg-binary"
version = "3.3.3"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/be/c0/b389119dd754483d316805260f3e73cdcad97925839107cc7a296f6132b1/psycopg_binary-3.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a89bb9ee11177b2995d87186b1d9fa892d8ea725e85eab28c6525e4cc14ee048", size = 4609740, upload-time = "2026-02-18T16:47:51.093Z" },
{ url = "https://files.pythonhosted.org/packages/cf/e3/9976eef20f61840285174d360da4c820a311ab39d6b82fa09fbb545be825/psycopg_binary-3.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f7d0cf072c6fbac3795b08c98ef9ea013f11db609659dcfc6b1f6cc31f9e181", size = 4676837, upload-time = "2026-02-18T16:47:55.523Z" },
{ url = "https://files.pythonhosted.org/packages/9f/f2/d28ba2f7404fd7f68d41e8a11df86313bd646258244cb12a8dd83b868a97/psycopg_binary-3.3.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:90eecd93073922f085967f3ed3a98ba8c325cbbc8c1a204e300282abd2369e13", size = 5497070, upload-time = "2026-02-18T16:47:59.929Z" },
{ url = "https://files.pythonhosted.org/packages/de/2f/6c5c54b815edeb30a281cfcea96dc93b3bb6be939aea022f00cab7aa1420/psycopg_binary-3.3.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dac7ee2f88b4d7bb12837989ca354c38d400eeb21bce3b73dac02622f0a3c8d6", size = 5172410, upload-time = "2026-02-18T16:48:05.665Z" },
{ url = "https://files.pythonhosted.org/packages/51/75/8206c7008b57de03c1ada46bd3110cc3743f3fd9ed52031c4601401d766d/psycopg_binary-3.3.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b62cf8784eb6d35beaee1056d54caf94ec6ecf2b7552395e305518ab61eb8fd2", size = 6763408, upload-time = "2026-02-18T16:48:13.541Z" },
{ url = "https://files.pythonhosted.org/packages/d4/5a/ea1641a1e6c8c8b3454b0fcb43c3045133a8b703e6e824fae134088e63bd/psycopg_binary-3.3.3-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a39f34c9b18e8f6794cca17bfbcd64572ca2482318db644268049f8c738f35a6", size = 5006255, upload-time = "2026-02-18T16:48:22.176Z" },
{ url = "https://files.pythonhosted.org/packages/aa/fb/538df099bf55ae1637d52d7ccb6b9620b535a40f4c733897ac2b7bb9e14c/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:883d68d48ca9ff3cb3d10c5fdebea02c79b48eecacdddbf7cce6e7cdbdc216b8", size = 4532694, upload-time = "2026-02-18T16:48:27.338Z" },
{ url = "https://files.pythonhosted.org/packages/a1/d1/00780c0e187ea3c13dfc53bd7060654b2232cd30df562aac91a5f1c545ac/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:cab7bc3d288d37a80aa8c0820033250c95e40b1c2b5c57cf59827b19c2a8b69d", size = 4222833, upload-time = "2026-02-18T16:48:31.221Z" },
{ url = "https://files.pythonhosted.org/packages/7a/34/a07f1ff713c51d64dc9f19f2c32be80299a2055d5d109d5853662b922cb4/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:56c767007ca959ca32f796b42379fc7e1ae2ed085d29f20b05b3fc394f3715cc", size = 3952818, upload-time = "2026-02-18T16:48:35.869Z" },
{ url = "https://files.pythonhosted.org/packages/d3/67/d33f268a7759b4445f3c9b5a181039b01af8c8263c865c1be7a6444d4749/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:da2f331a01af232259a21573a01338530c6016dcfad74626c01330535bcd8628", size = 4258061, upload-time = "2026-02-18T16:48:41.365Z" },
{ url = "https://files.pythonhosted.org/packages/b4/3b/0d8d2c5e8e29ccc07d28c8af38445d9d9abcd238d590186cac82ee71fc84/psycopg_binary-3.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:19f93235ece6dbfc4036b5e4f6d8b13f0b8f2b3eeb8b0bd2936d406991bcdd40", size = 3558915, upload-time = "2026-02-18T16:48:46.679Z" },
{ url = "https://files.pythonhosted.org/packages/90/15/021be5c0cbc5b7c1ab46e91cc3434eb42569f79a0592e67b8d25e66d844d/psycopg_binary-3.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6698dbab5bcef8fdb570fc9d35fd9ac52041771bfcfe6fd0fc5f5c4e36f1e99d", size = 4591170, upload-time = "2026-02-18T16:48:55.594Z" },
{ url = "https://files.pythonhosted.org/packages/f1/54/a60211c346c9a2f8c6b272b5f2bbe21f6e11800ce7f61e99ba75cf8b63e1/psycopg_binary-3.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:329ff393441e75f10b673ae99ab45276887993d49e65f141da20d915c05aafd8", size = 4670009, upload-time = "2026-02-18T16:49:03.608Z" },
{ url = "https://files.pythonhosted.org/packages/c1/53/ac7c18671347c553362aadbf65f92786eef9540676ca24114cc02f5be405/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:eb072949b8ebf4082ae24289a2b0fd724da9adc8f22743409d6fd718ddb379df", size = 5469735, upload-time = "2026-02-18T16:49:10.128Z" },
{ url = "https://files.pythonhosted.org/packages/7f/c3/4f4e040902b82a344eff1c736cde2f2720f127fe939c7e7565706f96dd44/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:263a24f39f26e19ed7fc982d7859a36f17841b05bebad3eb47bb9cd2dd785351", size = 5152919, upload-time = "2026-02-18T16:49:16.335Z" },
{ url = "https://files.pythonhosted.org/packages/0c/e7/d929679c6a5c212bcf738806c7c89f5b3d0919f2e1685a0e08d6ff877945/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5152d50798c2fa5bd9b68ec68eb68a1b71b95126c1d70adaa1a08cd5eefdc23d", size = 6738785, upload-time = "2026-02-18T16:49:22.687Z" },
{ url = "https://files.pythonhosted.org/packages/69/b0/09703aeb69a9443d232d7b5318d58742e8ca51ff79f90ffe6b88f1db45e7/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9d6a1e56dd267848edb824dbeb08cf5bac649e02ee0b03ba883ba3f4f0bd54f2", size = 4979008, upload-time = "2026-02-18T16:49:27.313Z" },
{ url = "https://files.pythonhosted.org/packages/cc/a6/e662558b793c6e13a7473b970fee327d635270e41eded3090ef14045a6a5/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73eaaf4bb04709f545606c1db2f65f4000e8a04cdbf3e00d165a23004692093e", size = 4508255, upload-time = "2026-02-18T16:49:31.575Z" },
{ url = "https://files.pythonhosted.org/packages/5f/7f/0f8b2e1d5e0093921b6f324a948a5c740c1447fbb45e97acaf50241d0f39/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:162e5675efb4704192411eaf8e00d07f7960b679cd3306e7efb120bb8d9456cc", size = 4189166, upload-time = "2026-02-18T16:49:35.801Z" },
{ url = "https://files.pythonhosted.org/packages/92/ec/ce2e91c33bc8d10b00c87e2f6b0fb570641a6a60042d6a9ae35658a3a797/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:fab6b5e37715885c69f5d091f6ff229be71e235f272ebaa35158d5a46fd548a0", size = 3924544, upload-time = "2026-02-18T16:49:41.129Z" },
{ url = "https://files.pythonhosted.org/packages/c5/2f/7718141485f73a924205af60041c392938852aa447a94c8cbd222ff389a1/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a4aab31bd6d1057f287c96c0effca3a25584eb9cc702f282ecb96ded7814e830", size = 4235297, upload-time = "2026-02-18T16:49:46.726Z" },
{ url = "https://files.pythonhosted.org/packages/57/f9/1add717e2643a003bbde31b1b220172e64fbc0cb09f06429820c9173f7fc/psycopg_binary-3.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:59aa31fe11a0e1d1bcc2ce37ed35fe2ac84cd65bb9036d049b1a1c39064d0f14", size = 3547659, upload-time = "2026-02-18T16:49:52.999Z" },
]
[[package]] [[package]]
name = "psycopg2-binary" name = "psycopg2-binary"
version = "2.9.11" version = "2.9.11"

View File

@ -21,3 +21,4 @@ pytest --timeout "${PYTEST_TIMEOUT}" api/tests/integration_tests/vdb/chroma \
api/tests/integration_tests/vdb/oceanbase \ api/tests/integration_tests/vdb/oceanbase \
api/tests/integration_tests/vdb/tidb_vector \ api/tests/integration_tests/vdb/tidb_vector \
api/tests/integration_tests/vdb/huawei \ api/tests/integration_tests/vdb/huawei \
api/tests/integration_tests/vdb/hologres \

View File

@ -541,7 +541,7 @@ SUPABASE_URL=your-server-url
# ------------------------------ # ------------------------------
# The type of vector store to use. # The type of vector store to use.
# Supported values are `weaviate`, `oceanbase`, `seekdb`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`, `vastbase`, `tidb`, `tidb_on_qdrant`, `baidu`, `lindorm`, `huawei_cloud`, `upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`, `iris`. # Supported values are `weaviate`, `oceanbase`, `seekdb`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`, `vastbase`, `tidb`, `tidb_on_qdrant`, `baidu`, `lindorm`, `huawei_cloud`, `upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`, `iris`, `hologres`.
VECTOR_STORE=weaviate VECTOR_STORE=weaviate
# Prefix used to create collection name in vector database # Prefix used to create collection name in vector database
VECTOR_INDEX_NAME_PREFIX=Vector_index VECTOR_INDEX_NAME_PREFIX=Vector_index
@ -605,6 +605,20 @@ COUCHBASE_PASSWORD=password
COUCHBASE_BUCKET_NAME=Embeddings COUCHBASE_BUCKET_NAME=Embeddings
COUCHBASE_SCOPE_NAME=_default COUCHBASE_SCOPE_NAME=_default
# Hologres configurations, only available when VECTOR_STORE is `hologres`
# access_key_id is used as the PG username, access_key_secret is used as the PG password
HOLOGRES_HOST=
HOLOGRES_PORT=80
HOLOGRES_DATABASE=
HOLOGRES_ACCESS_KEY_ID=
HOLOGRES_ACCESS_KEY_SECRET=
HOLOGRES_SCHEMA=public
HOLOGRES_TOKENIZER=jieba
HOLOGRES_DISTANCE_METHOD=Cosine
HOLOGRES_BASE_QUANTIZATION_TYPE=rabitq
HOLOGRES_MAX_DEGREE=64
HOLOGRES_EF_CONSTRUCTION=400
# pgvector configurations, only available when VECTOR_STORE is `pgvector` # pgvector configurations, only available when VECTOR_STORE is `pgvector`
PGVECTOR_HOST=pgvector PGVECTOR_HOST=pgvector
PGVECTOR_PORT=5432 PGVECTOR_PORT=5432

View File

@ -215,6 +215,17 @@ x-shared-env: &shared-api-worker-env
COUCHBASE_PASSWORD: ${COUCHBASE_PASSWORD:-password} COUCHBASE_PASSWORD: ${COUCHBASE_PASSWORD:-password}
COUCHBASE_BUCKET_NAME: ${COUCHBASE_BUCKET_NAME:-Embeddings} COUCHBASE_BUCKET_NAME: ${COUCHBASE_BUCKET_NAME:-Embeddings}
COUCHBASE_SCOPE_NAME: ${COUCHBASE_SCOPE_NAME:-_default} COUCHBASE_SCOPE_NAME: ${COUCHBASE_SCOPE_NAME:-_default}
HOLOGRES_HOST: ${HOLOGRES_HOST:-}
HOLOGRES_PORT: ${HOLOGRES_PORT:-80}
HOLOGRES_DATABASE: ${HOLOGRES_DATABASE:-}
HOLOGRES_ACCESS_KEY_ID: ${HOLOGRES_ACCESS_KEY_ID:-}
HOLOGRES_ACCESS_KEY_SECRET: ${HOLOGRES_ACCESS_KEY_SECRET:-}
HOLOGRES_SCHEMA: ${HOLOGRES_SCHEMA:-public}
HOLOGRES_TOKENIZER: ${HOLOGRES_TOKENIZER:-jieba}
HOLOGRES_DISTANCE_METHOD: ${HOLOGRES_DISTANCE_METHOD:-Cosine}
HOLOGRES_BASE_QUANTIZATION_TYPE: ${HOLOGRES_BASE_QUANTIZATION_TYPE:-rabitq}
HOLOGRES_MAX_DEGREE: ${HOLOGRES_MAX_DEGREE:-64}
HOLOGRES_EF_CONSTRUCTION: ${HOLOGRES_EF_CONSTRUCTION:-400}
PGVECTOR_HOST: ${PGVECTOR_HOST:-pgvector} PGVECTOR_HOST: ${PGVECTOR_HOST:-pgvector}
PGVECTOR_PORT: ${PGVECTOR_PORT:-5432} PGVECTOR_PORT: ${PGVECTOR_PORT:-5432}
PGVECTOR_USER: ${PGVECTOR_USER:-postgres} PGVECTOR_USER: ${PGVECTOR_USER:-postgres}