Skip to content

Commit

Permalink
fix lindorm vdb bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
jiangzhijie committed Dec 18, 2024
1 parent c5543e4 commit 47695de
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 317 deletions.
61 changes: 33 additions & 28 deletions api/core/rag/datasource/vdb/lindorm/lindorm_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ def to_opensearch_params(self) -> dict[str, Any]:


class LindormVectorStore(BaseVector):
def __init__(self, collection_name: str, config: LindormVectorStoreConfig, **kwargs):
def __init__(self, collection_name: str, config: LindormVectorStoreConfig, using_ugc: bool, **kwargs):
self._routing = None
self._routing_field = None
if config.using_ugc:
if using_ugc:
routing_value: str = kwargs.get("routing_value")
if routing_value is None:
raise ValueError("UGC index should init vector with valid 'routing_value' parameter value")
Expand All @@ -64,7 +64,7 @@ def __init__(self, collection_name: str, config: LindormVectorStoreConfig, **kwa
super().__init__(collection_name.lower())
self._client_config = config
self._client = OpenSearch(**config.to_opensearch_params())
self._using_ugc = config.using_ugc
self._using_ugc = using_ugc
self.kwargs = kwargs

def get_type(self) -> str:
Expand Down Expand Up @@ -354,17 +354,17 @@ def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dic


def default_text_search_query(
query_text: str,
k: int = 4,
text_field: str = Field.CONTENT_KEY.value,
must: Optional[list[dict]] = None,
must_not: Optional[list[dict]] = None,
should: Optional[list[dict]] = None,
minimum_should_match: int = 0,
filters: Optional[list[dict]] = None,
routing: Optional[str] = None,
routing_field: Optional[str] = None,
**kwargs,
query_text: str,
k: int = 4,
text_field: str = Field.CONTENT_KEY.value,
must: Optional[list[dict]] = None,
must_not: Optional[list[dict]] = None,
should: Optional[list[dict]] = None,
minimum_should_match: int = 0,
filters: Optional[list[dict]] = None,
routing: Optional[str] = None,
routing_field: Optional[str] = None,
**kwargs,
) -> dict:
if routing is not None:
query_clause = {
Expand Down Expand Up @@ -410,17 +410,17 @@ def default_text_search_query(


def default_vector_search_query(
query_vector: list[float],
k: int = 4,
min_score: str = "0.0",
ef_search: Optional[str] = None, # only for hnsw
nprobe: Optional[str] = None, # "2000"
reorder_factor: Optional[str] = None, # "20"
client_refactor: Optional[str] = None, # "true"
vector_field: str = Field.VECTOR.value,
filters: Optional[list[dict]] = None,
filter_type: Optional[str] = None,
**kwargs,
query_vector: list[float],
k: int = 4,
min_score: str = "0.0",
ef_search: Optional[str] = None, # only for hnsw
nprobe: Optional[str] = None, # "2000"
reorder_factor: Optional[str] = None, # "20"
client_refactor: Optional[str] = None, # "true"
vector_field: str = Field.VECTOR.value,
filters: Optional[list[dict]] = None,
filter_type: Optional[str] = None,
**kwargs,
) -> dict:
if filters is not None:
filter_type = "post_filter" if filter_type is None else filter_type
Expand Down Expand Up @@ -467,12 +467,16 @@ def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings
using_ugc = dify_config.USING_UGC_INDEX
routing_value = None
if dataset.index_struct:
if using_ugc:
# if an existed record's index_struct_dict doesn't contain using_ugc field,
# it actually stores in the normal index format
stored_in_ugc = dataset.index_struct_dict.get("using_ugc", False)
using_ugc = stored_in_ugc
if stored_in_ugc:
dimension = dataset.index_struct_dict["dimension"]
index_type = dataset.index_struct_dict["index_type"]
distance_type = dataset.index_struct_dict["distance_type"]
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
routing_value = dataset.index_struct_dict["vector_store"]["class_prefix"]
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
else:
index_name = dataset.index_struct_dict["vector_store"]["class_prefix"]
else:
Expand All @@ -487,11 +491,12 @@ def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings
"index_type": index_type,
"dimension": dimension,
"distance_type": distance_type,
"using_ugc": using_ugc
}
dataset.index_struct = json.dumps(index_struct_dict)
if using_ugc:
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
routing_value = class_prefix
else:
index_name = class_prefix
return LindormVectorStore(index_name, lindorm_config, routing_value=routing_value)
return LindormVectorStore(index_name, lindorm_config, routing_value=routing_value, using_ugc=using_ugc)
17 changes: 0 additions & 17 deletions docker/volumes/myscale/config/users.d/custom_users_config.xml

This file was deleted.

1 change: 0 additions & 1 deletion docker/volumes/oceanbase/init.d/vec_memory.sql

This file was deleted.

222 changes: 0 additions & 222 deletions docker/volumes/opensearch/opensearch_dashboards.yml

This file was deleted.

14 changes: 0 additions & 14 deletions docker/volumes/sandbox/conf/config.yaml

This file was deleted.

Loading

0 comments on commit 47695de

Please sign in to comment.