Skip to content

Commit

Permalink
Lindorm vdb bug-fix (langgenius#11790)
Browse files Browse the repository at this point in the history
Co-authored-by: jiangzhijie <jiangzhijie.jzj@alibaba-inc.com>
  • Loading branch information
2 people authored and 刘江波 committed Dec 20, 2024
1 parent a6d7056 commit e172c9f
Showing 1 changed file with 11 additions and 6 deletions.
17 changes: 11 additions & 6 deletions api/core/rag/datasource/vdb/lindorm/lindorm_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ def to_opensearch_params(self) -> dict[str, Any]:


class LindormVectorStore(BaseVector):
def __init__(self, collection_name: str, config: LindormVectorStoreConfig, **kwargs):
def __init__(self, collection_name: str, config: LindormVectorStoreConfig, using_ugc: bool, **kwargs):
self._routing = None
self._routing_field = None
if config.using_ugc:
if using_ugc:
routing_value: str = kwargs.get("routing_value")
if routing_value is None:
raise ValueError("UGC index should init vector with valid 'routing_value' parameter value")
Expand All @@ -64,7 +64,7 @@ def __init__(self, collection_name: str, config: LindormVectorStoreConfig, **kwa
super().__init__(collection_name.lower())
self._client_config = config
self._client = OpenSearch(**config.to_opensearch_params())
self._using_ugc = config.using_ugc
self._using_ugc = using_ugc
self.kwargs = kwargs

def get_type(self) -> str:
Expand Down Expand Up @@ -467,12 +467,16 @@ def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings
using_ugc = dify_config.USING_UGC_INDEX
routing_value = None
if dataset.index_struct:
if using_ugc:
# if an existed record's index_struct_dict doesn't contain using_ugc field,
# it actually stores in the normal index format
stored_in_ugc = dataset.index_struct_dict.get("using_ugc", False)
using_ugc = stored_in_ugc
if stored_in_ugc:
dimension = dataset.index_struct_dict["dimension"]
index_type = dataset.index_struct_dict["index_type"]
distance_type = dataset.index_struct_dict["distance_type"]
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
routing_value = dataset.index_struct_dict["vector_store"]["class_prefix"]
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
else:
index_name = dataset.index_struct_dict["vector_store"]["class_prefix"]
else:
Expand All @@ -487,11 +491,12 @@ def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings
"index_type": index_type,
"dimension": dimension,
"distance_type": distance_type,
"using_ugc": using_ugc,
}
dataset.index_struct = json.dumps(index_struct_dict)
if using_ugc:
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
routing_value = class_prefix
else:
index_name = class_prefix
return LindormVectorStore(index_name, lindorm_config, routing_value=routing_value)
return LindormVectorStore(index_name, lindorm_config, routing_value=routing_value, using_ugc=using_ugc)

0 comments on commit e172c9f

Please sign in to comment.