From cf05eb9f964eb8e3378b5a17757d581720fe2552 Mon Sep 17 00:00:00 2001 From: ryanrib14 <147437431+ryanrib14@users.noreply.github.com> Date: Tue, 13 Feb 2024 18:53:25 -0300 Subject: [PATCH] fix the bug on vector_search_profile default(this profile did not exist) and add the possibility to choose language_analyzer of searchable fields, now on v0.10.3 format (#10675) --- .../vector_stores/azureaisearch/base.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py index 16a51c65324527..b81e5e5512761a 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-azureaisearch/llama_index/vector_stores/azureaisearch/base.py @@ -143,14 +143,14 @@ def _create_index(self, index_name: Optional[str]) -> None: SearchableField( name=self._field_mapping["chunk"], type="Edm.String", - analyzer_name="en.microsoft", + analyzer_name=self.language_analyzer, ), SearchField( name=self._field_mapping["embedding"], type=SearchFieldDataType.Collection(SearchFieldDataType.Single), searchable=True, vector_search_dimensions=self.embedding_dimensionality, - vector_search_profile_name="default", + vector_search_profile_name=self.vector_profile_name, ), SimpleField(name=self._field_mapping["metadata"], type="Edm.String"), SimpleField( @@ -243,6 +243,10 @@ def __init__( ] = None, index_management: IndexManagement = IndexManagement.NO_VALIDATION, embedding_dimensionality: int = 1536, + vector_algorithm_type: str = "exhaustiveKnn", + # If we have content in other languages, it is better to enable the language analyzer to be adjusted in searchable fields. + # https://learn.microsoft.com/en-us/azure/search/index-add-language-analyzers + language_analyzer: str = "en.lucene", **kwargs: Any, ) -> None: # ruff: noqa: E501 @@ -306,6 +310,16 @@ def __init__( self._search_client: SearchClient = cast(SearchClient, None) self.embedding_dimensionality = embedding_dimensionality + if vector_algorithm_type == "exhaustiveKnn": + self.vector_profile_name = "myExhaustiveKnnProfile" + elif vector_algorithm_type == "hnsw": + self.vector_profile_name = "myHnswProfile" + else: + raise ValueError( + "Only 'exhaustiveKnn' and 'hnsw' are supported for vector_algorithm_type" + ) + + self.language_analyzer = language_analyzer # Validate search_or_index_client if search_or_index_client is not None: if isinstance(search_or_index_client, SearchIndexClient):