Skip to content

Commit 6c47ca8

Browse files
authored
Merge pull request #1820 from weaviate/multi2vec_aws
Add Multi2vec-aws and text2vec-morph
2 parents 3ece344 + 97e2002 commit 6c47ca8

File tree

3 files changed

+144
-0
lines changed

3 files changed

+144
-0
lines changed

test/collection/test_config.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1944,6 +1944,30 @@ def test_config_with_named_vectors(
19441944
}
19451945
},
19461946
),
1947+
(
1948+
[
1949+
Configure.Vectors.multi2vec_aws(
1950+
name="test",
1951+
dimensions=512,
1952+
model="model",
1953+
text_fields=["prop"],
1954+
image_fields=["img"],
1955+
)
1956+
],
1957+
{
1958+
"test": {
1959+
"vectorizer": {
1960+
"multi2vec-aws": {
1961+
"textFields": ["prop"],
1962+
"imageFields": ["img"],
1963+
"model": "model",
1964+
"dimensions": 512,
1965+
}
1966+
},
1967+
"vectorIndexType": "hnsw",
1968+
}
1969+
},
1970+
),
19471971
(
19481972
[Configure.Vectors.text2vec_cohere(name="test", source_properties=["prop"])],
19491973
{
@@ -2153,6 +2177,20 @@ def test_config_with_named_vectors(
21532177
}
21542178
},
21552179
),
2180+
(
2181+
[Configure.Vectors.text2vec_morph(name="test", source_properties=["prop"])],
2182+
{
2183+
"test": {
2184+
"vectorizer": {
2185+
"text2vec-morph": {
2186+
"vectorizeClassName": True,
2187+
"properties": ["prop"],
2188+
}
2189+
},
2190+
"vectorIndexType": "hnsw",
2191+
}
2192+
},
2193+
),
21562194
(
21572195
[
21582196
Configure.Vectors.text2vec_google(

weaviate/collections/classes/config_vectorizers.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ class Vectorizers(str, Enum):
115115
TEXT2VEC_GPT4ALL = "text2vec-gpt4all"
116116
TEXT2VEC_HUGGINGFACE = "text2vec-huggingface"
117117
TEXT2VEC_MISTRAL = "text2vec-mistral"
118+
TEXT2VEC_MORPH = "text2vec-morph"
118119
TEXT2VEC_MODEL2VEC = "text2vec-model2vec"
119120
TEXT2VEC_NVIDIA = "text2vec-nvidia"
120121
TEXT2VEC_OLLAMA = "text2vec-ollama"
@@ -125,6 +126,7 @@ class Vectorizers(str, Enum):
125126
TEXT2VEC_VOYAGEAI = "text2vec-voyageai"
126127
TEXT2VEC_WEAVIATE = "text2vec-weaviate"
127128
IMG2VEC_NEURAL = "img2vec-neural"
129+
MULTI2VEC_AWS = "multi2vec-aws"
128130
MULTI2VEC_CLIP = "multi2vec-clip"
129131
MULTI2VEC_COHERE = "multi2vec-cohere"
130132
MULTI2VEC_JINAAI = "multi2vec-jinaai"
@@ -274,6 +276,21 @@ def _to_dict(self) -> Dict[str, Any]:
274276
return ret_dict
275277

276278

279+
class _Text2VecMorphConfig(_VectorizerConfigCreate):
280+
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
281+
default=Vectorizers.TEXT2VEC_MORPH, frozen=True, exclude=True
282+
)
283+
model: Optional[str]
284+
vectorizeClassName: bool
285+
baseURL: Optional[AnyHttpUrl]
286+
287+
def _to_dict(self) -> Dict[str, Any]:
288+
ret_dict = super()._to_dict()
289+
if self.baseURL is not None:
290+
ret_dict["baseURL"] = self.baseURL.unicode_string()
291+
return ret_dict
292+
293+
277294
class _Text2VecDatabricksConfig(_VectorizerConfigCreate):
278295
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
279296
default=Vectorizers.TEXT2VEC_DATABRICKS, frozen=True, exclude=True
@@ -467,6 +484,15 @@ def _to_dict(self) -> Dict[str, Any]:
467484
return ret_dict
468485

469486

487+
class _Multi2VecAWSConfig(_Multi2VecBase):
488+
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
489+
default=Vectorizers.MULTI2VEC_AWS, frozen=True, exclude=True
490+
)
491+
region: Optional[str]
492+
model: Optional[str]
493+
dimensions: Optional[int]
494+
495+
470496
class _Multi2MultiVecJinaConfig(_Multi2VecBase):
471497
vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
472498
default=Vectorizers.MULTI2MULTI_JINAAI, frozen=True, exclude=True

weaviate/collections/classes/config_vectors.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
_Img2VecNeuralConfig,
4141
_map_multi2vec_fields,
4242
_Multi2MultiVecJinaConfig,
43+
_Multi2VecAWSConfig,
4344
_Multi2VecBindConfig,
4445
_Multi2VecClipConfig,
4546
_Multi2VecCohereConfig,
@@ -60,6 +61,7 @@
6061
_Text2VecJinaConfig,
6162
_Text2VecMistralConfig,
6263
_Text2VecModel2VecConfig,
64+
_Text2VecMorphConfig,
6365
_Text2VecNvidiaConfig,
6466
_Text2VecOllamaConfig,
6567
_Text2VecOpenAIConfig,
@@ -559,6 +561,42 @@ def text2vec_mistral(
559561
vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
560562
)
561563

564+
@staticmethod
565+
def text2vec_morph(
566+
*,
567+
name: Optional[str] = None,
568+
quantizer: Optional[_QuantizerConfigCreate] = None,
569+
base_url: Optional[AnyHttpUrl] = None,
570+
model: Optional[str] = None,
571+
source_properties: Optional[List[str]] = None,
572+
vector_index_config: Optional[_VectorIndexConfigCreate] = None,
573+
vectorize_collection_name: bool = True,
574+
) -> _VectorConfigCreate:
575+
"""Create a vector using the `text2vec-morph` module.
576+
577+
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/morph/embeddings)
578+
for detailed usage.
579+
580+
Args:
581+
name: The name of the vector.
582+
quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied.
583+
base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
584+
model: The model to use. Defaults to `None`, which uses the server-defined default.
585+
source_properties: Which properties should be included when vectorizing. By default all text properties are included.
586+
vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default
587+
vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
588+
"""
589+
return _VectorConfigCreate(
590+
name=name,
591+
source_properties=source_properties,
592+
vectorizer=_Text2VecMorphConfig(
593+
baseURL=base_url,
594+
model=model,
595+
vectorizeClassName=vectorize_collection_name,
596+
),
597+
vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
598+
)
599+
562600
@staticmethod
563601
def text2vec_ollama(
564602
*,
@@ -688,6 +726,48 @@ def text2vec_aws(
688726
vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
689727
)
690728

729+
@staticmethod
730+
def multi2vec_aws(
731+
*,
732+
name: Optional[str] = None,
733+
quantizer: Optional[_QuantizerConfigCreate] = None,
734+
dimensions: Optional[int] = None,
735+
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
736+
model: Optional[str] = None,
737+
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
738+
region: Optional[str] = None,
739+
vector_index_config: Optional[_VectorIndexConfigCreate] = None,
740+
) -> _VectorConfigCreate:
741+
"""Create a vector using the `multi2vec-aws` module.
742+
743+
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/aws/embeddings)
744+
for detailed usage.
745+
746+
Args:
747+
name: The name of the vector.
748+
quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied.
749+
dimensions: The number of dimensions to use. Defaults to `None`, which uses the server-defined default.
750+
image_fields: The image fields to use in vectorization.
751+
model: The model to use. Defaults to `None`, which uses the server-defined default.
752+
text_fields: The text fields to use in vectorization.
753+
region: The AWS region to run the model from. Defaults to `None`, which uses the server-defined defau
754+
vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default
755+
756+
Raises:
757+
pydantic.ValidationError: If `model` is not a valid value from the `JinaMultimodalModel` type.
758+
"""
759+
return _VectorConfigCreate(
760+
name=name,
761+
vectorizer=_Multi2VecAWSConfig(
762+
region=region,
763+
model=model,
764+
dimensions=dimensions,
765+
imageFields=_map_multi2vec_fields(image_fields),
766+
textFields=_map_multi2vec_fields(text_fields),
767+
),
768+
vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
769+
)
770+
691771
@staticmethod
692772
def img2vec_neural(
693773
*,

0 commit comments

Comments
 (0)