Merge pull request #1820 from weaviate/multi2vec_aws

dirkkul · web-flow · commit 6c47ca8eb104 · 2025-09-04T09:14:10.000+02:00
Add Multi2vec-aws and text2vec-morph
diff --git a/test/collection/test_config.py b/test/collection/test_config.py
@@ -1944,6 +1944,30 @@ def test_config_with_named_vectors(
             }
         },
     ),
+    (
+        [
+            Configure.Vectors.multi2vec_aws(
+                name="test",
+                dimensions=512,
+                model="model",
+                text_fields=["prop"],
+                image_fields=["img"],
+            )
+        ],
+        {
+            "test": {
+                "vectorizer": {
+                    "multi2vec-aws": {
+                        "textFields": ["prop"],
+                        "imageFields": ["img"],
+                        "model": "model",
+                        "dimensions": 512,
+                    }
+                },
+                "vectorIndexType": "hnsw",
+            }
+        },
+    ),
     (
         [Configure.Vectors.text2vec_cohere(name="test", source_properties=["prop"])],
         {
@@ -2153,6 +2177,20 @@ def test_config_with_named_vectors(
             }
         },
     ),
+    (
+        [Configure.Vectors.text2vec_morph(name="test", source_properties=["prop"])],
+        {
+            "test": {
+                "vectorizer": {
+                    "text2vec-morph": {
+                        "vectorizeClassName": True,
+                        "properties": ["prop"],
+                    }
+                },
+                "vectorIndexType": "hnsw",
+            }
+        },
+    ),
     (
         [
             Configure.Vectors.text2vec_google(
diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py
@@ -115,6 +115,7 @@ class Vectorizers(str, Enum):
     TEXT2VEC_GPT4ALL = "text2vec-gpt4all"
     TEXT2VEC_HUGGINGFACE = "text2vec-huggingface"
     TEXT2VEC_MISTRAL = "text2vec-mistral"
+    TEXT2VEC_MORPH = "text2vec-morph"
     TEXT2VEC_MODEL2VEC = "text2vec-model2vec"
     TEXT2VEC_NVIDIA = "text2vec-nvidia"
     TEXT2VEC_OLLAMA = "text2vec-ollama"
@@ -125,6 +126,7 @@ class Vectorizers(str, Enum):
     TEXT2VEC_VOYAGEAI = "text2vec-voyageai"
     TEXT2VEC_WEAVIATE = "text2vec-weaviate"
     IMG2VEC_NEURAL = "img2vec-neural"
+    MULTI2VEC_AWS = "multi2vec-aws"
     MULTI2VEC_CLIP = "multi2vec-clip"
     MULTI2VEC_COHERE = "multi2vec-cohere"
     MULTI2VEC_JINAAI = "multi2vec-jinaai"
@@ -274,6 +276,21 @@ def _to_dict(self) -> Dict[str, Any]:
         return ret_dict
 
 
+class _Text2VecMorphConfig(_VectorizerConfigCreate):
+    vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
+        default=Vectorizers.TEXT2VEC_MORPH, frozen=True, exclude=True
+    )
+    model: Optional[str]
+    vectorizeClassName: bool
+    baseURL: Optional[AnyHttpUrl]
+
+    def _to_dict(self) -> Dict[str, Any]:
+        ret_dict = super()._to_dict()
+        if self.baseURL is not None:
+            ret_dict["baseURL"] = self.baseURL.unicode_string()
+        return ret_dict
+
+
 class _Text2VecDatabricksConfig(_VectorizerConfigCreate):
     vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
         default=Vectorizers.TEXT2VEC_DATABRICKS, frozen=True, exclude=True
@@ -467,6 +484,15 @@ def _to_dict(self) -> Dict[str, Any]:
         return ret_dict
 
 
+class _Multi2VecAWSConfig(_Multi2VecBase):
+    vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
+        default=Vectorizers.MULTI2VEC_AWS, frozen=True, exclude=True
+    )
+    region: Optional[str]
+    model: Optional[str]
+    dimensions: Optional[int]
+
+
 class _Multi2MultiVecJinaConfig(_Multi2VecBase):
     vectorizer: Union[Vectorizers, _EnumLikeStr] = Field(
         default=Vectorizers.MULTI2MULTI_JINAAI, frozen=True, exclude=True
diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py
@@ -40,6 +40,7 @@
     _Img2VecNeuralConfig,
     _map_multi2vec_fields,
     _Multi2MultiVecJinaConfig,
+    _Multi2VecAWSConfig,
     _Multi2VecBindConfig,
     _Multi2VecClipConfig,
     _Multi2VecCohereConfig,
@@ -60,6 +61,7 @@
     _Text2VecJinaConfig,
     _Text2VecMistralConfig,
     _Text2VecModel2VecConfig,
+    _Text2VecMorphConfig,
     _Text2VecNvidiaConfig,
     _Text2VecOllamaConfig,
     _Text2VecOpenAIConfig,
@@ -559,6 +561,42 @@ def text2vec_mistral(
             vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
         )
 
+    @staticmethod
+    def text2vec_morph(
+        *,
+        name: Optional[str] = None,
+        quantizer: Optional[_QuantizerConfigCreate] = None,
+        base_url: Optional[AnyHttpUrl] = None,
+        model: Optional[str] = None,
+        source_properties: Optional[List[str]] = None,
+        vector_index_config: Optional[_VectorIndexConfigCreate] = None,
+        vectorize_collection_name: bool = True,
+    ) -> _VectorConfigCreate:
+        """Create a vector using the `text2vec-morph` module.
+
+        See the [documentation](https://weaviate.io/developers/weaviate/model-providers/morph/embeddings)
+        for detailed usage.
+
+        Args:
+            name: The name of the vector.
+            quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied.
+            base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
+            model: The model to use. Defaults to `None`, which uses the server-defined default.
+            source_properties: Which properties should be included when vectorizing. By default all text properties are included.
+            vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default
+            vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`.
+        """
+        return _VectorConfigCreate(
+            name=name,
+            source_properties=source_properties,
+            vectorizer=_Text2VecMorphConfig(
+                baseURL=base_url,
+                model=model,
+                vectorizeClassName=vectorize_collection_name,
+            ),
+            vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
+        )
+
     @staticmethod
     def text2vec_ollama(
         *,
@@ -688,6 +726,48 @@ def text2vec_aws(
             vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
         )
 
+    @staticmethod
+    def multi2vec_aws(
+        *,
+        name: Optional[str] = None,
+        quantizer: Optional[_QuantizerConfigCreate] = None,
+        dimensions: Optional[int] = None,
+        image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
+        model: Optional[str] = None,
+        text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
+        region: Optional[str] = None,
+        vector_index_config: Optional[_VectorIndexConfigCreate] = None,
+    ) -> _VectorConfigCreate:
+        """Create a vector using the `multi2vec-aws` module.
+
+        See the [documentation](https://weaviate.io/developers/weaviate/model-providers/aws/embeddings)
+        for detailed usage.
+
+        Args:
+            name: The name of the vector.
+            quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied.
+            dimensions: The number of dimensions to use. Defaults to `None`, which uses the server-defined default.
+            image_fields: The image fields to use in vectorization.
+            model: The model to use. Defaults to `None`, which uses the server-defined default.
+            text_fields: The text fields to use in vectorization.
+            region: The AWS region to run the model from. Defaults to `None`, which uses the server-defined defau
+            vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default
+
+        Raises:
+            pydantic.ValidationError: If `model` is not a valid value from the `JinaMultimodalModel` type.
+        """
+        return _VectorConfigCreate(
+            name=name,
+            vectorizer=_Multi2VecAWSConfig(
+                region=region,
+                model=model,
+                dimensions=dimensions,
+                imageFields=_map_multi2vec_fields(image_fields),
+                textFields=_map_multi2vec_fields(text_fields),
+            ),
+            vector_index_config=_IndexWrappers.single(vector_index_config, quantizer),
+        )
+
     @staticmethod
     def img2vec_neural(
         *,