diff --git a/redisvl/extensions/llmcache/semantic.py b/redisvl/extensions/llmcache/semantic.py
index 41e6e214..c741ca45 100644
--- a/redisvl/extensions/llmcache/semantic.py
+++ b/redisvl/extensions/llmcache/semantic.py
@@ -310,7 +310,8 @@ def _vectorize_prompt(self, prompt: Optional[str]) -> List[float]:
         if not isinstance(prompt, str):
             raise TypeError("Prompt must be a string.")
 
-        return self._vectorizer.embed(prompt)
+        result = self._vectorizer.embed(prompt)
+        return result  # type: ignore
 
     async def _avectorize_prompt(self, prompt: Optional[str]) -> List[float]:
         """Converts a text prompt to its vector representation using the
@@ -318,7 +319,8 @@ async def _avectorize_prompt(self, prompt: Optional[str]) -> List[float]:
         if not isinstance(prompt, str):
             raise TypeError("Prompt must be a string.")
 
-        return await self._vectorizer.aembed(prompt)
+        result = await self._vectorizer.aembed(prompt)
+        return result  # type: ignore
 
     def _check_vector_dims(self, vector: List[float]):
         """Checks the size of the provided vector and raises an error if it
diff --git a/redisvl/extensions/router/semantic.py b/redisvl/extensions/router/semantic.py
index 4a7e72c3..8aff7524 100644
--- a/redisvl/extensions/router/semantic.py
+++ b/redisvl/extensions/router/semantic.py
@@ -366,14 +366,14 @@ def __call__(
         if not vector:
             if not statement:
                 raise ValueError("Must provide a vector or statement to the router")
-            vector = self.vectorizer.embed(statement)
+            vector = self.vectorizer.embed(statement)  # type: ignore
 
         aggregation_method = (
             aggregation_method or self.routing_config.aggregation_method
         )
 
         # perform route classification
-        top_route_match = self._classify_route(vector, aggregation_method)
+        top_route_match = self._classify_route(vector, aggregation_method)  # type: ignore
         return top_route_match
 
     @deprecated_argument("distance_threshold")
@@ -400,7 +400,7 @@ def route_many(
         if not vector:
             if not statement:
                 raise ValueError("Must provide a vector or statement to the router")
-            vector = self.vectorizer.embed(statement)
+            vector = self.vectorizer.embed(statement)  # type: ignore
 
         max_k = max_k or self.routing_config.max_k
         aggregation_method = (
@@ -409,7 +409,7 @@ def route_many(
 
         # classify routes
         top_route_matches = self._classify_multi_route(
-            vector, max_k, aggregation_method
+            vector, max_k, aggregation_method  # type: ignore
         )
 
         return top_route_matches
diff --git a/redisvl/extensions/session_manager/semantic_session.py b/redisvl/extensions/session_manager/semantic_session.py
index 6825afa9..1aa15315 100644
--- a/redisvl/extensions/session_manager/semantic_session.py
+++ b/redisvl/extensions/session_manager/semantic_session.py
@@ -349,7 +349,7 @@ def add_messages(
                 role=message[ROLE_FIELD_NAME],
                 content=message[CONTENT_FIELD_NAME],
                 session_tag=session_tag,
-                vector_field=content_vector,
+                vector_field=content_vector,  # type: ignore
             )
 
             if TOOL_FIELD_NAME in message:
diff --git a/redisvl/utils/vectorize/base.py b/redisvl/utils/vectorize/base.py
index b3a63fa9..189b6e1a 100644
--- a/redisvl/utils/vectorize/base.py
+++ b/redisvl/utils/vectorize/base.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import Callable, List, Optional
+from typing import Callable, List, Optional, Union
 
 from pydantic import BaseModel, Field, field_validator
 
@@ -49,34 +49,69 @@ def check_dims(cls, value):
         return value
 
     @abstractmethod
-    def embed_many(
+    def embed(
         self,
-        texts: List[str],
+        text: str,
         preprocess: Optional[Callable] = None,
-        batch_size: int = 1000,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[float], bytes]:
+        """Embed a chunk of text.
+
+        Args:
+            text: Text to embed
+            preprocess: Optional function to preprocess text
+            as_buffer: If True, returns a bytes object instead of a list
+
+        Returns:
+            Union[List[float], bytes]: Embedding as a list of floats, or as a bytes
+            object if as_buffer=True
+        """
         raise NotImplementedError
 
     @abstractmethod
-    def embed(
+    def embed_many(
         self,
-        text: str,
+        texts: List[str],
         preprocess: Optional[Callable] = None,
+        batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
+    ) -> Union[List[List[float]], List[bytes]]:
+        """Embed multiple chunks of text.
+
+        Args:
+            texts: List of texts to embed
+            preprocess: Optional function to preprocess text
+            batch_size: Number of texts to process in each batch
+            as_buffer: If True, returns each embedding as a bytes object
+
+        Returns:
+            Union[List[List[float]], List[bytes]]: List of embeddings as lists of floats,
+            or as bytes objects if as_buffer=True
+        """
         raise NotImplementedError
 
     async def aembed_many(
         self,
         texts: List[str],
         preprocess: Optional[Callable] = None,
-        batch_size: int = 1000,
+        batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[List[float]], List[bytes]]:
+        """Asynchronously embed multiple chunks of text.
+
+        Args:
+            texts: List of texts to embed
+            preprocess: Optional function to preprocess text
+            batch_size: Number of texts to process in each batch
+            as_buffer: If True, returns each embedding as a bytes object
+
+        Returns:
+            Union[List[List[float]], List[bytes]]: List of embeddings as lists of floats,
+            or as bytes objects if as_buffer=True
+        """
         # Fallback to standard embedding call if no async support
         return self.embed_many(texts, preprocess, batch_size, as_buffer, **kwargs)
 
@@ -86,7 +121,18 @@ async def aembed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
+    ) -> Union[List[float], bytes]:
+        """Asynchronously embed a chunk of text.
+
+        Args:
+            text: Text to embed
+            preprocess: Optional function to preprocess text
+            as_buffer: If True, returns a bytes object instead of a list
+
+        Returns:
+            Union[List[float], bytes]: Embedding as a list of floats, or as a bytes
+            object if as_buffer=True
+        """
         # Fallback to standard embedding call if no async support
         return self.embed(text, preprocess, as_buffer, **kwargs)
 
diff --git a/redisvl/utils/vectorize/text/azureopenai.py b/redisvl/utils/vectorize/text/azureopenai.py
index 7b3b7d01..410280e5 100644
--- a/redisvl/utils/vectorize/text/azureopenai.py
+++ b/redisvl/utils/vectorize/text/azureopenai.py
@@ -1,5 +1,5 @@
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 
 from pydantic import PrivateAttr
 from tenacity import retry, stop_after_attempt, wait_random_exponential
@@ -178,7 +178,7 @@ def embed_many(
         batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[List[float]], List[bytes]]:
         """Embed many chunks of texts using the AzureOpenAI API.
 
         Args:
@@ -191,7 +191,8 @@ def embed_many(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[List[float]]: List of embeddings.
+            Union[List[List[float]], List[bytes]]: List of embeddings as lists of floats,
+            or as bytes objects if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the test.
@@ -205,7 +206,9 @@ def embed_many(
 
         embeddings: List = []
         for batch in self.batchify(texts, batch_size, preprocess):
-            response = self._client.embeddings.create(input=batch, model=self.model)
+            response = self._client.embeddings.create(
+                input=batch, model=self.model, **kwargs
+            )
             embeddings += [
                 self._process_embedding(r.embedding, as_buffer, dtype)
                 for r in response.data
@@ -224,7 +227,7 @@ def embed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
+    ) -> Union[List[float], bytes]:
         """Embed a chunk of text using the AzureOpenAI API.
 
         Args:
@@ -235,7 +238,8 @@ def embed(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[float]: Embedding.
+            Union[List[float], bytes]: Embedding as a list of floats, or as a bytes
+            object if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the test.
@@ -248,7 +252,9 @@ def embed(
 
         dtype = kwargs.pop("dtype", self.dtype)
 
-        result = self._client.embeddings.create(input=[text], model=self.model)
+        result = self._client.embeddings.create(
+            input=[text], model=self.model, **kwargs
+        )
         return self._process_embedding(result.data[0].embedding, as_buffer, dtype)
 
     @retry(
@@ -261,10 +267,10 @@ async def aembed_many(
         self,
         texts: List[str],
         preprocess: Optional[Callable] = None,
-        batch_size: int = 1000,
+        batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[List[float]], List[bytes]]:
         """Asynchronously embed many chunks of texts using the AzureOpenAI API.
 
         Args:
@@ -277,7 +283,8 @@ async def aembed_many(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[List[float]]: List of embeddings.
+            Union[List[List[float]], List[bytes]]: List of embeddings as lists of floats,
+            or as bytes objects if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the test.
@@ -292,7 +299,7 @@ async def aembed_many(
         embeddings: List = []
         for batch in self.batchify(texts, batch_size, preprocess):
             response = await self._aclient.embeddings.create(
-                input=batch, model=self.model
+                input=batch, model=self.model, **kwargs
             )
             embeddings += [
                 self._process_embedding(r.embedding, as_buffer, dtype)
@@ -312,7 +319,7 @@ async def aembed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
+    ) -> Union[List[float], bytes]:
         """Asynchronously embed a chunk of text using the OpenAI API.
 
         Args:
@@ -323,7 +330,8 @@ async def aembed(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[float]: Embedding.
+            Union[List[float], bytes]: Embedding as a list of floats, or as a bytes
+            object if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the test.
@@ -336,7 +344,9 @@ async def aembed(
 
         dtype = kwargs.pop("dtype", self.dtype)
 
-        result = await self._aclient.embeddings.create(input=[text], model=self.model)
+        result = await self._aclient.embeddings.create(
+            input=[text], model=self.model, **kwargs
+        )
         return self._process_embedding(result.data[0].embedding, as_buffer, dtype)
 
     @property
diff --git a/redisvl/utils/vectorize/text/bedrock.py b/redisvl/utils/vectorize/text/bedrock.py
index 5858aff8..2d40685d 100644
--- a/redisvl/utils/vectorize/text/bedrock.py
+++ b/redisvl/utils/vectorize/text/bedrock.py
@@ -1,6 +1,6 @@
 import json
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 
 from pydantic import PrivateAttr
 from tenacity import retry, stop_after_attempt, wait_random_exponential
@@ -135,8 +135,8 @@ def embed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
-        """Embed a chunk of text using Amazon Bedrock.
+    ) -> Union[List[float], bytes]:
+        """Embed a chunk of text using the AWS Bedrock Embeddings API.
 
         Args:
             text (str): Text to embed.
@@ -144,7 +144,8 @@ def embed(
             as_buffer (bool): Whether to return as byte buffer.
 
         Returns:
-            List[float]: The embedding vector.
+            Union[List[float], bytes]: Embedding as a list of floats, or as a bytes
+            object if as_buffer=True
 
         Raises:
             TypeError: If text is not a string.
@@ -156,7 +157,7 @@ def embed(
             text = preprocess(text)
 
         response = self._client.invoke_model(
-            modelId=self.model, body=json.dumps({"inputText": text})
+            modelId=self.model, body=json.dumps({"inputText": text}), **kwargs
         )
         response_body = json.loads(response["body"].read())
         embedding = response_body["embedding"]
@@ -177,17 +178,18 @@ def embed_many(
         batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
-        """Embed multiple texts using Amazon Bedrock.
+    ) -> Union[List[List[float]], List[bytes]]:
+        """Embed many chunks of text using the AWS Bedrock Embeddings API.
 
         Args:
             texts (List[str]): List of texts to embed.
             preprocess (Optional[Callable]): Optional preprocessing function.
-            batch_size (int): Size of batches for processing.
+            batch_size (int): Size of batches for processing. Defaults to 10.
             as_buffer (bool): Whether to return as byte buffers.
 
         Returns:
-            List[List[float]]: List of embedding vectors.
+            Union[List[List[float]], List[bytes]]: List of embeddings as lists of floats,
+            or as bytes objects if as_buffer=True
 
         Raises:
             TypeError: If texts is not a list of strings.
@@ -206,7 +208,7 @@ def embed_many(
             batch_embeddings = []
             for text in batch:
                 response = self._client.invoke_model(
-                    modelId=self.model, body=json.dumps({"inputText": text})
+                    modelId=self.model, body=json.dumps({"inputText": text}), **kwargs
                 )
                 response_body = json.loads(response["body"].read())
                 batch_embeddings.append(response_body["embedding"])
diff --git a/redisvl/utils/vectorize/text/cohere.py b/redisvl/utils/vectorize/text/cohere.py
index bd6481fe..4e6192e2 100644
--- a/redisvl/utils/vectorize/text/cohere.py
+++ b/redisvl/utils/vectorize/text/cohere.py
@@ -1,5 +1,6 @@
 import os
-from typing import Any, Callable, Dict, List, Optional
+import warnings
+from typing import Any, Callable, Dict, List, Optional, Union
 
 from pydantic import PrivateAttr
 from tenacity import retry, stop_after_attempt, wait_random_exponential
@@ -64,7 +65,8 @@ def __init__(
                 Defaults to None.
             dtype (str): the default datatype to use when embedding text as byte arrays.
                 Used when setting `as_buffer=True` in calls to embed() and embed_many().
-                Defaults to 'float32'.
+                'float32' will use Cohere's float embeddings, 'int8' and 'uint8' will map
+                to Cohere's corresponding embedding types. Defaults to 'float32'.
 
         Raises:
             ImportError: If the cohere library is not installed.
@@ -114,6 +116,15 @@ def _set_model_dims(self) -> int:
             raise ValueError(f"Error setting embedding model dimensions: {str(e)}")
         return len(embedding)
 
+    def _get_cohere_embedding_type(self, dtype: str) -> List[str]:
+        """Map dtype to appropriate Cohere embedding_types value."""
+        if dtype == "int8":
+            return ["int8"]
+        elif dtype == "uint8":
+            return ["uint8"]
+        else:
+            return ["float"]
+
     @deprecated_argument("dtype")
     def embed(
         self,
@@ -121,7 +132,7 @@ def embed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
+    ) -> Union[List[float], List[int], bytes]:
         """Embed a chunk of text using the Cohere Embeddings API.
 
         Must provide the embedding `input_type` as a `kwarg` to this method
@@ -150,13 +161,17 @@ def embed(
                 Required for embedding models v3 and higher.
 
         Returns:
-            List[float]: Embedding.
+            Union[List[float], List[int], bytes]:
+            - If as_buffer=True: Returns a bytes object
+            - If as_buffer=False:
+              - For dtype="float32": Returns a list of floats
+              - For dtype="int8" or "uint8": Returns a list of integers
 
         Raises:
             TypeError: In an invalid input_type is provided.
 
         """
-        input_type = kwargs.get("input_type")
+        input_type = kwargs.pop("input_type", None)
 
         if not isinstance(text, str):
             raise TypeError("Must pass in a str value to embed.")
@@ -171,9 +186,34 @@ def embed(
 
         dtype = kwargs.pop("dtype", self.dtype)
 
-        embedding = self._client.embed(
-            texts=[text], model=self.model, input_type=input_type
-        ).embeddings[0]
+        # Check if embedding_types was provided and warn user
+        if "embedding_types" in kwargs:
+            warnings.warn(
+                "The 'embedding_types' parameter is not supported in CohereTextVectorizer. "
+                "Please use the 'dtype' parameter instead. Your 'embedding_types' value will be ignored.",
+                UserWarning,
+                stacklevel=2,
+            )
+            kwargs.pop("embedding_types")
+
+        # Map dtype to appropriate embedding_type
+        embedding_types = self._get_cohere_embedding_type(dtype)
+
+        response = self._client.embed(
+            texts=[text],
+            model=self.model,
+            input_type=input_type,
+            embedding_types=embedding_types,
+            **kwargs,
+        )
+
+        # Extract the appropriate embedding based on embedding_types
+        embed_type = embedding_types[0]
+        if hasattr(response.embeddings, embed_type):
+            embedding = getattr(response.embeddings, embed_type)[0]
+        else:
+            embedding = response.embeddings[0]  # Fallback for older API versions
+
         return self._process_embedding(embedding, as_buffer, dtype)
 
     @retry(
@@ -189,7 +229,7 @@ def embed_many(
         batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[List[float]], List[List[int]], List[bytes]]:
         """Embed many chunks of text using the Cohere Embeddings API.
 
         Must provide the embedding `input_type` as a `kwarg` to this method
@@ -221,13 +261,17 @@ def embed_many(
                 Required for embedding models v3 and higher.
 
         Returns:
-            List[List[float]]: List of embeddings.
+            Union[List[List[float]], List[List[int]], List[bytes]]:
+            - If as_buffer=True: Returns a list of bytes objects
+            - If as_buffer=False:
+              - For dtype="float32": Returns a list of lists of floats
+              - For dtype="int8" or "uint8": Returns a list of lists of integers
 
         Raises:
             TypeError: In an invalid input_type is provided.
 
         """
-        input_type = kwargs.get("input_type")
+        input_type = kwargs.pop("input_type", None)
 
         if not isinstance(texts, list):
             raise TypeError("Must pass in a list of str values to embed.")
@@ -241,14 +285,41 @@ def embed_many(
 
         dtype = kwargs.pop("dtype", self.dtype)
 
+        # Check if embedding_types was provided and warn user
+        if "embedding_types" in kwargs:
+            warnings.warn(
+                "The 'embedding_types' parameter is not supported in CohereTextVectorizer. "
+                "Please use the 'dtype' parameter instead. Your 'embedding_types' value will be ignored.",
+                UserWarning,
+                stacklevel=2,
+            )
+            kwargs.pop("embedding_types")
+
+        # Map dtype to appropriate embedding_type
+        embedding_types = self._get_cohere_embedding_type(dtype)
+
         embeddings: List = []
         for batch in self.batchify(texts, batch_size, preprocess):
             response = self._client.embed(
-                texts=batch, model=self.model, input_type=input_type
+                texts=batch,
+                model=self.model,
+                input_type=input_type,
+                embedding_types=embedding_types,
+                **kwargs,
             )
+
+            # Extract the appropriate embeddings based on embedding_types
+            embed_type = embedding_types[0]
+            if hasattr(response.embeddings, embed_type):
+                batch_embeddings = getattr(response.embeddings, embed_type)
+            else:
+                batch_embeddings = (
+                    response.embeddings
+                )  # Fallback for older API versions
+
             embeddings += [
                 self._process_embedding(embedding, as_buffer, dtype)
-                for embedding in response.embeddings
+                for embedding in batch_embeddings
             ]
         return embeddings
 
diff --git a/redisvl/utils/vectorize/text/custom.py b/redisvl/utils/vectorize/text/custom.py
index 4558d4d7..ed284d29 100644
--- a/redisvl/utils/vectorize/text/custom.py
+++ b/redisvl/utils/vectorize/text/custom.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, List, Optional, Union
 
 from pydantic import PrivateAttr
 
@@ -162,7 +162,7 @@ def embed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
+    ) -> Union[List[float], bytes]:
         """
         Generate an embedding for a single piece of text using your sync embed function.
 
@@ -172,7 +172,7 @@ def embed(
             as_buffer (bool): If True, return the embedding as a byte buffer.
 
         Returns:
-            List[float]: The embedding of the input text.
+            Union[List[float], bytes]: The embedding of the input text.
 
         Raises:
             TypeError: If the input is not a string.
@@ -200,7 +200,7 @@ def embed_many(
         batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[List[float]], List[bytes]]:
         """
         Generate embeddings for multiple pieces of text in batches using your sync embed_many function.
 
@@ -211,7 +211,7 @@ def embed_many(
             as_buffer (bool): If True, convert each embedding to a byte buffer.
 
         Returns:
-            List[List[float]]: A list of embeddings, where each embedding is a list of floats.
+            Union[List[List[float]], List[bytes]]: A list of embeddings, where each embedding is a list of floats or bytes.
 
         Raises:
             TypeError: If the input is not a list of strings.
@@ -226,7 +226,7 @@ def embed_many(
             raise NotImplementedError("No embed_many function was provided.")
 
         dtype = kwargs.pop("dtype", self.dtype)
-        embeddings: List[List[float]] = []
+        embeddings: Union[List[List[float]], List[bytes]] = []
 
         try:
             for batch in self.batchify(texts, batch_size, preprocess):
@@ -288,10 +288,10 @@ async def aembed_many(
         self,
         texts: List[str],
         preprocess: Optional[Callable] = None,
-        batch_size: int = 1000,
+        batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[List[float]], List[bytes]]:
         """
         Asynchronously generate embeddings for multiple pieces of text in batches.
 
@@ -302,7 +302,7 @@ async def aembed_many(
             as_buffer (bool): If True, convert each embedding to a byte buffer.
 
         Returns:
-            List[List[float]]: A list of embeddings, where each embedding is a list of floats.
+            Union[List[List[float]], List[bytes]]: A list of embeddings, where each embedding is a list of floats or bytes.
 
         Raises:
             TypeError: If the input is not a list of strings.
@@ -317,7 +317,7 @@ async def aembed_many(
             raise NotImplementedError("No aembed_many function was provided.")
 
         dtype = kwargs.pop("dtype", self.dtype)
-        embeddings: List[List[float]] = []
+        embeddings: Union[List[List[float]], List[bytes]] = []
 
         try:
             for batch in self.batchify(texts, batch_size, preprocess):
diff --git a/redisvl/utils/vectorize/text/huggingface.py b/redisvl/utils/vectorize/text/huggingface.py
index 8f81b85c..bafba41d 100644
--- a/redisvl/utils/vectorize/text/huggingface.py
+++ b/redisvl/utils/vectorize/text/huggingface.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, List, Optional, Union
 
 from pydantic.v1 import PrivateAttr
 
@@ -89,7 +89,7 @@ def embed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
+    ) -> Union[List[float], bytes]:
         """Embed a chunk of text using the Hugging Face sentence transformer.
 
         Args:
@@ -100,7 +100,8 @@ def embed(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[float]: Embedding.
+            Union[List[float], bytes]: Embedding as a list of floats, or as a bytes
+            object if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the text.
@@ -121,10 +122,10 @@ def embed_many(
         self,
         texts: List[str],
         preprocess: Optional[Callable] = None,
-        batch_size: int = 1000,
+        batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[List[float]], List[bytes]]:
         """Asynchronously embed many chunks of texts using the Hugging Face
         sentence transformer.
 
@@ -138,7 +139,8 @@ def embed_many(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[List[float]]: List of embeddings.
+            Union[List[List[float]], List[bytes]]: List of embeddings as lists of floats,
+            or as bytes objects if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the test.
diff --git a/redisvl/utils/vectorize/text/mistral.py b/redisvl/utils/vectorize/text/mistral.py
index e930b3a4..05133b37 100644
--- a/redisvl/utils/vectorize/text/mistral.py
+++ b/redisvl/utils/vectorize/text/mistral.py
@@ -1,5 +1,5 @@
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 
 from pydantic import PrivateAttr
 from tenacity import retry, stop_after_attempt, wait_random_exponential
@@ -128,7 +128,7 @@ def embed_many(
         batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[List[float]], List[bytes]]:
         """Embed many chunks of texts using the Mistral API.
 
         Args:
@@ -141,7 +141,8 @@ def embed_many(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[List[float]]: List of embeddings.
+            Union[List[List[float]], List[bytes]]: List of embeddings as lists of floats,
+            or as bytes objects if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the test.
@@ -155,7 +156,9 @@ def embed_many(
 
         embeddings: List = []
         for batch in self.batchify(texts, batch_size, preprocess):
-            response = self._client.embeddings.create(model=self.model, inputs=batch)
+            response = self._client.embeddings.create(
+                model=self.model, inputs=batch, **kwargs
+            )
             embeddings += [
                 self._process_embedding(r.embedding, as_buffer, dtype)
                 for r in response.data
@@ -174,7 +177,7 @@ def embed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
+    ) -> Union[List[float], bytes]:
         """Embed a chunk of text using the Mistral API.
 
         Args:
@@ -185,7 +188,8 @@ def embed(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[float]: Embedding.
+            Union[List[float], bytes]: Embedding as a list of floats, or as a bytes
+            object if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the test.
@@ -198,7 +202,9 @@ def embed(
 
         dtype = kwargs.pop("dtype", self.dtype)
 
-        result = self._client.embeddings.create(model=self.model, inputs=[text])
+        result = self._client.embeddings.create(
+            model=self.model, inputs=[text], **kwargs
+        )
         return self._process_embedding(result.data[0].embedding, as_buffer, dtype)
 
     @retry(
@@ -211,7 +217,7 @@ async def aembed_many(
         self,
         texts: List[str],
         preprocess: Optional[Callable] = None,
-        batch_size: int = 1000,
+        batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
     ) -> List[List[float]]:
@@ -242,7 +248,7 @@ async def aembed_many(
         embeddings: List = []
         for batch in self.batchify(texts, batch_size, preprocess):
             response = await self._client.embeddings.create_async(
-                model=self.model, inputs=batch
+                model=self.model, inputs=batch, **kwargs
             )
             embeddings += [
                 self._process_embedding(r.embedding, as_buffer, dtype)
@@ -287,7 +293,7 @@ async def aembed(
         dtype = kwargs.pop("dtype", self.dtype)
 
         result = await self._client.embeddings.create_async(
-            model=self.model, inputs=[text]
+            model=self.model, inputs=[text], **kwargs
         )
         return self._process_embedding(result.data[0].embedding, as_buffer, dtype)
 
diff --git a/redisvl/utils/vectorize/text/openai.py b/redisvl/utils/vectorize/text/openai.py
index 25b21c67..eee0764a 100644
--- a/redisvl/utils/vectorize/text/openai.py
+++ b/redisvl/utils/vectorize/text/openai.py
@@ -1,5 +1,5 @@
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 
 from pydantic import PrivateAttr
 from tenacity import retry, stop_after_attempt, wait_random_exponential
@@ -129,7 +129,7 @@ def embed_many(
         batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[List[float]], List[bytes]]:
         """Embed many chunks of texts using the OpenAI API.
 
         Args:
@@ -142,7 +142,8 @@ def embed_many(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[List[float]]: List of embeddings.
+            Union[List[List[float]], List[bytes]]: List of embeddings as lists of floats,
+            or as bytes objects if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the text.
@@ -156,7 +157,9 @@ def embed_many(
 
         embeddings: List = []
         for batch in self.batchify(texts, batch_size, preprocess):
-            response = self._client.embeddings.create(input=batch, model=self.model)
+            response = self._client.embeddings.create(
+                input=batch, model=self.model, **kwargs
+            )
             embeddings += [
                 self._process_embedding(r.embedding, as_buffer, dtype)
                 for r in response.data
@@ -175,7 +178,7 @@ def embed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
+    ) -> Union[List[float], bytes]:
         """Embed a chunk of text using the OpenAI API.
 
         Args:
@@ -186,7 +189,8 @@ def embed(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[float]: Embedding.
+            Union[List[float], bytes]: Embedding as a list of floats, or as a bytes
+            object if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the text.
@@ -199,7 +203,9 @@ def embed(
 
         dtype = kwargs.pop("dtype", self.dtype)
 
-        result = self._client.embeddings.create(input=[text], model=self.model)
+        result = self._client.embeddings.create(
+            input=[text], model=self.model, **kwargs
+        )
         return self._process_embedding(result.data[0].embedding, as_buffer, dtype)
 
     @retry(
@@ -212,10 +218,10 @@ async def aembed_many(
         self,
         texts: List[str],
         preprocess: Optional[Callable] = None,
-        batch_size: int = 1000,
+        batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[List[float]], List[bytes]]:
         """Asynchronously embed many chunks of texts using the OpenAI API.
 
         Args:
@@ -228,7 +234,8 @@ async def aembed_many(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[List[float]]: List of embeddings.
+            Union[List[List[float]], List[bytes]]: List of embeddings as lists of floats,
+            or as bytes objects if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the text.
@@ -243,7 +250,7 @@ async def aembed_many(
         embeddings: List = []
         for batch in self.batchify(texts, batch_size, preprocess):
             response = await self._aclient.embeddings.create(
-                input=batch, model=self.model
+                input=batch, model=self.model, **kwargs
             )
             embeddings += [
                 self._process_embedding(r.embedding, as_buffer, dtype)
@@ -263,7 +270,7 @@ async def aembed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
+    ) -> Union[List[float], bytes]:
         """Asynchronously embed a chunk of text using the OpenAI API.
 
         Args:
@@ -274,7 +281,8 @@ async def aembed(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[float]: Embedding.
+            Union[List[float], bytes]: Embedding as a list of floats, or as a bytes
+            object if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the text.
@@ -287,7 +295,9 @@ async def aembed(
 
         dtype = kwargs.pop("dtype", self.dtype)
 
-        result = await self._aclient.embeddings.create(input=[text], model=self.model)
+        result = await self._aclient.embeddings.create(
+            input=[text], model=self.model, **kwargs
+        )
         return self._process_embedding(result.data[0].embedding, as_buffer, dtype)
 
     @property
diff --git a/redisvl/utils/vectorize/text/vertexai.py b/redisvl/utils/vectorize/text/vertexai.py
index 6d455c67..ebe2a625 100644
--- a/redisvl/utils/vectorize/text/vertexai.py
+++ b/redisvl/utils/vectorize/text/vertexai.py
@@ -1,5 +1,5 @@
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 
 from pydantic import PrivateAttr
 from tenacity import retry, stop_after_attempt, wait_random_exponential
@@ -141,8 +141,8 @@ def embed_many(
         batch_size: int = 10,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
-        """Embed many chunks of texts using the VertexAI API.
+    ) -> Union[List[List[float]], List[bytes]]:
+        """Embed many chunks of text using the VertexAI Embeddings API.
 
         Args:
             texts (List[str]): List of text chunks to embed.
@@ -154,7 +154,8 @@ def embed_many(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[List[float]]: List of embeddings.
+            Union[List[List[float]], List[bytes]]: List of embeddings as lists of floats,
+            or as bytes objects if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the test.
@@ -168,7 +169,7 @@ def embed_many(
 
         embeddings: List = []
         for batch in self.batchify(texts, batch_size, preprocess):
-            response = self._client.get_embeddings(batch)
+            response = self._client.get_embeddings(batch, **kwargs)
             embeddings += [
                 self._process_embedding(r.values, as_buffer, dtype) for r in response
             ]
@@ -186,8 +187,8 @@ def embed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
-        """Embed a chunk of text using the VertexAI API.
+    ) -> Union[List[float], bytes]:
+        """Embed a chunk of text using the VertexAI Embeddings API.
 
         Args:
             text (str): Chunk of text to embed.
@@ -197,7 +198,8 @@ def embed(
                 to a byte string. Defaults to False.
 
         Returns:
-            List[float]: Embedding.
+            Union[List[float], bytes]: Embedding as a list of floats, or as a bytes
+            object if as_buffer=True
 
         Raises:
             TypeError: If the wrong input type is passed in for the test.
@@ -210,7 +212,7 @@ def embed(
 
         dtype = kwargs.pop("dtype", self.dtype)
 
-        result = self._client.get_embeddings([text])
+        result = self._client.get_embeddings([text], **kwargs)
         return self._process_embedding(result[0].values, as_buffer, dtype)
 
     @property
diff --git a/redisvl/utils/vectorize/text/voyageai.py b/redisvl/utils/vectorize/text/voyageai.py
index fbcbfd9e..9d015a81 100644
--- a/redisvl/utils/vectorize/text/voyageai.py
+++ b/redisvl/utils/vectorize/text/voyageai.py
@@ -1,5 +1,5 @@
 import os
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 
 from pydantic import PrivateAttr
 from tenacity import retry, stop_after_attempt, wait_random_exponential
@@ -124,7 +124,7 @@ def embed(
         preprocess: Optional[Callable] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[float]:
+    ) -> Union[List[float], bytes]:
         """Embed a chunk of text using the VoyageAI Embeddings API.
 
         Can provide the embedding `input_type` as a `kwarg` to this method
@@ -149,7 +149,8 @@ def embed(
                 Check https://docs.voyageai.com/docs/embeddings
 
         Returns:
-            List[float]: Embedding.
+            Union[List[float], bytes]: Embedding as a list of floats, or as a bytes
+            object if as_buffer=True
 
         Raises:
             TypeError: If an invalid input_type is provided.
@@ -171,7 +172,7 @@ def embed_many(
         batch_size: Optional[int] = None,
         as_buffer: bool = False,
         **kwargs,
-    ) -> List[List[float]]:
+    ) -> Union[List[List[float]], List[bytes]]:
         """Embed many chunks of text using the VoyageAI Embeddings API.
 
         Can provide the embedding `input_type` as a `kwarg` to this method
@@ -198,14 +199,15 @@ def embed_many(
                 Check https://docs.voyageai.com/docs/embeddings
 
         Returns:
-            List[List[float]]: List of embeddings.
+            Union[List[List[float]], List[bytes]]: List of embeddings as lists of floats,
+            or as bytes objects if as_buffer=True
 
         Raises:
             TypeError: If an invalid input_type is provided.
 
         """
-        input_type = kwargs.get("input_type")
-        truncation = kwargs.get("truncation")
+        input_type = kwargs.pop("input_type", None)
+        truncation = kwargs.pop("truncation", None)
         dtype = kwargs.pop("dtype", self.dtype)
 
         if not isinstance(texts, list):
@@ -235,7 +237,7 @@ def embed_many(
         embeddings: List = []
         for batch in self.batchify(texts, batch_size, preprocess):
             response = self._client.embed(
-                texts=batch, model=self.model, input_type=input_type
+                texts=batch, model=self.model, input_type=input_type, **kwargs
             )
             embeddings += [
                 self._process_embedding(embedding, as_buffer, dtype)
@@ -284,8 +286,8 @@ async def aembed_many(
             TypeError: In an invalid input_type is provided.
 
         """
-        input_type = kwargs.get("input_type")
-        truncation = kwargs.get("truncation")
+        input_type = kwargs.pop("input_type", None)
+        truncation = kwargs.pop("truncation", None)
         dtype = kwargs.pop("dtype", self.dtype)
 
         if not isinstance(texts, list):
@@ -315,7 +317,7 @@ async def aembed_many(
         embeddings: List = []
         for batch in self.batchify(texts, batch_size, preprocess):
             response = await self._aclient.embed(
-                texts=batch, model=self.model, input_type=input_type
+                texts=batch, model=self.model, input_type=input_type, **kwargs
             )
             embeddings += [
                 self._process_embedding(embedding, as_buffer, dtype)
@@ -360,7 +362,6 @@ async def aembed(
         Raises:
             TypeError: In an invalid input_type is provided.
         """
-
         result = await self.aembed_many(
             texts=[text], preprocess=preprocess, as_buffer=as_buffer, **kwargs
         )
diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py
index e1de4a46..36e444de 100644
--- a/tests/integration/test_vectorizers.py
+++ b/tests/integration/test_vectorizers.py
@@ -1,5 +1,6 @@
 import os
 
+import numpy as np
 import pytest
 
 from redisvl.utils.vectorize import (
@@ -287,7 +288,7 @@ def test_default_dtype(vectorizer_):
         VoyageAITextVectorizer,
     ],
 )
-def test_other_dtypes(vectorizer_):
+def test_vectorizer_dtype_assignment(vectorizer_):
     # test initializing dtype in constructor
     for dtype in ["float16", "float32", "float64", "bfloat16", "int8", "uint8"]:
         if issubclass(vectorizer_, CustomTextVectorizer):
@@ -319,7 +320,7 @@ def test_other_dtypes(vectorizer_):
         VoyageAITextVectorizer,
     ],
 )
-def test_bad_dtypes(vectorizer_):
+def test_non_supported_dtypes(vectorizer_):
     with pytest.raises(ValueError):
         vectorizer_(dtype="float25")
 
@@ -392,3 +393,95 @@ async def test_avectorizer_bad_input(avectorizer):
 
     with pytest.raises(TypeError):
         avectorizer.embed_many(42)
+
+
+@pytest.mark.requires_api_keys
+@pytest.mark.parametrize(
+    "dtype,expected_type",
+    [
+        ("float32", float),  # Float dtype should return floats
+        ("int8", int),  # Int8 dtype should return ints
+        ("uint8", int),  # Uint8 dtype should return ints
+    ],
+)
+def test_cohere_dtype_support(dtype, expected_type):
+    """Test that CohereTextVectorizer properly handles different dtypes for embeddings."""
+    text = "This is a test sentence."
+    texts = ["First test sentence.", "Second test sentence."]
+
+    # Create vectorizer with specified dtype
+    vectorizer = CohereTextVectorizer(dtype=dtype)
+
+    # Verify the correct mapping of dtype to Cohere embedding_types
+    if dtype == "int8":
+        assert vectorizer._get_cohere_embedding_type(dtype) == ["int8"]
+    elif dtype == "uint8":
+        assert vectorizer._get_cohere_embedding_type(dtype) == ["uint8"]
+    else:
+        # All other dtypes should map to float
+        assert vectorizer._get_cohere_embedding_type(dtype) == ["float"]
+
+    # Test single embedding
+    embedding = vectorizer.embed(text, input_type="search_document")
+    assert isinstance(embedding, list)
+    assert len(embedding) == vectorizer.dims
+
+    # Check that all elements are of the expected type
+    assert all(
+        isinstance(val, expected_type) for val in embedding
+    ), f"Expected all elements to be {expected_type.__name__} for dtype {dtype}"
+
+    # Test multiple embeddings
+    embeddings = vectorizer.embed_many(texts, input_type="search_document")
+    assert isinstance(embeddings, list)
+    assert len(embeddings) == len(texts)
+    assert all(
+        isinstance(emb, list) and len(emb) == vectorizer.dims for emb in embeddings
+    )
+
+    # Check that all elements in all embeddings are of the expected type
+    for emb in embeddings:
+        assert all(
+            isinstance(val, expected_type) for val in emb
+        ), f"Expected all elements to be {expected_type.__name__} for dtype {dtype}"
+
+    # Test as_buffer output format
+    embedding_buffer = vectorizer.embed(
+        text, input_type="search_document", as_buffer=True
+    )
+    assert isinstance(embedding_buffer, bytes)
+
+    # Test embed_many with as_buffer=True
+    buffer_embeddings = vectorizer.embed_many(
+        texts, input_type="search_document", as_buffer=True
+    )
+    assert all(isinstance(emb, bytes) for emb in buffer_embeddings)
+
+    # Compare dimensions between buffer and list formats
+    assert len(np.frombuffer(embedding_buffer, dtype=dtype)) == len(embedding)
+
+
+@pytest.mark.requires_api_keys
+def test_cohere_embedding_types_warning():
+    """Test that a warning is raised when embedding_types parameter is passed."""
+    text = "This is a test sentence."
+    texts = ["First test sentence.", "Second test sentence."]
+    vectorizer = CohereTextVectorizer()
+
+    # Test warning for single embedding
+    with pytest.warns(UserWarning, match="embedding_types.*not supported"):
+        embedding = vectorizer.embed(
+            text,
+            input_type="search_document",
+            embedding_types=["uint8"],  # explicitly testing the anti-pattern here
+        )
+    assert isinstance(embedding, list)
+    assert len(embedding) == vectorizer.dims
+
+    # Test warning for multiple embeddings
+    with pytest.warns(UserWarning, match="embedding_types.*not supported"):
+        embeddings = vectorizer.embed_many(
+            texts, input_type="search_document", embedding_types=["uint8"]
+        )
+    assert isinstance(embeddings, list)
+    assert len(embeddings) == len(texts)