Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions providers/cohere/src/airflow/providers/cohere/hooks/cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

if TYPE_CHECKING:
from cohere.core.request_options import RequestOptions
from cohere.types import ChatMessages, EmbedByTypeResponseEmbeddings
from cohere.types import ChatMessages


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -91,7 +91,7 @@ def get_conn(self) -> cohere.ClientV2:

def create_embeddings(
self, texts: list[str], model: str = "embed-multilingual-v3.0"
) -> EmbedByTypeResponseEmbeddings:
) -> list[list[float]]:
logger.info("Creating embeddings with model: embed-multilingual-v3.0")
response = self.get_conn().embed(
texts=texts,
Expand All @@ -100,8 +100,9 @@ def create_embeddings(
embedding_types=["float"],
request_options=self.request_options,
)
embeddings = response.embeddings
return embeddings
if response.embeddings.float_ is None:
raise ValueError("Embeddings response is missing float_ field")
return response.embeddings.float_
Copy link
Contributor

@sjyangkevin sjyangkevin Jun 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like to provide some of my findings about this change. As shown https://github.com/cohere-ai/cohere-python/blob/main/src/cohere/types/embed_by_type_response_embeddings.py. There could be a case that the embedding is stored in other fields. I also share some thoughts in the comment, and it will be better if the pydantic model can be handled by XComs serde.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now its ok, just added a comment so that people arent confused: #51517


@classmethod
def get_ui_field_behaviour(cls) -> dict[str, Any]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

if TYPE_CHECKING:
from cohere.core.request_options import RequestOptions
from cohere.types import EmbedByTypeResponseEmbeddings

try:
from airflow.sdk.definitions.context import Context
Expand Down Expand Up @@ -91,6 +90,9 @@ def hook(self) -> CohereHook:
request_options=self.request_options,
)

def execute(self, context: Context) -> EmbedByTypeResponseEmbeddings:
def execute(self, context: Context) -> list[list[float]]:
"""Embed texts using Cohere embed services."""
return self.hook.create_embeddings(self.input_text)
embedding_response = self.hook.create_embeddings(self.input_text)

# Extract just the embeddings list, which is serializable
return embedding_response
10 changes: 5 additions & 5 deletions providers/cohere/tests/unit/cohere/operators/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
def test_cohere_embedding_operator(cohere_client, get_connection):
"""
Test Cohere client is getting called with the correct key and that
the execute methods returns expected response.
the execute method returns expected response.
"""
embedded_obj = [1, 2, 3]
embedded_obj = [[1.0, 2.0, 3.0]]

class resp:
embeddings = embedded_obj
mock_response = MagicMock()
mock_response.embeddings.float_ = embedded_obj

api_key = "test"
base_url = "http://some_host.com"
Expand All @@ -43,7 +43,7 @@ class resp:
get_connection.return_value = Connection(conn_type="cohere", password=api_key, host=base_url)
client_obj = MagicMock()
cohere_client.return_value = client_obj
client_obj.embed.return_value = resp
client_obj.embed.return_value = mock_response

op = CohereEmbeddingOperator(
task_id="embed",
Expand Down
Loading