From b43bab503758084783c7bfd8d0ed55535c6ef802 Mon Sep 17 00:00:00 2001
From: Raynor Chavez <raynorkirksonchavez@gmail.com>
Date: Tue, 20 Aug 2024 15:12:21 +0800
Subject: [PATCH] Fix embed not using cuda as default device when available
 2.11 (#941)

What is the current behavior? (You can also link to an open issue here)
when device is unset in the embed request, even though cuda is available, default device of embed is cpu. Other functionalities such as search, add docs are unaffected (defaults to cuda when available)

What is the new behavior (if this is a feature change)?
default device in embed will be cuda when it is avialable
---
 .gitignore                                    |  4 +-
 RELEASE.md                                    | 10 +++++
 src/marqo/core/embed/embed.py                 |  6 +--
 src/marqo/version.py                          |  2 +-
 tests/tensor_search/integ_tests/test_embed.py | 40 +++++++++++++++++++
 5 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index c6b115c56..b1d549014 100644
--- a/.gitignore
+++ b/.gitignore
@@ -149,4 +149,6 @@ dump.rdb
 .DS_Store
 
 # Tester app for unit tests
-scripts/vespa_local/vespa_tester_app.zip
\ No newline at end of file
+scripts/vespa_local/vespa_tester_app.zip
+
+src/marqo/tensor_search/cache_dir/*
\ No newline at end of file
diff --git a/RELEASE.md b/RELEASE.md
index e4c1c0622..894b11231 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,8 @@
+# Release 2.11.2
+
+## Bug fixes and minor changes
+- Fix an issue where CUDA was not automatically selected as the default device for the `embed` endpoint, even when available [#941](https://github.com/marqo-ai/marqo/pull/941).
+
 # Release 2.11.1
 
 ## Bug fixes and minor changes
@@ -22,6 +27,11 @@
 - Huge shoutout to all our 4.4k stargazers! We’ve come a long way as a team and as a community, so a huge thanks to everyone who continues to support Marqo.
 - Feel free to keep on sharing questions and feedback on our [forum](https://community.marqo.ai/) and [Slack channel](https://marqo-community.slack.com/join/shared_invite/zt-2b4nsvbd2-TDf8agPszzWH5hYKBMIgDA#/shared-invite/email)! If you have any more inquiries or thoughts, please don’t hesitate to reach out.
 
+# Release 2.10.2
+
+## Bug fixes and minor changes
+- Fix an issue where CUDA was not automatically selected as the default device for the `embed` endpoint, even when available [#941](https://github.com/marqo-ai/marqo/pull/941).
+
 # Release 2.10.1
 
 ## Bug fixes and minor changes
diff --git a/src/marqo/core/embed/embed.py b/src/marqo/core/embed/embed.py
index d8c00815c..dc96f2318 100644
--- a/src/marqo/core/embed/embed.py
+++ b/src/marqo/core/embed/embed.py
@@ -13,6 +13,7 @@
 from marqo.tensor_search.tensor_search_logging import get_logger
 from marqo.core.utils.prefix import determine_text_prefix, DeterminePrefixContentType
 from marqo.vespa.vespa_client import VespaClient
+from marqo.tensor_search import utils
 
 logger = get_logger(__name__)
 
@@ -61,11 +62,10 @@ def embed_content(
         temp_config = config.Config(
             vespa_client=self.vespa_client,
         )
-
+        
         # Set default device if not provided
         if device is None:
-            device = self.default_device
-
+            device = utils.read_env_vars_and_defaults("MARQO_BEST_AVAILABLE_DEVICE")
 
         # Content validation is done in API model layer
         t0 = timer()
diff --git a/src/marqo/version.py b/src/marqo/version.py
index ebdf862f0..7a12174e6 100644
--- a/src/marqo/version.py
+++ b/src/marqo/version.py
@@ -1,4 +1,4 @@
-__version__ = "2.11.1"
+__version__ = "2.11.2"
 
 def get_version() -> str:
     return f"{__version__}"
diff --git a/tests/tensor_search/integ_tests/test_embed.py b/tests/tensor_search/integ_tests/test_embed.py
index 51491c1bd..2a68ecc72 100644
--- a/tests/tensor_search/integ_tests/test_embed.py
+++ b/tests/tensor_search/integ_tests/test_embed.py
@@ -20,6 +20,7 @@
 from marqo.vespa.models.query_result import Root, Child, RootFields
 from marqo.tensor_search.models.private_models import S3Auth, ModelAuth, HfAuth
 from marqo.api.models.embed_request import EmbedRequest
+from marqo.tensor_search import utils
 import os
 import pprint
 import unittest
@@ -150,6 +151,45 @@ def tearDown(self) -> None:
         super().tearDown()
         self.device_patcher.stop()
 
+    def test_embed_content_cuda_device_as_default(self):
+        """
+        Test that embed_content uses the default device when no device is specified.
+        """
+        for index in [self.unstructured_default_text_index, self.structured_default_text_index]:
+            with self.subTest(index=index.type):
+                expected_devices = ["cuda", "cpu"]
+                for expected_device in expected_devices:
+                    with patch.dict(os.environ, {"MARQO_BEST_AVAILABLE_DEVICE": expected_device}):
+                        with patch('marqo.tensor_search.tensor_search.run_vectorise_pipeline') as mock_vectorise:
+                            mock_vectorise.return_value = {0: [0.1, 0.2, 0.3]}
+
+                            embed_res = embed(
+                                marqo_config=self.config,
+                                index_name=index.name,
+                                embedding_request=EmbedRequest(
+                                    content=["This is a test document"]
+                                ),
+                                device=None
+                            )
+                        
+                        # Check that run_vectorise_pipeline was called
+                        mock_vectorise.assert_called_once()
+                        
+                        # Get the arguments passed to run_vectorise_pipeline
+                        args, kwargs = mock_vectorise.call_args
+                        
+                        # Print the args and kwargs for debugging
+                        print(f"args passed to run_vectorise_pipeline: {args}")
+                        print(f"kwargs passed to run_vectorise_pipeline: {kwargs}")
+                        
+                        # Check that the device passed to run_vectorise_pipeline matches the expected value
+                        self.assertEqual(args[2], expected_device)
+
+                        # Check the result
+                        self.assertEqual(embed_res["content"], ["This is a test document"])
+                        self.assertIsInstance(embed_res["embeddings"][0], list)
+                        self.assertEqual(embed_res["embeddings"][0], [0.1, 0.2, 0.3])
+
     def test_embed_equivalent_to_add_docs(self):
         """
         Ensure that the embedding returned by embed endpoint matches the one created by add_docs.