Extract test image urls to marqo_test.py (#965)

marqo-ai · Sep 17, 2024 · f6f7d01 · f6f7d01
1 parent ccb671c
commit f6f7d01
Show file tree

Hide file tree

Showing 27 changed files with 262 additions and 242 deletions.
diff --git a/src/marqo/s2_inference/random_utils.py b/src/marqo/s2_inference/random_utils.py
@@ -12,8 +12,13 @@ def sentence_to_hash(sentence):
     # for speed reasons we hash
     if isinstance(sentence, ImageType):
         pixel_data = list(sentence.getdata())
-        pixel_averages = [sum(channels)/len(channels) for channels in pixel_data]
-        image_average = functools.reduce(lambda x, y: x + y, pixel_averages)/len(pixel_data)
+        if isinstance(pixel_data[0], int):
+            # If each pixel is one int value, it means this either uses palette or is a grey-scale image
+            image_average = functools.reduce(lambda x, y: x + y, pixel_data) / len(pixel_data)
+        else:
+            # Most image type has multiple channels to represent one pixel
+            pixel_averages = [sum(channels)/len(channels) for channels in pixel_data]
+            image_average = functools.reduce(lambda x, y: x + y, pixel_averages) / len(pixel_data)
         return int(hashlib.sha256(str(image_average).encode('utf-8')).hexdigest(), 16) % 10 ** 8
     else:
         return int(hashlib.sha256(sentence.encode('utf-8')).hexdigest(), 16) % 10**8

diff --git a/tests/core/document/test_partial_document_update.py b/tests/core/document/test_partial_document_update.py
@@ -15,7 +15,7 @@
 from marqo.tensor_search.api import update_documents
 from marqo.tensor_search.models.add_docs_objects import AddDocsParams
 from marqo.tensor_search.models.score_modifiers_object import ScoreModifierLists
-from tests.marqo_test import MarqoTestCase
+from tests.marqo_test import MarqoTestCase, TestImageUrls
 from marqo.core.models.marqo_update_documents_response import MarqoUpdateDocumentsResponse, MarqoUpdateDocumentsItem
 
 
@@ -398,7 +398,7 @@ def test_update_image_pointer_field(self):
 
         Note: We can only update an image pointer field when it is not a tensor field."""
         original_doc = {
-            "image_pointer_field": "https://marqo-assets.s3.amazonaws.com/tests/images/image1.jpg",
+            "image_pointer_field": TestImageUrls.IMAGE1.value,
             "text_field_tensor": "search me",
             "_id": "1"
         }
@@ -409,22 +409,22 @@ def test_update_image_pointer_field(self):
         self.assertEqual(1, self.monitoring.get_index_stats_by_name(self.structured_index_name).number_of_documents)
 
         updated_doc = {
-            "image_pointer_field": "https://marqo-assets.s3.amazonaws.com/tests/images/image2.jpg",
+            "image_pointer_field": TestImageUrls.IMAGE2.value,
             "_id": "1"
         }
         r = update_documents(body=UpdateDocumentsBodyParams(documents=[updated_doc]),
                              index_name=self.structured_index_name, marqo_config=self.config)
         updated_doc = tensor_search.get_document_by_id(self.config, self.structured_index_name, updated_doc["_id"])
 
-        self.assertEqual("https://marqo-assets.s3.amazonaws.com/tests/images/image2.jpg",
+        self.assertEqual(TestImageUrls.IMAGE2.value,
                          updated_doc["image_pointer_field"])
 
     def test_update_multimodal_image_field(self):
         """
         Test that updating an image field in a multimodal context properly embeds the image as an image and not as text.
         """
-        original_image_url = "https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png"
-        updated_image_url = "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg"
+        original_image_url = TestImageUrls.HIPPO_REALISTIC.value
+        updated_image_url = TestImageUrls.IMAGE2.value
 
         original_doc = {
             "_id": "1",

diff --git a/tests/core/inference/test_vectorise_inference_cache.py b/tests/core/inference/test_vectorise_inference_cache.py
@@ -10,6 +10,7 @@
 from PIL import Image
 
 from marqo.s2_inference.s2_inference import get_marqo_inference_cache, clear_marqo_inference_cache, clear_loaded_models
+from tests.marqo_test import TestImageUrls
 
 
 class TestVectoriseInferenceCache(unittest.TestCase):
@@ -132,7 +133,7 @@ def test_vectorise_cacheNotWorkForPILImage(self):
     def test_vectorise_cacheWorkForImagePath(self):
         """Test if the cache works for image paths."""
         vectorise = self._import_vectorise_with_inference_cache()
-        content = ["https://marqo-assets.s3.amazonaws.com/tests/images/image1.jpg"]
+        content = [TestImageUrls.IMAGE1.value]
         # First call
         original_vector = vectorise(model_name="open_clip/ViT-B-32/laion2b_s34b_b79k", content=content,
                                     device="cpu", enable_cache=True, infer=True)

diff --git a/tests/core/monitoring/test_monitoring.py b/tests/core/monitoring/test_monitoring.py
@@ -10,7 +10,7 @@
 from marqo.core.models.marqo_index_stats import MarqoIndexStats, VespaStats
 from marqo.tensor_search import tensor_search
 from marqo.tensor_search.models.add_docs_objects import AddDocsParams
-from tests.marqo_test import MarqoTestCase
+from tests.marqo_test import MarqoTestCase, TestImageUrls
 
 
 class TestMonitoring(MarqoTestCase):
@@ -180,8 +180,7 @@ def test_get_index_stats_structuredMultimodalIndex_successful(self):
             config=self.config, add_docs_params=AddDocsParams(
                 docs=[
                     {"title": "2",
-                     "img": "https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/"
-                            "ai_hippo_realistic.png"
+                     "img": TestImageUrls.HIPPO_REALISTIC.value
                      },
                     {"title": "2"},
                     {"desc": "2"}

diff --git a/tests/marqo_test.py b/tests/marqo_test.py
@@ -22,6 +22,18 @@
 from marqo.vespa.vespa_client import VespaClient
 
 
+class TestImageUrls(Enum):
+    IMAGE0 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image0.jpg'
+    IMAGE1 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image1.jpg'
+    IMAGE2 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg'
+    IMAGE3 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image3.jpg'
+    IMAGE4 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image4.jpg'
+    COCO = 'https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg'
+    HIPPO_REALISTIC = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic_small.png'
+    HIPPO_REALISTIC_LARGE = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png'
+    HIPPO_STATUE = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue_small.png'
+
+
 class MarqoTestCase(unittest.TestCase):
     indexes = []
 

diff --git a/tests/s2_inference/test_clip_utils.py b/tests/s2_inference/test_clip_utils.py
@@ -14,12 +14,13 @@
 from marqo.tensor_search.enums import ModelProperties
 from marqo.tensor_search.models.private_models import ModelLocation
 from marqo.tensor_search.models.private_models import S3Auth, S3Location, HfModelLocation
+from tests.marqo_test import TestImageUrls
 
 
 class TestImageDownloading(unittest.TestCase):
 
     def test_loadImageFromPathTimeout(self):
-        good_url = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png'
+        good_url = TestImageUrls.HIPPO_REALISTIC.value
         # should be fine on regular timeout:
         img = clip_utils.load_image_from_path(good_url, {})
         assert isinstance(img, types.ImageType)
@@ -32,7 +33,7 @@ def test_loadImageFromPathAllRequestErrors(self):
         """Do we catch other download errors?
         The errors tested inherit from requests.exceptions.RequestException
         """
-        good_url = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png'
+        good_url = TestImageUrls.HIPPO_REALISTIC.value
         clip_utils.load_image_from_path(good_url, {})
         for err in [pycurl.error]:
             with mock.patch('pycurl.Curl') as MockCurl:
@@ -43,7 +44,7 @@ def test_loadImageFromPathAllRequestErrors(self):
 
     @patch('pycurl.Curl')
     def test_downloadImageFromRrlCloseCalled(self, MockCurl):
-        good_url = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png'
+        good_url = TestImageUrls.HIPPO_REALISTIC.value
 
         mock_curl_instance = MockCurl.return_value
         mock_curl_instance.getinfo.return_value = 200

diff --git a/tests/s2_inference/test_encoding.py b/tests/s2_inference/test_encoding.py
@@ -13,6 +13,8 @@
     _convert_vectorized_output,
 )
 from marqo.s2_inference.s2_inference import _load_model as og_load_model
+from tests.marqo_test import TestImageUrls
+
 _load_model = functools.partial(og_load_model, calling_func = "unit_test")
 
 class TestEncoding(unittest.TestCase):
@@ -338,9 +340,9 @@ def test_load_clip_image_model(self):
 
         device = 'cpu'
         eps = 1e-9
-        images = ["https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image0.jpg",
-                  "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image1.jpg",
-                  "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg"]
+        images = [TestImageUrls.IMAGE0.value,
+                  TestImageUrls.IMAGE1.value,
+                  TestImageUrls.IMAGE2.value]
 
         for name in names:
 
@@ -454,7 +456,7 @@ def test_model_un_normalization(self):
     @patch("torch.cuda.amp.autocast")
     def test_autocast_called_when_cuda(self, mock_autocast):
         names = self.open_clip_test_model
-        contents = ['this is a test sentence. so is this.', "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image0.jpg"]
+        contents = ['this is a test sentence. so is this.', TestImageUrls.IMAGE0.value]
         for model_name in names:
             for content in contents:
                 vectorise(model_name=model_name, content=content, device="cpu")

diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
@@ -13,7 +13,7 @@
     validate_model_properties
 )
 
-from tests.marqo_test import MarqoTestCase
+from tests.marqo_test import MarqoTestCase, TestImageUrls
 from unittest import mock
 
 @unittest.skip
@@ -323,7 +323,7 @@ def test_add_documents_text_and_image(self):
                 "_id": "123",
                 "title 1": "content 1",
                 "desc 2": "content 2. blah blah blah",
-                "image" : "https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg"
+                "image" : TestImageUrls.COCO.value
             }]
 
         tensor_search.add_documents(config=config, add_docs_params=AddDocsParams(
@@ -371,7 +371,7 @@ def test_vectorise_generic_openai_clip_encode_image_results(self):
 
         epsilon = 1e-7
 
-        image = "https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg"
+        image = TestImageUrls.COCO.value
 
         model_name = "test-model"
         model_properties = {
@@ -410,7 +410,7 @@ def test_vectorise_generic_open_clip_encode_image_results(self):
 
         epsilon = 1e-7
 
-        image = "https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg"
+        image = TestImageUrls.COCO.value
 
         model_name = "test-model"
         model_properties = {

diff --git a/tests/s2_inference/test_large_model_encoding.py b/tests/s2_inference/test_large_model_encoding.py
@@ -12,6 +12,8 @@
 )
 import functools
 from marqo.s2_inference.s2_inference import _load_model as og_load_model
+from tests.marqo_test import TestImageUrls
+
 _load_model = functools.partial(og_load_model, calling_func = "unit_test")
 from marqo.s2_inference.configs import ModelCache
 import shutil
@@ -192,7 +194,7 @@ def test_cuda_encode_type(self):
     def test_autocast_called_in_open_clip(self, mock_autocast):
         names = ["open_clip/ViT-B-32/laion400m_e31"]
         contents = ['this is a test sentence. so is this.',
-                    "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image0.jpg"]
+                    TestImageUrls.IMAGE0.value]
         for model_name in names:
             with self.subTest(f"Testing model: {model_name}"):
                 for content in contents:

diff --git a/tests/s2_inference/test_reranking_with_models.py b/tests/s2_inference/test_reranking_with_models.py
@@ -5,6 +5,7 @@
 from marqo.s2_inference.reranking import rerank
 from marqo.s2_inference.errors import RerankerError,RerankerNameError
 from marqo.s2_inference.s2_inference import clear_loaded_models
+from tests.marqo_test import TestImageUrls
 
 
 class TestRerankingWithModels(unittest.TestCase):
@@ -157,7 +158,7 @@ def test_reranking_images_owl_inconsistent(self):
         # not all results have the searchable filed to rerank over
         results_lexical = {'hits': 
                     [{'attributes': 'yello head. pruple shirt. black sweater.',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
+                        'location': TestImageUrls.HIPPO_STATUE.value,
                         'other': 'some other text',
                         # this one has no id
                         '_score': 1.4017934,
@@ -172,7 +173,7 @@ def test_reranking_images_owl_inconsistent(self):
                         },
                         # this one has less fields
                         {'attributes': 'face is bowlcut. body is blue . background is grey. head is tan',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
+                        'location': TestImageUrls.HIPPO_REALISTIC.value,
                         '_id': 'QmTVYuULK1Qbzh21Y3hzeTFny5AGUSUGAXoGjLqNB2b1at',
                         '_score': 0.2876821,
                         '_highlights': [],
@@ -213,7 +214,7 @@ def test_reranking_images_incorrect_model(self):
         # not all results have the searchable filed to rerank over
         results_lexical = {'hits': 
                     [{'attributes': 'yello head. pruple shirt. black sweater.',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
+                        'location': TestImageUrls.HIPPO_STATUE.value,
                         'other': 'some other text',
                         # this one has no id
                         '_score': 1.4017934,
@@ -228,7 +229,7 @@ def test_reranking_images_incorrect_model(self):
                         },
                         # this one has less fields
                         {'attributes': 'face is bowlcut. body is blue . background is grey. head is tan',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
+                        'location': TestImageUrls.HIPPO_REALISTIC.value,
                         '_id': 'QmTVYuULK1Qbzh21Y3hzeTFny5AGUSUGAXoGjLqNB2b1at',
                         '_score': 0.2876821,
                         '_highlights': [],
@@ -251,7 +252,7 @@ def test_reranking_images_owl_inconsistent_highlights(self):
         # not all results have the searchable filed to rerank over
         results_lexical = {'hits': 
                     [{'attributes': 'yello head. pruple shirt. black sweater.',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
+                        'location': TestImageUrls.HIPPO_STATUE.value,
                         'other': 'some other text',
                         # this one has no id
                         '_score': 1.4017934,
@@ -266,7 +267,7 @@ def test_reranking_images_owl_inconsistent_highlights(self):
                         },
                         # this one has less fields
                         {'attributes': 'face is bowlcut. body is blue . background is grey. head is tan',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
+                        'location': TestImageUrls.HIPPO_REALISTIC.value,
                         '_id': 'QmTVYuULK1Qbzh21Y3hzeTFny5AGUSUGAXoGjLqNB2b1at',
                         '_score': 0.2876821,
                         '_highlights': {"location":[0,0,20,30]},
@@ -308,7 +309,7 @@ def test_reranking_images_owl_consistent(self):
         # all results have the searchable field to rerank over
         results_lexical = {'hits': 
                     [{'attributes': 'yello head. pruple shirt. black sweater.',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
+                        'location': TestImageUrls.HIPPO_STATUE.value,
                         'other': 'some other text',
                         # this one has no id
                         '_score': 1.4017934,
@@ -317,14 +318,14 @@ def test_reranking_images_owl_consistent(self):
                         {'attributes': 'face is viking. body is white turtleneck. background is pearl',
                         # missing locations
                         'other': 'some more text',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
+                        'location': TestImageUrls.HIPPO_STATUE.value,
                         '_id': 'QmRR6PBkgCdhiSYBM3AY3EWhn4ZbeR2X8Ygpy2veLkcPC5',
                         '_score': 0.2876821,
                         '_highlights': [],
                         },
                         # this one has less fields
                         {'attributes': 'face is bowlcut. body is blue . background is grey. head is tan',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
+                        'location': TestImageUrls.HIPPO_REALISTIC.value,
                         '_id': 'QmTVYuULK1Qbzh21Y3hzeTFny5AGUSUGAXoGjLqNB2b1at',
                         '_score': 0.2876821,
                         '_highlights': [],
@@ -365,7 +366,7 @@ def test_reranking_check_search_has_fields(self):
 
         results_lexical = {'hits': 
                     [{'attributes': 'yello head. pruple shirt. black sweater.',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
+                        'location': TestImageUrls.HIPPO_STATUE.value,
                         'other': 'some other text',
                         # this one has no id
                         '_score': 1.4017934,
@@ -374,14 +375,14 @@ def test_reranking_check_search_has_fields(self):
                         {'attributes': 'face is viking. body is white turtleneck. background is pearl',
                         # missing locations
                         'other': 'some more text',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
+                        'location': TestImageUrls.HIPPO_STATUE.value,
                         '_id': 'QmRR6PBkgCdhiSYBM3AY3EWhn4ZbeR2X8Ygpy2veLkcPC5',
                         '_score': 0.2876821,
                         '_highlights': [],
                         },
                         # this one has less fields
                         {'attributes': 'face is bowlcut. body is blue . background is grey. head is tan',
-                        'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
+                        'location': TestImageUrls.HIPPO_REALISTIC.value,
                         '_id': 'QmTVYuULK1Qbzh21Y3hzeTFny5AGUSUGAXoGjLqNB2b1at',
                         '_score': 0.2876821,
                         '_highlights': [],