Skip to content

Commit

Permalink
Extract test image urls to marqo_test.py (#965)
Browse files Browse the repository at this point in the history
  • Loading branch information
papa99do authored Sep 17, 2024
1 parent ccb671c commit f6f7d01
Show file tree
Hide file tree
Showing 27 changed files with 262 additions and 242 deletions.
9 changes: 7 additions & 2 deletions src/marqo/s2_inference/random_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,13 @@ def sentence_to_hash(sentence):
# for speed reasons we hash
if isinstance(sentence, ImageType):
pixel_data = list(sentence.getdata())
pixel_averages = [sum(channels)/len(channels) for channels in pixel_data]
image_average = functools.reduce(lambda x, y: x + y, pixel_averages)/len(pixel_data)
if isinstance(pixel_data[0], int):
# If each pixel is one int value, it means this either uses palette or is a grey-scale image
image_average = functools.reduce(lambda x, y: x + y, pixel_data) / len(pixel_data)
else:
# Most image type has multiple channels to represent one pixel
pixel_averages = [sum(channels)/len(channels) for channels in pixel_data]
image_average = functools.reduce(lambda x, y: x + y, pixel_averages) / len(pixel_data)
return int(hashlib.sha256(str(image_average).encode('utf-8')).hexdigest(), 16) % 10 ** 8
else:
return int(hashlib.sha256(sentence.encode('utf-8')).hexdigest(), 16) % 10**8
Expand Down
12 changes: 6 additions & 6 deletions tests/core/document/test_partial_document_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from marqo.tensor_search.api import update_documents
from marqo.tensor_search.models.add_docs_objects import AddDocsParams
from marqo.tensor_search.models.score_modifiers_object import ScoreModifierLists
from tests.marqo_test import MarqoTestCase
from tests.marqo_test import MarqoTestCase, TestImageUrls
from marqo.core.models.marqo_update_documents_response import MarqoUpdateDocumentsResponse, MarqoUpdateDocumentsItem


Expand Down Expand Up @@ -398,7 +398,7 @@ def test_update_image_pointer_field(self):
Note: We can only update an image pointer field when it is not a tensor field."""
original_doc = {
"image_pointer_field": "https://marqo-assets.s3.amazonaws.com/tests/images/image1.jpg",
"image_pointer_field": TestImageUrls.IMAGE1.value,
"text_field_tensor": "search me",
"_id": "1"
}
Expand All @@ -409,22 +409,22 @@ def test_update_image_pointer_field(self):
self.assertEqual(1, self.monitoring.get_index_stats_by_name(self.structured_index_name).number_of_documents)

updated_doc = {
"image_pointer_field": "https://marqo-assets.s3.amazonaws.com/tests/images/image2.jpg",
"image_pointer_field": TestImageUrls.IMAGE2.value,
"_id": "1"
}
r = update_documents(body=UpdateDocumentsBodyParams(documents=[updated_doc]),
index_name=self.structured_index_name, marqo_config=self.config)
updated_doc = tensor_search.get_document_by_id(self.config, self.structured_index_name, updated_doc["_id"])

self.assertEqual("https://marqo-assets.s3.amazonaws.com/tests/images/image2.jpg",
self.assertEqual(TestImageUrls.IMAGE2.value,
updated_doc["image_pointer_field"])

def test_update_multimodal_image_field(self):
"""
Test that updating an image field in a multimodal context properly embeds the image as an image and not as text.
"""
original_image_url = "https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png"
updated_image_url = "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg"
original_image_url = TestImageUrls.HIPPO_REALISTIC.value
updated_image_url = TestImageUrls.IMAGE2.value

original_doc = {
"_id": "1",
Expand Down
3 changes: 2 additions & 1 deletion tests/core/inference/test_vectorise_inference_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from PIL import Image

from marqo.s2_inference.s2_inference import get_marqo_inference_cache, clear_marqo_inference_cache, clear_loaded_models
from tests.marqo_test import TestImageUrls


class TestVectoriseInferenceCache(unittest.TestCase):
Expand Down Expand Up @@ -132,7 +133,7 @@ def test_vectorise_cacheNotWorkForPILImage(self):
def test_vectorise_cacheWorkForImagePath(self):
"""Test if the cache works for image paths."""
vectorise = self._import_vectorise_with_inference_cache()
content = ["https://marqo-assets.s3.amazonaws.com/tests/images/image1.jpg"]
content = [TestImageUrls.IMAGE1.value]
# First call
original_vector = vectorise(model_name="open_clip/ViT-B-32/laion2b_s34b_b79k", content=content,
device="cpu", enable_cache=True, infer=True)
Expand Down
5 changes: 2 additions & 3 deletions tests/core/monitoring/test_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from marqo.core.models.marqo_index_stats import MarqoIndexStats, VespaStats
from marqo.tensor_search import tensor_search
from marqo.tensor_search.models.add_docs_objects import AddDocsParams
from tests.marqo_test import MarqoTestCase
from tests.marqo_test import MarqoTestCase, TestImageUrls


class TestMonitoring(MarqoTestCase):
Expand Down Expand Up @@ -180,8 +180,7 @@ def test_get_index_stats_structuredMultimodalIndex_successful(self):
config=self.config, add_docs_params=AddDocsParams(
docs=[
{"title": "2",
"img": "https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/"
"ai_hippo_realistic.png"
"img": TestImageUrls.HIPPO_REALISTIC.value
},
{"title": "2"},
{"desc": "2"}
Expand Down
12 changes: 12 additions & 0 deletions tests/marqo_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@
from marqo.vespa.vespa_client import VespaClient


class TestImageUrls(Enum):
IMAGE0 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image0.jpg'
IMAGE1 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image1.jpg'
IMAGE2 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg'
IMAGE3 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image3.jpg'
IMAGE4 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image4.jpg'
COCO = 'https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg'
HIPPO_REALISTIC = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic_small.png'
HIPPO_REALISTIC_LARGE = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png'
HIPPO_STATUE = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue_small.png'


class MarqoTestCase(unittest.TestCase):
indexes = []

Expand Down
7 changes: 4 additions & 3 deletions tests/s2_inference/test_clip_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@
from marqo.tensor_search.enums import ModelProperties
from marqo.tensor_search.models.private_models import ModelLocation
from marqo.tensor_search.models.private_models import S3Auth, S3Location, HfModelLocation
from tests.marqo_test import TestImageUrls


class TestImageDownloading(unittest.TestCase):

def test_loadImageFromPathTimeout(self):
good_url = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png'
good_url = TestImageUrls.HIPPO_REALISTIC.value
# should be fine on regular timeout:
img = clip_utils.load_image_from_path(good_url, {})
assert isinstance(img, types.ImageType)
Expand All @@ -32,7 +33,7 @@ def test_loadImageFromPathAllRequestErrors(self):
"""Do we catch other download errors?
The errors tested inherit from requests.exceptions.RequestException
"""
good_url = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png'
good_url = TestImageUrls.HIPPO_REALISTIC.value
clip_utils.load_image_from_path(good_url, {})
for err in [pycurl.error]:
with mock.patch('pycurl.Curl') as MockCurl:
Expand All @@ -43,7 +44,7 @@ def test_loadImageFromPathAllRequestErrors(self):

@patch('pycurl.Curl')
def test_downloadImageFromRrlCloseCalled(self, MockCurl):
good_url = 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png'
good_url = TestImageUrls.HIPPO_REALISTIC.value

mock_curl_instance = MockCurl.return_value
mock_curl_instance.getinfo.return_value = 200
Expand Down
10 changes: 6 additions & 4 deletions tests/s2_inference/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
_convert_vectorized_output,
)
from marqo.s2_inference.s2_inference import _load_model as og_load_model
from tests.marqo_test import TestImageUrls

_load_model = functools.partial(og_load_model, calling_func = "unit_test")

class TestEncoding(unittest.TestCase):
Expand Down Expand Up @@ -338,9 +340,9 @@ def test_load_clip_image_model(self):

device = 'cpu'
eps = 1e-9
images = ["https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image0.jpg",
"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image1.jpg",
"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg"]
images = [TestImageUrls.IMAGE0.value,
TestImageUrls.IMAGE1.value,
TestImageUrls.IMAGE2.value]

for name in names:

Expand Down Expand Up @@ -454,7 +456,7 @@ def test_model_un_normalization(self):
@patch("torch.cuda.amp.autocast")
def test_autocast_called_when_cuda(self, mock_autocast):
names = self.open_clip_test_model
contents = ['this is a test sentence. so is this.', "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image0.jpg"]
contents = ['this is a test sentence. so is this.', TestImageUrls.IMAGE0.value]
for model_name in names:
for content in contents:
vectorise(model_name=model_name, content=content, device="cpu")
Expand Down
8 changes: 4 additions & 4 deletions tests/s2_inference/test_generic_clip_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
validate_model_properties
)

from tests.marqo_test import MarqoTestCase
from tests.marqo_test import MarqoTestCase, TestImageUrls
from unittest import mock

@unittest.skip
Expand Down Expand Up @@ -323,7 +323,7 @@ def test_add_documents_text_and_image(self):
"_id": "123",
"title 1": "content 1",
"desc 2": "content 2. blah blah blah",
"image" : "https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg"
"image" : TestImageUrls.COCO.value
}]

tensor_search.add_documents(config=config, add_docs_params=AddDocsParams(
Expand Down Expand Up @@ -371,7 +371,7 @@ def test_vectorise_generic_openai_clip_encode_image_results(self):

epsilon = 1e-7

image = "https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg"
image = TestImageUrls.COCO.value

model_name = "test-model"
model_properties = {
Expand Down Expand Up @@ -410,7 +410,7 @@ def test_vectorise_generic_open_clip_encode_image_results(self):

epsilon = 1e-7

image = "https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg"
image = TestImageUrls.COCO.value

model_name = "test-model"
model_properties = {
Expand Down
4 changes: 3 additions & 1 deletion tests/s2_inference/test_large_model_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
)
import functools
from marqo.s2_inference.s2_inference import _load_model as og_load_model
from tests.marqo_test import TestImageUrls

_load_model = functools.partial(og_load_model, calling_func = "unit_test")
from marqo.s2_inference.configs import ModelCache
import shutil
Expand Down Expand Up @@ -192,7 +194,7 @@ def test_cuda_encode_type(self):
def test_autocast_called_in_open_clip(self, mock_autocast):
names = ["open_clip/ViT-B-32/laion400m_e31"]
contents = ['this is a test sentence. so is this.',
"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image0.jpg"]
TestImageUrls.IMAGE0.value]
for model_name in names:
with self.subTest(f"Testing model: {model_name}"):
for content in contents:
Expand Down
25 changes: 13 additions & 12 deletions tests/s2_inference/test_reranking_with_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from marqo.s2_inference.reranking import rerank
from marqo.s2_inference.errors import RerankerError,RerankerNameError
from marqo.s2_inference.s2_inference import clear_loaded_models
from tests.marqo_test import TestImageUrls


class TestRerankingWithModels(unittest.TestCase):
Expand Down Expand Up @@ -157,7 +158,7 @@ def test_reranking_images_owl_inconsistent(self):
# not all results have the searchable filed to rerank over
results_lexical = {'hits':
[{'attributes': 'yello head. pruple shirt. black sweater.',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
'location': TestImageUrls.HIPPO_STATUE.value,
'other': 'some other text',
# this one has no id
'_score': 1.4017934,
Expand All @@ -172,7 +173,7 @@ def test_reranking_images_owl_inconsistent(self):
},
# this one has less fields
{'attributes': 'face is bowlcut. body is blue . background is grey. head is tan',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
'location': TestImageUrls.HIPPO_REALISTIC.value,
'_id': 'QmTVYuULK1Qbzh21Y3hzeTFny5AGUSUGAXoGjLqNB2b1at',
'_score': 0.2876821,
'_highlights': [],
Expand Down Expand Up @@ -213,7 +214,7 @@ def test_reranking_images_incorrect_model(self):
# not all results have the searchable filed to rerank over
results_lexical = {'hits':
[{'attributes': 'yello head. pruple shirt. black sweater.',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
'location': TestImageUrls.HIPPO_STATUE.value,
'other': 'some other text',
# this one has no id
'_score': 1.4017934,
Expand All @@ -228,7 +229,7 @@ def test_reranking_images_incorrect_model(self):
},
# this one has less fields
{'attributes': 'face is bowlcut. body is blue . background is grey. head is tan',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
'location': TestImageUrls.HIPPO_REALISTIC.value,
'_id': 'QmTVYuULK1Qbzh21Y3hzeTFny5AGUSUGAXoGjLqNB2b1at',
'_score': 0.2876821,
'_highlights': [],
Expand All @@ -251,7 +252,7 @@ def test_reranking_images_owl_inconsistent_highlights(self):
# not all results have the searchable filed to rerank over
results_lexical = {'hits':
[{'attributes': 'yello head. pruple shirt. black sweater.',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
'location': TestImageUrls.HIPPO_STATUE.value,
'other': 'some other text',
# this one has no id
'_score': 1.4017934,
Expand All @@ -266,7 +267,7 @@ def test_reranking_images_owl_inconsistent_highlights(self):
},
# this one has less fields
{'attributes': 'face is bowlcut. body is blue . background is grey. head is tan',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
'location': TestImageUrls.HIPPO_REALISTIC.value,
'_id': 'QmTVYuULK1Qbzh21Y3hzeTFny5AGUSUGAXoGjLqNB2b1at',
'_score': 0.2876821,
'_highlights': {"location":[0,0,20,30]},
Expand Down Expand Up @@ -308,7 +309,7 @@ def test_reranking_images_owl_consistent(self):
# all results have the searchable field to rerank over
results_lexical = {'hits':
[{'attributes': 'yello head. pruple shirt. black sweater.',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
'location': TestImageUrls.HIPPO_STATUE.value,
'other': 'some other text',
# this one has no id
'_score': 1.4017934,
Expand All @@ -317,14 +318,14 @@ def test_reranking_images_owl_consistent(self):
{'attributes': 'face is viking. body is white turtleneck. background is pearl',
# missing locations
'other': 'some more text',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
'location': TestImageUrls.HIPPO_STATUE.value,
'_id': 'QmRR6PBkgCdhiSYBM3AY3EWhn4ZbeR2X8Ygpy2veLkcPC5',
'_score': 0.2876821,
'_highlights': [],
},
# this one has less fields
{'attributes': 'face is bowlcut. body is blue . background is grey. head is tan',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
'location': TestImageUrls.HIPPO_REALISTIC.value,
'_id': 'QmTVYuULK1Qbzh21Y3hzeTFny5AGUSUGAXoGjLqNB2b1at',
'_score': 0.2876821,
'_highlights': [],
Expand Down Expand Up @@ -365,7 +366,7 @@ def test_reranking_check_search_has_fields(self):

results_lexical = {'hits':
[{'attributes': 'yello head. pruple shirt. black sweater.',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
'location': TestImageUrls.HIPPO_STATUE.value,
'other': 'some other text',
# this one has no id
'_score': 1.4017934,
Expand All @@ -374,14 +375,14 @@ def test_reranking_check_search_has_fields(self):
{'attributes': 'face is viking. body is white turtleneck. background is pearl',
# missing locations
'other': 'some more text',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png',
'location': TestImageUrls.HIPPO_STATUE.value,
'_id': 'QmRR6PBkgCdhiSYBM3AY3EWhn4ZbeR2X8Ygpy2veLkcPC5',
'_score': 0.2876821,
'_highlights': [],
},
# this one has less fields
{'attributes': 'face is bowlcut. body is blue . background is grey. head is tan',
'location': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
'location': TestImageUrls.HIPPO_REALISTIC.value,
'_id': 'QmTVYuULK1Qbzh21Y3hzeTFny5AGUSUGAXoGjLqNB2b1at',
'_score': 0.2876821,
'_highlights': [],
Expand Down
Loading

0 comments on commit f6f7d01

Please sign in to comment.