marqo-ai · vicilliar · Nov 26, 2024 · Oct 2, 2024 · Oct 9, 2024 · Oct 15, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -139,5 +139,6 @@ local_only/
 tests/cache/
 
 cache/
+src/marqo/cache/
 
 __pycache__/
diff --git a/.github/workflows/arm64_docker_marqo.yml b/.github/workflows/arm64_docker_marqo.yml
@@ -83,7 +83,7 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Set up Python 3.9   # TODO: Check if 3.9 is okay instead of 3.8. So far, so good
+      - name: Set up Python 3.9
         run:  |
           apt-get -y update
           apt-get -y install python3.9

diff --git a/.github/workflows/cpu_docker_marqo.yml b/.github/workflows/cpu_docker_marqo.yml
@@ -91,10 +91,10 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Install Dependencies

diff --git a/.github/workflows/cpu_local_marqo.yml b/.github/workflows/cpu_local_marqo.yml
@@ -91,10 +91,10 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Install Dependencies

diff --git a/.github/workflows/cuda_docker_marqo.yml b/.github/workflows/cuda_docker_marqo.yml
@@ -86,10 +86,10 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Install Dependencies

diff --git a/.github/workflows/largemodel_unit_test_CI.yml b/.github/workflows/largemodel_unit_test_CI.yml
@@ -66,10 +66,10 @@ jobs:
           fetch-depth: 0
           path: marqo
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Checkout marqo-base for requirements

diff --git a/.github/workflows/locust_perf_test.yml b/.github/workflows/locust_perf_test.yml
@@ -112,10 +112,10 @@ jobs:
         with:
           ref: ${{ github.event.inputs.marqo_ref }}
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
 
       - name: Set up Docker Buildx
         if: github.event.inputs.marqo_host == 'http://localhost:8882' && github.event.inputs.image_to_test == 'marqo_docker_0'

diff --git a/.github/workflows/test_documentation.yml b/.github/workflows/test_documentation.yml
@@ -20,10 +20,10 @@ jobs:
           fetch-depth: 0
           path: marqo
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Install dependencies

diff --git a/.github/workflows/unit_test_200gb_CI.yml b/.github/workflows/unit_test_200gb_CI.yml
@@ -66,10 +66,10 @@ jobs:
           fetch-depth: 0
           path: marqo
 
-      - name: Set up Python 3.8
+      - name: Set up Python 3.9
         uses: actions/setup-python@v3
         with:
-          python-version: "3.8"
+          python-version: "3.9"
           cache: "pip"
 
       - name: Checkout marqo-base for requirements

diff --git a/Dockerfile b/Dockerfile
@@ -6,7 +6,7 @@ COPY vespa .
 RUN mvn clean package
 
 # Stage 2: Base image for Python setup
-FROM marqoai/marqo-base:36 as base_image
+FROM marqoai/marqo-base:37 as base_image
 
 # Allow mounting volume containing data and configs for vespa
 VOLUME /opt/vespa/var

diff --git a/tests/s2_inference/embeddings_reference/embeddings_large_e5_python_3_8.json b/tests/s2_inference/embeddings_reference/embeddings_large_e5_python_3_8.json
diff --git a/tests/s2_inference/embeddings_reference/embeddings_large_multilingual_e5_python_3_8.json b/tests/s2_inference/embeddings_reference/embeddings_large_multilingual_e5_python_3_8.json
diff --git a/tests/s2_inference/embeddings_reference/embeddings_large_open_clip_python_3_8.json b/tests/s2_inference/embeddings_reference/embeddings_large_open_clip_python_3_8.json
diff --git a/tests/s2_inference/embeddings_reference/embeddings_open_clip_python_3_8.json b/tests/s2_inference/embeddings_reference/embeddings_open_clip_python_3_8.json
diff --git a/tests/s2_inference/embeddings_reference/embeddings_python_3_8.json b/tests/s2_inference/embeddings_reference/embeddings_python_3_8.json
diff --git a/tests/s2_inference/embeddings_reference/info.txt b/tests/s2_inference/embeddings_reference/info.txt
@@ -0,0 +1,7 @@
+16/10/24 - All embeddings were generated with:
+- Marqo mainline head: 055237ae6c4a8121b4026650582f3a23bd416564 (2.12.2 release notes)
+- Python 3.8.20
+- open_clip_torch==2.24.0
+- torch==1.12.1
+- Ubuntu 22.04.4 LTS
+- g4dn.xlarge EC2 instance
diff --git a/tests/s2_inference/test_encoding.py b/tests/s2_inference/test_encoding.py
@@ -1,12 +1,15 @@
 import unittest
 import torch
+import json
+import numpy as np
 from unittest.mock import MagicMock, patch
 from marqo.s2_inference.types import FloatTensor
 from marqo.s2_inference.s2_inference import clear_loaded_models, get_model_properties_from_registry
 from marqo.s2_inference.model_registry import load_model_properties, _get_open_clip_properties
 from marqo.s2_inference.s2_inference import _convert_tensor_to_numpy
 import numpy as np
 import functools
+import os
 
 from marqo.s2_inference.s2_inference import (
     _check_output_type, vectorise,
@@ -17,6 +20,13 @@
 
 _load_model = functools.partial(og_load_model, calling_func = "unit_test")
 
+
+def get_absolute_file_path(filename: str) -> str:
+    currentdir = os.path.dirname(os.path.abspath(__file__))
+    abspath = os.path.join(currentdir, filename)
+    return abspath
+
+
 class TestEncoding(unittest.TestCase):
 
     def setUp(self) -> None:
@@ -26,8 +36,12 @@ def tearDown(self) -> None:
         clear_loaded_models()
 
     def test_vectorize(self):
-        names = ["fp16/ViT-B/32", "onnx16/open_clip/ViT-B-32/laion400m_e32",
-                 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32',
+        """
+        Ensure that vectorised output from vectorise function matches both the model.encode output and
+        hardcoded embeddings from Python 3.8.20
+        """
+
+        names = ["fp16/ViT-B/32", "onnx16/open_clip/ViT-B-32/laion400m_e32", 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32',
                  "all-MiniLM-L6-v1", "all_datasets_v4_MiniLM-L6", "hf/all-MiniLM-L6-v1", "hf/all_datasets_v4_MiniLM-L6",
                  "hf/bge-small-en-v1.5", "onnx/all-MiniLM-L6-v1", "onnx/all_datasets_v4_MiniLM-L6"]
 
@@ -43,21 +57,42 @@ def test_vectorize(self):
         sentences = ['hello', 'this is a test sentence. so is this.', ['hello', 'this is a test sentence. so is this.']]
         device = 'cpu'
         eps = 1e-9
+        embeddings_file_name = get_absolute_file_path("embeddings_reference/embeddings_python_3_8.json")
+
+        # Load in hardcoded embeddings json file
+        with open(embeddings_file_name, "r") as f:
+            embeddings_python_3_8 = json.load(f)
 
         for name in names:
-            model_properties = get_model_properties_from_registry(name)
-            model = _load_model(model_properties['name'], model_properties=model_properties, device=device)
+            with self.subTest(name=name):
+                # Add hardcoded embeddings into the variable.
+                model_properties = get_model_properties_from_registry(name)
+                model = _load_model(model_properties['name'], model_properties=model_properties, device=device)
 
-            for sentence in sentences:
-                output_v = vectorise(name, sentence, model_properties, device, normalize_embeddings=True)
+                for sentence in sentences:
+                    with self.subTest(sentence=sentence):
+                        output_v = vectorise(name, sentence, model_properties, device, normalize_embeddings=True)
+                        assert _check_output_type(output_v)
 
-                assert _check_output_type(output_v)
+                        output_m = model.encode(sentence, normalize=True)
 
-                output_m = model.encode(sentence, normalize=True)
+                        # Embeddings must match hardcoded python 3.8.20 embeddings
+                        if isinstance(sentence, str):
+                            with self.subTest("Hardcoded Python 3.8 Embeddings Comparison"):
+                                try:
+                                    self.assertEqual(np.allclose(output_m, embeddings_python_3_8[name][sentence],
+                                                                 atol=1e-6),
+                                                 True)
+                                except KeyError:
+                                    raise KeyError(f"Hardcoded Python 3.8 embeddings not found for "
+                                                   f"model: {name}, sentence: {sentence} in JSON file: "
+                                                   f"{embeddings_file_name}")
 
-                assert abs(torch.FloatTensor(output_m) - torch.FloatTensor(output_v)).sum() < eps
+                        with self.subTest("Model encode vs vectorize"):
+                            self.assertEqual(np.allclose(output_m, output_v, atol=eps), True)
+
+                clear_loaded_models()
 
-            clear_loaded_models()
 
     def test_vectorize_normalise(self):
         open_clip_names = ["open_clip/ViT-B-32/laion2b_s34b_b79k"]
@@ -120,6 +155,7 @@ def test_cpu_encode_type(self):
 
             clear_loaded_models()
 
+
     def test_load_clip_text_model(self):
         names = ["fp16/ViT-B/32", "onnx16/open_clip/ViT-B-32/laion400m_e32", 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32',
                   'RN50', "ViT-B/16"]
@@ -313,6 +349,11 @@ def test_open_clip_vectorize(self):
         sentences = ['hello', 'this is a test sentence. so is this.', ['hello', 'this is a test sentence. so is this.']]
         device = 'cpu'
         eps = 1e-9
+        embeddings_reference_file = get_absolute_file_path("embeddings_reference/embeddings_open_clip_python_3_8.json")
+
+        # Load in hardcoded embeddings json file
+        with open(embeddings_reference_file, "r") as f:
+            embeddings_python_3_8 = json.load(f)
 
         for name in names:
             model_properties = get_model_properties_from_registry(name)
@@ -327,7 +368,21 @@ def test_open_clip_vectorize(self):
 
                     output_m = model.encode(sentence, normalize=normalize_embeddings)
 
-                    assert abs(torch.FloatTensor(output_m) - torch.FloatTensor(output_v)).sum() < eps
+                # Embeddings must match hardcoded python 3.8.20 embeddings
+                if isinstance(sentence, str):
+                    with self.subTest("Hardcoded Python 3.8 Embeddings Comparison"):
+                        try:
+                            self.assertEqual(np.allclose(output_m, embeddings_python_3_8[name][sentence], atol=1e-5),
+                                            True, f"For model {name} and sentence {sentence}: "
+                                                    f"Calculated embedding is {output_m} but "
+                                                  f"hardcoded embedding is {embeddings_python_3_8[name][sentence]}")
+                        except KeyError:
+                            raise KeyError(f"Hardcoded Python 3.8 embeddings not found for "
+                                           f"model: {name}, sentence: {sentence} in JSON file: "
+                                           f"{embeddings_reference_file}")
+
+                with self.subTest("Model encode vs vectorize"):
+                    self.assertEqual(np.allclose(output_m, output_v, atol=eps), True)
 
             clear_loaded_models()
 

diff --git a/tests/s2_inference/test_large_model_encoding.py b/tests/s2_inference/test_large_model_encoding.py
@@ -1,6 +1,7 @@
 import os
 import torch
 import pytest
+import json
 from marqo.s2_inference.types import FloatTensor
 from marqo.s2_inference.s2_inference import clear_loaded_models, get_model_properties_from_registry, _convert_tensor_to_numpy
 from unittest.mock import patch
@@ -34,10 +35,31 @@ def remove_cached_model_files():
                 elif os.path.isdir(item_path):
                     shutil.rmtree(item_path)
 
-def run_test_vectorize(models):
+
+def get_absolute_file_path(filename: str) -> str:
+    currentdir = os.path.dirname(os.path.abspath(__file__))
+    abspath = os.path.join(currentdir, filename)
+    return abspath
+
+
+def run_test_vectorize(models, model_type):
+
+    # model_type determines the filename with which the embeddings are saved/loaded
+    # Ensure that vectorised output from vectorise function matches both the model.encode output and
+    # hardcoded embeddings from Python 3.8
+
+
     sentences = ['hello', 'this is a test sentence. so is this.', ['hello', 'this is a test sentence. so is this.']]
     device = "cuda"
     eps = 1e-9
+    embeddings_reference_file = get_absolute_file_path(
+        f"embeddings_reference/embeddings_{model_type}_python_3_8.json"
+    )
+
+    # Load in hardcoded embeddings json file
+    with open(embeddings_reference_file, "r") as f:
+        embeddings_python_3_8 = json.load(f)
+
     with patch.dict(os.environ, {"MARQO_MAX_CUDA_MODEL_MEMORY": "10"}):
         def run():
             for name in models:
@@ -55,7 +77,16 @@ def run():
                     if type(output_m) == torch.Tensor:
                         output_m = output_m.cpu().numpy()
 
-                    assert abs(torch.FloatTensor(output_m) - torch.FloatTensor(output_v)).sum() < eps
+                    # Embeddings must match hardcoded python 3.8.20 embeddings
+                    if isinstance(sentence, str):
+                        try:
+                            assert np.allclose(output_m, embeddings_python_3_8[name][sentence], atol=1e-6)
+                        except KeyError:
+                            raise KeyError(f"Hardcoded Python 3.8 embeddings not found for "
+                                           f"model: {name}, sentence: {sentence} in JSON file: "
+                                           f"{embeddings_reference_file}")
+
+                    assert np.allclose(output_m, output_v, atol=eps)
 
                 clear_loaded_models()
                 torch.cuda.empty_cache()
@@ -67,6 +98,7 @@ def run():
 
         assert run()
 
+
 def run_test_model_outputs(models):
     sentences = ['hello', 'this is a test sentence. so is this.', ['hello', 'this is a test sentence. so is this.']]
     device = "cuda"
@@ -155,8 +187,7 @@ def tearDownClass(cls) -> None:
 
     def test_vectorize(self):
         # For GPU Memory Optimization, we shouldn't load all models at once
-        for model_name in self.models:
-            run_test_vectorize(models=[model_name])
+        run_test_vectorize(models=self.models, model_type="large_open_clip")
 
     def test_load_clip_text_model(self):
         device = "cuda"
@@ -224,8 +255,7 @@ def tearDownClass(cls) -> None:
 
     def test_vectorize(self):
         # For GPU Memory Optimization, we shouldn't load all models at once
-        for model_name in self.models:
-            run_test_vectorize(models=[model_name])
+        run_test_vectorize(models=self.models, model_type="large_e5")
 
     def test_model_outputs(self):
         for model_name in self.models:
@@ -259,8 +289,7 @@ def tearDownClass(cls) -> None:
 
     def test_vectorize(self):
         # For GPU Memory Optimization, we shouldn't load all models at once
-        for model_name in self.models:
-            run_test_vectorize(models=[model_name])
+        run_test_vectorize(models=self.models, model_type="large_bge")
 
     def test_model_outputs(self):
         for model_name in self.models:
@@ -294,8 +323,7 @@ def tearDownClass(cls) -> None:
 
     def test_vectorize(self):
         # For GPU Memory Optimization, we shouldn't load all models at once
-        for model_name in self.models:
-            run_test_vectorize(models=[model_name])
+        run_test_vectorize(models=self.models, model_type="large_snowflake")
 
     def test_model_outputs(self):
         for model_name in self.models:
@@ -334,8 +362,7 @@ def tearDownClass(cls) -> None:
 
     def test_vectorize(self):
         # For GPU Memory Optimization, we shouldn't load all models at once
-        for model_name in self.models:
-            run_test_vectorize(models=[model_name])
+        run_test_vectorize(models=self.models, model_type="large_multilingual_e5")
 
     def test_model_outputs(self):
         for model_name in self.models: