From 02349b0afced54cabd75d37d345a80370f204a2c Mon Sep 17 00:00:00 2001
From: joshuak <joshua@marqo.ai>
Date: Fri, 12 May 2023 14:58:07 +0800
Subject: [PATCH 1/9] initial draft work

---
 src/marqo/tensor_search/on_start_script.py | 41 ++++++++++++++++++++--
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/src/marqo/tensor_search/on_start_script.py b/src/marqo/tensor_search/on_start_script.py
index 1443e841f..c8ff84c91 100644
--- a/src/marqo/tensor_search/on_start_script.py
+++ b/src/marqo/tensor_search/on_start_script.py
@@ -109,13 +109,16 @@ def __init__(self):
             try:
                 self.models = json.loads(warmed_models)
             except json.JSONDecodeError as e:
+                # TODO: Change error message to match new format
                 raise errors.EnvVarError(
                     f"Could not parse environment variable `{EnvVars.MARQO_MODELS_TO_PRELOAD}`. "
-                    f"Please ensure that this a JSON-encoded array of strings. For example:\n"
+                    f"Please ensure that this a JSON-encoded array of strings or dictionaries. For example:\n"
                     f"""export {EnvVars.MARQO_MODELS_TO_PRELOAD}='["ViT-L/14", "onnx/all_datasets_v4_MiniLM-L6"]'"""
                 ) from e
         else:
             self.models = warmed_models
+        
+        self.logger.debug(f"self.models is of data type {type(self.models)}. The value is {self.models}")
         # TBD to include cross-encoder/ms-marco-TinyBERT-L-2-v2
 
         self.default_devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda']
@@ -123,6 +126,36 @@ def __init__(self):
         self.logger.info(f"pre-loading {self.models} onto devices={self.default_devices}")
 
     def run(self):
+
+        def _prewarm_model(model, content, device):
+            """
+                Calls vectorise for a model once. This will load in the model if it isn't already loaded.
+                If `model` is a str, it should be a model name in the registry
+                If `model is a dict, it should be an object containing `model_name` and `model_properties`
+                Model properties will be passed to vectorise call if object exists
+            """
+            if isinstance(model, str):
+                self.logger.debug(f"Model {model} has been passed as a str")
+                # For models IN REGISTRY
+                _ = vectorise(
+                    model_name=model, 
+                    content=test_string, 
+                    device=device
+                )
+            elif isinstance(model, dict):
+                # For models from URL
+                """
+                TODO: include validation from on start script (model name properties etc)
+                _check_model_name(index_settings)
+                """
+                self.logger.debug(f"Model {model['model_name']} has been passed as a dict")
+                _ = vectorise(
+                    model_name=model["model_name"], 
+                    model_properties=model["model_properties"], 
+                    content=test_string, 
+                    device=device
+                )
+        
         from marqo.s2_inference.s2_inference import vectorise
        
         test_string = 'this is a test string'
@@ -130,14 +163,16 @@ def run(self):
         messages = []
         for model in self.models:
             for device in self.default_devices:
+                self.logger.debug(f"Beginning loading for model: {model} on device: {device}")
                 
                 # warm it up
-                _ = vectorise(model, test_string, device=device)
+                _ = _prewarm_model(model=model, content=test_string, device=device)
 
                 t = 0
                 for n in range(N):
+                    self.logger.debug(f"Vectorise Call #{n}")
                     t0 = time.time()
-                    _ = vectorise(model, test_string, device=device)
+                    _ = _prewarm_model(model=model, content=test_string, device=device)
                     t1 = time.time()
                     t += (t1 - t0)
                 message = f"{(t)/float((N))} for {model} and {device}"

From 9b0939fdfe9955fa5c7efbe2cfd5cc7fbb9670ac Mon Sep 17 00:00:00 2001
From: joshuak <joshua@marqo.ai>
Date: Fri, 12 May 2023 14:59:42 +0800
Subject: [PATCH 2/9] initial draft work

---
 src/marqo/s2_inference/s2_inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 56a9f6dd1..45a6926d2 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -172,7 +172,7 @@ def _update_available_models(model_cache_key: str, model_name: str, validated_mo
                     f"and Marqo has access to the weights file.")
     else:
         most_recently_used_time = datetime.datetime.now()
-        logger.debug(f'renew {model_name} on device {device} with new time={most_recently_used_time}.')
+        logger.debug(f'renewed {model_name} on device {device} with new most recently time={most_recently_used_time}.')
         try:
             available_models[model_cache_key][AvailableModelsKey.most_recently_used_time] = most_recently_used_time
         except KeyError:

From cc92826998bb94af283d9f5b24df42c0eb5c235a Mon Sep 17 00:00:00 2001
From: joshuak <joshua@marqo.ai>
Date: Fri, 12 May 2023 18:51:16 +0800
Subject: [PATCH 3/9] updated error messages and added try catch to warmup

---
 src/marqo/tensor_search/on_start_script.py | 25 +++++++++++++---------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/marqo/tensor_search/on_start_script.py b/src/marqo/tensor_search/on_start_script.py
index c8ff84c91..7d54c774f 100644
--- a/src/marqo/tensor_search/on_start_script.py
+++ b/src/marqo/tensor_search/on_start_script.py
@@ -112,8 +112,9 @@ def __init__(self):
                 # TODO: Change error message to match new format
                 raise errors.EnvVarError(
                     f"Could not parse environment variable `{EnvVars.MARQO_MODELS_TO_PRELOAD}`. "
-                    f"Please ensure that this a JSON-encoded array of strings or dictionaries. For example:\n"
+                    f"Please ensure that this a JSON-encoded array of strings or dicts. For example:\n"
                     f"""export {EnvVars.MARQO_MODELS_TO_PRELOAD}='["ViT-L/14", "onnx/all_datasets_v4_MiniLM-L6"]'"""
+                    f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Models-Reference/bring_your_own_model/`"""
                 ) from e
         else:
             self.models = warmed_models
@@ -135,7 +136,6 @@ def _prewarm_model(model, content, device):
                 Model properties will be passed to vectorise call if object exists
             """
             if isinstance(model, str):
-                self.logger.debug(f"Model {model} has been passed as a str")
                 # For models IN REGISTRY
                 _ = vectorise(
                     model_name=model, 
@@ -148,13 +148,18 @@ def _prewarm_model(model, content, device):
                 TODO: include validation from on start script (model name properties etc)
                 _check_model_name(index_settings)
                 """
-                self.logger.debug(f"Model {model['model_name']} has been passed as a dict")
-                _ = vectorise(
-                    model_name=model["model_name"], 
-                    model_properties=model["model_properties"], 
-                    content=test_string, 
-                    device=device
-                )
+                try:
+                    _ = vectorise(
+                        model_name=model["model"], 
+                        model_properties=model["model_properties"], 
+                        content=test_string, 
+                        device=device
+                    )
+                except KeyError as e:
+                    raise errors.EnvVarError(
+                        f"Your custom model {model} is missing either `model_name` or `model_properties`."
+                        f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Models-Reference/bring_your_own_model/`"""
+                    ) from e
         
         from marqo.s2_inference.s2_inference import vectorise
        
@@ -170,13 +175,13 @@ def _prewarm_model(model, content, device):
 
                 t = 0
                 for n in range(N):
-                    self.logger.debug(f"Vectorise Call #{n}")
                     t0 = time.time()
                     _ = _prewarm_model(model=model, content=test_string, device=device)
                     t1 = time.time()
                     t += (t1 - t0)
                 message = f"{(t)/float((N))} for {model} and {device}"
                 messages.append(message)
+                self.logger.debug(f"{model} {device} vectorise run {N} times.")
                 self.logger.info(f"{model} {device} run succesfully!")
 
         for message in messages:

From 9e169d2ae06ffe77b571f9be884645080e506beb Mon Sep 17 00:00:00 2001
From: joshuak <joshua@marqo.ai>
Date: Mon, 15 May 2023 17:27:23 +0800
Subject: [PATCH 4/9] made preload_model an outside function

---
 src/marqo/tensor_search/on_start_script.py | 75 +++++++++++-----------
 1 file changed, 38 insertions(+), 37 deletions(-)

diff --git a/src/marqo/tensor_search/on_start_script.py b/src/marqo/tensor_search/on_start_script.py
index 7d54c774f..0514138b2 100644
--- a/src/marqo/tensor_search/on_start_script.py
+++ b/src/marqo/tensor_search/on_start_script.py
@@ -12,6 +12,7 @@
 from marqo import errors
 from marqo.tensor_search.throttling.redis_throttle import throttle
 from marqo.connections import redis_driver
+from marqo.s2_inference.s2_inference import vectorise
 
 
 def on_start(marqo_os_url: str):
@@ -95,6 +96,41 @@ def id_to_device(id):
             device_names.append( {'id':device_id, 'name':id_to_device(device_id)})
         self.logger.info(f"found devices {device_names}")
 
+
+def _preload_model(model, content, device):
+    """
+        Calls vectorise for a model once. This will load in the model if it isn't already loaded.
+        If `model` is a str, it should be a model name in the registry
+        If `model is a dict, it should be an object containing `model_name` and `model_properties`
+        Model properties will be passed to vectorise call if object exists
+    """
+    if isinstance(model, str):
+        # For models IN REGISTRY
+        _ = vectorise(
+            model_name=model, 
+            content=content, 
+            device=device
+        )
+    elif isinstance(model, dict):
+        # For models from URL
+        """
+        TODO: include validation from on start script (model name properties etc)
+        _check_model_name(index_settings)
+        """
+        try:
+            _ = vectorise(
+                model_name=model["model"], 
+                model_properties=model["model_properties"], 
+                content=content, 
+                device=device
+            )
+        except KeyError as e:
+            raise errors.EnvVarError(
+                f"Your custom model {model} is missing either `model_name` or `model_properties`."
+                f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Models-Reference/bring_your_own_model/`"""
+            ) from e
+
+
 class ModelsForCacheing:
     """warms the in-memory model cache by preloading good defaults
     """
@@ -127,41 +163,6 @@ def __init__(self):
         self.logger.info(f"pre-loading {self.models} onto devices={self.default_devices}")
 
     def run(self):
-
-        def _prewarm_model(model, content, device):
-            """
-                Calls vectorise for a model once. This will load in the model if it isn't already loaded.
-                If `model` is a str, it should be a model name in the registry
-                If `model is a dict, it should be an object containing `model_name` and `model_properties`
-                Model properties will be passed to vectorise call if object exists
-            """
-            if isinstance(model, str):
-                # For models IN REGISTRY
-                _ = vectorise(
-                    model_name=model, 
-                    content=test_string, 
-                    device=device
-                )
-            elif isinstance(model, dict):
-                # For models from URL
-                """
-                TODO: include validation from on start script (model name properties etc)
-                _check_model_name(index_settings)
-                """
-                try:
-                    _ = vectorise(
-                        model_name=model["model"], 
-                        model_properties=model["model_properties"], 
-                        content=test_string, 
-                        device=device
-                    )
-                except KeyError as e:
-                    raise errors.EnvVarError(
-                        f"Your custom model {model} is missing either `model_name` or `model_properties`."
-                        f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Models-Reference/bring_your_own_model/`"""
-                    ) from e
-        
-        from marqo.s2_inference.s2_inference import vectorise
        
         test_string = 'this is a test string'
         N = 10
@@ -171,12 +172,12 @@ def _prewarm_model(model, content, device):
                 self.logger.debug(f"Beginning loading for model: {model} on device: {device}")
                 
                 # warm it up
-                _ = _prewarm_model(model=model, content=test_string, device=device)
+                _ = _preload_model(model=model, content=test_string, device=device)
 
                 t = 0
                 for n in range(N):
                     t0 = time.time()
-                    _ = _prewarm_model(model=model, content=test_string, device=device)
+                    _ = _preload_model(model=model, content=test_string, device=device)
                     t1 = time.time()
                     t += (t1 - t0)
                 message = f"{(t)/float((N))} for {model} and {device}"

From 89a4ad9f55e9fe2d3473a8a672e629b6239805e0 Mon Sep 17 00:00:00 2001
From: joshuak <joshua@marqo.ai>
Date: Mon, 15 May 2023 19:00:47 +0800
Subject: [PATCH 5/9] draft test work

---
 src/marqo/tensor_search/on_start_script.py  | 72 ++++++++++-----------
 tests/tensor_search/test_on_start_script.py | 32 ++++++++-
 2 files changed, 67 insertions(+), 37 deletions(-)

diff --git a/src/marqo/tensor_search/on_start_script.py b/src/marqo/tensor_search/on_start_script.py
index 0514138b2..6d72eec74 100644
--- a/src/marqo/tensor_search/on_start_script.py
+++ b/src/marqo/tensor_search/on_start_script.py
@@ -12,7 +12,6 @@
 from marqo import errors
 from marqo.tensor_search.throttling.redis_throttle import throttle
 from marqo.connections import redis_driver
-from marqo.s2_inference.s2_inference import vectorise
 
 
 def on_start(marqo_os_url: str):
@@ -97,40 +96,6 @@ def id_to_device(id):
         self.logger.info(f"found devices {device_names}")
 
 
-def _preload_model(model, content, device):
-    """
-        Calls vectorise for a model once. This will load in the model if it isn't already loaded.
-        If `model` is a str, it should be a model name in the registry
-        If `model is a dict, it should be an object containing `model_name` and `model_properties`
-        Model properties will be passed to vectorise call if object exists
-    """
-    if isinstance(model, str):
-        # For models IN REGISTRY
-        _ = vectorise(
-            model_name=model, 
-            content=content, 
-            device=device
-        )
-    elif isinstance(model, dict):
-        # For models from URL
-        """
-        TODO: include validation from on start script (model name properties etc)
-        _check_model_name(index_settings)
-        """
-        try:
-            _ = vectorise(
-                model_name=model["model"], 
-                model_properties=model["model_properties"], 
-                content=content, 
-                device=device
-            )
-        except KeyError as e:
-            raise errors.EnvVarError(
-                f"Your custom model {model} is missing either `model_name` or `model_properties`."
-                f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Models-Reference/bring_your_own_model/`"""
-            ) from e
-
-
 class ModelsForCacheing:
     """warms the in-memory model cache by preloading good defaults
     """
@@ -163,10 +128,10 @@ def __init__(self):
         self.logger.info(f"pre-loading {self.models} onto devices={self.default_devices}")
 
     def run(self):
-       
         test_string = 'this is a test string'
         N = 10
         messages = []
+        self.logger.debug(f"DEBUG Models to load: {self.models}")
         for model in self.models:
             for device in self.default_devices:
                 self.logger.debug(f"Beginning loading for model: {model} on device: {device}")
@@ -190,6 +155,41 @@ def run(self):
         self.logger.info("completed loading models")
 
 
+def _preload_model(model, content, device):
+    """
+        Calls vectorise for a model once. This will load in the model if it isn't already loaded.
+        If `model` is a str, it should be a model name in the registry
+        If `model is a dict, it should be an object containing `model_name` and `model_properties`
+        Model properties will be passed to vectorise call if object exists
+    """
+    from marqo.s2_inference.s2_inference import vectorise
+    if isinstance(model, str):
+        # For models IN REGISTRY
+        _ = vectorise(
+            model_name=model, 
+            content=content, 
+            device=device
+        )
+    elif isinstance(model, dict):
+        # For models from URL
+        """
+        TODO: include validation from on start script (model name properties etc)
+        _check_model_name(index_settings)
+        """
+        try:
+            _ = vectorise(
+                model_name=model["model"], 
+                model_properties=model["model_properties"], 
+                content=content, 
+                device=device
+            )
+        except KeyError as e:
+            raise errors.EnvVarError(
+                f"Your custom model {model} is missing either `model_name` or `model_properties`."
+                f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Models-Reference/bring_your_own_model/`"""
+            ) from e
+
+
 class InitializeRedis:
 
     def __init__(self, host: str, port: int):
diff --git a/tests/tensor_search/test_on_start_script.py b/tests/tensor_search/test_on_start_script.py
index 1ebdfaefa..eff6102a0 100644
--- a/tests/tensor_search/test_on_start_script.py
+++ b/tests/tensor_search/test_on_start_script.py
@@ -10,7 +10,7 @@
 
 class TestOnStartScript(MarqoTestCase):
 
-    def test_preload_models(self):
+    def test_preload_registry_models(self):
         environ_expected_models = [
             ({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: []}, []),
             ({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: ""}, []),
@@ -47,6 +47,36 @@ def run():
                 print(str(e))
                 return True
         assert run()
+    
+    def test_preload_url_models(self):
+        environ_expected_models = [
+        ]
+        for mock_environ, expected in environ_expected_models:
+            mock_vectorise = mock.MagicMock()
+            @mock.patch("os.environ", mock_environ)
+            @mock.patch("marqo.s2_inference.s2_inference.vectorise", mock_vectorise)
+            def run():
+                model_caching_script = on_start_script.ModelsForCacheing()
+                model_caching_script.run()
+                loaded_models = {args[0] for args, kwargs in mock_vectorise.call_args_list}
+                assert loaded_models == set(expected)
+                return True
+            assert run()
+    
+    def test_preload_url_models_malformed(self):
+        environ_expected_models = [
+        ]
+        for mock_environ, expected in environ_expected_models:
+            mock_vectorise = mock.MagicMock()
+            @mock.patch("os.environ", mock_environ)
+            @mock.patch("marqo.s2_inference.s2_inference.vectorise", mock_vectorise)
+            def run():
+                model_caching_script = on_start_script.ModelsForCacheing()
+                model_caching_script.run()
+                loaded_models = {args[0] for args, kwargs in mock_vectorise.call_args_list}
+                assert loaded_models == set(expected)
+                return True
+            assert run()
 
 
 

From 2c32092ebff9682f071e2c012ba35777d0152143 Mon Sep 17 00:00:00 2001
From: joshuak <joshua@marqo.ai>
Date: Mon, 15 May 2023 21:42:22 +0800
Subject: [PATCH 6/9] fixed unit tests for preload function

---
 src/marqo/tensor_search/on_start_script.py  |   3 +-
 tests/tensor_search/test_on_start_script.py | 105 +++++++++++++++++---
 2 files changed, 90 insertions(+), 18 deletions(-)

diff --git a/src/marqo/tensor_search/on_start_script.py b/src/marqo/tensor_search/on_start_script.py
index 6d72eec74..d96f2530e 100644
--- a/src/marqo/tensor_search/on_start_script.py
+++ b/src/marqo/tensor_search/on_start_script.py
@@ -12,6 +12,7 @@
 from marqo import errors
 from marqo.tensor_search.throttling.redis_throttle import throttle
 from marqo.connections import redis_driver
+from marqo.s2_inference.s2_inference import vectorise
 
 
 def on_start(marqo_os_url: str):
@@ -131,7 +132,6 @@ def run(self):
         test_string = 'this is a test string'
         N = 10
         messages = []
-        self.logger.debug(f"DEBUG Models to load: {self.models}")
         for model in self.models:
             for device in self.default_devices:
                 self.logger.debug(f"Beginning loading for model: {model} on device: {device}")
@@ -162,7 +162,6 @@ def _preload_model(model, content, device):
         If `model is a dict, it should be an object containing `model_name` and `model_properties`
         Model properties will be passed to vectorise call if object exists
     """
-    from marqo.s2_inference.s2_inference import vectorise
     if isinstance(model, str):
         # For models IN REGISTRY
         _ = vectorise(
diff --git a/tests/tensor_search/test_on_start_script.py b/tests/tensor_search/test_on_start_script.py
index eff6102a0..ca9382c3b 100644
--- a/tests/tensor_search/test_on_start_script.py
+++ b/tests/tensor_search/test_on_start_script.py
@@ -28,11 +28,11 @@ def test_preload_registry_models(self):
         for mock_environ, expected in environ_expected_models:
             mock_vectorise = mock.MagicMock()
             @mock.patch("os.environ", mock_environ)
-            @mock.patch("marqo.s2_inference.s2_inference.vectorise", mock_vectorise)
+            @mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
             def run():
                 model_caching_script = on_start_script.ModelsForCacheing()
                 model_caching_script.run()
-                loaded_models = {args[0] for args, kwargs in mock_vectorise.call_args_list}
+                loaded_models = {kwargs["model_name"] for args, kwargs in mock_vectorise.call_args_list}
                 assert loaded_models == set(expected)
                 return True
             assert run()
@@ -49,35 +49,108 @@ def run():
         assert run()
     
     def test_preload_url_models(self):
+        clip_model_object = {
+            "model": "generic-clip-test-model-2",
+            "model_properties": {
+                "name": "ViT-B/32",
+                "dimensions": 512,
+                "type": "clip",
+                "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt"
+            }
+        }
+
+        clip_model_expected = (
+            "generic-clip-test-model-2", 
+            "ViT-B/32", 
+            512, 
+            "clip", 
+            "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt"
+        )
+
+        open_clip_model_object = {
+            "model": "random-open-clip-1",
+            "model_properties": {
+                "name": "ViT-B-32-quickgelu",
+                "dimensions": 512,
+                "type": "open_clip",
+                "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
+            }
+        }
+
+        # must be an immutable datatype
+        open_clip_model_expected = (
+            "random-open-clip-1", 
+            "ViT-B-32-quickgelu", 
+            512, 
+            "open_clip", 
+            "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
+        )
+
         environ_expected_models = [
+            ({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [clip_model_object, open_clip_model_object]}, [clip_model_expected, open_clip_model_expected]),
+            ({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: json.dumps([clip_model_object, open_clip_model_object])}, [clip_model_expected, open_clip_model_expected])
         ]
         for mock_environ, expected in environ_expected_models:
             mock_vectorise = mock.MagicMock()
             @mock.patch("os.environ", mock_environ)
-            @mock.patch("marqo.s2_inference.s2_inference.vectorise", mock_vectorise)
+            @mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
             def run():
                 model_caching_script = on_start_script.ModelsForCacheing()
                 model_caching_script.run()
-                loaded_models = {args[0] for args, kwargs in mock_vectorise.call_args_list}
+                loaded_models = {
+                    (
+                        kwargs["model_name"],
+                        kwargs["model_properties"]["name"],
+                        kwargs["model_properties"]["dimensions"],
+                        kwargs["model_properties"]["type"],
+                        kwargs["model_properties"]["url"]
+                    )
+                    for args, kwargs in mock_vectorise.call_args_list
+                }
                 assert loaded_models == set(expected)
                 return True
             assert run()
     
-    def test_preload_url_models_malformed(self):
-        environ_expected_models = [
-        ]
-        for mock_environ, expected in environ_expected_models:
-            mock_vectorise = mock.MagicMock()
-            @mock.patch("os.environ", mock_environ)
-            @mock.patch("marqo.s2_inference.s2_inference.vectorise", mock_vectorise)
-            def run():
+    def test_preload_url_missing_model(self):
+        open_clip_model_object = {
+            "model_properties": {
+                "name": "ViT-B-32-quickgelu",
+                "dimensions": 512,
+                "type": "open_clip",
+                "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
+            }
+        }
+        mock_vectorise = mock.MagicMock()
+        @mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
+        @mock.patch("os.environ", {enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [open_clip_model_object]})
+        def run():
+            try:
                 model_caching_script = on_start_script.ModelsForCacheing()
+                # There should be a KeyError -> EnvVarError when attempting to call vectorise
                 model_caching_script.run()
-                loaded_models = {args[0] for args, kwargs in mock_vectorise.call_args_list}
-                assert loaded_models == set(expected)
+                raise AssertionError
+            except errors.EnvVarError as e:
                 return True
-            assert run()
-
+        assert run()
+    
+    def test_preload_url_missing_model_properties(self):
+        open_clip_model_object = {
+            "model": "random-open-clip-1"
+        }
+        mock_vectorise = mock.MagicMock()
+        @mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
+        @mock.patch("os.environ", {enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [open_clip_model_object]})
+        def run():
+            try:
+                model_caching_script = on_start_script.ModelsForCacheing()
+                # There should be a KeyError -> EnvVarError when attempting to call vectorise
+                model_caching_script.run()
+                raise AssertionError
+            except errors.EnvVarError as e:
+                return True
+        assert run()
+    
+    # TODO: test bad/no names/URLS in end-to-end tests, as this logic is done in vectorise call
 
 
 

From 7d3bd18828b8797000f861c6a3c6e7c2b240dc65 Mon Sep 17 00:00:00 2001
From: joshuak <joshua@marqo.ai>
Date: Mon, 15 May 2023 21:44:47 +0800
Subject: [PATCH 7/9] fixed debug messages

---
 src/marqo/tensor_search/on_start_script.py  | 2 --
 tests/tensor_search/test_on_start_script.py | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/marqo/tensor_search/on_start_script.py b/src/marqo/tensor_search/on_start_script.py
index d96f2530e..e70e82ed9 100644
--- a/src/marqo/tensor_search/on_start_script.py
+++ b/src/marqo/tensor_search/on_start_script.py
@@ -120,8 +120,6 @@ def __init__(self):
                 ) from e
         else:
             self.models = warmed_models
-        
-        self.logger.debug(f"self.models is of data type {type(self.models)}. The value is {self.models}")
         # TBD to include cross-encoder/ms-marco-TinyBERT-L-2-v2
 
         self.default_devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda']
diff --git a/tests/tensor_search/test_on_start_script.py b/tests/tensor_search/test_on_start_script.py
index ca9382c3b..550afa6a3 100644
--- a/tests/tensor_search/test_on_start_script.py
+++ b/tests/tensor_search/test_on_start_script.py
@@ -85,7 +85,8 @@ def test_preload_url_models(self):
             "open_clip", 
             "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
         )
-
+        
+        # So far has clip and open clip tests
         environ_expected_models = [
             ({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [clip_model_object, open_clip_model_object]}, [clip_model_expected, open_clip_model_expected]),
             ({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: json.dumps([clip_model_object, open_clip_model_object])}, [clip_model_expected, open_clip_model_expected])

From 4a8f403cfaf37a1620db21354747646992ee03f3 Mon Sep 17 00:00:00 2001
From: Joshua <joshua@s2search.io>
Date: Tue, 16 May 2023 12:52:32 +0800
Subject: [PATCH 8/9] Update version.py to 0.0.20

---
 src/marqo/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/version.py b/src/marqo/version.py
index 293a218ba..b02899ea3 100644
--- a/src/marqo/version.py
+++ b/src/marqo/version.py
@@ -1,4 +1,4 @@
-__version__ = "0.0.19"
+__version__ = "0.0.20"
 
 
 def get_version() -> str:

From 0e4b73cc1d6376e71537b1760049d7f9c63381c1 Mon Sep 17 00:00:00 2001
From: joshuak <joshua@marqo.ai>
Date: Tue, 16 May 2023 19:31:43 +0800
Subject: [PATCH 9/9] updated error message

---
 src/marqo/tensor_search/on_start_script.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/marqo/tensor_search/on_start_script.py b/src/marqo/tensor_search/on_start_script.py
index e70e82ed9..92cbab655 100644
--- a/src/marqo/tensor_search/on_start_script.py
+++ b/src/marqo/tensor_search/on_start_script.py
@@ -182,8 +182,8 @@ def _preload_model(model, content, device):
             )
         except KeyError as e:
             raise errors.EnvVarError(
-                f"Your custom model {model} is missing either `model_name` or `model_properties`."
-                f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Models-Reference/bring_your_own_model/`"""
+                f"Your custom model {model} is missing either `model` or `model_properties`."
+                f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Advanced-Usage/configuration/#configuring-preloaded-models`"""
             ) from e