From 8bea5c848b40beb022e3dfdde337940fc3b50a66 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 24 Jan 2023 12:01:04 +1100
Subject: [PATCH 01/54] add large scale test

---
 src/marqo/s2_inference/clip_utils.py | 26 ++++++++++++++++++++++++++
 src/marqo/s2_inference/errors.py     |  5 +++++
 2 files changed, 31 insertions(+)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 3b89f3b06..01134e32e 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -14,6 +14,7 @@
 from marqo.s2_inference.types import *
 from marqo.s2_inference.logger import get_logger
 import marqo.s2_inference.model_registry as model_registry
+from marqo.s2_inference.errors import InvalidModelDeviceError
 
 logger = get_logger(__name__)
 
@@ -239,6 +240,31 @@ def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],
             logger.debug('text')
             return self.encode_text(inputs, normalize=normalize)
 
+
+class FAST_CLIP(CLIP):
+    def __init__(self, model_type: str = "fast/ViT-B/32", device: str = 'cuda',  embedding_dim: int = None,
+                            truncate: bool = True, **kwargs) -> None:
+        super.__init__(model_type, device, embedding_dim, truncate, **kwargs)
+
+        if not self.device.startswith("cuda"):
+            raise InvalidModelDeviceError(f"Fast clip model `{self.model_type}` is only available with device `cuda`.")
+
+        self.model_name = self.model_type.replace("fast/", "")
+
+
+    def load(self) -> None:
+
+        # https://github.com/openai/CLIP/issues/30
+        self.model, self.preprocess = clip.load(self.model_name, device='cuda', jit=False)
+        self.model = self.model.to(self.device)
+        self.tokenizer = clip.tokenize
+        self.model.eval()
+
+
+
+
+
+
 class OPEN_CLIP(CLIP):
     def __init__(self, model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32", device: str = 'cpu',  embedding_dim: int = None,
                             truncate: bool = True, **kwargs) -> None:
diff --git a/src/marqo/s2_inference/errors.py b/src/marqo/s2_inference/errors.py
index 5eb555652..d47df97cb 100644
--- a/src/marqo/s2_inference/errors.py
+++ b/src/marqo/s2_inference/errors.py
@@ -49,4 +49,9 @@ class RerankerNameError(S2InferenceError):
 
 
 class ModelNotInCacheError(S2InferenceError):
+    pass
+
+# Raise an ERROR if the model is only available with "cpu" or "cuda" but
+# the other one is provided
+class InvalidModelDeviceError(S2InferenceError):
     pass
\ No newline at end of file

From 2a07546df2e31b25cd617b058eb4d483957a7680 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 24 Jan 2023 13:25:40 +1100
Subject: [PATCH 02/54] add fp16 model support

---
 src/marqo/s2_inference/clip_utils.py     |  6 +++---
 src/marqo/s2_inference/model_registry.py | 17 ++++++++++++++++-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 01134e32e..412402812 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -241,15 +241,15 @@ def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],
             return self.encode_text(inputs, normalize=normalize)
 
 
-class FAST_CLIP(CLIP):
-    def __init__(self, model_type: str = "fast/ViT-B/32", device: str = 'cuda',  embedding_dim: int = None,
+class Fp16_CLIP(CLIP):
+    def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  embedding_dim: int = None,
                             truncate: bool = True, **kwargs) -> None:
         super.__init__(model_type, device, embedding_dim, truncate, **kwargs)
 
         if not self.device.startswith("cuda"):
             raise InvalidModelDeviceError(f"Fast clip model `{self.model_type}` is only available with device `cuda`.")
 
-        self.model_name = self.model_type.replace("fast/", "")
+        self.model_name = self.model_type.replace("fp16/", "")
 
 
     def load(self) -> None:
diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index 6eec35831..2ccd08ad1 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -2,7 +2,7 @@
 from marqo.s2_inference.sbert_onnx_utils import SBERT_ONNX
 from marqo.s2_inference.sbert_utils import SBERT, TEST
 from marqo.s2_inference.random_utils import Random
-from marqo.s2_inference.clip_utils import CLIP, OPEN_CLIP, MULTILINGUAL_CLIP
+from marqo.s2_inference.clip_utils import CLIP, OPEN_CLIP, MULTILINGUAL_CLIP, FP16_CLIP
 from marqo.s2_inference.types import Any, Dict, List, Optional, Union, FloatTensor
 from marqo.s2_inference.onnx_clip_utils import CLIP_ONNX
 
@@ -1532,6 +1532,19 @@ def _get_onnx_clip_properties() -> Dict:
     }
     return ONNX_CLIP_MODEL_PROPERTIES
 
+
+def _get_fp16_clip_properties() -> Dict:
+    FP16_CLIP_MODEL_PROPERTIES = {
+        "FP/ViT-L/14":{
+            "name":"fP16/ViT-L/14",
+            "dimension" : 768,
+            "tupe": "fp16clip",
+            "notes" : "The faster version (fp16, load from `cuda`) of openai clip model"
+        },
+    }
+
+    return FP16_CLIP_MODEL_PROPERTIES
+
 def _get_random_properties() -> Dict:
     RANDOM_MODEL_PROPERTIES = {
             "random":
@@ -1587,6 +1600,7 @@ def load_model_properties() -> Dict:
     open_clip_model_properties = _get_open_clip_properties()
     onnx_clip_model_properties = _get_onnx_clip_properties()
     multilingual_clip_model_properties = _get_multilingual_clip_properties()
+    fp16_clip_model_properties = _get_fp16_clip_properties()
 
     # combine the above dicts
     model_properties = dict(clip_model_properties.items())
@@ -1598,6 +1612,7 @@ def load_model_properties() -> Dict:
     model_properties.update(open_clip_model_properties)
     model_properties.update(onnx_clip_model_properties)
     model_properties.update(multilingual_clip_model_properties)
+    model_properties.update(fp16_clip_model_properties)
 
     all_properties = dict()
     all_properties['models'] = model_properties

From 512e8b7f2e371b320277a74f8406eb13201cc2a6 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 24 Jan 2023 13:34:39 +1100
Subject: [PATCH 03/54] add fp16 model support

---
 src/marqo/s2_inference/clip_utils.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 412402812..f7f77b345 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -241,7 +241,7 @@ def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],
             return self.encode_text(inputs, normalize=normalize)
 
 
-class Fp16_CLIP(CLIP):
+class FP16_CLIP(CLIP):
     def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  embedding_dim: int = None,
                             truncate: bool = True, **kwargs) -> None:
         super.__init__(model_type, device, embedding_dim, truncate, **kwargs)
@@ -260,11 +260,6 @@ def load(self) -> None:
         self.tokenizer = clip.tokenize
         self.model.eval()
 
-
-
-
-
-
 class OPEN_CLIP(CLIP):
     def __init__(self, model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32", device: str = 'cpu',  embedding_dim: int = None,
                             truncate: bool = True, **kwargs) -> None:

From c351f32008191fb8d414d810b5226d2c4b92d692 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 24 Jan 2023 13:36:45 +1100
Subject: [PATCH 04/54] add fp16 model support

---
 src/marqo/s2_inference/model_registry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index 2ccd08ad1..bfba0763c 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -1535,7 +1535,7 @@ def _get_onnx_clip_properties() -> Dict:
 
 def _get_fp16_clip_properties() -> Dict:
     FP16_CLIP_MODEL_PROPERTIES = {
-        "FP/ViT-L/14":{
+        "fp16/ViT-L/14":{
             "name":"fP16/ViT-L/14",
             "dimension" : 768,
             "tupe": "fp16clip",

From 93d572558802ecc52ac97843d33392b814dfb62a Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 24 Jan 2023 13:38:00 +1100
Subject: [PATCH 05/54] add fp16 model support

---
 src/marqo/s2_inference/model_registry.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index bfba0763c..ac7804860 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -1538,7 +1538,7 @@ def _get_fp16_clip_properties() -> Dict:
         "fp16/ViT-L/14":{
             "name":"fP16/ViT-L/14",
             "dimension" : 768,
-            "tupe": "fp16clip",
+            "type": "fp16clip",
             "notes" : "The faster version (fp16, load from `cuda`) of openai clip model"
         },
     }
@@ -1583,6 +1583,7 @@ def _get_model_load_mappings() -> Dict:
             'sbert_onnx':SBERT_ONNX,
             'clip_onnx': CLIP_ONNX,
             "multilingual_clip" : MULTILINGUAL_CLIP,
+            "fp16clip": FP16_CLIP,
             'random':Random,
             'hf':HF_MODEL}
 

From 0692ed11fbd0fa506f1d0a15a81a8a4ddb24d856 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 24 Jan 2023 13:39:19 +1100
Subject: [PATCH 06/54] add fp16 model support

---
 src/marqo/s2_inference/model_registry.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index ac7804860..8036b4db5 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -1535,11 +1535,11 @@ def _get_onnx_clip_properties() -> Dict:
 
 def _get_fp16_clip_properties() -> Dict:
     FP16_CLIP_MODEL_PROPERTIES = {
-        "fp16/ViT-L/14":{
-            "name":"fP16/ViT-L/14",
-            "dimension" : 768,
+        "fp16/ViT-L/14": {
+            "name": "fP16/ViT-L/14",
+            "dimensions": 768,
             "type": "fp16clip",
-            "notes" : "The faster version (fp16, load from `cuda`) of openai clip model"
+            "notes": "The faster version (fp16, load from `cuda`) of openai clip model"
         },
     }
 

From 08f933683938066d9425a526706eeb44a51ec64d Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 24 Jan 2023 13:40:26 +1100
Subject: [PATCH 07/54] add fp16 model support

---
 src/marqo/s2_inference/clip_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index f7f77b345..7309347b1 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -244,7 +244,7 @@ def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],
 class FP16_CLIP(CLIP):
     def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  embedding_dim: int = None,
                             truncate: bool = True, **kwargs) -> None:
-        super.__init__(model_type, device, embedding_dim, truncate, **kwargs)
+        super().__init__(model_type, device, embedding_dim, truncate, **kwargs)
 
         if not self.device.startswith("cuda"):
             raise InvalidModelDeviceError(f"Fast clip model `{self.model_type}` is only available with device `cuda`.")

From f6cb5b0044d5595b2b6b8cf43366e52a0455f99f Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 24 Jan 2023 13:43:56 +1100
Subject: [PATCH 08/54] add fp16 model support

---
 src/marqo/s2_inference/clip_utils.py     | 2 +-
 src/marqo/s2_inference/model_registry.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 7309347b1..9b0f506e4 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -247,7 +247,7 @@ def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  emb
         super().__init__(model_type, device, embedding_dim, truncate, **kwargs)
 
         if not self.device.startswith("cuda"):
-            raise InvalidModelDeviceError(f"Fast clip model `{self.model_type}` is only available with device `cuda`.")
+            raise InvalidModelDeviceError(f"FP16 clip model `{self.model_type}` is only available with device `cuda`.")
 
         self.model_name = self.model_type.replace("fp16/", "")
 
diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index 8036b4db5..95cf3e433 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -1536,7 +1536,7 @@ def _get_onnx_clip_properties() -> Dict:
 def _get_fp16_clip_properties() -> Dict:
     FP16_CLIP_MODEL_PROPERTIES = {
         "fp16/ViT-L/14": {
-            "name": "fP16/ViT-L/14",
+            "name": "fp16/ViT-L/14",
             "dimensions": 768,
             "type": "fp16clip",
             "notes": "The faster version (fp16, load from `cuda`) of openai clip model"

From a14c9851110550b92590d42c0407a481402a2965 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 24 Jan 2023 13:48:40 +1100
Subject: [PATCH 09/54] add fp16 model support

---
 src/marqo/s2_inference/model_registry.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index 95cf3e433..ed7e4ffaa 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -1541,10 +1541,23 @@ def _get_fp16_clip_properties() -> Dict:
             "type": "fp16clip",
             "notes": "The faster version (fp16, load from `cuda`) of openai clip model"
         },
+        'fp16/ViT-B/32':
+            {"name": "fp16/ViT-B/32",
+             "dimensions": 512,
+             "notes": "The faster version (fp16, load from `cuda`) of openai clip model",
+             "type": "fp16clip",
+             },
+        'fp16/ViT-B/16':
+            {"name": "fp16/ViT-B/16",
+             "dimensions": 512,
+             "notes": "The faster version (fp16, load from `cuda`) of openai clip model",
+             "type": "fp16clip",
+             },
     }
 
     return FP16_CLIP_MODEL_PROPERTIES
 
+
 def _get_random_properties() -> Dict:
     RANDOM_MODEL_PROPERTIES = {
             "random":

From 992d76eaa0da1435d0347e0591e0c3d93f86d816 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 24 Jan 2023 20:50:24 +1100
Subject: [PATCH 10/54] add fp16 model support

---
 src/marqo/s2_inference/clip_utils.py   | 105 +++++++++++++++++++++++--
 src/marqo/s2_inference/s2_inference.py |   2 +-
 2 files changed, 100 insertions(+), 7 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 9b0f506e4..aa06a6df3 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -10,18 +10,45 @@
 import open_clip
 from multilingual_clip import pt_multilingual_clip
 import transformers
+from clip.model import build_model
 
 from marqo.s2_inference.types import *
 from marqo.s2_inference.logger import get_logger
 import marqo.s2_inference.model_registry as model_registry
-from marqo.s2_inference.errors import InvalidModelDeviceError
+from marqo.s2_inference.errors import InvalidModelDeviceError, InvalidModelPropertiesError
+from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 
 logger = get_logger(__name__)
 
+OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073)
+OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711)
 
 def get_allowed_image_types():
     return set(('.jpg', '.png', '.bmp', '.jpeg'))
 
+try:
+    from torchvision.transforms import InterpolationMode
+    BICUBIC = InterpolationMode.BICUBIC
+except ImportError:
+    BICUBIC = Image.BICUBIC
+
+
+def _convert_image_to_rgb(image):
+    return image.convert("RGB")
+
+
+def _get_transform(n_px: int, image_mean:List[float] = None, image_std:List[float] = None):
+    img_mean = image_mean or OPENAI_DATASET_MEAN
+    img_std = image_std or OPENAI_DATASET_STD
+    return Compose([
+        Resize(n_px, interpolation=BICUBIC),
+        CenterCrop(n_px),
+        _convert_image_to_rgb,
+        ToTensor(),
+        Normalize(img_mean, img_std),
+    ])
+
+
 
 def format_and_load_CLIP_images(images: List[Union[str, ndarray, ImageType]]) -> List[ImageType]:
     """takes in a list of strings, arrays or urls and either loads and/or converts to PIL
@@ -157,14 +184,77 @@ def __init__(self, model_type: str = "ViT-B/32", device: str = 'cpu',  embedding
         self.processor = None
         self.embedding_dimension = embedding_dim
         self.truncate = truncate
+        self.model_properties = kwargs["model_properties"]
 
     def load(self) -> None:
 
-        # https://github.com/openai/CLIP/issues/30
-        self.model, self.preprocess = clip.load(self.model_type, device='cpu', jit=False)
-        self.model = self.model.to(self.device)
+        try:
+            # The original method to load the openai clip model
+            # https://github.com/openai/CLIP/issues/30
+            self.model, self.preprocess = clip.load(self.model_type, device='cpu', jit=False)
+            self.model = self.model.to(self.device)
+            self.tokenizer = clip.tokenize
+
+
+        except RuntimeError:
+            self.jit = self.model_properties.get("jit", False)
+            self.model_path = self.model_properties["localpath"]
+            self.device = self.model_properties.get("device", "cpu")
+            self.mean = self.model_properties.get("mean", None)
+            self.std = self.model_properties.get("std", None)
+
+            logger.info("Can not load clip model. Try custom clip model loading.")
+            # Loading code from openai clip repo
+            # Check https://github.com/openai/CLIP/blob/3702849800aa56e2223035bccd1c6ef91c704ca8/clip/clip.py#L126-L142
+
+            try:
+                # Try to load the script model using openai loading method
+                logger.info("Try generic clip model openai clip loading")
+                with open(self.model_path, 'rb') as opened_file:
+                    try:
+                        # loading JIT archive
+                        self.model = torch.jit.load(opened_file, map_location=self.device if self.jit else "cpu").eval()
+                        state_dict = None
+                    except RuntimeError:
+                        # loading saved state dict
+                        if self.jit:
+                            self.jit = False
+                        state_dict = torch.load(opened_file, map_location="cpu")
+
+                    if not self.jit:
+                        self.model = build_model(state_dict or self.model.state_dict()).to(self.device)
+                        if str(self.device) == "cpu":
+                            self.model.float()
+
+            except EOFError:
+                logger.info("Try generic clip model open_clip loading")
+                try:
+                    # loading JIT archive
+                    self.model = torch.jit.load(self.model_path, map_location=self.device if self.jit else "cpu").eval()
+                    state_dict = None
+                except RuntimeError:
+                    # loading saved state dict
+                    if self.jit:
+                        self.jit = False
+                    state_dict = torch.load(self.model_path, map_location="cpu")
+
+                if not self.jit:
+                    try:
+                        self.model = build_model(state_dict or self.model.state_dict()).to(self.device)
+                    except KeyError:
+                        sd = {k[7:]: v for k, v in state_dict["state_dict"].items()}
+                        self.model = build_model(sd).to(self.device)
+
+                    if str(self.device) == "cpu":
+                        self.model.float()
+
+        # We use simple clip tokenizer.
+        # TODO Support custom tokenizer (huggingface based)
+        self.preprocess = _get_transform(self.model.visual.input_resolution, self.mean, self.std)
         self.tokenizer = clip.tokenize
         self.model.eval()
+
+
     
     def _convert_output(self, output):
 
@@ -181,8 +271,11 @@ def encode_text(self, sentence: Union[str, List[str]], normalize = True) -> Floa
         
         if self.model is None:
             self.load()
-        
-        text = self.tokenizer(sentence, truncate=self.truncate).to(self.device)
+        try:
+            text = self.tokenizer(sentence, truncate=self.truncate).to(self.device)
+        except Exception:
+            text = self.tokenizer(sentence).to(self.device)
+
 
         with torch.no_grad():
             outputs =  self.model.encode_text(text)
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 38143ef47..05ca71e19 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -288,7 +288,7 @@ def _load_model(model_name: str, model_properties: dict, device: str = get_defau
     max_sequence_length = model_properties.get('tokens', get_default_seq_length())
 
     model = loader(model_properties['name'], device=device, embedding_dim=model_properties['dimensions'],
-                   max_seq_length=max_sequence_length)
+                   max_seq_length=max_sequence_length, model_properties = model_properties)
 
     model.load()
 

From 0c49093daabebb343a516e107b81b79905a1eed1 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Jan 2023 11:21:15 +1100
Subject: [PATCH 11/54] add fp16 model support

---
 src/marqo/s2_inference/clip_utils.py          | 248 ++++++++++++++----
 src/marqo/s2_inference/hf_utils.py            |   9 +-
 .../processing/custom_clip_utils.py           |  32 +++
 3 files changed, 242 insertions(+), 47 deletions(-)
 create mode 100644 src/marqo/s2_inference/processing/custom_clip_utils.py

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index aa06a6df3..7321fd21f 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -17,6 +17,7 @@
 import marqo.s2_inference.model_registry as model_registry
 from marqo.s2_inference.errors import InvalidModelDeviceError, InvalidModelPropertiesError
 from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
+from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer
 
 logger = get_logger(__name__)
 
@@ -198,64 +199,221 @@ def load(self) -> None:
 
         except RuntimeError:
             self.jit = self.model_properties.get("jit", False)
-            self.model_path = self.model_properties["localpath"]
+            self.model_path = self.model_properties["localpath"] # This is a must provide property
             self.device = self.model_properties.get("device", "cpu")
             self.mean = self.model_properties.get("mean", None)
             self.std = self.model_properties.get("std", None)
-
             logger.info("Can not load clip model. Try custom clip model loading.")
+
+            try:
+                self.model, self.preprocess = self.clip_load()
+                self.model.eval()
+            except EOFError:
+                self.open_clip_load()
+                self.model.eval()
+
+            self.tokenizer = self.load_tokenizer()
+
+
             # Loading code from openai clip repo
             # Check https://github.com/openai/CLIP/blob/3702849800aa56e2223035bccd1c6ef91c704ca8/clip/clip.py#L126-L142
 
+        #     try:
+        #         # Try to load the script model using openai loading method
+        #         logger.info("Try generic clip model openai clip loading")
+        #         with open(self.model_path, 'rb') as opened_file:
+        #             try:
+        #                 # loading JIT archive
+        #                 self.model = torch.jit.load(opened_file, map_location=self.device if self.jit else "cpu").eval()
+        #                 state_dict = None
+        #             except RuntimeError:
+        #                 # loading saved state dict
+        #                 if self.jit:
+        #                     self.jit = False
+        #                 state_dict = torch.load(opened_file, map_location="cpu")
+        #
+        #             if not self.jit:
+        #                 self.model = build_model(state_dict or self.model.state_dict()).to(self.device)
+        #                 if str(self.device) == "cpu":
+        #                     self.model.float()
+        #
+        #     except EOFError:
+        #         logger.info("Try generic clip model open_clip loading")
+        #         try:
+        #             # loading JIT archive
+        #             self.model = torch.jit.load(self.model_path, map_location=self.device if self.jit else "cpu").eval()
+        #             state_dict = None
+        #         except RuntimeError:
+        #             # loading saved state dict
+        #             if self.jit:
+        #                 self.jit = False
+        #             state_dict = torch.load(self.model_path, map_location="cpu")
+        #
+        #         if not self.jit:
+        #             try:
+        #                 self.model = build_model(state_dict or self.model.state_dict()).to(self.device)
+        #             except KeyError:
+        #                 sd = {k[7:]: v for k, v in state_dict["state_dict"].items()}
+        #                 self.model = build_model(sd).to(self.device)
+        #
+        #             if str(self.device) == "cpu":
+        #                 self.model.float()
+        #
+        # # We use simple clip tokenizer.
+        # 
+        # self.preprocess = _get_transform(self.model.visual.input_resolution, self.mean, self.std)
+        # self.tokenizer = clip.tokenize
+        # self.model.eval()
+    def openai_clip_load(self):
+        with open(self.model_path, 'rb') as opened_file:
+            try:
+                # loading JIT archive
+                model = torch.jit.load(opened_file, map_location=self.device if self.jit else "cpu").eval()
+                state_dict = None
+            except RuntimeError:
+                # loading saved state dict
+                if self.jit:
+                    logger.info(f"File {self.model_path} is not a JIT archive. Loading as a state dict instead")
+                    self.jit = False
+                state_dict = torch.load(opened_file, map_location="cpu")
+
+        if not self.jit:
+            model = build_model(state_dict or self.model.state_dict()).to(self.device)
+            if str(self.device) == "cpu":
+                model.float()
+            return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
+
+        # patch the device names
+        device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(self.device)), example_inputs=[])
+        device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1]
+
+        def patch_device(module):
             try:
-                # Try to load the script model using openai loading method
-                logger.info("Try generic clip model openai clip loading")
-                with open(self.model_path, 'rb') as opened_file:
-                    try:
-                        # loading JIT archive
-                        self.model = torch.jit.load(opened_file, map_location=self.device if self.jit else "cpu").eval()
-                        state_dict = None
-                    except RuntimeError:
-                        # loading saved state dict
-                        if self.jit:
-                            self.jit = False
-                        state_dict = torch.load(opened_file, map_location="cpu")
-
-                    if not self.jit:
-                        self.model = build_model(state_dict or self.model.state_dict()).to(self.device)
-                        if str(self.device) == "cpu":
-                            self.model.float()
+                graphs = [module.graph] if hasattr(module, "graph") else []
+            except RuntimeError:
+                graphs = []
 
-            except EOFError:
-                logger.info("Try generic clip model open_clip loading")
+            if hasattr(module, "forward1"):
+                graphs.append(module.forward1.graph)
+
+            for graph in graphs:
+                for node in graph.findAllNodes("prim::Constant"):
+                    if "value" in node.attributeNames() and str(node["value"]).startswith("cuda"):
+                        node.copyAttributes(device_node)
+
+        model.apply(patch_device)
+        patch_device(model.encode_image)
+        patch_device(model.encode_text)
+
+        # patch dtype to float32 on CPU
+        if str(self.device) == "cpu":
+            float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[])
+            float_input = list(float_holder.graph.findNode("aten::to").inputs())[1]
+            float_node = float_input.node()
+
+            def patch_float(module):
                 try:
-                    # loading JIT archive
-                    self.model = torch.jit.load(self.model_path, map_location=self.device if self.jit else "cpu").eval()
-                    state_dict = None
+                    graphs = [module.graph] if hasattr(module, "graph") else []
                 except RuntimeError:
-                    # loading saved state dict
-                    if self.jit:
-                        self.jit = False
-                    state_dict = torch.load(self.model_path, map_location="cpu")
-
-                if not self.jit:
-                    try:
-                        self.model = build_model(state_dict or self.model.state_dict()).to(self.device)
-                    except KeyError:
-                        sd = {k[7:]: v for k, v in state_dict["state_dict"].items()}
-                        self.model = build_model(sd).to(self.device)
-
-                    if str(self.device) == "cpu":
-                        self.model.float()
-
-        # We use simple clip tokenizer.
-        # TODO Support custom tokenizer (huggingface based)
-        self.preprocess = _get_transform(self.model.visual.input_resolution, self.mean, self.std)
-        self.tokenizer = clip.tokenize
-        self.model.eval()
+                    graphs = []
+
+                if hasattr(module, "forward1"):
+                    graphs.append(module.forward1.graph)
+
+                for graph in graphs:
+                    for node in graph.findAllNodes("aten::to"):
+                        inputs = list(node.inputs())
+                        for i in [1, 2]:  # dtype can be the second or third argument to aten::to()
+                            if inputs[i].node()["value"] == 5:
+                                inputs[i].node().copyAttributes(float_node)
+
+            model.apply(patch_float)
+            patch_float(model.encode_image)
+            patch_float(model.encode_text)
+
+            model.float()
+
+        return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
+
+
+    def open_clip_load(self):
+        # loading the open clip model
+        # Check https://github.com/mlfoundations/open_clip/blob/db7504f070b4e76e6c8578ee7b73596267083a19/src/clip/openai_clip.py#L121-L189
+        try:
+            # loading JIT archive
+            model = torch.jit.load(self.model_path, map_location=self.device if self.jit else "cpu").eval()
+            state_dict = None
+        except RuntimeError:
+            # loading saved state dict
+            if self.jit:
+                jit = False
+            state_dict = torch.load(self.model_path, map_location="cpu")
+
+        if not jit:
+            try:
+                model = build_model(state_dict or model.state_dict()).to(self.device)
+            except KeyError:
+                sd = {k[7:]: v for k, v in state_dict["state_dict"].items()}
+                model = build_model(sd).to(self.device)
+
+            if str(self.device) == "cpu":
+                model.float()
+            return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
+
+        # patch the device names
+        device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(self.device)), example_inputs=[])
+        device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1]
+
+        def patch_device(module):
+            graphs = [module.graph] if hasattr(module, "graph") else []
+            if hasattr(module, "forward1"):
+                graphs.append(module.forward1.graph)
+
+            for graph in graphs:
+                for node in graph.findAllNodes("prim::Constant"):
+                    if "value" in node.attributeNames() and str(node["value"]).startswith("cuda"):
+                        node.copyAttributes(device_node)
+
+        model.apply(patch_device)
+        patch_device(model.encode_image)
+        patch_device(model.encode_text)
+
+        # patch dtype to float32 on CPU
+        if str(self.device) == "cpu":
+            float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[])
+            float_input = list(float_holder.graph.findNode("aten::to").inputs())[1]
+            float_node = float_input.node()
+
+            def patch_float(module):
+                graphs = [module.graph] if hasattr(module, "graph") else []
+                if hasattr(module, "forward1"):
+                    graphs.append(module.forward1.graph)
+
+                for graph in graphs:
+                    for node in graph.findAllNodes("aten::to"):
+                        inputs = list(node.inputs())
+                        for i in [1, 2]:  # dtype can be the second or third argument to aten::to()
+                            if inputs[i].node()["value"] == 5:
+                                inputs[i].node().copyAttributes(float_node)
+
+            model.apply(patch_float)
+            patch_float(model.encode_image)
+            patch_float(model.encode_text)
+
+            model.float()
+
+        return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
+
+
+    def load_tokenizer(self):
+        tokenizer_name = self.model_properties.get("tokenizer", "clip")
+
+        if tokenizer_name == "clip":
+            return clip.tokenize
+        else:
+            return HFTokenizer(tokenizer_name)
 
 
-    
     def _convert_output(self, output):
 
         if self.device == 'cpu':
diff --git a/src/marqo/s2_inference/hf_utils.py b/src/marqo/s2_inference/hf_utils.py
index 2169b951f..ddfdad037 100644
--- a/src/marqo/s2_inference/hf_utils.py
+++ b/src/marqo/s2_inference/hf_utils.py
@@ -5,7 +5,6 @@
 
 from marqo.s2_inference.sbert_utils import Model
 from marqo.s2_inference.types import Union, FloatTensor, List
-
 from marqo.s2_inference.logger import get_logger
 logger = get_logger(__name__)
 
@@ -76,4 +75,10 @@ def mean_pooling(self, model_output, attention_mask):
         return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
 
     def cls_pooling(self, model_output, attention_mask):
-        return model_output[0][:,0]
\ No newline at end of file
+        return model_output[0][:,0]
+
+
+def whitespace_clean(text):
+    text = re.sub(r'\s+', ' ', text)
+    text = text.strip()
+    return text
diff --git a/src/marqo/s2_inference/processing/custom_clip_utils.py b/src/marqo/s2_inference/processing/custom_clip_utils.py
new file mode 100644
index 000000000..448a9a51a
--- /dev/null
+++ b/src/marqo/s2_inference/processing/custom_clip_utils.py
@@ -0,0 +1,32 @@
+import regex as re
+from typing import Union, List
+import torch
+import ftfy
+import html
+
+def whitespace_clean(text):
+    text = re.sub(r'\s+', ' ', text)
+    text = text.strip()
+    return text
+
+def basic_clean(text):
+    text = ftfy.fix_text(text)
+    text = html.unescape(html.unescape(text))
+    return text.strip()
+
+
+class HFTokenizer:
+    # HuggingFace tokenizer wrapper
+    # Check https://github.com/mlfoundations/open_clip/blob/16e229c596cafaec46a4defaf27e0e30ffcca12d/src/open_clip/tokenizer.py#L188-L201
+    def __init__(self, tokenizer_name:str):
+        from transformers import AutoTokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+
+    def __call__(self, texts:Union[str, List[str]], context_length:int=77) -> torch.Tensor:
+        # same cleaning as for default tokenizer, except lowercasing
+        # adding lower (for case-sensitive tokenizers) will make it more robust but less sensitive to nuance
+        if isinstance(texts, str):
+            texts = [texts]
+        texts = [whitespace_clean(basic_clean(text)) for text in texts]
+        input_ids = self.tokenizer(texts, return_tensors='pt', max_length=context_length, padding='max_length', truncation=True).input_ids
+        return input_ids
\ No newline at end of file

From 95c0817c049a732b6eee38a78c6a2e00867bf1dd Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Jan 2023 12:02:11 +1100
Subject: [PATCH 12/54] add fp16 model support

---
 src/marqo/s2_inference/clip_utils.py          | 74 +++++--------------
 .../processing/custom_clip_utils.py           | 32 +++++++-
 2 files changed, 48 insertions(+), 58 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 7321fd21f..f3b8911c4 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -17,7 +17,7 @@
 import marqo.s2_inference.model_registry as model_registry
 from marqo.s2_inference.errors import InvalidModelDeviceError, InvalidModelPropertiesError
 from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
-from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer
+from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer, download_pretrained_from_url
 
 logger = get_logger(__name__)
 
@@ -198,72 +198,32 @@ def load(self) -> None:
 
 
         except RuntimeError:
+            logger.info("Can not load clip model. Try custom clip model loading.")
+
+            path = self.model_properties.get("localpath", None) or self.model_properties.get("url", None)
+            if path is None:
+                raise InvalidModelPropertiesError("Model path `url` or `localpath` is not provided. Can not load generic model.")
+            elif os.path.isfile(path):
+                self.model_path = path
+            elif validators.url(path):
+                self.model_path = download_pretrained_from_url(path)
+
             self.jit = self.model_properties.get("jit", False)
-            self.model_path = self.model_properties["localpath"] # This is a must provide property
             self.device = self.model_properties.get("device", "cpu")
             self.mean = self.model_properties.get("mean", None)
             self.std = self.model_properties.get("std", None)
-            logger.info("Can not load clip model. Try custom clip model loading.")
+
 
             try:
-                self.model, self.preprocess = self.clip_load()
+                self.model, self.preprocess = self.openai_clip_load()
                 self.model.eval()
             except EOFError:
-                self.open_clip_load()
+                self.model, self.preprocess = self.open_clip_load()
                 self.model.eval()
 
             self.tokenizer = self.load_tokenizer()
 
 
-            # Loading code from openai clip repo
-            # Check https://github.com/openai/CLIP/blob/3702849800aa56e2223035bccd1c6ef91c704ca8/clip/clip.py#L126-L142
-
-        #     try:
-        #         # Try to load the script model using openai loading method
-        #         logger.info("Try generic clip model openai clip loading")
-        #         with open(self.model_path, 'rb') as opened_file:
-        #             try:
-        #                 # loading JIT archive
-        #                 self.model = torch.jit.load(opened_file, map_location=self.device if self.jit else "cpu").eval()
-        #                 state_dict = None
-        #             except RuntimeError:
-        #                 # loading saved state dict
-        #                 if self.jit:
-        #                     self.jit = False
-        #                 state_dict = torch.load(opened_file, map_location="cpu")
-        #
-        #             if not self.jit:
-        #                 self.model = build_model(state_dict or self.model.state_dict()).to(self.device)
-        #                 if str(self.device) == "cpu":
-        #                     self.model.float()
-        #
-        #     except EOFError:
-        #         logger.info("Try generic clip model open_clip loading")
-        #         try:
-        #             # loading JIT archive
-        #             self.model = torch.jit.load(self.model_path, map_location=self.device if self.jit else "cpu").eval()
-        #             state_dict = None
-        #         except RuntimeError:
-        #             # loading saved state dict
-        #             if self.jit:
-        #                 self.jit = False
-        #             state_dict = torch.load(self.model_path, map_location="cpu")
-        #
-        #         if not self.jit:
-        #             try:
-        #                 self.model = build_model(state_dict or self.model.state_dict()).to(self.device)
-        #             except KeyError:
-        #                 sd = {k[7:]: v for k, v in state_dict["state_dict"].items()}
-        #                 self.model = build_model(sd).to(self.device)
-        #
-        #             if str(self.device) == "cpu":
-        #                 self.model.float()
-        #
-        # # We use simple clip tokenizer.
-        # 
-        # self.preprocess = _get_transform(self.model.visual.input_resolution, self.mean, self.std)
-        # self.tokenizer = clip.tokenize
-        # self.model.eval()
     def openai_clip_load(self):
         with open(self.model_path, 'rb') as opened_file:
             try:
@@ -278,7 +238,7 @@ def openai_clip_load(self):
                 state_dict = torch.load(opened_file, map_location="cpu")
 
         if not self.jit:
-            model = build_model(state_dict or self.model.state_dict()).to(self.device)
+            model = build_model(state_dict or model.state_dict()).to(self.device)
             if str(self.device) == "cpu":
                 model.float()
             return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
@@ -346,10 +306,10 @@ def open_clip_load(self):
         except RuntimeError:
             # loading saved state dict
             if self.jit:
-                jit = False
+                self.jit = False
             state_dict = torch.load(self.model_path, map_location="cpu")
 
-        if not jit:
+        if not self.jit:
             try:
                 model = build_model(state_dict or model.state_dict()).to(self.device)
             except KeyError:
diff --git a/src/marqo/s2_inference/processing/custom_clip_utils.py b/src/marqo/s2_inference/processing/custom_clip_utils.py
index 448a9a51a..0207e0b8f 100644
--- a/src/marqo/s2_inference/processing/custom_clip_utils.py
+++ b/src/marqo/s2_inference/processing/custom_clip_utils.py
@@ -3,6 +3,9 @@
 import torch
 import ftfy
 import html
+import os
+import urllib
+from tqdm import tqdm
 
 def whitespace_clean(text):
     text = re.sub(r'\s+', ' ', text)
@@ -29,4 +32,31 @@ def __call__(self, texts:Union[str, List[str]], context_length:int=77) -> torch.
             texts = [texts]
         texts = [whitespace_clean(basic_clean(text)) for text in texts]
         input_ids = self.tokenizer(texts, return_tensors='pt', max_length=context_length, padding='max_length', truncation=True).input_ids
-        return input_ids
\ No newline at end of file
+        return input_ids
+
+
+def download_pretrained_from_url(
+        url: str,
+        cache_dir: Union[str, None] = None,
+):
+    if not cache_dir:
+        cache_dir = os.path.expanduser("~/.cache/clip")
+    os.makedirs(cache_dir, exist_ok=True)
+    filename = os.path.basename(url)
+
+    download_target = os.path.join(cache_dir, filename)
+
+    if os.path.isfile(download_target):
+        return download_target
+
+    with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
+        with tqdm(total=int(source.headers.get("Content-Length")), ncols=80, unit='iB', unit_scale=True) as loop:
+            while True:
+                buffer = source.read(8192)
+                if not buffer:
+                    break
+
+                output.write(buffer)
+                loop.update(len(buffer))
+
+    return download_target

From 83e8d8d4a0a268769623bd7541085466e0c3ae61 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Jan 2023 13:52:44 +1100
Subject: [PATCH 13/54] add fp16 model support

---
 src/marqo/s2_inference/clip_utils.py | 98 +++-------------------------
 src/marqo/s2_inference/hf_utils.py   |  6 --
 2 files changed, 9 insertions(+), 95 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index f3b8911c4..cf8450c03 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -189,21 +189,17 @@ def __init__(self, model_type: str = "ViT-B/32", device: str = 'cpu',  embedding
 
     def load(self) -> None:
 
-        try:
+        path = self.model_properties.get("localpath", None) or self.model_properties.get("url", None)
+
+        if path is None:
             # The original method to load the openai clip model
             # https://github.com/openai/CLIP/issues/30
             self.model, self.preprocess = clip.load(self.model_type, device='cpu', jit=False)
             self.model = self.model.to(self.device)
             self.tokenizer = clip.tokenize
-
-
-        except RuntimeError:
-            logger.info("Can not load clip model. Try custom clip model loading.")
-
-            path = self.model_properties.get("localpath", None) or self.model_properties.get("url", None)
-            if path is None:
-                raise InvalidModelPropertiesError("Model path `url` or `localpath` is not provided. Can not load generic model.")
-            elif os.path.isfile(path):
+        else:
+            logger.info("Detecting custom clip model path. We use generic model loading.")
+            if os.path.isfile(path):
                 self.model_path = path
             elif validators.url(path):
                 self.model_path = download_pretrained_from_url(path)
@@ -214,89 +210,13 @@ def load(self) -> None:
             self.std = self.model_properties.get("std", None)
 
 
-            try:
-                self.model, self.preprocess = self.openai_clip_load()
-                self.model.eval()
-            except EOFError:
-                self.model, self.preprocess = self.open_clip_load()
-                self.model.eval()
+            self.model, self.preprocess = self.custom_clip_load()
+            self.model.eval()
 
             self.tokenizer = self.load_tokenizer()
 
 
-    def openai_clip_load(self):
-        with open(self.model_path, 'rb') as opened_file:
-            try:
-                # loading JIT archive
-                model = torch.jit.load(opened_file, map_location=self.device if self.jit else "cpu").eval()
-                state_dict = None
-            except RuntimeError:
-                # loading saved state dict
-                if self.jit:
-                    logger.info(f"File {self.model_path} is not a JIT archive. Loading as a state dict instead")
-                    self.jit = False
-                state_dict = torch.load(opened_file, map_location="cpu")
-
-        if not self.jit:
-            model = build_model(state_dict or model.state_dict()).to(self.device)
-            if str(self.device) == "cpu":
-                model.float()
-            return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
-
-        # patch the device names
-        device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(self.device)), example_inputs=[])
-        device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1]
-
-        def patch_device(module):
-            try:
-                graphs = [module.graph] if hasattr(module, "graph") else []
-            except RuntimeError:
-                graphs = []
-
-            if hasattr(module, "forward1"):
-                graphs.append(module.forward1.graph)
-
-            for graph in graphs:
-                for node in graph.findAllNodes("prim::Constant"):
-                    if "value" in node.attributeNames() and str(node["value"]).startswith("cuda"):
-                        node.copyAttributes(device_node)
-
-        model.apply(patch_device)
-        patch_device(model.encode_image)
-        patch_device(model.encode_text)
-
-        # patch dtype to float32 on CPU
-        if str(self.device) == "cpu":
-            float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[])
-            float_input = list(float_holder.graph.findNode("aten::to").inputs())[1]
-            float_node = float_input.node()
-
-            def patch_float(module):
-                try:
-                    graphs = [module.graph] if hasattr(module, "graph") else []
-                except RuntimeError:
-                    graphs = []
-
-                if hasattr(module, "forward1"):
-                    graphs.append(module.forward1.graph)
-
-                for graph in graphs:
-                    for node in graph.findAllNodes("aten::to"):
-                        inputs = list(node.inputs())
-                        for i in [1, 2]:  # dtype can be the second or third argument to aten::to()
-                            if inputs[i].node()["value"] == 5:
-                                inputs[i].node().copyAttributes(float_node)
-
-            model.apply(patch_float)
-            patch_float(model.encode_image)
-            patch_float(model.encode_text)
-
-            model.float()
-
-        return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
-
-
-    def open_clip_load(self):
+    def custom_clip_load(self):
         # loading the open clip model
         # Check https://github.com/mlfoundations/open_clip/blob/db7504f070b4e76e6c8578ee7b73596267083a19/src/clip/openai_clip.py#L121-L189
         try:
diff --git a/src/marqo/s2_inference/hf_utils.py b/src/marqo/s2_inference/hf_utils.py
index ddfdad037..8529567af 100644
--- a/src/marqo/s2_inference/hf_utils.py
+++ b/src/marqo/s2_inference/hf_utils.py
@@ -76,9 +76,3 @@ def mean_pooling(self, model_output, attention_mask):
 
     def cls_pooling(self, model_output, attention_mask):
         return model_output[0][:,0]
-
-
-def whitespace_clean(text):
-    text = re.sub(r'\s+', ' ', text)
-    text = text.strip()
-    return text

From cc1784722b6ce5df1e8b19f4405e980055377118 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Jan 2023 14:00:54 +1100
Subject: [PATCH 14/54] generic clip revise

---
 src/marqo/s2_inference/clip_utils.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index cf8450c03..9f115e2b2 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -1,6 +1,8 @@
 # from torch import FloatTensor
 # from typing import Any, Dict, List, Optional, Union
 import os
+
+import PIL.Image
 import validators
 import requests
 import numpy as np
@@ -34,11 +36,23 @@ def get_allowed_image_types():
     BICUBIC = Image.BICUBIC
 
 
-def _convert_image_to_rgb(image):
+def _convert_image_to_rgb(image: ImageType) -> ImageType:
+    # Take a PIL.Image.Image and return its RGB version
     return image.convert("RGB")
 
 
-def _get_transform(n_px: int, image_mean:List[float] = None, image_std:List[float] = None):
+def _get_transform(n_px: int, image_mean:List[float] = None, image_std: List[float] = None) -> torch.Tensor:
+    '''
+
+    Args:
+        n_px: the size of the processed image
+        image_mean: the mean of the image used for normalization
+        image_std: the std of the image used for normalization
+
+    Returns:
+        the processed image tensor wit shape (3, n_px, n_px)
+
+    '''
     img_mean = image_mean or OPENAI_DATASET_MEAN
     img_std = image_std or OPENAI_DATASET_STD
     return Compose([
@@ -125,6 +139,7 @@ def format_and_load_CLIP_image(image: Union[str, ndarray, ImageType]) -> ImageTy
 
     return img
 
+
 def _is_image(inputs: Union[str, List[Union[str, ImageType, ndarray]]]) -> bool:
     # some logic to determine if something is an image or not
     # assume the batch is the same type
@@ -217,7 +232,7 @@ def load(self) -> None:
 
 
     def custom_clip_load(self):
-        # loading the open clip model
+        # This function can load both openai clip and open_clip models
         # Check https://github.com/mlfoundations/open_clip/blob/db7504f070b4e76e6c8578ee7b73596267083a19/src/clip/openai_clip.py#L121-L189
         try:
             # loading JIT archive

From dab1c2251b18586129e3e89ce573bf3f270c1b53 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Jan 2023 14:11:16 +1100
Subject: [PATCH 15/54] generic clip revise

---
 src/marqo/s2_inference/clip_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 9f115e2b2..4bdfcb682 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -13,7 +13,6 @@
 from multilingual_clip import pt_multilingual_clip
 import transformers
 from clip.model import build_model
-
 from marqo.s2_inference.types import *
 from marqo.s2_inference.logger import get_logger
 import marqo.s2_inference.model_registry as model_registry

From ff4d7b9fc1767471f75e9edf611e7fa4216f1f08 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Jan 2023 14:12:50 +1100
Subject: [PATCH 16/54] generic clip revise

---
 src/marqo/s2_inference/clip_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 4bdfcb682..871746aed 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -212,7 +212,7 @@ def load(self) -> None:
             self.model = self.model.to(self.device)
             self.tokenizer = clip.tokenize
         else:
-            logger.info("Detecting custom clip model path. We use generic model loading.")
+            logger.info("Detecting custom clip model path. We use generic clip model loading.")
             if os.path.isfile(path):
                 self.model_path = path
             elif validators.url(path):

From 93cbf808e9b78a97f15751303ca4034f6ac3d406 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Jan 2023 14:13:52 +1100
Subject: [PATCH 17/54] generic clip revise

---
 src/marqo/s2_inference/clip_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 871746aed..29440e1b5 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -217,6 +217,8 @@ def load(self) -> None:
                 self.model_path = path
             elif validators.url(path):
                 self.model_path = download_pretrained_from_url(path)
+            else:
+                raise InvalidModelPropertiesError(f"The provided model path {path} is neither a local file nor a valid url.")
 
             self.jit = self.model_properties.get("jit", False)
             self.device = self.model_properties.get("device", "cpu")

From 3682c379f0eb72438b03ae6e4fbe90917f875abd Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Jan 2023 14:26:41 +1100
Subject: [PATCH 18/54] generic clip revise

---
 src/marqo/s2_inference/clip_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 29440e1b5..4b20f7fe4 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -49,7 +49,7 @@ def _get_transform(n_px: int, image_mean:List[float] = None, image_std: List[flo
         image_std: the std of the image used for normalization
 
     Returns:
-        the processed image tensor wit shape (3, n_px, n_px)
+        the processed image tensor with shape (3, n_px, n_px)
 
     '''
     img_mean = image_mean or OPENAI_DATASET_MEAN

From 9a1817c7fc5ee9d98497d47f11c3c8aa4eb4777c Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Jan 2023 16:20:24 +1100
Subject: [PATCH 19/54] add generic clip model tests

---
 tests/s2_inference/test_generic_clip_model.py | 272 ++++++++++++++++++
 1 file changed, 272 insertions(+)
 create mode 100644 tests/s2_inference/test_generic_clip_model.py

diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
new file mode 100644
index 000000000..a05a9e694
--- /dev/null
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -0,0 +1,272 @@
+import numpy as np
+
+from marqo.errors import IndexNotFoundError
+from marqo.s2_inference.errors import InvalidModelPropertiesError, UnknownModelError, ModelLoadError
+from marqo.tensor_search import tensor_search
+from marqo.s2_inference.processing.custom_clip_utils import download_pretrained_from_url
+
+from marqo.s2_inference.s2_inference import (
+    available_models,
+    vectorise,
+    _validate_model_properties,
+    _update_available_models
+)
+
+from tests.marqo_test import MarqoTestCase
+
+
+class TestGenericModelSupport(MarqoTestCase):
+
+    def setUp(self):
+        self.index_name_1 = "my-test-index-1"
+        self.index_name_2 = "my-test-index-2"
+        try:
+            tensor_search.delete_index(config=self.config, index_name=self.index_name_1)
+        except IndexNotFoundError as e:
+            pass
+
+
+    def tearDown(self) -> None:
+        try:
+            tensor_search.delete_index(config=self.config, index_name=self.index_name_1)
+        except IndexNotFoundError as e:
+            pass
+        try:
+            tensor_search.delete_index(config=self.config, index_name=self.index_name_2)
+        except IndexNotFoundError as e:
+            pass
+
+
+    def test_create_index_with_custom_open_clip_model_properties_url(self):
+        """index should get created with custom model_properties
+        """
+        model_name = 'test-model-1'
+        model_properties = {"name": "open_clip custom model",
+                            "dimensions": 512,
+                            "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt",
+                            "type": "clip",
+                            }
+        tensor_search.create_vector_index(
+            index_name=self.index_name_1, config=self.config,
+            index_settings={
+                "index_defaults": {
+                    'model': model_name,
+                    'model_properties': model_properties
+                }
+            }
+        )
+
+
+    def test_create_index_with_custom_openai_clip_model_properties_url(self):
+        model_name = 'test-model-2'
+        model_properties = {"name": "openai custom model",
+                            "dimensions": 512,
+                            "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
+                            "type": "clip",
+                            }
+
+        tensor_search.create_vector_index(
+            index_name=self.index_name_2, config=self.config,
+            index_settings={
+                "index_defaults": {
+                    'model': model_name,
+                    'model_properties': model_properties
+                }
+            }
+        )
+
+
+    def test_create_index_with_custom_open_clip_model_properties_localpath(self):
+        """index should get created with custom model_properties
+        """
+        url = "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
+        target_model = download_pretrained_from_url(url)
+
+        model_name = 'test-model-1'
+        model_properties = {"name": "open_clip custom model",
+                            "dimensions": 512,
+                            "localpath": target_model,
+                            "type": "clip",
+                            }
+        tensor_search.create_vector_index(
+            index_name=self.index_name_1, config=self.config,
+            index_settings={
+                "index_defaults": {
+                    'model': model_name,
+                    'model_properties': model_properties
+                }
+            }
+        )
+
+    def test_vectorise_with_custom_open_clip_model_properties_invalid_localpath(self):
+        """index should get created with custom model_properties
+        """
+        content = ["testtest"]
+        invalid_localpath = "/test/test/test/testmodel.pt"
+
+        model_name = 'test-model-1'
+        model_properties = {"name": "open_clip custom model",
+                            "dimensions": 512,
+                            "localpath": invalid_localpath,
+                            "type": "clip",
+                            }
+
+        self.assertRaises(ModelLoadError, vectorise, model_name, content, model_properties)
+
+
+    def test_vectorise_with_custom_open_clip_model_properties_invalid_url(self):
+        """index should get created with custom model_properties
+        """
+        content = ["testtest"]
+        invalid_url = "http://test/test/test/testmodel.pt"
+
+        model_name = 'test-model-1'
+        model_properties = {"name": "open_clip custom model",
+                            "dimensions": 512,
+                            "url": invalid_url,
+                            "type": "clip",
+                            }
+
+        self.assertRaises(ModelLoadError, vectorise, model_name, content, model_properties)
+
+
+    def test_create_index_with_model_properties_without_model_name(self):
+        """
+            create_vector_index should throw an error
+            if model_properties are given without model_name
+        """
+        model_properties = {"name": "openai custom model",
+                            "dimensions": 512,
+                            "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
+                            "type": "clip",
+                            }
+
+        index_settings = {
+            "index_defaults": {
+                # 'model': model_name,
+                'model_properties': model_properties
+            }
+        }
+
+        self.assertRaises(UnknownModelError, tensor_search.create_vector_index, config=self.config,
+                         index_name=self.index_name_1, index_settings=index_settings)
+
+
+    def test_add_documents_text_and_image(self):
+        """if given the right input, add_documents should work without any throwing any errors
+        """
+        model_name = "test-model"
+        model_properties = {
+                            "name": "openai custom model",
+                            "dimensions": 512,
+                            "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
+                            "type": "clip",
+                            }
+        tensor_search.create_vector_index(
+            index_name=self.index_name_1, config=self.config,
+            index_settings={
+                "index_defaults": {
+                    'model': model_name,
+                    'model_properties': model_properties,
+                    "treat_urls_and_pointers_as_images": True
+                }
+            }
+        )
+
+        config = self.config
+        index_name = self.index_name_1
+        docs = [
+            {
+                "_id": "123",
+                "title 1": "content 1",
+                "desc 2": "content 2. blah blah blah",
+                "image" : "https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg"
+            }]
+        auto_refresh = True
+
+        tensor_search.add_documents(config=config, index_name=index_name, docs=docs, auto_refresh=auto_refresh)
+
+
+    def test_load_custom_clip_without_url_or_localpath(self):
+        """vectorise should throw an exception if url or localpath are not given.
+        """
+        content = ["test test"]
+        model_name = "test-model"
+        model_properties = {
+                           "name": "openai custom model",
+                           "dimensions": 512,
+                            #"url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
+                            "type": "clip",
+                            }
+
+        self.assertRaises(ModelLoadError, vectorise, model_name,content, model_properties)
+
+        model_properties["url"] = "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt"
+
+        vectorise(model_name, content, model_properties)
+
+
+    def test_vectorise_without_clip_type(self):
+        """_validate_model_properties should throw an exception if required keys are not given.
+        """
+        content = ["test test"]
+        model_name = "test-model"
+        model_properties = {
+                           "name": "openai custom model",
+                           "dimensions": 512,
+                            "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
+                            #"type": "clip",
+                            }
+
+        self.assertRaises(ModelLoadError, vectorise, model_name,content, model_properties)
+
+        model_properties["type"] = "clip"
+        vectorise(model_name, content, model_properties)
+
+
+    def test_validate_model_properties_unknown_model_error(self):
+        pass
+        """_validate_model_properties should throw an error if model is not in registry,
+            and if model_properties have not been given in index
+        """
+        model_name = "test-model"
+        tensor_search.create_vector_index(
+            index_name=self.index_name_1, config=self.config,
+            index_settings={
+                "index_defaults": {
+                    'model': model_name,
+                    'type' : "clip"
+                }
+            }
+        )
+
+        model_properties = None
+
+        self.assertRaises(UnknownModelError, _validate_model_properties, model_name, model_properties)
+
+
+    def test_vectorise_custom_clip_encode_results(self):
+
+        epsilon = 1e-7
+
+        image = "https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg"
+        text = "this is a test to test the custom clip output results"
+
+        model_name = "test-model"
+        model_properties = {
+                            "name": "openai custom model",
+                            "dimensions": 512,
+                            "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
+                            "type": "clip",
+                            }
+
+        a = vectorise(model_name, content = image, model_properties = model_properties)
+        b = vectorise("ViT-B/32", content = image)
+
+        assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
+
+        a = vectorise(model_name, content=text, model_properties=model_properties)
+        b = vectorise("ViT-B/32", content=text)
+
+        assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
+

From 3b74215215da25248f5019e5f54aa0080f0f3fcf Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Jan 2023 18:40:14 +1100
Subject: [PATCH 20/54] add generic clip model tests

---
 src/marqo/s2_inference/clip_utils.py          |  6 +-
 tests/s2_inference/test_generic_clip_model.py | 58 ++++++++++++++++++-
 2 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 4b20f7fe4..e5e4fc9c8 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -19,6 +19,7 @@
 from marqo.s2_inference.errors import InvalidModelDeviceError, InvalidModelPropertiesError
 from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer, download_pretrained_from_url
+from clip import load as openai_clip_load
 
 logger = get_logger(__name__)
 
@@ -227,12 +228,13 @@ def load(self) -> None:
 
 
             self.model, self.preprocess = self.custom_clip_load()
-            self.model.eval()
-
             self.tokenizer = self.load_tokenizer()
 
+            self.model.eval()
+
 
     def custom_clip_load(self):
+        # TODO Figure how to get the same results as open_clip package
         # This function can load both openai clip and open_clip models
         # Check https://github.com/mlfoundations/open_clip/blob/db7504f070b4e76e6c8578ee7b73596267083a19/src/clip/openai_clip.py#L121-L189
         try:
diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index a05a9e694..0f6271918 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -245,12 +245,11 @@ def test_validate_model_properties_unknown_model_error(self):
         self.assertRaises(UnknownModelError, _validate_model_properties, model_name, model_properties)
 
 
-    def test_vectorise_custom_clip_encode_results(self):
+    def test_vectorise_custom_openai_clip_encode_image_results(self):
 
         epsilon = 1e-7
 
         image = "https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg"
-        text = "this is a test to test the custom clip output results"
 
         model_name = "test-model"
         model_properties = {
@@ -265,8 +264,63 @@ def test_vectorise_custom_clip_encode_results(self):
 
         assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
 
+
+    def test_vectorise_custom_openai_clip_encode_text_results(self):
+
+        epsilon = 1e-7
+        text = "this is a test to test the custom clip output results"
+
+        model_name = "test-model"
+        model_properties = {
+                            "name": "openai custom model",
+                            "dimensions": 512,
+                            "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
+                            "type": "clip",
+                            }
+
         a = vectorise(model_name, content=text, model_properties=model_properties)
         b = vectorise("ViT-B/32", content=text)
 
         assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
 
+    def test_vectorise_custom_open_clip_encode_image_results(self):
+
+        epsilon = 1e-7
+
+        image = "https://raw.githubusercontent.com/marqo-ai/marqo-clip-onnx/main/examples/coco.jpg"
+
+        model_name = "test-model"
+        model_properties = {
+                            "name": "open_clip custom model",
+                            "dimensions": 512,
+                            "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt",
+                            "type": "clip",
+                            "jit" : False
+                            }
+
+        a = vectorise(model_name, content = image, model_properties = model_properties)
+        b = vectorise("open_clip/ViT-B-32-quickgelu/laion400m_e31", content = image)
+
+        assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
+
+
+    def test_vectorise_custom_open_clip_encode_text_results(self):
+        epsilon = 1e-7
+        text = "this is a test to test the custom clip output results"
+
+        model_name = "test-model"
+        model_properties = {
+            "name": "open_clip custom model",
+            "dimensions": 512,
+            "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt",
+            "type": "clip",
+            "jit": False
+        }
+
+
+        a = vectorise(model_name, content=text, model_properties=model_properties)
+        b = vectorise("open_clip/ViT-B-32-quickgelu/laion400m_e31", content=text)
+
+        assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
+
+

From 8c48b53ce89508c461fd5cce03a2d53ae09462cb Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 27 Jan 2023 12:15:07 +1100
Subject: [PATCH 21/54] open_clip finish

---
 src/marqo/s2_inference/clip_utils.py          | 124 ++++++++++--------
 tests/s2_inference/test_generic_clip_model.py |   2 +-
 2 files changed, 69 insertions(+), 57 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index e5e4fc9c8..065efbc40 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -19,6 +19,7 @@
 from marqo.s2_inference.errors import InvalidModelDeviceError, InvalidModelPropertiesError
 from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer, download_pretrained_from_url
+from open_clip.pretrained import _PRETRAINED
 from clip import load as openai_clip_load
 
 logger = get_logger(__name__)
@@ -233,74 +234,84 @@ def load(self) -> None:
             self.model.eval()
 
 
-    def custom_clip_load(self):
-        # TODO Figure how to get the same results as open_clip package
-        # This function can load both openai clip and open_clip models
-        # Check https://github.com/mlfoundations/open_clip/blob/db7504f070b4e76e6c8578ee7b73596267083a19/src/clip/openai_clip.py#L121-L189
-        try:
-            # loading JIT archive
-            model = torch.jit.load(self.model_path, map_location=self.device if self.jit else "cpu").eval()
-            state_dict = None
-        except RuntimeError:
-            # loading saved state dict
-            if self.jit:
-                self.jit = False
-            state_dict = torch.load(self.model_path, map_location="cpu")
-
-        if not self.jit:
-            try:
-                model = build_model(state_dict or model.state_dict()).to(self.device)
-            except KeyError:
-                sd = {k[7:]: v for k, v in state_dict["state_dict"].items()}
-                model = build_model(sd).to(self.device)
-
-            if str(self.device) == "cpu":
-                model.float()
-            return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
-
-        # patch the device names
-        device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(self.device)), example_inputs=[])
-        device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1]
 
-        def patch_device(module):
-            graphs = [module.graph] if hasattr(module, "graph") else []
-            if hasattr(module, "forward1"):
-                graphs.append(module.forward1.graph)
-
-            for graph in graphs:
-                for node in graph.findAllNodes("prim::Constant"):
-                    if "value" in node.attributeNames() and str(node["value"]).startswith("cuda"):
-                        node.copyAttributes(device_node)
+    def custom_clip_load(self):
+        self.model_name = self.model_properties.get("name", None)
 
-        model.apply(patch_device)
-        patch_device(model.encode_image)
-        patch_device(model.encode_text)
+        if self.model_name in _PRETRAINED:
+            logger.info(f"The name of the custom clip model is {self.model_name}.")
+            model, _, preprocess = open_clip.create_model_and_transforms(model_name=self.model_name, pretrained=self.model_path)
+            return model, preprocess
 
-        # patch dtype to float32 on CPU
-        if str(self.device) == "cpu":
-            float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[])
-            float_input = list(float_holder.graph.findNode("aten::to").inputs())[1]
-            float_node = float_input.node()
+        else:
+            # This step can load both openai clip and open_clip models by the script file.
+            # Check https://github.com/mlfoundations/open_clip/blob/db7504f070b4e76e6c8578ee7b73596267083a19/src/clip/openai_clip.py#L121-L189
 
-            def patch_float(module):
+            logger.info(f"The provided name `{self.model_name}` is not supported. We try to load from script file directly.")
+            try:
+                # loading JIT archive
+                model = torch.jit.load(self.model_path, map_location=self.device if self.jit else "cpu").eval()
+                state_dict = None
+            except RuntimeError:
+                # loading saved state dict
+                if self.jit:
+                    self.jit = False
+                state_dict = torch.load(self.model_path, map_location="cpu")
+
+            if not self.jit:
+                try:
+                    model = build_model(state_dict or model.state_dict()).to(self.device)
+                except KeyError:
+                    sd = {k[7:]: v for k, v in state_dict["state_dict"].items()}
+                    model = build_model(sd).to(self.device)
+
+                if str(self.device) == "cpu":
+                    model.float()
+                return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
+
+            # patch the device names
+            device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(self.device)), example_inputs=[])
+            device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1]
+
+            def patch_device(module):
                 graphs = [module.graph] if hasattr(module, "graph") else []
                 if hasattr(module, "forward1"):
                     graphs.append(module.forward1.graph)
 
                 for graph in graphs:
-                    for node in graph.findAllNodes("aten::to"):
-                        inputs = list(node.inputs())
-                        for i in [1, 2]:  # dtype can be the second or third argument to aten::to()
-                            if inputs[i].node()["value"] == 5:
-                                inputs[i].node().copyAttributes(float_node)
+                    for node in graph.findAllNodes("prim::Constant"):
+                        if "value" in node.attributeNames() and str(node["value"]).startswith("cuda"):
+                            node.copyAttributes(device_node)
 
-            model.apply(patch_float)
-            patch_float(model.encode_image)
-            patch_float(model.encode_text)
+            model.apply(patch_device)
+            patch_device(model.encode_image)
+            patch_device(model.encode_text)
 
-            model.float()
+            # patch dtype to float32 on CPU
+            if str(self.device) == "cpu":
+                float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[])
+                float_input = list(float_holder.graph.findNode("aten::to").inputs())[1]
+                float_node = float_input.node()
+
+                def patch_float(module):
+                    graphs = [module.graph] if hasattr(module, "graph") else []
+                    if hasattr(module, "forward1"):
+                        graphs.append(module.forward1.graph)
+
+                    for graph in graphs:
+                        for node in graph.findAllNodes("aten::to"):
+                            inputs = list(node.inputs())
+                            for i in [1, 2]:  # dtype can be the second or third argument to aten::to()
+                                if inputs[i].node()["value"] == 5:
+                                    inputs[i].node().copyAttributes(float_node)
+
+                model.apply(patch_float)
+                patch_float(model.encode_image)
+                patch_float(model.encode_text)
+
+                model.float()
 
-        return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
+            return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
 
 
     def load_tokenizer(self):
@@ -309,6 +320,7 @@ def load_tokenizer(self):
         if tokenizer_name == "clip":
             return clip.tokenize
         else:
+            logger.info(f"Custom HFTokenizer is provided. Loading...")
             return HFTokenizer(tokenizer_name)
 
 
diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index 0f6271918..c9e72ca70 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -310,7 +310,7 @@ def test_vectorise_custom_open_clip_encode_text_results(self):
 
         model_name = "test-model"
         model_properties = {
-            "name": "open_clip custom model",
+            "name": "ViT-B-32-quickgelu",
             "dimensions": 512,
             "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt",
             "type": "clip",

From e20c969004fb161713a58db5950847ec8dd1c2c8 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 27 Jan 2023 12:20:08 +1100
Subject: [PATCH 22/54] open_clip finish

---
 src/marqo/s2_inference/clip_utils.py          |  6 +++++-
 tests/s2_inference/test_generic_clip_model.py | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 065efbc40..271962015 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -27,9 +27,11 @@
 OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073)
 OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711)
 
+
 def get_allowed_image_types():
     return set(('.jpg', '.png', '.bmp', '.jpeg'))
 
+
 try:
     from torchvision.transforms import InterpolationMode
     BICUBIC = InterpolationMode.BICUBIC
@@ -65,7 +67,6 @@ def _get_transform(n_px: int, image_mean:List[float] = None, image_std: List[flo
     ])
 
 
-
 def format_and_load_CLIP_images(images: List[Union[str, ndarray, ImageType]]) -> List[ImageType]:
     """takes in a list of strings, arrays or urls and either loads and/or converts to PIL
         for the clip model
@@ -88,6 +89,7 @@ def format_and_load_CLIP_images(images: List[Union[str, ndarray, ImageType]]) ->
     
     return results
 
+
 def load_image_from_path(image_path: str) -> ImageType:
     """Loads an image into PIL from a string path that is either local or a url
 
@@ -114,6 +116,7 @@ def load_image_from_path(image_path: str) -> ImageType:
 
     return img
 
+
 def format_and_load_CLIP_image(image: Union[str, ndarray, ImageType]) -> ImageType:
     """standardizes the input to be a PIL image
 
@@ -185,6 +188,7 @@ def _is_image(inputs: Union[str, List[Union[str, ImageType, ndarray]]]) -> bool:
     else:
         raise UnidentifiedImageError(f"expected type Image or str for inputs but received type {type(thing)}")
 
+
 class CLIP:
     
     """
diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index c9e72ca70..6179366ee 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -323,4 +323,22 @@ def test_vectorise_custom_open_clip_encode_text_results(self):
 
         assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
 
+    def test_unsupported_generic_clip_name(self):
+        epsilon = 1e-7
+        text = "this is a test to test the custom clip output results"
+
+        model_name = "test-model"
+        model_properties = {
+            "name": "this is a test name",
+            "dimensions": 512,
+            "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt",
+            "type": "clip",
+            "jit": False
+        }
+
+        a = vectorise(model_name, content=text, model_properties=model_properties)
+        b = vectorise("open_clip/ViT-B-32-quickgelu/laion400m_e31", content=text)
+
+        assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
+
 

From c15725020d64365f56135732ef22864cceb1b4e7 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 27 Jan 2023 12:20:51 +1100
Subject: [PATCH 23/54] generic clip finished

---
 tests/s2_inference/test_generic_clip_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index 6179366ee..47f437732 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -323,8 +323,9 @@ def test_vectorise_custom_open_clip_encode_text_results(self):
 
         assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
 
+
     def test_unsupported_generic_clip_name(self):
-        epsilon = 1e-7
+        epsilon = 1e-2
         text = "this is a test to test the custom clip output results"
 
         model_name = "test-model"

From d3a1cae1bdabe30f1b315cccf0154d723c83c5bc Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 27 Jan 2023 12:25:18 +1100
Subject: [PATCH 24/54] generic clip finished

---
 tests/s2_inference/test_generic_clip_model.py | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index 47f437732..531ab36b5 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -37,7 +37,7 @@ def tearDown(self) -> None:
             pass
 
 
-    def test_create_index_with_custom_open_clip_model_properties_url(self):
+    def test_create_index_with_generic_open_clip_model_properties_url(self):
         """index should get created with custom model_properties
         """
         model_name = 'test-model-1'
@@ -57,7 +57,7 @@ def test_create_index_with_custom_open_clip_model_properties_url(self):
         )
 
 
-    def test_create_index_with_custom_openai_clip_model_properties_url(self):
+    def test_create_index_with_generic_openai_clip_model_properties_url(self):
         model_name = 'test-model-2'
         model_properties = {"name": "openai custom model",
                             "dimensions": 512,
@@ -76,7 +76,7 @@ def test_create_index_with_custom_openai_clip_model_properties_url(self):
         )
 
 
-    def test_create_index_with_custom_open_clip_model_properties_localpath(self):
+    def test_create_index_with_generic_open_clip_model_properties_localpath(self):
         """index should get created with custom model_properties
         """
         url = "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
@@ -98,7 +98,7 @@ def test_create_index_with_custom_open_clip_model_properties_localpath(self):
             }
         )
 
-    def test_vectorise_with_custom_open_clip_model_properties_invalid_localpath(self):
+    def test_vectorise_with_generic_open_clip_model_properties_invalid_localpath(self):
         """index should get created with custom model_properties
         """
         content = ["testtest"]
@@ -114,7 +114,7 @@ def test_vectorise_with_custom_open_clip_model_properties_invalid_localpath(self
         self.assertRaises(ModelLoadError, vectorise, model_name, content, model_properties)
 
 
-    def test_vectorise_with_custom_open_clip_model_properties_invalid_url(self):
+    def test_vectorise_with_generic_open_clip_model_properties_invalid_url(self):
         """index should get created with custom model_properties
         """
         content = ["testtest"]
@@ -187,7 +187,7 @@ def test_add_documents_text_and_image(self):
         tensor_search.add_documents(config=config, index_name=index_name, docs=docs, auto_refresh=auto_refresh)
 
 
-    def test_load_custom_clip_without_url_or_localpath(self):
+    def test_load_generic_clip_without_url_or_localpath(self):
         """vectorise should throw an exception if url or localpath are not given.
         """
         content = ["test test"]
@@ -245,7 +245,7 @@ def test_validate_model_properties_unknown_model_error(self):
         self.assertRaises(UnknownModelError, _validate_model_properties, model_name, model_properties)
 
 
-    def test_vectorise_custom_openai_clip_encode_image_results(self):
+    def test_vectorise_generic_openai_clip_encode_image_results(self):
 
         epsilon = 1e-7
 
@@ -265,14 +265,14 @@ def test_vectorise_custom_openai_clip_encode_image_results(self):
         assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
 
 
-    def test_vectorise_custom_openai_clip_encode_text_results(self):
+    def test_vectorise_generic_openai_clip_encode_text_results(self):
 
         epsilon = 1e-7
         text = "this is a test to test the custom clip output results"
 
         model_name = "test-model"
         model_properties = {
-                            "name": "openai custom model",
+                            "name": "ViT-L/14",
                             "dimensions": 512,
                             "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
                             "type": "clip",
@@ -283,7 +283,8 @@ def test_vectorise_custom_openai_clip_encode_text_results(self):
 
         assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
 
-    def test_vectorise_custom_open_clip_encode_image_results(self):
+
+    def test_vectorise_generic_open_clip_encode_image_results(self):
 
         epsilon = 1e-7
 
@@ -304,7 +305,7 @@ def test_vectorise_custom_open_clip_encode_image_results(self):
         assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
 
 
-    def test_vectorise_custom_open_clip_encode_text_results(self):
+    def test_vectorise_generic_open_clip_encode_text_results(self):
         epsilon = 1e-7
         text = "this is a test to test the custom clip output results"
 

From 5c04f28a8ec0fc7f232d31772728fa5e32d9a151 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 27 Jan 2023 12:27:12 +1100
Subject: [PATCH 25/54] add test

---
 src/marqo/s2_inference/clip_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 271962015..00a36e13e 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -251,7 +251,7 @@ def custom_clip_load(self):
             # This step can load both openai clip and open_clip models by the script file.
             # Check https://github.com/mlfoundations/open_clip/blob/db7504f070b4e76e6c8578ee7b73596267083a19/src/clip/openai_clip.py#L121-L189
 
-            logger.info(f"The provided name `{self.model_name}` is not supported. We try to load from script file directly.")
+            logger.info(f"The provided name `{self.model_name}` is not supported. Marqo try to load from script file directly.")
             try:
                 # loading JIT archive
                 model = torch.jit.load(self.model_path, map_location=self.device if self.jit else "cpu").eval()

From f024861bbd9d4fa7cea9fd45e2da27adffea4083 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 27 Jan 2023 12:29:58 +1100
Subject: [PATCH 26/54] add test

---
 src/marqo/s2_inference/clip_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 00a36e13e..888839ea2 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -251,7 +251,8 @@ def custom_clip_load(self):
             # This step can load both openai clip and open_clip models by the script file.
             # Check https://github.com/mlfoundations/open_clip/blob/db7504f070b4e76e6c8578ee7b73596267083a19/src/clip/openai_clip.py#L121-L189
 
-            logger.info(f"The provided name `{self.model_name}` is not supported. Marqo try to load from script file directly.")
+            logger.warning(f"The provided name `{self.model_name}` is not supported. Marqo try to load from script file directly."
+                           f"This is highly NOT RECOMMENDED as it may lead to inaccurate results.")
             try:
                 # loading JIT archive
                 model = torch.jit.load(self.model_path, map_location=self.device if self.jit else "cpu").eval()

From e6d47d5ff88b94d277824d3399aefabce0bb5480 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 27 Jan 2023 12:38:27 +1100
Subject: [PATCH 27/54] add test

---
 src/marqo/s2_inference/clip_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 888839ea2..12179d9e6 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -340,6 +340,7 @@ def _convert_output(self, output):
     def normalize(outputs):
         return outputs.norm(dim=-1, keepdim=True)
 
+
     def encode_text(self, sentence: Union[str, List[str]], normalize = True) -> FloatTensor:
         
         if self.model is None:
@@ -419,7 +420,6 @@ def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  emb
 
 
     def load(self) -> None:
-
         # https://github.com/openai/CLIP/issues/30
         self.model, self.preprocess = clip.load(self.model_name, device='cuda', jit=False)
         self.model = self.model.to(self.device)
@@ -440,6 +440,7 @@ def load(self) -> None:
         self.tokenizer = open_clip.get_tokenizer(self.model_name)
         self.model.eval()
 
+
     def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatTensor:
 
         if self.model is None:
@@ -489,6 +490,7 @@ def load(self) -> None:
         self.textual_model.eval()
         self.visual_model.eval()
 
+
     def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatTensor:
 
         if self.textual_model is None:
@@ -504,6 +506,7 @@ def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatT
 
         return self._convert_output(outputs)
 
+
     def encode_image(self, images: Union[str, ImageType, List[Union[str, ImageType]]],
                      normalize=True) -> FloatTensor:
 

From 40914d9a280e5fc17248aacb0404027155618372 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 27 Jan 2023 12:56:56 +1100
Subject: [PATCH 28/54] add test

---
 src/marqo/s2_inference/s2_inference.py | 29 ++++++++++++++++----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 05ca71e19..c3b336a21 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -100,17 +100,24 @@ def _validate_model_properties(model_name: str, model_properties: dict) -> dict:
     if model_properties is not None:
         """checks model dict to see if all required keys are present
         """
-        required_keys = ["name", "dimensions"]
-        for key in required_keys:
-            if key not in model_properties:
-                raise InvalidModelPropertiesError(f"model_properties has missing key '{key}'. ")
-
-        """updates model dict with default values if optional keys are missing
-        """
-        optional_keys_values = [("type", "sbert"), ("tokens", get_default_seq_length())]
-        for key, value in optional_keys_values:
-            if key not in model_properties:
-                model_properties[key] = value
+        if model_properties.get("type", None) is None or "sbert":
+            required_keys = ["name", "dimensions"]
+            for key in required_keys:
+                if key not in model_properties:
+                    raise InvalidModelPropertiesError(f"model_properties has missing key '{key}'. ")
+
+            """updates model dict with default values if optional keys are missing
+            """
+            optional_keys_values = [("type", "sbert"), ("tokens", get_default_seq_length())]
+            for key, value in optional_keys_values:
+                if key not in model_properties:
+                    model_properties[key] = value
+
+        elif model_properties("type", None) is "clip":
+            required_keys = ["name", "dimensions"]
+            for key in required_keys:
+                if key not in model_properties:
+                    raise InvalidModelPropertiesError(f"model_properties has missing key '{key}'. ")
 
     else:
         model_properties = get_model_properties_from_registry(model_name)

From 5cc753725ed05c7c0041837610cb181f0448ef1b Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 27 Jan 2023 12:57:31 +1100
Subject: [PATCH 29/54] add test

---
 src/marqo/s2_inference/clip_utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 12179d9e6..1ca0b592b 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -231,14 +231,12 @@ def load(self) -> None:
             self.mean = self.model_properties.get("mean", None)
             self.std = self.model_properties.get("std", None)
 
-
             self.model, self.preprocess = self.custom_clip_load()
             self.tokenizer = self.load_tokenizer()
 
             self.model.eval()
 
 
-
     def custom_clip_load(self):
         self.model_name = self.model_properties.get("name", None)
 

From 3c4bbc6a04ff528b1b656d80bb0c1e0002b68dfc Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 27 Jan 2023 12:58:11 +1100
Subject: [PATCH 30/54] add test

---
 src/marqo/s2_inference/clip_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 1ca0b592b..6fa4e959e 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -227,7 +227,6 @@ def load(self) -> None:
                 raise InvalidModelPropertiesError(f"The provided model path {path} is neither a local file nor a valid url.")
 
             self.jit = self.model_properties.get("jit", False)
-            self.device = self.model_properties.get("device", "cpu")
             self.mean = self.model_properties.get("mean", None)
             self.std = self.model_properties.get("std", None)
 

From 5e0197b290d60278497ea2ae611ea62eadefc8e2 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 27 Jan 2023 12:59:01 +1100
Subject: [PATCH 31/54] add test

---
 src/marqo/s2_inference/clip_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 6fa4e959e..5689271b0 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -241,7 +241,8 @@ def custom_clip_load(self):
 
         if self.model_name in _PRETRAINED:
             logger.info(f"The name of the custom clip model is {self.model_name}.")
-            model, _, preprocess = open_clip.create_model_and_transforms(model_name=self.model_name, pretrained=self.model_path)
+            model, _, preprocess = open_clip.create_model_and_transforms(model_name=self.model_name, pretrained=self.model_path,
+                                                                         image_mean=self.mean, image_std=self.std)
             return model, preprocess
 
         else:

From 138643bcde336c4b3d445772d999b584574115c0 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 30 Jan 2023 14:21:40 +1100
Subject: [PATCH 32/54] add test

---
 src/marqo/s2_inference/clip_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 5689271b0..96eca7bee 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -209,7 +209,7 @@ def __init__(self, model_type: str = "ViT-B/32", device: str = 'cpu',  embedding
 
     def load(self) -> None:
 
-        path = self.model_properties.get("localpath", None) or self.model_properties.get("url", None)
+        path = self.model_properties.get("localpath", None) or self.model_properties.get("url",None)
 
         if path is None:
             # The original method to load the openai clip model
@@ -241,7 +241,7 @@ def custom_clip_load(self):
 
         if self.model_name in _PRETRAINED:
             logger.info(f"The name of the custom clip model is {self.model_name}.")
-            model, _, preprocess = open_clip.create_model_and_transforms(model_name=self.model_name, pretrained=self.model_path,
+            model, _, preprocess = open_clip.create_model_and_transforms(model_name=self.model_name, jit = self.jit, pretrained=self.model_path,
                                                                          image_mean=self.mean, image_std=self.std)
             return model, preprocess
 

From 28fe0067cd791dd3ce6d3ab6020f26acf5630ac9 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 30 Jan 2023 15:46:12 +1100
Subject: [PATCH 33/54] add test

---
 src/marqo/s2_inference/clip_utils.py          | 24 +++++++++++--------
 tests/s2_inference/test_generic_clip_model.py |  7 +++---
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 96eca7bee..2dce48ee8 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -19,8 +19,7 @@
 from marqo.s2_inference.errors import InvalidModelDeviceError, InvalidModelPropertiesError
 from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer, download_pretrained_from_url
-from open_clip.pretrained import _PRETRAINED
-from clip import load as openai_clip_load
+from open_clip.pretrained import _PRETRAINED as OPEN_CLIP_PRETRAINED
 
 logger = get_logger(__name__)
 
@@ -226,6 +225,7 @@ def load(self) -> None:
             else:
                 raise InvalidModelPropertiesError(f"The provided model path {path} is neither a local file nor a valid url.")
 
+            self.precision = self.model_properties.get("precision", "fp32")
             self.jit = self.model_properties.get("jit", False)
             self.mean = self.model_properties.get("mean", None)
             self.std = self.model_properties.get("std", None)
@@ -239,10 +239,17 @@ def load(self) -> None:
     def custom_clip_load(self):
         self.model_name = self.model_properties.get("name", None)
 
-        if self.model_name in _PRETRAINED:
-            logger.info(f"The name of the custom clip model is {self.model_name}.")
-            model, _, preprocess = open_clip.create_model_and_transforms(model_name=self.model_name, jit = self.jit, pretrained=self.model_path,
-                                                                         image_mean=self.mean, image_std=self.std)
+        if self.model_name in OPEN_CLIP_PRETRAINED:
+            logger.info(f"The name of the custom clip model is {self.model_name}. We use open_clip load")
+            model, _, preprocess = open_clip.create_model_and_transforms(model_name=self.model_name, jit = self.jit, pretrained=self.model_path, precision = self.precision,
+                                                                         image_mean=self.mean, image_std=self.std, device = self.device)
+            return model, preprocess
+
+        elif self.model_name in clip.available_models():
+            logger.info(f"The name of the custom clip model is {self.model_name}. We use openai clip load")
+            print(self.device)
+            model, preprocess = clip.load(name=self.model_path, device="cpu", jit = self.jit)
+            model = model.to(self.device)
             return model, preprocess
 
         else:
@@ -343,11 +350,8 @@ def encode_text(self, sentence: Union[str, List[str]], normalize = True) -> Floa
         
         if self.model is None:
             self.load()
-        try:
-            text = self.tokenizer(sentence, truncate=self.truncate).to(self.device)
-        except Exception:
-            text = self.tokenizer(sentence).to(self.device)
 
+        text = self.tokenizer(sentence, truncate=self.truncate).to(self.device)
 
         with torch.no_grad():
             outputs =  self.model.encode_text(text)
diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index 531ab36b5..13feb8b0d 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -157,9 +157,9 @@ def test_add_documents_text_and_image(self):
         """
         model_name = "test-model"
         model_properties = {
-                            "name": "openai custom model",
+                            "name": "ViT-B/16",
                             "dimensions": 512,
-                            "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
+                            "url": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt",
                             "type": "clip",
                             }
         tensor_search.create_vector_index(
@@ -292,7 +292,7 @@ def test_vectorise_generic_open_clip_encode_image_results(self):
 
         model_name = "test-model"
         model_properties = {
-                            "name": "open_clip custom model",
+                            "name": "ViT-B-32-quickgelu",
                             "dimensions": 512,
                             "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt",
                             "type": "clip",
@@ -329,6 +329,7 @@ def test_unsupported_generic_clip_name(self):
         epsilon = 1e-2
         text = "this is a test to test the custom clip output results"
 
+
         model_name = "test-model"
         model_properties = {
             "name": "this is a test name",

From d11355040f5897641abd2b8e3dae42489e5c683a Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 30 Jan 2023 15:50:53 +1100
Subject: [PATCH 34/54] add test

---
 src/marqo/s2_inference/clip_utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 2dce48ee8..85408a9dd 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -414,6 +414,9 @@ class FP16_CLIP(CLIP):
     def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  embedding_dim: int = None,
                             truncate: bool = True, **kwargs) -> None:
         super().__init__(model_type, device, embedding_dim, truncate, **kwargs)
+        '''This class loads the provided clip model directly from cuda in float16 version. The inference time is halved
+        with very minor accuracy drop. 
+        '''
 
         if not self.device.startswith("cuda"):
             raise InvalidModelDeviceError(f"FP16 clip model `{self.model_type}` is only available with device `cuda`.")

From 636d1e3717d33e5db773db9effae079fb09aefc9 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 30 Jan 2023 18:18:29 +1100
Subject: [PATCH 35/54] Separate clip and open_clip load

---
 src/marqo/s2_inference/clip_utils.py          | 161 ++++++------------
 tests/s2_inference/test_generic_clip_model.py |  43 ++---
 2 files changed, 68 insertions(+), 136 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 85408a9dd..9341193e1 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -204,7 +204,7 @@ def __init__(self, model_type: str = "ViT-B/32", device: str = 'cpu',  embedding
         self.processor = None
         self.embedding_dimension = embedding_dim
         self.truncate = truncate
-        self.model_properties = kwargs["model_properties"]
+        self.model_properties = kwargs.get("model_properties", None)
 
     def load(self) -> None:
 
@@ -225,13 +225,9 @@ def load(self) -> None:
             else:
                 raise InvalidModelPropertiesError(f"The provided model path {path} is neither a local file nor a valid url.")
 
-            self.precision = self.model_properties.get("precision", "fp32")
             self.jit = self.model_properties.get("jit", False)
-            self.mean = self.model_properties.get("mean", None)
-            self.std = self.model_properties.get("std", None)
-
             self.model, self.preprocess = self.custom_clip_load()
-            self.tokenizer = self.load_tokenizer()
+            self.tokenizer = clip.tokenize
 
             self.model.eval()
 
@@ -239,99 +235,10 @@ def load(self) -> None:
     def custom_clip_load(self):
         self.model_name = self.model_properties.get("name", None)
 
-        if self.model_name in OPEN_CLIP_PRETRAINED:
-            logger.info(f"The name of the custom clip model is {self.model_name}. We use open_clip load")
-            model, _, preprocess = open_clip.create_model_and_transforms(model_name=self.model_name, jit = self.jit, pretrained=self.model_path, precision = self.precision,
-                                                                         image_mean=self.mean, image_std=self.std, device = self.device)
-            return model, preprocess
-
-        elif self.model_name in clip.available_models():
-            logger.info(f"The name of the custom clip model is {self.model_name}. We use openai clip load")
-            print(self.device)
-            model, preprocess = clip.load(name=self.model_path, device="cpu", jit = self.jit)
-            model = model.to(self.device)
-            return model, preprocess
-
-        else:
-            # This step can load both openai clip and open_clip models by the script file.
-            # Check https://github.com/mlfoundations/open_clip/blob/db7504f070b4e76e6c8578ee7b73596267083a19/src/clip/openai_clip.py#L121-L189
-
-            logger.warning(f"The provided name `{self.model_name}` is not supported. Marqo try to load from script file directly."
-                           f"This is highly NOT RECOMMENDED as it may lead to inaccurate results.")
-            try:
-                # loading JIT archive
-                model = torch.jit.load(self.model_path, map_location=self.device if self.jit else "cpu").eval()
-                state_dict = None
-            except RuntimeError:
-                # loading saved state dict
-                if self.jit:
-                    self.jit = False
-                state_dict = torch.load(self.model_path, map_location="cpu")
-
-            if not self.jit:
-                try:
-                    model = build_model(state_dict or model.state_dict()).to(self.device)
-                except KeyError:
-                    sd = {k[7:]: v for k, v in state_dict["state_dict"].items()}
-                    model = build_model(sd).to(self.device)
-
-                if str(self.device) == "cpu":
-                    model.float()
-                return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
-
-            # patch the device names
-            device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(self.device)), example_inputs=[])
-            device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1]
-
-            def patch_device(module):
-                graphs = [module.graph] if hasattr(module, "graph") else []
-                if hasattr(module, "forward1"):
-                    graphs.append(module.forward1.graph)
-
-                for graph in graphs:
-                    for node in graph.findAllNodes("prim::Constant"):
-                        if "value" in node.attributeNames() and str(node["value"]).startswith("cuda"):
-                            node.copyAttributes(device_node)
-
-            model.apply(patch_device)
-            patch_device(model.encode_image)
-            patch_device(model.encode_text)
-
-            # patch dtype to float32 on CPU
-            if str(self.device) == "cpu":
-                float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[])
-                float_input = list(float_holder.graph.findNode("aten::to").inputs())[1]
-                float_node = float_input.node()
-
-                def patch_float(module):
-                    graphs = [module.graph] if hasattr(module, "graph") else []
-                    if hasattr(module, "forward1"):
-                        graphs.append(module.forward1.graph)
-
-                    for graph in graphs:
-                        for node in graph.findAllNodes("aten::to"):
-                            inputs = list(node.inputs())
-                            for i in [1, 2]:  # dtype can be the second or third argument to aten::to()
-                                if inputs[i].node()["value"] == 5:
-                                    inputs[i].node().copyAttributes(float_node)
-
-                model.apply(patch_float)
-                patch_float(model.encode_image)
-                patch_float(model.encode_text)
-
-                model.float()
-
-            return model, _get_transform(model.visual.input_resolution, self.mean, self.std)
-
-
-    def load_tokenizer(self):
-        tokenizer_name = self.model_properties.get("tokenizer", "clip")
-
-        if tokenizer_name == "clip":
-            return clip.tokenize
-        else:
-            logger.info(f"Custom HFTokenizer is provided. Loading...")
-            return HFTokenizer(tokenizer_name)
+        logger.info(f"The name of the custom clip model is {self.model_name}. We use openai clip load")
+        model, preprocess = clip.load(name=self.model_path, device="cpu", jit= self.jit)
+        model = model.to(self.device)
+        return model, preprocess
 
 
     def _convert_output(self, output):
@@ -419,7 +326,7 @@ def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  emb
         '''
 
         if not self.device.startswith("cuda"):
-            raise InvalidModelDeviceError(f"FP16 clip model `{self.model_type}` is only available with device `cuda`.")
+            raise InvalidModelDeviceError(f"FP16 clip model `{self.model_type}` is only ava ilable with device `cuda`.")
 
         self.model_name = self.model_type.replace("fp16/", "")
 
@@ -435,16 +342,60 @@ class OPEN_CLIP(CLIP):
     def __init__(self, model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32", device: str = 'cpu',  embedding_dim: int = None,
                             truncate: bool = True, **kwargs) -> None:
         super().__init__(model_type, device,  embedding_dim, truncate , **kwargs)
-        self.model_name = model_type.split("/", 3)[1]
-        self.pretrained = model_type.split("/", 3)[2]
+        self.model_name = model_type.split("/", 3)[1] if model_type.startswith("open_clip/") else model_type
+        self.pretrained = model_type.split("/", 3)[2] if model_type.startswith("open_clip/") else model_type
 
 
     def load(self) -> None:
         # https://github.com/mlfoundations/open_clip
-        self.model, _, self.preprocess = open_clip.create_model_and_transforms(self.model_name, pretrained = self.pretrained, device=self.device, jit=False)
-        self.tokenizer = open_clip.get_tokenizer(self.model_name)
-        self.model.eval()
+        path = self.model_properties.get("localpath", None) or self.model_properties.get("url", None)
+
+        if path is None:
+            self.model, _, self.preprocess = open_clip.create_model_and_transforms(self.model_name,
+                                                                                   pretrained=self.pretrained,
+                                                                                   device=self.device, jit=False)
+            self.tokenizer = open_clip.get_tokenizer(self.model_name)
+            self.model.eval()
+        else:
+            logger.info("Detecting custom clip model path. We use generic clip model loading.")
+            if os.path.isfile(path):
+                self.model_path = path
+            elif validators.url(path):
+                self.model_path = download_pretrained_from_url(path)
+            else:
+                raise InvalidModelPropertiesError(
+                    f"The provided model path {path} is neither a local file nor a valid url.")
+
+            self.precision = self.model_properties.get("precision", "fp32")
+            self.jit = self.model_properties.get("jit", False)
+            self.mean = self.model_properties.get("mean", None)
+            self.std = self.model_properties.get("std", None)
+
+            self.model, self.preprocess = self.custom_clip_load()
+            self.tokenizer = self.load_tokenizer()
+
+            self.model.eval()
+
+
+    def custom_clip_load(self):
+        self.model_name = self.model_properties.get("name", None)
+
+
+        logger.info(f"The name of the custom clip model is {self.model_name}. We use open_clip load")
+        model, _, preprocess = open_clip.create_model_and_transforms(model_name=self.model_name, jit = self.jit, pretrained=self.model_path, precision = self.precision,
+                                                                     image_mean=self.mean, image_std=self.std, device = self.device)
+
+        return model, preprocess
+
+
+    def load_tokenizer(self):
+        tokenizer_name = self.model_properties.get("tokenizer", "clip")
 
+        if tokenizer_name == "clip":
+            return clip.tokenize
+        else:
+            logger.info(f"Custom HFTokenizer is provided. Loading...")
+            return HFTokenizer(tokenizer_name)
 
     def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatTensor:
 
diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index 13feb8b0d..70bd8b4d7 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -41,10 +41,10 @@ def test_create_index_with_generic_open_clip_model_properties_url(self):
         """index should get created with custom model_properties
         """
         model_name = 'test-model-1'
-        model_properties = {"name": "open_clip custom model",
+        model_properties = {"name": "ViT-B-32-quickgelu",
                             "dimensions": 512,
                             "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt",
-                            "type": "clip",
+                            "type": "open_clip",
                             }
         tensor_search.create_vector_index(
             index_name=self.index_name_1, config=self.config,
@@ -59,7 +59,7 @@ def test_create_index_with_generic_open_clip_model_properties_url(self):
 
     def test_create_index_with_generic_openai_clip_model_properties_url(self):
         model_name = 'test-model-2'
-        model_properties = {"name": "openai custom model",
+        model_properties = {"name": "ViT-B/32",
                             "dimensions": 512,
                             "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
                             "type": "clip",
@@ -83,10 +83,10 @@ def test_create_index_with_generic_open_clip_model_properties_localpath(self):
         target_model = download_pretrained_from_url(url)
 
         model_name = 'test-model-1'
-        model_properties = {"name": "open_clip custom model",
+        model_properties = {"name": "ViT-B-32-quickgelu",
                             "dimensions": 512,
                             "localpath": target_model,
-                            "type": "clip",
+                            "type": "open_clip",
                             }
         tensor_search.create_vector_index(
             index_name=self.index_name_1, config=self.config,
@@ -121,7 +121,7 @@ def test_vectorise_with_generic_open_clip_model_properties_invalid_url(self):
         invalid_url = "http://test/test/test/testmodel.pt"
 
         model_name = 'test-model-1'
-        model_properties = {"name": "open_clip custom model",
+        model_properties = {"name": "test test void model",
                             "dimensions": 512,
                             "url": invalid_url,
                             "type": "clip",
@@ -135,7 +135,7 @@ def test_create_index_with_model_properties_without_model_name(self):
             create_vector_index should throw an error
             if model_properties are given without model_name
         """
-        model_properties = {"name": "openai custom model",
+        model_properties = {"name": "ViT-B-32",
                             "dimensions": 512,
                             "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
                             "type": "clip",
@@ -212,7 +212,7 @@ def test_vectorise_without_clip_type(self):
         content = ["test test"]
         model_name = "test-model"
         model_properties = {
-                           "name": "openai custom model",
+                           "name": "ViT-B-32",
                            "dimensions": 512,
                             "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
                             #"type": "clip",
@@ -253,7 +253,7 @@ def test_vectorise_generic_openai_clip_encode_image_results(self):
 
         model_name = "test-model"
         model_properties = {
-                            "name": "openai custom model",
+                            "name": "ViT-B/32",
                             "dimensions": 512,
                             "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
                             "type": "clip",
@@ -272,7 +272,7 @@ def test_vectorise_generic_openai_clip_encode_text_results(self):
 
         model_name = "test-model"
         model_properties = {
-                            "name": "ViT-L/14",
+                            "name": "ViT-B/32",
                             "dimensions": 512,
                             "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
                             "type": "clip",
@@ -295,7 +295,7 @@ def test_vectorise_generic_open_clip_encode_image_results(self):
                             "name": "ViT-B-32-quickgelu",
                             "dimensions": 512,
                             "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt",
-                            "type": "clip",
+                            "type": "open_clip",
                             "jit" : False
                             }
 
@@ -314,7 +314,7 @@ def test_vectorise_generic_open_clip_encode_text_results(self):
             "name": "ViT-B-32-quickgelu",
             "dimensions": 512,
             "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt",
-            "type": "clip",
+            "type": "open_clip",
             "jit": False
         }
 
@@ -325,23 +325,4 @@ def test_vectorise_generic_open_clip_encode_text_results(self):
         assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
 
 
-    def test_unsupported_generic_clip_name(self):
-        epsilon = 1e-2
-        text = "this is a test to test the custom clip output results"
-
-
-        model_name = "test-model"
-        model_properties = {
-            "name": "this is a test name",
-            "dimensions": 512,
-            "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt",
-            "type": "clip",
-            "jit": False
-        }
-
-        a = vectorise(model_name, content=text, model_properties=model_properties)
-        b = vectorise("open_clip/ViT-B-32-quickgelu/laion400m_e31", content=text)
-
-        assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
-
 

From e5278b58a7c94de2654b5eee44e1bc004f68e63b Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 30 Jan 2023 18:22:21 +1100
Subject: [PATCH 36/54] Separate clip and open_clip load

---
 src/marqo/s2_inference/clip_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 9341193e1..0e607cf58 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -204,7 +204,7 @@ def __init__(self, model_type: str = "ViT-B/32", device: str = 'cpu',  embedding
         self.processor = None
         self.embedding_dimension = embedding_dim
         self.truncate = truncate
-        self.model_properties = kwargs.get("model_properties", None)
+        self.model_properties = kwargs.get("model_properties", dict())
 
     def load(self) -> None:
 

From 1ae5948b21a63c5521380519c5c14cbaaa49c86c Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 31 Jan 2023 09:35:44 +1100
Subject: [PATCH 37/54] Separate clip and open_clip load

---
 src/marqo/s2_inference/s2_inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index c3b336a21..aabe44697 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -113,7 +113,7 @@ def _validate_model_properties(model_name: str, model_properties: dict) -> dict:
                 if key not in model_properties:
                     model_properties[key] = value
 
-        elif model_properties("type", None) is "clip":
+        elif model_properties("type", None) in ("clip", "open_clip"):
             required_keys = ["name", "dimensions"]
             for key in required_keys:
                 if key not in model_properties:

From da9f36f2c543274af675cd70ca22078496fc08f0 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 31 Jan 2023 10:29:09 +1100
Subject: [PATCH 38/54] add *args, **kwargs in sbert model class

---
 src/marqo/s2_inference/sbert_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/sbert_utils.py b/src/marqo/s2_inference/sbert_utils.py
index d9fc4746d..befe48bba 100644
--- a/src/marqo/s2_inference/sbert_utils.py
+++ b/src/marqo/s2_inference/sbert_utils.py
@@ -11,7 +11,7 @@
 class Model:
     """ generic model wrapper class
     """
-    def __init__(self, model_name: str, device: str = 'cpu', batch_size: int = 2048, embedding_dim=None, max_seq_length=None) -> None:
+    def __init__(self, model_name: str, device: str = 'cpu', batch_size: int = 2048, embedding_dim=None, max_seq_length=None , *args, **kwargs) -> None:
 
         self.model_name = model_name
         self.device = device

From 0c4954fea74754bd51cf7b52776e4f0c079311d5 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 31 Jan 2023 10:35:56 +1100
Subject: [PATCH 39/54] add **kwargs in sbert, onnx sbert model class

---
 src/marqo/s2_inference/sbert_onnx_utils.py | 2 +-
 src/marqo/s2_inference/sbert_utils.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/marqo/s2_inference/sbert_onnx_utils.py b/src/marqo/s2_inference/sbert_onnx_utils.py
index 378710b55..d5f3b9686 100644
--- a/src/marqo/s2_inference/sbert_onnx_utils.py
+++ b/src/marqo/s2_inference/sbert_onnx_utils.py
@@ -31,7 +31,7 @@ def __init__(self, model_name_or_path: Optional[str] = None,
                  enable_overwrite: Optional[bool] = False,
                  max_seq_length: int = 128,
                  lower_case: bool = True,
-                 ):
+                 **kwargs):
        
 
         self.device = device
diff --git a/src/marqo/s2_inference/sbert_utils.py b/src/marqo/s2_inference/sbert_utils.py
index befe48bba..dcfbd2574 100644
--- a/src/marqo/s2_inference/sbert_utils.py
+++ b/src/marqo/s2_inference/sbert_utils.py
@@ -11,7 +11,7 @@
 class Model:
     """ generic model wrapper class
     """
-    def __init__(self, model_name: str, device: str = 'cpu', batch_size: int = 2048, embedding_dim=None, max_seq_length=None , *args, **kwargs) -> None:
+    def __init__(self, model_name: str, device: str = 'cpu', batch_size: int = 2048, embedding_dim=None, max_seq_length=None , **kwargs) -> None:
 
         self.model_name = model_name
         self.device = device

From c82c87c7687d9ef490ed5071d07c1cdfda0db382 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 31 Jan 2023 12:18:40 +1100
Subject: [PATCH 40/54] typo fix

---
 src/marqo/s2_inference/s2_inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index aabe44697..7a91f1d6e 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -113,7 +113,7 @@ def _validate_model_properties(model_name: str, model_properties: dict) -> dict:
                 if key not in model_properties:
                     model_properties[key] = value
 
-        elif model_properties("type", None) in ("clip", "open_clip"):
+        elif model_properties.get("type", None) in ("clip", "open_clip"):
             required_keys = ["name", "dimensions"]
             for key in required_keys:
                 if key not in model_properties:

From ffe06f1a5bcb8c219bf0e5fb0b5d4b50423d73d6 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 1 Feb 2023 17:37:28 +1100
Subject: [PATCH 41/54] revise error style!

---
 src/marqo/s2_inference/clip_utils.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 0e607cf58..8d26e616a 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -223,7 +223,9 @@ def load(self) -> None:
             elif validators.url(path):
                 self.model_path = download_pretrained_from_url(path)
             else:
-                raise InvalidModelPropertiesError(f"The provided model path {path} is neither a local file nor a valid url.")
+                raise InvalidModelPropertiesError(f"Marqo can not load the custom clip model."
+                                                  f"The provided model path `{path}` is neither a local file nor a valid url."
+                                                  f"Please check your provided model url and retry.")
 
             self.jit = self.model_properties.get("jit", False)
             self.model, self.preprocess = self.custom_clip_load()
@@ -326,7 +328,9 @@ def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  emb
         '''
 
         if not self.device.startswith("cuda"):
-            raise InvalidModelDeviceError(f"FP16 clip model `{self.model_type}` is only ava ilable with device `cuda`.")
+            raise InvalidModelDeviceError(f"Marqo can not load the provided model `{self.model_type}`"
+                                          f"FP16 clip model `{self.model_type}` is only available with device `cuda`."
+                                          f"Please check you cuda availability or try the fp32 version `{self.model_type.replace('fp16/','')}`")
 
         self.model_name = self.model_type.replace("fp16/", "")
 
@@ -363,8 +367,9 @@ def load(self) -> None:
             elif validators.url(path):
                 self.model_path = download_pretrained_from_url(path)
             else:
-                raise InvalidModelPropertiesError(
-                    f"The provided model path {path} is neither a local file nor a valid url.")
+                raise InvalidModelPropertiesError(f"Marqo can not load the custom clip model."
+                                                  f"The provided model path `{path}` is neither a local file nor a valid url."
+                                                  f"Please check your provided model url and retry.")
 
             self.precision = self.model_properties.get("precision", "fp32")
             self.jit = self.model_properties.get("jit", False)

From 5e9314e723057f56f8ab75f70f4738e9abf9d387 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 1 Feb 2023 17:47:39 +1100
Subject: [PATCH 42/54] remove space

---
 src/marqo/s2_inference/clip_utils.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 8d26e616a..1891f8cb1 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -225,7 +225,8 @@ def load(self) -> None:
             else:
                 raise InvalidModelPropertiesError(f"Marqo can not load the custom clip model."
                                                   f"The provided model path `{path}` is neither a local file nor a valid url."
-                                                  f"Please check your provided model url and retry.")
+                                                  f"Please check your provided model url and retry"
+                                                  f"Check `https://docs.marqo.ai/0.0.12/Models-Reference/dense_retrieval/` for more info.")
 
             self.jit = self.model_properties.get("jit", False)
             self.model, self.preprocess = self.custom_clip_load()
@@ -330,7 +331,8 @@ def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  emb
         if not self.device.startswith("cuda"):
             raise InvalidModelDeviceError(f"Marqo can not load the provided model `{self.model_type}`"
                                           f"FP16 clip model `{self.model_type}` is only available with device `cuda`."
-                                          f"Please check you cuda availability or try the fp32 version `{self.model_type.replace('fp16/','')}`")
+                                          f"Please check you cuda availability or try the fp32 version `{self.model_type.replace('fp16/','')}`"
+                                          f"Check `https://docs.marqo.ai/0.0.12/Models-Reference/dense_retrieval/` for more info.")
 
         self.model_name = self.model_type.replace("fp16/", "")
 
@@ -369,7 +371,8 @@ def load(self) -> None:
             else:
                 raise InvalidModelPropertiesError(f"Marqo can not load the custom clip model."
                                                   f"The provided model path `{path}` is neither a local file nor a valid url."
-                                                  f"Please check your provided model url and retry.")
+                                                  f"Please check your provided model url and retry."
+                                                  f"Check `https://docs.marqo.ai/0.0.12/Models-Reference/dense_retrieval/` for more info.")
 
             self.precision = self.model_properties.get("precision", "fp32")
             self.jit = self.model_properties.get("jit", False)

From c469835e364db70a2ac305c0deaed6895659dfe3 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 1 Feb 2023 17:51:28 +1100
Subject: [PATCH 43/54] change error message

---
 src/marqo/s2_inference/clip_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 1891f8cb1..2f6c98fc0 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -332,7 +332,7 @@ def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  emb
             raise InvalidModelDeviceError(f"Marqo can not load the provided model `{self.model_type}`"
                                           f"FP16 clip model `{self.model_type}` is only available with device `cuda`."
                                           f"Please check you cuda availability or try the fp32 version `{self.model_type.replace('fp16/','')}`"
-                                          f"Check `https://docs.marqo.ai/0.0.12/Models-Reference/dense_retrieval/` for more info.")
+                                          f"Check `https://docs.marqo.ai/0.0.13/Models-Reference/dense_retrieval/#generic-clip-models` for more info.")
 
         self.model_name = self.model_type.replace("fp16/", "")
 
@@ -372,7 +372,7 @@ def load(self) -> None:
                 raise InvalidModelPropertiesError(f"Marqo can not load the custom clip model."
                                                   f"The provided model path `{path}` is neither a local file nor a valid url."
                                                   f"Please check your provided model url and retry."
-                                                  f"Check `https://docs.marqo.ai/0.0.12/Models-Reference/dense_retrieval/` for more info.")
+                                                  f"Check `https://docs.marqo.ai/0.0.12/Models-Reference/dense_retrieval/#generic-clip-models` for more info.")
 
             self.precision = self.model_properties.get("precision", "fp32")
             self.jit = self.model_properties.get("jit", False)

From ec7ca6ddff036a82a57f11ee5267a533deddda1a Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 1 Feb 2023 17:51:40 +1100
Subject: [PATCH 44/54] change error message

---
 src/marqo/s2_inference/clip_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 2f6c98fc0..194b395b9 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -372,7 +372,7 @@ def load(self) -> None:
                 raise InvalidModelPropertiesError(f"Marqo can not load the custom clip model."
                                                   f"The provided model path `{path}` is neither a local file nor a valid url."
                                                   f"Please check your provided model url and retry."
-                                                  f"Check `https://docs.marqo.ai/0.0.12/Models-Reference/dense_retrieval/#generic-clip-models` for more info.")
+                                                  f"Check `https://docs.marqo.ai/0.0.13/Models-Reference/dense_retrieval/#generic-clip-models` for more info.")
 
             self.precision = self.model_properties.get("precision", "fp32")
             self.jit = self.model_properties.get("jit", False)

From 7fa5c9d87ca0008c1c8dfb61d658309262a2bf27 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 2 Feb 2023 10:52:19 +1100
Subject: [PATCH 45/54] revised based on pandu's comments

---
 src/marqo/s2_inference/clip_utils.py          | 19 ++----
 src/marqo/s2_inference/errors.py              |  2 +-
 src/marqo/s2_inference/model_registry.py      |  8 +--
 .../processing/custom_clip_utils.py           | 12 +++-
 src/marqo/s2_inference/s2_inference.py        |  2 +-
 tests/s2_inference/test_generic_clip_model.py | 65 ++++++++++++++++++-
 6 files changed, 85 insertions(+), 23 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 194b395b9..f2287b4c2 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -16,36 +16,30 @@
 from marqo.s2_inference.types import *
 from marqo.s2_inference.logger import get_logger
 import marqo.s2_inference.model_registry as model_registry
-from marqo.s2_inference.errors import InvalidModelDeviceError, InvalidModelPropertiesError
+from marqo.s2_inference.errors import IncompatibleModelDeviceError, InvalidModelPropertiesError
 from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer, download_pretrained_from_url
-from open_clip.pretrained import _PRETRAINED as OPEN_CLIP_PRETRAINED
+from torchvision.transforms import InterpolationMode
 
 logger = get_logger(__name__)
 
 OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073)
 OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711)
+BICUBIC = InterpolationMode.BICUBIC
 
 
 def get_allowed_image_types():
     return set(('.jpg', '.png', '.bmp', '.jpeg'))
 
 
-try:
-    from torchvision.transforms import InterpolationMode
-    BICUBIC = InterpolationMode.BICUBIC
-except ImportError:
-    BICUBIC = Image.BICUBIC
-
-
 def _convert_image_to_rgb(image: ImageType) -> ImageType:
     # Take a PIL.Image.Image and return its RGB version
     return image.convert("RGB")
 
 
 def _get_transform(n_px: int, image_mean:List[float] = None, image_std: List[float] = None) -> torch.Tensor:
-    '''
-
+    '''This function returns a transform to preprocess the image. The processed image will be passed into
+    clip model for inference.
     Args:
         n_px: the size of the processed image
         image_mean: the mean of the image used for normalization
@@ -53,7 +47,6 @@ def _get_transform(n_px: int, image_mean:List[float] = None, image_std: List[flo
 
     Returns:
         the processed image tensor with shape (3, n_px, n_px)
-
     '''
     img_mean = image_mean or OPENAI_DATASET_MEAN
     img_std = image_std or OPENAI_DATASET_STD
@@ -329,7 +322,7 @@ def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  emb
         '''
 
         if not self.device.startswith("cuda"):
-            raise InvalidModelDeviceError(f"Marqo can not load the provided model `{self.model_type}`"
+            raise IncompatibleModelDeviceError(f"Marqo can not load the provided model `{self.model_type}`"
                                           f"FP16 clip model `{self.model_type}` is only available with device `cuda`."
                                           f"Please check you cuda availability or try the fp32 version `{self.model_type.replace('fp16/','')}`"
                                           f"Check `https://docs.marqo.ai/0.0.13/Models-Reference/dense_retrieval/#generic-clip-models` for more info.")
diff --git a/src/marqo/s2_inference/errors.py b/src/marqo/s2_inference/errors.py
index d47df97cb..11ad8e9b1 100644
--- a/src/marqo/s2_inference/errors.py
+++ b/src/marqo/s2_inference/errors.py
@@ -53,5 +53,5 @@ class ModelNotInCacheError(S2InferenceError):
 
 # Raise an ERROR if the model is only available with "cpu" or "cuda" but
 # the other one is provided
-class InvalidModelDeviceError(S2InferenceError):
+class IncompatibleModelDeviceError(S2InferenceError):
     pass
\ No newline at end of file
diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index ed7e4ffaa..6ccf50da1 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -1538,20 +1538,20 @@ def _get_fp16_clip_properties() -> Dict:
         "fp16/ViT-L/14": {
             "name": "fp16/ViT-L/14",
             "dimensions": 768,
-            "type": "fp16clip",
+            "type": "fp16_clip",
             "notes": "The faster version (fp16, load from `cuda`) of openai clip model"
         },
         'fp16/ViT-B/32':
             {"name": "fp16/ViT-B/32",
              "dimensions": 512,
              "notes": "The faster version (fp16, load from `cuda`) of openai clip model",
-             "type": "fp16clip",
+             "type": "fp16_clip",
              },
         'fp16/ViT-B/16':
             {"name": "fp16/ViT-B/16",
              "dimensions": 512,
              "notes": "The faster version (fp16, load from `cuda`) of openai clip model",
-             "type": "fp16clip",
+             "type": "fp16_clip",
              },
     }
 
@@ -1596,7 +1596,7 @@ def _get_model_load_mappings() -> Dict:
             'sbert_onnx':SBERT_ONNX,
             'clip_onnx': CLIP_ONNX,
             "multilingual_clip" : MULTILINGUAL_CLIP,
-            "fp16clip": FP16_CLIP,
+            "fp16_clip": FP16_CLIP,
             'random':Random,
             'hf':HF_MODEL}
 
diff --git a/src/marqo/s2_inference/processing/custom_clip_utils.py b/src/marqo/s2_inference/processing/custom_clip_utils.py
index 0207e0b8f..befcf99fb 100644
--- a/src/marqo/s2_inference/processing/custom_clip_utils.py
+++ b/src/marqo/s2_inference/processing/custom_clip_utils.py
@@ -39,6 +39,16 @@ def download_pretrained_from_url(
         url: str,
         cache_dir: Union[str, None] = None,
 ):
+    '''
+    This function takes a clip model checkpoint url as input, downloads the model, and returns the local
+    path of the downloaded file.
+    Args:
+        url: a valid string of the url address.
+        cache_dir: the directory to store the file
+    Returns:
+        download_target: the local path of the downloaded file.
+    '''
+    buffer_size = 8192
     if not cache_dir:
         cache_dir = os.path.expanduser("~/.cache/clip")
     os.makedirs(cache_dir, exist_ok=True)
@@ -52,7 +62,7 @@ def download_pretrained_from_url(
     with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
         with tqdm(total=int(source.headers.get("Content-Length")), ncols=80, unit='iB', unit_scale=True) as loop:
             while True:
-                buffer = source.read(8192)
+                buffer = source.read(buffer_size)
                 if not buffer:
                     break
 
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 7a91f1d6e..761f50af3 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -100,7 +100,7 @@ def _validate_model_properties(model_name: str, model_properties: dict) -> dict:
     if model_properties is not None:
         """checks model dict to see if all required keys are present
         """
-        if model_properties.get("type", None) is None or "sbert":
+        if model_properties.get("type", None) in (None, "sbert"):
             required_keys = ["name", "dimensions"]
             for key in required_keys:
                 if key not in model_properties:
diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index 70bd8b4d7..23d358f66 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -4,6 +4,7 @@
 from marqo.s2_inference.errors import InvalidModelPropertiesError, UnknownModelError, ModelLoadError
 from marqo.tensor_search import tensor_search
 from marqo.s2_inference.processing.custom_clip_utils import download_pretrained_from_url
+from marqo.s2_inference.s2_inference import clear_loaded_models
 
 from marqo.s2_inference.s2_inference import (
     available_models,
@@ -35,11 +36,13 @@ def tearDown(self) -> None:
             tensor_search.delete_index(config=self.config, index_name=self.index_name_2)
         except IndexNotFoundError as e:
             pass
+        clear_loaded_models()
 
 
-    def test_create_index_with_generic_open_clip_model_properties_url(self):
+    def test_create_index_and_add_documents_with_generic_open_clip_model_properties_url(self):
         """index should get created with custom model_properties
         """
+        # Step1 - Create Index
         model_name = 'test-model-1'
         model_properties = {"name": "ViT-B-32-quickgelu",
                             "dimensions": 512,
@@ -55,9 +58,21 @@ def test_create_index_with_generic_open_clip_model_properties_url(self):
                 }
             }
         )
+        # Step2 - Add documents
+        docs = [
+            {
+                "_id": "123",
+                "title 1": "content 1",
+                "desc 2": "content 2. blah blah blah"
+            }]
 
+        auto_refresh = True
+        tensor_search.add_documents(config=self.config, index_name=self.index_name_1, docs=docs, auto_refresh=auto_refresh)
+
+        # Step3 - Search
+        results = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "test-test")
 
-    def test_create_index_with_generic_openai_clip_model_properties_url(self):
+    def test_pipeline_with_generic_openai_clip_model_properties_url(self):
         model_name = 'test-model-2'
         model_properties = {"name": "ViT-B/32",
                             "dimensions": 512,
@@ -75,8 +90,20 @@ def test_create_index_with_generic_openai_clip_model_properties_url(self):
             }
         )
 
+        docs = [
+            {
+                "_id": "123",
+                "title 1": "content 1",
+                "desc 2": "content 2. blah blah blah"
+            }]
+
+        auto_refresh = True
+        tensor_search.add_documents(config=self.config, index_name=self.index_name_2, docs=docs, auto_refresh=auto_refresh)
+
+        results = tensor_search.search(config=self.config, index_name=self.index_name_2, text = "test-test")
 
-    def test_create_index_with_generic_open_clip_model_properties_localpath(self):
+
+    def test_pipeline_with_generic_open_clip_model_properties_localpath(self):
         """index should get created with custom model_properties
         """
         url = "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
@@ -98,6 +125,19 @@ def test_create_index_with_generic_open_clip_model_properties_localpath(self):
             }
         )
 
+        docs = [
+            {
+                "_id": "123",
+                "title 1": "content 1",
+                "desc 2": "content 2. blah blah blah"
+            }]
+
+        auto_refresh = True
+        tensor_search.add_documents(config=self.config, index_name=self.index_name_1, docs=docs,
+                                    auto_refresh=auto_refresh)
+
+        results = tensor_search.search(config=self.config, index_name=self.index_name_1, text="test-test")
+
     def test_vectorise_with_generic_open_clip_model_properties_invalid_localpath(self):
         """index should get created with custom model_properties
         """
@@ -325,4 +365,23 @@ def test_vectorise_generic_open_clip_encode_text_results(self):
         assert np.abs(np.array(a) - np.array(b)).sum() < epsilon
 
 
+    def test_incorrect_vectorise_generic_open_clip_encode_text_results(self):
+        epsilon = 1e-3
+        text = "this is a test to test the custom clip output results"
+
+        model_name = "test-model"
+        model_properties = {
+            "name": "ViT-B-32-quickgelu",
+            "dimensions": 512,
+            "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt",
+            "type": "open_clip",
+            "jit": False
+        }
+
+
+        a = vectorise(model_name, content=text, model_properties=model_properties)
+        b = vectorise("open_clip/ViT-B-32-quickgelu/laion400m_e32", content=text)
+
+        assert np.abs(np.array(a) - np.array(b)).sum() > epsilon
+
 

From 7326261cd7e4b9977cd6c01cb4e61f325bb8cce1 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 2 Feb 2023 14:30:44 +1100
Subject: [PATCH 46/54] adding test pipelines

---
 tests/s2_inference/test_generic_clip_model.py | 43 +++++++++++++++++--
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index 23d358f66..b03b0e149 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -69,8 +69,22 @@ def test_create_index_and_add_documents_with_generic_open_clip_model_properties_
         auto_refresh = True
         tensor_search.add_documents(config=self.config, index_name=self.index_name_1, docs=docs, auto_refresh=auto_refresh)
 
+        # test if we can get the document by _id
+        assert tensor_search.get_document_by_id(
+            config=self.config, index_name=self.index_name_1,
+            document_id="123") == {
+                "_id": "123",
+                "title 1": "content 1",
+                "desc 2": "content 2. blah blah blah"
+            }
+
         # Step3 - Search
-        results = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "test-test")
+        search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah")
+        assert len(search_res['hits']) == 1
+        assert search_res["hits"][0]["_score"] > 0.6
+
+
+
 
     def test_pipeline_with_generic_openai_clip_model_properties_url(self):
         model_name = 'test-model-2'
@@ -100,7 +114,18 @@ def test_pipeline_with_generic_openai_clip_model_properties_url(self):
         auto_refresh = True
         tensor_search.add_documents(config=self.config, index_name=self.index_name_2, docs=docs, auto_refresh=auto_refresh)
 
-        results = tensor_search.search(config=self.config, index_name=self.index_name_2, text = "test-test")
+        assert tensor_search.get_document_by_id(
+            config=self.config, index_name=self.index_name_2,
+            document_id="123") == {
+                   "_id": "123",
+                   "title 1": "content 1",
+                   "desc 2": "content 2. blah blah blah"
+               }
+
+        search_res = tensor_search.search(config=self.config, index_name=self.index_name_2,
+                                          text="content 2. blah blah blah")
+        assert len(search_res['hits']) == 1
+        assert search_res["hits"][0]["_score"] > 0.6
 
 
     def test_pipeline_with_generic_open_clip_model_properties_localpath(self):
@@ -136,7 +161,19 @@ def test_pipeline_with_generic_open_clip_model_properties_localpath(self):
         tensor_search.add_documents(config=self.config, index_name=self.index_name_1, docs=docs,
                                     auto_refresh=auto_refresh)
 
-        results = tensor_search.search(config=self.config, index_name=self.index_name_1, text="test-test")
+        assert tensor_search.get_document_by_id(
+            config=self.config, index_name=self.index_name_1,
+            document_id="123") == {
+                "_id": "123",
+                "title 1": "content 1",
+                "desc 2": "content 2. blah blah blah"
+            }
+
+        # Step3 - Search
+        search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah")
+        assert len(search_res['hits']) == 1
+        assert search_res["hits"][0]["_score"] > 0.6
+
 
     def test_vectorise_with_generic_open_clip_model_properties_invalid_localpath(self):
         """index should get created with custom model_properties

From f166d65401586fa7f014fdf2ff49c53236813820 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 2 Feb 2023 14:39:04 +1100
Subject: [PATCH 47/54] test another document

---
 tests/s2_inference/test_generic_clip_model.py | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index b03b0e149..843b6214e 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -78,6 +78,26 @@ def test_create_index_and_add_documents_with_generic_open_clip_model_properties_
                 "desc 2": "content 2. blah blah blah"
             }
 
+        # test another document
+        docs2 = [
+            {
+                "_id": "321",
+                "title 1": "test test test",
+                "desc 2": "test again test again test again"
+            }]
+
+        tensor_search.add_documents(config=self.config, index_name=self.index_name_1, docs=docs2,
+                                    auto_refresh=auto_refresh)
+
+        assert tensor_search.get_document_by_id(
+            config=self.config, index_name=self.index_name_1,
+            document_id="321") == {
+                "_id": "321",
+                "title 1": "test test test",
+                "desc 2": "test again test again test again"
+               }
+
+
         # Step3 - Search
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah")
         assert len(search_res['hits']) == 1
@@ -122,6 +142,24 @@ def test_pipeline_with_generic_openai_clip_model_properties_url(self):
                    "desc 2": "content 2. blah blah blah"
                }
 
+        docs2 = [
+            {
+                "_id": "321",
+                "title 1": "test test test",
+                "desc 2": "test again test again test again"
+            }]
+
+        tensor_search.add_documents(config=self.config, index_name=self.index_name_2, docs=docs2,
+                                    auto_refresh=auto_refresh)
+
+        assert tensor_search.get_document_by_id(
+            config=self.config, index_name=self.index_name_2,
+            document_id="321") == {
+                "_id": "321",
+                "title 1": "test test test",
+                "desc 2": "test again test again test again"
+               }
+
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_2,
                                           text="content 2. blah blah blah")
         assert len(search_res['hits']) == 1
@@ -169,6 +207,24 @@ def test_pipeline_with_generic_open_clip_model_properties_localpath(self):
                 "desc 2": "content 2. blah blah blah"
             }
 
+        docs2 = [
+            {
+                "_id": "321",
+                "title 1": "test test test",
+                "desc 2": "test again test again test again"
+            }]
+
+        tensor_search.add_documents(config=self.config, index_name=self.index_name_1, docs=docs2,
+                                    auto_refresh=auto_refresh)
+
+        assert tensor_search.get_document_by_id(
+            config=self.config, index_name=self.index_name_1,
+            document_id="321") == {
+                "_id": "321",
+                "title 1": "test test test",
+                "desc 2": "test again test again test again"
+               }
+
         # Step3 - Search
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah")
         assert len(search_res['hits']) == 1

From ea35ff36bd19125ac7ceed73bbb0ff1e91b8cd08 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 2 Feb 2023 14:40:45 +1100
Subject: [PATCH 48/54] test another document

---
 tests/s2_inference/test_generic_clip_model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index 843b6214e..74c2d0421 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -100,7 +100,7 @@ def test_create_index_and_add_documents_with_generic_open_clip_model_properties_
 
         # Step3 - Search
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah")
-        assert len(search_res['hits']) == 1
+        assert len(search_res['hits']) == 2
         assert search_res["hits"][0]["_score"] > 0.6
 
 
@@ -162,7 +162,7 @@ def test_pipeline_with_generic_openai_clip_model_properties_url(self):
 
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_2,
                                           text="content 2. blah blah blah")
-        assert len(search_res['hits']) == 1
+        assert len(search_res['hits']) == 2
         assert search_res["hits"][0]["_score"] > 0.6
 
 
@@ -227,7 +227,7 @@ def test_pipeline_with_generic_open_clip_model_properties_localpath(self):
 
         # Step3 - Search
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah")
-        assert len(search_res['hits']) == 1
+        assert len(search_res['hits']) == 2
         assert search_res["hits"][0]["_score"] > 0.6
 
 

From 789203fd0a319b9ef16d792b276fbc43c58cae49 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 2 Feb 2023 14:42:28 +1100
Subject: [PATCH 49/54] test another document

---
 tests/s2_inference/test_generic_clip_model.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index 74c2d0421..71199ca49 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -99,8 +99,8 @@ def test_create_index_and_add_documents_with_generic_open_clip_model_properties_
 
 
         # Step3 - Search
-        search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah")
-        assert len(search_res['hits']) == 2
+        search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah", limit =1)
+        assert len(search_res['hits']) == 1
         assert search_res["hits"][0]["_score"] > 0.6
 
 
@@ -161,8 +161,8 @@ def test_pipeline_with_generic_openai_clip_model_properties_url(self):
                }
 
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_2,
-                                          text="content 2. blah blah blah")
-        assert len(search_res['hits']) == 2
+                                          text="content 2. blah blah blah", limit = 1)
+        assert len(search_res['hits']) == 1
         assert search_res["hits"][0]["_score"] > 0.6
 
 
@@ -226,8 +226,8 @@ def test_pipeline_with_generic_open_clip_model_properties_localpath(self):
                }
 
         # Step3 - Search
-        search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah")
-        assert len(search_res['hits']) == 2
+        search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah", limit= 1)
+        assert len(search_res['hits']) == 1
         assert search_res["hits"][0]["_score"] > 0.6
 
 

From 92f7a2f4e323f716979a617c214149481415476e Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 2 Feb 2023 14:44:41 +1100
Subject: [PATCH 50/54] test another document

---
 tests/s2_inference/test_generic_clip_model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index 71199ca49..b5f013dbb 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -99,7 +99,7 @@ def test_create_index_and_add_documents_with_generic_open_clip_model_properties_
 
 
         # Step3 - Search
-        search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah", limit =1)
+        search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah", result_count=1)
         assert len(search_res['hits']) == 1
         assert search_res["hits"][0]["_score"] > 0.6
 
@@ -161,7 +161,7 @@ def test_pipeline_with_generic_openai_clip_model_properties_url(self):
                }
 
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_2,
-                                          text="content 2. blah blah blah", limit = 1)
+                                          text="content 2. blah blah blah", result_count=1)
         assert len(search_res['hits']) == 1
         assert search_res["hits"][0]["_score"] > 0.6
 
@@ -226,7 +226,7 @@ def test_pipeline_with_generic_open_clip_model_properties_localpath(self):
                }
 
         # Step3 - Search
-        search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah", limit= 1)
+        search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah", result_count=1)
         assert len(search_res['hits']) == 1
         assert search_res["hits"][0]["_score"] > 0.6
 

From 9ea286c61c7ee587f010a5d4f1a4934478b382d3 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 2 Feb 2023 14:46:26 +1100
Subject: [PATCH 51/54] test another document

---
 tests/s2_inference/test_generic_clip_model.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index b5f013dbb..c83d465ac 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -43,6 +43,7 @@ def test_create_index_and_add_documents_with_generic_open_clip_model_properties_
         """index should get created with custom model_properties
         """
         # Step1 - Create Index
+        score_threshold = 0.6
         model_name = 'test-model-1'
         model_properties = {"name": "ViT-B-32-quickgelu",
                             "dimensions": 512,
@@ -101,12 +102,13 @@ def test_create_index_and_add_documents_with_generic_open_clip_model_properties_
         # Step3 - Search
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah", result_count=1)
         assert len(search_res['hits']) == 1
-        assert search_res["hits"][0]["_score"] > 0.6
+        assert search_res["hits"][0]["_score"] > score_threshold
 
 
 
 
     def test_pipeline_with_generic_openai_clip_model_properties_url(self):
+        score_threshold = 0.6
         model_name = 'test-model-2'
         model_properties = {"name": "ViT-B/32",
                             "dimensions": 512,
@@ -163,12 +165,13 @@ def test_pipeline_with_generic_openai_clip_model_properties_url(self):
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_2,
                                           text="content 2. blah blah blah", result_count=1)
         assert len(search_res['hits']) == 1
-        assert search_res["hits"][0]["_score"] > 0.6
+        assert search_res["hits"][0]["_score"] > score_threshold
 
 
     def test_pipeline_with_generic_open_clip_model_properties_localpath(self):
         """index should get created with custom model_properties
         """
+        score_threshold = 0.6
         url = "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
         target_model = download_pretrained_from_url(url)
 
@@ -228,7 +231,7 @@ def test_pipeline_with_generic_open_clip_model_properties_localpath(self):
         # Step3 - Search
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah", result_count=1)
         assert len(search_res['hits']) == 1
-        assert search_res["hits"][0]["_score"] > 0.6
+        assert search_res["hits"][0]["_score"] > score_threshold
 
 
     def test_vectorise_with_generic_open_clip_model_properties_invalid_localpath(self):

From e80e53b405ce524ccdb1f77c227535771c73fe43 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 2 Feb 2023 15:00:00 +1100
Subject: [PATCH 52/54] test another document

---
 tests/s2_inference/test_generic_clip_model.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index c83d465ac..d38bca842 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -103,8 +103,7 @@ def test_create_index_and_add_documents_with_generic_open_clip_model_properties_
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah", result_count=1)
         assert len(search_res['hits']) == 1
         assert search_res["hits"][0]["_score"] > score_threshold
-
-
+        assert search_res["hits"][0]["_id"] == "123"
 
 
     def test_pipeline_with_generic_openai_clip_model_properties_url(self):
@@ -166,6 +165,7 @@ def test_pipeline_with_generic_openai_clip_model_properties_url(self):
                                           text="content 2. blah blah blah", result_count=1)
         assert len(search_res['hits']) == 1
         assert search_res["hits"][0]["_score"] > score_threshold
+        assert search_res["hits"][0]["_id"] == "123"
 
 
     def test_pipeline_with_generic_open_clip_model_properties_localpath(self):
@@ -232,6 +232,7 @@ def test_pipeline_with_generic_open_clip_model_properties_localpath(self):
         search_res = tensor_search.search(config=self.config, index_name=self.index_name_1, text = "content 2. blah blah blah", result_count=1)
         assert len(search_res['hits']) == 1
         assert search_res["hits"][0]["_score"] > score_threshold
+        assert search_res["hits"][0]["_id"] == "123"
 
 
     def test_vectorise_with_generic_open_clip_model_properties_invalid_localpath(self):

From e7057c7eca6484c74e2be3e32bd6de52d41bac3e Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 3 Feb 2023 09:55:01 +1100
Subject: [PATCH 53/54] change downloading path for clip

---
 src/marqo/s2_inference/clip_utils.py               | 14 +++++++-------
 src/marqo/s2_inference/configs.py                  |  1 +
 .../s2_inference/processing/custom_clip_utils.py   |  4 ++--
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index f2287b4c2..1e9b87b97 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -12,7 +12,6 @@
 import open_clip
 from multilingual_clip import pt_multilingual_clip
 import transformers
-from clip.model import build_model
 from marqo.s2_inference.types import *
 from marqo.s2_inference.logger import get_logger
 import marqo.s2_inference.model_registry as model_registry
@@ -20,6 +19,7 @@
 from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer, download_pretrained_from_url
 from torchvision.transforms import InterpolationMode
+from marqo.s2_inference.configs import ModelCache
 
 logger = get_logger(__name__)
 
@@ -206,7 +206,7 @@ def load(self) -> None:
         if path is None:
             # The original method to load the openai clip model
             # https://github.com/openai/CLIP/issues/30
-            self.model, self.preprocess = clip.load(self.model_type, device='cpu', jit=False)
+            self.model, self.preprocess = clip.load(self.model_type, device='cpu', jit=False, download_root=ModelCache.clip_cache_path)
             self.model = self.model.to(self.device)
             self.tokenizer = clip.tokenize
         else:
@@ -232,7 +232,7 @@ def custom_clip_load(self):
         self.model_name = self.model_properties.get("name", None)
 
         logger.info(f"The name of the custom clip model is {self.model_name}. We use openai clip load")
-        model, preprocess = clip.load(name=self.model_path, device="cpu", jit= self.jit)
+        model, preprocess = clip.load(name=self.model_path, device="cpu", jit= self.jit, download_root=ModelCache.clip_cache_path)
         model = model.to(self.device)
         return model, preprocess
 
@@ -332,7 +332,7 @@ def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  emb
 
     def load(self) -> None:
         # https://github.com/openai/CLIP/issues/30
-        self.model, self.preprocess = clip.load(self.model_name, device='cuda', jit=False)
+        self.model, self.preprocess = clip.load(self.model_name, device='cuda', jit=False, download_root=ModelCache.clip_cache_path)
         self.model = self.model.to(self.device)
         self.tokenizer = clip.tokenize
         self.model.eval()
@@ -352,7 +352,7 @@ def load(self) -> None:
         if path is None:
             self.model, _, self.preprocess = open_clip.create_model_and_transforms(self.model_name,
                                                                                    pretrained=self.pretrained,
-                                                                                   device=self.device, jit=False)
+                                                                                   device=self.device, jit=False, cache_dir=ModelCache.clip_cache_path)
             self.tokenizer = open_clip.get_tokenizer(self.model_name)
             self.model.eval()
         else:
@@ -384,7 +384,7 @@ def custom_clip_load(self):
 
         logger.info(f"The name of the custom clip model is {self.model_name}. We use open_clip load")
         model, _, preprocess = open_clip.create_model_and_transforms(model_name=self.model_name, jit = self.jit, pretrained=self.model_path, precision = self.precision,
-                                                                     image_mean=self.mean, image_std=self.std, device = self.device)
+                                                                     image_mean=self.mean, image_std=self.std, device = self.device, cache_dir=ModelCache.clip_cache_path)
 
         return model, preprocess
 
@@ -432,7 +432,7 @@ def __init__(self, model_type: str = "multilingual-clip/ViT-L/14", device: str =
     def load(self) -> None:
         if self.visual_name.startswith("openai/"):
             clip_name = self.visual_name.replace("openai/", "")
-            self.visual_model, self.preprocess = clip.load(name = clip_name, device = "cpu", jit = False)
+            self.visual_model, self.preprocess = clip.load(name = clip_name, device = "cpu", jit = False, download_root=ModelCache.clip_cache_path)
             self.visual_model = self.visual_model.to(self.device)
             self.visual_model = self.visual_model.visual
 
diff --git a/src/marqo/s2_inference/configs.py b/src/marqo/s2_inference/configs.py
index 143aef5d8..49b7f0d8f 100644
--- a/src/marqo/s2_inference/configs.py
+++ b/src/marqo/s2_inference/configs.py
@@ -4,6 +4,7 @@ class ModelCache:
 
     onnx_cache_path = os.environ.get('ONNX_SAVE_PATH', './cache/models_onnx/')
     torch_cache_path = os.getenv('SENTENCE_TRANSFORMERS_HOME', './cache/models/')
+    clip_cache_path = os.getenv('CLIP_SAVE_PATH', './cache/clip/')
 
 class BaseTransformerModels:
 
diff --git a/src/marqo/s2_inference/processing/custom_clip_utils.py b/src/marqo/s2_inference/processing/custom_clip_utils.py
index befcf99fb..32723f520 100644
--- a/src/marqo/s2_inference/processing/custom_clip_utils.py
+++ b/src/marqo/s2_inference/processing/custom_clip_utils.py
@@ -6,7 +6,7 @@
 import os
 import urllib
 from tqdm import tqdm
-
+from src.marqo.s2_inference.configs import ModelCache
 def whitespace_clean(text):
     text = re.sub(r'\s+', ' ', text)
     text = text.strip()
@@ -50,7 +50,7 @@ def download_pretrained_from_url(
     '''
     buffer_size = 8192
     if not cache_dir:
-        cache_dir = os.path.expanduser("~/.cache/clip")
+        cache_dir = os.path.expanduser(ModelCache.clip_cache_path)
     os.makedirs(cache_dir, exist_ok=True)
     filename = os.path.basename(url)
 

From 42410dfd16d3ea83e6acf6a7b76c50096309a425 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 3 Feb 2023 14:56:07 +1100
Subject: [PATCH 54/54] edit error

---
 src/marqo/s2_inference/s2_inference.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 761f50af3..5d51645f2 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -104,7 +104,9 @@ def _validate_model_properties(model_name: str, model_properties: dict) -> dict:
             required_keys = ["name", "dimensions"]
             for key in required_keys:
                 if key not in model_properties:
-                    raise InvalidModelPropertiesError(f"model_properties has missing key '{key}'. ")
+                    raise InvalidModelPropertiesError(f"model_properties has missing key '{key}'."
+                                                      f"please update your model properties with required key `{key}`"
+                                                      f"check `https://docs.marqo.ai/0.0.12/Models-Reference/dense_retrieval/` for more info.")
 
             """updates model dict with default values if optional keys are missing
             """
@@ -117,8 +119,9 @@ def _validate_model_properties(model_name: str, model_properties: dict) -> dict:
             required_keys = ["name", "dimensions"]
             for key in required_keys:
                 if key not in model_properties:
-                    raise InvalidModelPropertiesError(f"model_properties has missing key '{key}'. ")
-
+                    raise InvalidModelPropertiesError(f"model_properties has missing key '{key}'."
+                                                      f"please update your model properties with required key `{key}`"
+                                                      f"check `https://docs.marqo.ai/0.0.12/Models-Reference/dense_retrieval/` for more info.")
     else:
         model_properties = get_model_properties_from_registry(model_name)