marqo-ai · wanliAlex · Feb 5, 2023 · Feb 1, 2023 · Feb 1, 2023 · Feb 1, 2023
diff --git a/requirements.txt b/requirements.txt
@@ -15,7 +15,7 @@ Pillow==9.3.0
 numpy==1.23.4
 validators==0.20.0
 sentence-transformers==2.2.2
-open_clip_torch==2.9.2
+open_clip_torch==2.10.1
 clip-marqo==1.0.2
 protobuf==3.20.1
 onnx==1.12.0

diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
@@ -284,7 +284,53 @@ def _get_open_clip_properties() -> Dict:
              'dimensions': 1024,
              'note': 'clip model from open_clip implementation',
              'type': 'open_clip',
-             'pretrained': 'laion2b_s12b_b42k'}}
+             'pretrained': 'laion2b_s12b_b42k'},
+
+        # TODO Uncomment this model in the next open_clip release
+        # There is a typo in the current release of open_clip.
+        # We will add this model once in the next open_clip release.
+        # 'open_clip/convnext_base/laion400m_s13b_b51k':
+        #     {'name': 'open_clip/convnext_base/laion400m_s13b_b51k',
+        #      'dimensions': 512,
+        #      'note': 'clip model from open_clip implementation',
+        #      'type': 'open_clip',
+        #      'pretrained': 'laion400m_s13b_b51k'},
+
+        'open_clip/convnext_base_w/laion2b_s13b_b82k': {
+            'name': 'open_clip/convnext_base_w/laion2b_s13b_b82k',
+            'dimensions': 640,
+            'note': 'clip model from open_clip implementation',
+            'type': 'open_clip',
+            'pretrained': 'laion2b_s13b_b82k'},
+
+        'open_clip/convnext_base_w/laion2b_s13b_b82k_augreg': {
+            'name': 'open_clip/convnext_base_w/laion2b_s13b_b82k_augreg',
+            'dimensions': 640,
+            'note': 'clip model from open_clip implementation',
+            'type': 'open_clip',
+            'pretrained': 'laion2b_s13b_b82k_augreg'},
+
+        'open_clip/convnext_base_w/laion_aesthetic_s13b_b82k': {
+            'name': 'open_clip/convnext_base_w/laion_aesthetic_s13b_b82k',
+            'dimensions': 640,
+            'note': 'clip model from open_clip implementation',
+            'type': 'open_clip',
+            'pretrained': 'laion_aesthetic_s13b_b82k'},
+
+        'open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k': {
+            'name': 'open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k',
+            'dimensions': 640,
+            'note': 'clip model from open_clip implementation',
+            'type': 'open_clip',
+            'pretrained': 'laion_aesthetic_s13b_b82k'},
+
+        'open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k_augreg': {
+            'name': 'open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k_augreg',
+            'dimensions': 640,
+            'note': 'clip model from open_clip implementation',
+            'type': 'open_clip',
+            'pretrained': 'laion_aesthetic_s13b_b82k_augreg'},
+}
 
     return OPEN_CLIP_MODEL_PROPERTIES
 

diff --git a/tests/s2_inference/test_encoding.py b/tests/s2_inference/test_encoding.py
@@ -24,7 +24,9 @@ def tearDown(self) -> None:
         clear_loaded_models()
 
     def test_vectorize(self):
-        names = ["onnx16/open_clip/ViT-B-32/laion400m_e32", 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32',
+        names = ["open_clip/convnext_base_w/laion2b_s13b_b82k",
+                 "open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k_augreg",
+                 "onnx16/open_clip/ViT-B-32/laion400m_e32", 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32',
                  "all-MiniLM-L6-v1", "all_datasets_v4_MiniLM-L6", "hf/all-MiniLM-L6-v1", "hf/all_datasets_v4_MiniLM-L6",
                  "onnx/all-MiniLM-L6-v1", "onnx/all_datasets_v4_MiniLM-L6"]
 
@@ -50,7 +52,8 @@ def test_vectorize(self):
 
     def test_load_clip_text_model(self):
         names = [ "onnx16/open_clip/ViT-B-32/laion400m_e32", 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32',
-                  'RN50', "ViT-B/16"]
+                  'RN50', "ViT-B/16", "open_clip/convnext_base_w/laion2b_s13b_b82k",
+                 "open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k_augreg",]
 
         device = 'cpu'
         eps = 1e-9
@@ -128,7 +131,9 @@ def test_compare_onnx_sbert_text_models(self):
 
 
     def test_model_outputs(self):
-        names = ["onnx16/open_clip/ViT-B-32/laion400m_e32", 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32',
+        names = ["open_clip/convnext_base_w/laion2b_s13b_b82k",
+                 "open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k_augreg",
+                 "onnx16/open_clip/ViT-B-32/laion400m_e32", 'onnx32/open_clip/ViT-B-32-quickgelu/laion400m_e32',
                  'open_clip/ViT-B-32/laion400m_e32', "all-MiniLM-L6-v1",
                  "all_datasets_v4_MiniLM-L6", "hf/all-MiniLM-L6-v1",
                  "hf/all_datasets_v4_MiniLM-L6", "onnx/all-MiniLM-L6-v1", "onnx/all_datasets_v4_MiniLM-L6"]
@@ -199,7 +204,9 @@ def test_model_un_normalization(self):
 
 
     def test_open_clip_vectorize(self):
-        names = ['open_clip/ViT-B-32/laion400m_e32', 'open_clip/RN50/openai']
+        names = ["open_clip/convnext_base_w/laion2b_s13b_b82k",
+                 "open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k_augreg",
+                 'open_clip/ViT-B-32/laion400m_e32', 'open_clip/RN50/openai']
 
         sentences = ['hello', 'this is a test sentence. so is this.', ['hello', 'this is a test sentence. so is this.']]
         device = 'cpu'
@@ -222,7 +229,8 @@ def test_open_clip_vectorize(self):
 
 
     def test_open_clip_embedding_size(self):
-        names = ['open_clip/ViT-B-32/laion400m_e32', 'open_clip/RN50/openai']
+        names = ["open_clip/convnext_base_w/laion2b_s13b_b82k",
+                 "open_clip/convnext_base_w_320/laion_aesthetic_s13b_b82k_augreg",'open_clip/ViT-B-32/laion400m_e32', 'open_clip/RN50/openai']
 
         device = "cpu"