From a710ea953420dd366630313538f4d01393b82f48 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 28 Mar 2024 08:51:49 -0700
Subject: [PATCH] Break: `get_model` returns tuple

---
 CONTRIBUTING.md            | 12 ++++++++++++
 README.md                  |  2 +-
 scripts/bench.py           | 16 ++++++++--------
 scripts/test_embeddings.py | 12 ++++++------
 4 files changed, 27 insertions(+), 15 deletions(-)
 create mode 100644 CONTRIBUTING.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..e90cd97
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,12 @@
+# Contributing to UForm
+
+We welcome contributions to UForm!
+Before submitting any changes, please make sure that the tests pass.
+
+```sh
+pip install -e .                # For core dependencies
+pip install -e ".[torch]"       # For PyTorch
+pip install -e ".[onnx]"        # For ONNX on CPU
+pip install -e ".[onnx-gpu]"    # For ONNX on GPU, available for some platforms
+pytest scripts/ -s -x -Wd -v
+```
\ No newline at end of file
diff --git a/README.md b/README.md
index 7fe7e34..582ad68 100755
--- a/README.md
+++ b/README.md
@@ -227,7 +227,7 @@ For that pick the encoder of the model you want to run in parallel (`text_encode
 ```python
 import uform
 
-model = uform.get_model('unum-cloud/uform-vl-english')
+model, processor = uform.get_model('unum-cloud/uform-vl-english')
 model_image = nn.DataParallel(model.image_encoder)
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
diff --git a/scripts/bench.py b/scripts/bench.py
index 21c6b13..49c7004 100644
--- a/scripts/bench.py
+++ b/scripts/bench.py
@@ -5,8 +5,12 @@
 import requests
 import torch
 from PIL import Image
-from transformers import (AutoProcessor, InstructBlipForConditionalGeneration,
-                          InstructBlipProcessor, LlavaForConditionalGeneration)
+from transformers import (
+    AutoProcessor,
+    InstructBlipForConditionalGeneration,
+    InstructBlipProcessor,
+    LlavaForConditionalGeneration,
+)
 
 from uform import get_model
 from uform.gen_model import VLMForCausalLM, VLMProcessor
@@ -76,9 +80,7 @@ def bench_image_embeddings(model, images):
     total_embeddings = 0
     images *= 10
     while total_duration < 10:
-        seconds, embeddings = duration(
-            lambda: model.encode_image(model.preprocess_image(images))
-        )
+        seconds, embeddings = duration(lambda: model.encode_image(processor.preprocess_image(images)))
         total_duration += seconds
         total_embeddings += len(embeddings)
 
@@ -90,9 +92,7 @@ def bench_text_embeddings(model, texts):
     total_embeddings = 0
     texts *= 10
     while total_duration < 10:
-        seconds, embeddings = duration(
-            lambda: model.encode_text(model.preprocess_text(texts))
-        )
+        seconds, embeddings = duration(lambda: model.encode_text(processor.preprocess_text(texts)))
         total_duration += seconds
         total_embeddings += len(embeddings)
 
diff --git a/scripts/test_embeddings.py b/scripts/test_embeddings.py
index 0ebf4cb..45fca56 100644
--- a/scripts/test_embeddings.py
+++ b/scripts/test_embeddings.py
@@ -10,13 +10,13 @@
 
 @pytest.mark.parametrize("model_name", torch_models)
 def test_one_embedding(model_name: str):
-    model = uform.get_model(model_name)
+    model, processor = uform.get_model(model_name)
     text = "a small red panda in a zoo"
     image_path = "assets/unum.png"
 
     image = Image.open(image_path)
-    image_data = model.preprocess_image(image)
-    text_data = model.preprocess_text(text)
+    image_data = processor.preprocess_image(image)
+    text_data = processor.preprocess_text(text)
 
     _, image_embedding = model.encode_image(image_data, return_features=True)
     _, text_embedding = model.encode_text(text_data, return_features=True)
@@ -28,13 +28,13 @@ def test_one_embedding(model_name: str):
 @pytest.mark.parametrize("model_name", torch_models)
 @pytest.mark.parametrize("batch_size", [1, 2])
 def test_many_embeddings(model_name: str, batch_size: int):
-    model = uform.get_model(model_name)
+    model, processor = uform.get_model(model_name)
     texts = ["a small red panda in a zoo"] * batch_size
     image_paths = ["assets/unum.png"] * batch_size
 
     images = [Image.open(path) for path in image_paths]
-    image_data = model.preprocess_image(images)
-    text_data = model.preprocess_text(texts)
+    image_data = processor.preprocess_image(images)
+    text_data = processor.preprocess_text(texts)
 
     image_embeddings = model.encode_image(image_data, return_features=False)
     text_embeddings = model.encode_text(text_data, return_features=False)