From a710ea953420dd366630313538f4d01393b82f48 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Thu, 28 Mar 2024 08:51:49 -0700 Subject: [PATCH] Break: `get_model` returns tuple --- CONTRIBUTING.md | 12 ++++++++++++ README.md | 2 +- scripts/bench.py | 16 ++++++++-------- scripts/test_embeddings.py | 12 ++++++------ 4 files changed, 27 insertions(+), 15 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e90cd97 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,12 @@ +# Contributing to UForm + +We welcome contributions to UForm! +Before submitting any changes, please make sure that the tests pass. + +```sh +pip install -e . # For core dependencies +pip install -e ".[torch]" # For PyTorch +pip install -e ".[onnx]" # For ONNX on CPU +pip install -e ".[onnx-gpu]" # For ONNX on GPU, available for some platforms +pytest scripts/ -s -x -Wd -v +``` \ No newline at end of file diff --git a/README.md b/README.md index 7fe7e34..582ad68 100755 --- a/README.md +++ b/README.md @@ -227,7 +227,7 @@ For that pick the encoder of the model you want to run in parallel (`text_encode ```python import uform -model = uform.get_model('unum-cloud/uform-vl-english') +model, processor = uform.get_model('unum-cloud/uform-vl-english') model_image = nn.DataParallel(model.image_encoder) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") diff --git a/scripts/bench.py b/scripts/bench.py index 21c6b13..49c7004 100644 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -5,8 +5,12 @@ import requests import torch from PIL import Image -from transformers import (AutoProcessor, InstructBlipForConditionalGeneration, - InstructBlipProcessor, LlavaForConditionalGeneration) +from transformers import ( + AutoProcessor, + InstructBlipForConditionalGeneration, + InstructBlipProcessor, + LlavaForConditionalGeneration, +) from uform import get_model from uform.gen_model import VLMForCausalLM, VLMProcessor @@ -76,9 +80,7 @@ def bench_image_embeddings(model, images): total_embeddings = 0 images *= 10 while total_duration < 10: - seconds, embeddings = duration( - lambda: model.encode_image(model.preprocess_image(images)) - ) + seconds, embeddings = duration(lambda: model.encode_image(processor.preprocess_image(images))) total_duration += seconds total_embeddings += len(embeddings) @@ -90,9 +92,7 @@ def bench_text_embeddings(model, texts): total_embeddings = 0 texts *= 10 while total_duration < 10: - seconds, embeddings = duration( - lambda: model.encode_text(model.preprocess_text(texts)) - ) + seconds, embeddings = duration(lambda: model.encode_text(processor.preprocess_text(texts))) total_duration += seconds total_embeddings += len(embeddings) diff --git a/scripts/test_embeddings.py b/scripts/test_embeddings.py index 0ebf4cb..45fca56 100644 --- a/scripts/test_embeddings.py +++ b/scripts/test_embeddings.py @@ -10,13 +10,13 @@ @pytest.mark.parametrize("model_name", torch_models) def test_one_embedding(model_name: str): - model = uform.get_model(model_name) + model, processor = uform.get_model(model_name) text = "a small red panda in a zoo" image_path = "assets/unum.png" image = Image.open(image_path) - image_data = model.preprocess_image(image) - text_data = model.preprocess_text(text) + image_data = processor.preprocess_image(image) + text_data = processor.preprocess_text(text) _, image_embedding = model.encode_image(image_data, return_features=True) _, text_embedding = model.encode_text(text_data, return_features=True) @@ -28,13 +28,13 @@ def test_one_embedding(model_name: str): @pytest.mark.parametrize("model_name", torch_models) @pytest.mark.parametrize("batch_size", [1, 2]) def test_many_embeddings(model_name: str, batch_size: int): - model = uform.get_model(model_name) + model, processor = uform.get_model(model_name) texts = ["a small red panda in a zoo"] * batch_size image_paths = ["assets/unum.png"] * batch_size images = [Image.open(path) for path in image_paths] - image_data = model.preprocess_image(images) - text_data = model.preprocess_text(texts) + image_data = processor.preprocess_image(images) + text_data = processor.preprocess_text(texts) image_embeddings = model.encode_image(image_data, return_features=False) text_embeddings = model.encode_text(text_data, return_features=False)