-
Notifications
You must be signed in to change notification settings - Fork 63
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3b5c5f2
commit c57b0d2
Showing
7 changed files
with
191 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
name: Pre-Release | ||
|
||
on: | ||
push: | ||
branches: ["main-dev"] | ||
pull_request: | ||
branches: ["main-dev"] | ||
|
||
env: | ||
BUILD_TYPE: Release | ||
GH_TOKEN: ${{ secrets.SEMANTIC_RELEASE_TOKEN }} | ||
PYTHONUTF8: 1 | ||
|
||
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages | ||
permissions: | ||
contents: read | ||
|
||
jobs: | ||
jobs: | ||
test_python: | ||
name: Test Python | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
- run: git submodule update --init --recursive | ||
- name: Set up Python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: "3.11" | ||
|
||
- name: Install dependencies | ||
run: | | ||
python -m pip install --no-cache-dir --upgrade pip | ||
pip install -e . | ||
- name: Test with PyTest | ||
run: pytest scripts/ -s -x -Wd -v |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,8 @@ | ||
requirements.txt | ||
dist/ | ||
test | ||
src/__pycache__ | ||
src/test.py | ||
build/ | ||
package-lock.json | ||
*.egg-info | ||
*.onnx | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import pytest | ||
from PIL import Image | ||
import uform | ||
|
||
torch_models = [ | ||
"unum-cloud/uform-vl-english", | ||
"unum-cloud/uform-vl-multilingual-v2", | ||
] | ||
|
||
|
||
@pytest.mark.parametrize("model_name", torch_models) | ||
def test_one_embedding(model_name: str): | ||
model = uform.get_model(model_name) | ||
text = "a small red panda in a zoo" | ||
image_path = "assets/unum.png" | ||
|
||
image = Image.open(image_path) | ||
image_data = model.preprocess_image(image) | ||
text_data = model.preprocess_text(text) | ||
|
||
_, image_embedding = model.encode_image(image_data, return_features=True) | ||
_, text_embedding = model.encode_text(text_data, return_features=True) | ||
|
||
assert image_embedding.shape[0] == 1, "Image embedding batch size is not 1" | ||
assert text_embedding.shape[0] == 1, "Text embedding batch size is not 1" | ||
|
||
|
||
@pytest.mark.parametrize("model_name", torch_models) | ||
@pytest.mark.parametrize("batch_size", [1, 2]) | ||
def test_many_embeddings(model_name: str, batch_size: int): | ||
model = uform.get_model(model_name) | ||
texts = ["a small red panda in a zoo"] * batch_size | ||
image_paths = ["assets/unum.png"] * batch_size | ||
|
||
images = [Image.open(path) for path in image_paths] | ||
image_data = model.preprocess_image(images) | ||
text_data = model.preprocess_text(texts) | ||
|
||
image_embeddings = model.encode_image(image_data, return_features=False) | ||
text_embeddings = model.encode_text(text_data, return_features=False) | ||
|
||
assert image_embeddings.shape[0] == batch_size, "Image embedding is unexpected" | ||
assert text_embeddings.shape[0] == batch_size, "Text embedding is unexpected" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import pytest | ||
from PIL import Image | ||
|
||
# PyTorch is a very heavy dependency, so we may want to skip these tests if it's not installed | ||
try: | ||
import torch | ||
|
||
torch_available = True | ||
except: | ||
torch_available = False | ||
|
||
torch_hf_models = [ | ||
"unum-cloud/uform-gen2-qwen-500m", | ||
] | ||
|
||
|
||
@pytest.mark.skipif(not torch_available, reason="PyTorch is not installed") | ||
@pytest.mark.parametrize("model_name", torch_hf_models) | ||
def test_one_conversation(model_name: str): | ||
from transformers import AutoModel, AutoProcessor | ||
|
||
model = AutoModel.from_pretrained(model_name, trust_remote_code=True) | ||
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True) | ||
|
||
prompt = "Describe the image in great detail." | ||
image = Image.open("assets/unum.png") | ||
|
||
inputs = processor(text=[prompt], images=[image], return_tensors="pt") | ||
|
||
with torch.inference_mode(): | ||
output = model.generate( | ||
**inputs, | ||
do_sample=False, | ||
use_cache=True, | ||
max_new_tokens=10, | ||
pad_token_id=processor.tokenizer.pad_token_id, | ||
) | ||
prompt_len = inputs["input_ids"].shape[1] | ||
decoded_text = processor.batch_decode(output[:, prompt_len:])[0] | ||
|
||
assert len(decoded_text), "No text was generated from the model." | ||
|
||
|
||
@pytest.mark.skipif(not torch_available, reason="PyTorch is not installed") | ||
@pytest.mark.parametrize("model_name", torch_hf_models) | ||
@pytest.mark.parametrize("batch_size", [1, 2]) | ||
def test_many_conversations(model_name: str, batch_size: int): | ||
|
||
from transformers import AutoModel, AutoProcessor | ||
|
||
model = AutoModel.from_pretrained(model_name, trust_remote_code=True) | ||
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True) | ||
|
||
prompt = "Describe the image in great detail." | ||
image = Image.open("assets/unum.png") | ||
|
||
texts = [prompt] * batch_size | ||
images = [image] * batch_size | ||
inputs = processor(text=texts, images=images, return_tensors="pt") | ||
|
||
with torch.inference_mode(): | ||
output = model.generate( | ||
**inputs, | ||
do_sample=False, | ||
use_cache=True, | ||
max_new_tokens=10, | ||
pad_token_id=processor.tokenizer.pad_token_id, | ||
) | ||
prompt_len = inputs["input_ids"].shape[1] | ||
decoded_texts = processor.batch_decode(output[:, prompt_len:]) | ||
|
||
assert all(len(decoded_text) for decoded_text in decoded_texts), "No text was generated from the model." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters