Add: Testing suite

unum-cloud · Mar 27, 2024 · c57b0d2 · c57b0d2
1 parent 3b5c5f2
commit c57b0d2
Show file tree

Hide file tree

Showing 7 changed files with 191 additions and 18 deletions.
diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml
@@ -0,0 +1,38 @@
+name: Pre-Release
+
+on:
+  push:
+    branches: ["main-dev"]
+  pull_request:
+    branches: ["main-dev"]
+
+env:
+  BUILD_TYPE: Release
+  GH_TOKEN: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}
+  PYTHONUTF8: 1
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+
+jobs:
+jobs:
+  test_python:
+    name: Test Python
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+      - run: git submodule update --init --recursive
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --no-cache-dir --upgrade pip
+          pip install -e .
+
+      - name: Test with PyTest
+        run: pytest scripts/ -s -x -Wd -v
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -1,11 +1,3 @@
-# This workflow will upload a Python Package using Twine when a release is created
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
-
-# This workflow uses actions that are not certified by GitHub.
-# They are provided by a third-party and are governed by
-# separate terms of service, privacy policy, and support
-# documentation.
-
 name: Release
 
 on:
@@ -26,7 +18,7 @@ jobs:
     name: Semantic Release
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           persist-credentials: false
       - uses: actions/setup-node@v3
@@ -38,7 +30,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout the latest code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -56,17 +48,40 @@ jobs:
           unprotect_reviews: True
           force: True
 
+  test_python:
+    name: Run Tests
+    runs-on: ubuntu-latest
+    needs: versioning
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: "main"
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install -r requirements-dev.txt
+
+      - name: Run PyTest
+        run: pytest scripts/
+
   pypi_publish:
     name: Publish Python
     runs-on: ubuntu-latest
     needs: versioning
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           ref: "main"
       - name: Set up Python
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 
@@ -79,7 +94,7 @@ jobs:
         run: python -m build
 
       - name: Publish to PyPi
-        uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+        uses: pypa/gh-action-pypi-publish@1.8.14
         with:
           user: __token__
           password: ${{ secrets.PYPI_API_TOKEN }}
@@ -93,7 +108,7 @@ jobs:
     needs: pypi_publish
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           ref: "main"
       - name: Setup GitHub Pages

diff --git a/.gitignore b/.gitignore
@@ -1,9 +1,8 @@
 requirements.txt
 dist/
 test
-src/__pycache__
-src/test.py
 build/
 package-lock.json
 *.egg-info
 *.onnx
+__pycache__
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,7 +35,7 @@ dependencies = [
     "torchvision",
     "transformers>=4.36.2",
 ]
-description = "Multi-Modal Transformers library for Semantic Search and other Vision-Language tasks"
+description = "Pocket-Sized Multimodal AI for Content Understanding and Generation"
 maintainers = [
     {email = "info@unum.cloud", name = "Unum Cloud"},
 ]
@@ -70,3 +70,9 @@ in_place = true
 spaces_before_inline_comment = 2
 spaces_indent_inline_array = 4
 trailing_comma_inline_array = true
+
+# Configuration options for the Black formatter:
+# https://black.readthedocs.io/en/latest/usage_and_configuration/the_basics.html#where-black-looks-for-the-file
+[tool.black]
+line-length = 120                   # Set line length to the same value as in `.clang-format` for modern wide screens
+target-version = ['py36', 'py312']  # Set target Python versions to 3.6 and 3.12
diff --git a/scripts/test_embeddings.py b/scripts/test_embeddings.py
@@ -0,0 +1,43 @@
+import pytest
+from PIL import Image
+import uform
+
+torch_models = [
+    "unum-cloud/uform-vl-english",
+    "unum-cloud/uform-vl-multilingual-v2",
+]
+
+
+@pytest.mark.parametrize("model_name", torch_models)
+def test_one_embedding(model_name: str):
+    model = uform.get_model(model_name)
+    text = "a small red panda in a zoo"
+    image_path = "assets/unum.png"
+
+    image = Image.open(image_path)
+    image_data = model.preprocess_image(image)
+    text_data = model.preprocess_text(text)
+
+    _, image_embedding = model.encode_image(image_data, return_features=True)
+    _, text_embedding = model.encode_text(text_data, return_features=True)
+
+    assert image_embedding.shape[0] == 1, "Image embedding batch size is not 1"
+    assert text_embedding.shape[0] == 1, "Text embedding batch size is not 1"
+
+
+@pytest.mark.parametrize("model_name", torch_models)
+@pytest.mark.parametrize("batch_size", [1, 2])
+def test_many_embeddings(model_name: str, batch_size: int):
+    model = uform.get_model(model_name)
+    texts = ["a small red panda in a zoo"] * batch_size
+    image_paths = ["assets/unum.png"] * batch_size
+
+    images = [Image.open(path) for path in image_paths]
+    image_data = model.preprocess_image(images)
+    text_data = model.preprocess_text(texts)
+
+    image_embeddings = model.encode_image(image_data, return_features=False)
+    text_embeddings = model.encode_text(text_data, return_features=False)
+
+    assert image_embeddings.shape[0] == batch_size, "Image embedding is unexpected"
+    assert text_embeddings.shape[0] == batch_size, "Text embedding is unexpected"
diff --git a/scripts/test_generative.py b/scripts/test_generative.py
@@ -0,0 +1,72 @@
+import pytest
+from PIL import Image
+
+# PyTorch is a very heavy dependency, so we may want to skip these tests if it's not installed
+try:
+    import torch
+
+    torch_available = True
+except:
+    torch_available = False
+
+torch_hf_models = [
+    "unum-cloud/uform-gen2-qwen-500m",
+]
+
+
+@pytest.mark.skipif(not torch_available, reason="PyTorch is not installed")
+@pytest.mark.parametrize("model_name", torch_hf_models)
+def test_one_conversation(model_name: str):
+    from transformers import AutoModel, AutoProcessor
+
+    model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
+    processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
+
+    prompt = "Describe the image in great detail."
+    image = Image.open("assets/unum.png")
+
+    inputs = processor(text=[prompt], images=[image], return_tensors="pt")
+
+    with torch.inference_mode():
+        output = model.generate(
+            **inputs,
+            do_sample=False,
+            use_cache=True,
+            max_new_tokens=10,
+            pad_token_id=processor.tokenizer.pad_token_id,
+        )
+    prompt_len = inputs["input_ids"].shape[1]
+    decoded_text = processor.batch_decode(output[:, prompt_len:])[0]
+
+    assert len(decoded_text), "No text was generated from the model."
+
+
+@pytest.mark.skipif(not torch_available, reason="PyTorch is not installed")
+@pytest.mark.parametrize("model_name", torch_hf_models)
+@pytest.mark.parametrize("batch_size", [1, 2])
+def test_many_conversations(model_name: str, batch_size: int):
+
+    from transformers import AutoModel, AutoProcessor
+
+    model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
+    processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
+
+    prompt = "Describe the image in great detail."
+    image = Image.open("assets/unum.png")
+
+    texts = [prompt] * batch_size
+    images = [image] * batch_size
+    inputs = processor(text=texts, images=images, return_tensors="pt")
+
+    with torch.inference_mode():
+        output = model.generate(
+            **inputs,
+            do_sample=False,
+            use_cache=True,
+            max_new_tokens=10,
+            pad_token_id=processor.tokenizer.pad_token_id,
+        )
+    prompt_len = inputs["input_ids"].shape[1]
+    decoded_texts = processor.batch_decode(output[:, prompt_len:])
+
+    assert all(len(decoded_text) for decoded_text in decoded_texts), "No text was generated from the model."
diff --git a/src/uform/models.py b/src/uform/models.py
@@ -353,7 +353,7 @@ def forward(self, x: Tensor) -> Tensor:
 
 class VLM(nn.Module):
     """
-    Vision-Language Model for multi-modal embeddings.
+    Vision-Language Model for Multimodal embeddings.
     """
 
     def __init__(self, config: Dict, tokenizer_path: PathLike):