Skip to content

Commit

Permalink
[llama-mm] Fix vision encoder model test (#6842)
Browse files Browse the repository at this point in the history
* [llama-mm] Fix vision encoder model test

Summary: As titled. We need a smaller model config to make it work on CI
jobs.

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Fix pull.yml and trunk.yml

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Remove torchao

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Small fixes

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Add torchao back

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
  • Loading branch information
larryliu0820 authored Nov 14, 2024
1 parent 21eecff commit 0a9598b
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 14 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ jobs:
conda activate "${CONDA_ENV}"
MODEL_NAME=${{ matrix.model }}
# Install requirements for llama vision
if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" ]]; then
bash examples/models/llama3_2_vision/install_requirements.sh
fi
BUILD_TOOL=${{ matrix.build-tool }}
BACKEND=${{ matrix.backend }}
DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }}
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ jobs:
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test xecutorch
# Install requirements for llama vision
if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" ]]; then
${CONDA_RUN} bash examples/models/llama3_2_vision/install_requirements.sh
fi
# Build and test executorch
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
test-custom-ops-macos:
Expand Down
12 changes: 12 additions & 0 deletions examples/models/llama3_2_vision/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .vision_encoder import FlamingoVisionEncoderModel, VisionEncoderConfig

__all__ = [
"FlamingoVisionEncoderModel",
"VisionEncoderConfig",
]
28 changes: 16 additions & 12 deletions examples/models/llama3_2_vision/vision_encoder/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,6 @@
)
from torchtune.models.llama3_2_vision._component_builders import llama3_2_vision_encoder

max_seq_len = 8192
in_channels = 3
tile_size = 560
max_num_tiles = 4
# how many tokens per image generated by the vision encoder
tokens_per_image = 6404
# how many images to cache in the kv cache in cross attention
kv_cache_image_num = 1
# maximum number of tokens generated by encoder and thus stored in the kv cache in cross attention
encoder_max_seq_len = tokens_per_image * kv_cache_image_num


@dataclass
class VisionEncoderConfig:
Expand All @@ -42,11 +31,26 @@ class VisionEncoderConfig:
in_channels: int = 3


# 8 layers for CI testing purpose
demo_config: VisionEncoderConfig = VisionEncoderConfig(
patch_size=14,
num_heads=8,
clip_embed_dim=768,
clip_num_layers=6,
clip_hidden_states=[1, 3, 5],
decoder_embed_dim=1024,
num_layers_projection=4,
tile_size=224,
max_num_tiles=4,
in_channels=3,
)


class FlamingoVisionEncoderModel(EagerModelBase):
def __init__(self, config: Optional[VisionEncoderConfig] = None):
super().__init__()
if config is None:
config = VisionEncoderConfig()
config = demo_config
self.config = config
self.model = llama3_2_vision_encoder(
patch_size=config.patch_size,
Expand Down
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ addopts =
# examples
examples/models/llama/tests
examples/models/llama3_2_vision/preprocess
# examples/models/llama3_2_vision/vision_encoder/test TODO: enable this
examples/models/llama3_2_vision/vision_encoder/test
# examples/models/llava/test TODO: enable this
# exir
exir/_serialize/test
Expand Down

0 comments on commit 0a9598b

Please sign in to comment.