Skip to content

Commit

Permalink
[Bugfix] Fix img_sizes Parsing in Phi3-Vision (vllm-project#5888)
Browse files Browse the repository at this point in the history
  • Loading branch information
ywang96 authored and prashantgupta24 committed Jul 1, 2024
1 parent 8399340 commit 9b1a3f6
Showing 1 changed file with 6 additions and 20 deletions.
26 changes: 6 additions & 20 deletions vllm/model_executor/models/phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,6 @@ def __init__(self, wte=None) -> None:
self.type_feature: str
self.img_processor: CLIPVisionModel

def set_img_features(self, img_features: torch.FloatTensor) -> None:
self.img_features = img_features

def set_img_sizes(self, img_sizes: torch.LongTensor) -> None:
self.img_sizes = img_sizes

def get_img_features(self,
img_embeds: torch.FloatTensor) -> torch.FloatTensor:
LAYER_IDX = self.layer_idx
Expand Down Expand Up @@ -144,21 +138,16 @@ def __init__(self,
self.layer_idx = config.img_processor.get('layer_idx', -2)
self.type_feature = config.img_processor.get('type_feature', 'patch')

def forward(self,
input_ids: torch.LongTensor,
def forward(self, input_ids: torch.LongTensor,
pixel_values: torch.FloatTensor,
image_sizes=None) -> torch.FloatTensor:
image_sizes: torch.Tensor) -> torch.FloatTensor:
"""process and merge text embeddings with image embeddings."""

# (batch_size, max_num_crops, 3, height, width)
img_embeds = pixel_values
img_sizes = image_sizes

if self.img_features is not None:
img_embeds = self.img_features.clone()
self.img_features = None

if self.img_sizes is not None:
img_sizes = self.img_sizes
# (batch_size, 2)
img_sizes = image_sizes

input_shape = input_ids.size()
input_ids = input_ids.view(-1, input_shape[-1])
Expand Down Expand Up @@ -190,11 +179,8 @@ def forward(self,
output_imgs = []
output_len = []

if isinstance(img_sizes, torch.Tensor):
img_sizes.squeeze_(0)

for _bs in range(bs):
h, w = img_sizes
h, w = img_sizes[_bs]
h = h // 336
w = w // 336
B_ = h * w
Expand Down

0 comments on commit 9b1a3f6

Please sign in to comment.