Skip to content

Commit

Permalink
Replace tensor.norm() with decomposed version for CLIP executorch e…
Browse files Browse the repository at this point in the history
…xport (huggingface#32887)

* Replace .norm() with decomposed version for executorch export

* [run_slow] clip
  • Loading branch information
qubvel authored and BernardZach committed Dec 6, 2024
1 parent 5f68594 commit f084390
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions src/transformers/models/clip/modeling_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,17 @@ def clip_loss(similarity: torch.Tensor) -> torch.Tensor:
return (caption_loss + image_loss) / 2.0


def _get_vector_norm(tensor: torch.Tensor) -> torch.Tensor:
"""
This method is equivalent to tensor.norm(p=2, dim=-1, keepdim=True) and used to make
model `executorch` exportable. See issue https://github.com/pytorch/executorch/issues/3566
"""
square_tensor = torch.pow(tensor, 2)
sum_tensor = torch.sum(square_tensor, dim=-1, keepdim=True)
normed_tensor = torch.pow(sum_tensor, 0.5)
return normed_tensor


@dataclass
class CLIPVisionModelOutput(ModelOutput):
"""
Expand Down Expand Up @@ -1313,8 +1324,8 @@ def forward(
text_embeds = self.text_projection(text_embeds)

# normalized features
image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True)
text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True)
image_embeds = image_embeds / _get_vector_norm(image_embeds)
text_embeds = text_embeds / _get_vector_norm(text_embeds)

# cosine similarity as logits
logit_scale = self.logit_scale.exp()
Expand Down

0 comments on commit f084390

Please sign in to comment.