UKPLab · tomaarsen · May 13, 2024 · Apr 23, 2024 · Apr 23, 2024 · Apr 26, 2024
diff --git a/sentence_transformers/SentenceTransformer.py b/sentence_transformers/SentenceTransformer.py
@@ -20,6 +20,7 @@
 import math
 import queue
 import tempfile
+import copy
 
 from . import __MODEL_HUB_ORGANIZATION__
 from .evaluation import SentenceEvaluator
@@ -89,11 +90,13 @@ def __init__(
         token: Optional[Union[bool, str]] = None,
         use_auth_token: Optional[Union[bool, str]] = None,
         truncate_dim: Optional[int] = None,
+        padding: Union[str, bool] = True,
     ):
         # Note: self._load_sbert_model can also update `self.prompts` and `self.default_prompt_name`
         self.prompts = prompts or {}
         self.default_prompt_name = default_prompt_name
         self.truncate_dim = truncate_dim
+        self.padding = padding
         self._model_card_vars = {}
         self._model_card_text = None
         self._model_config = {}
@@ -315,6 +318,10 @@ def encode(
             ht.hpu.wrap_in_hpu_graph(self, disable_tensor_cache=True)
             self.is_hpu_graph_enabled = True
 
+            from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+            adapt_transformers_to_gaudi()
+
         self.eval()
         if show_progress_bar is None:
             show_progress_bar = (
@@ -374,11 +381,42 @@ def encode(
         for start_index in trange(0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar):
             sentences_batch = sentences_sorted[start_index : start_index + batch_size]
             features = self.tokenize(sentences_batch)
+            if self.device.type == "hpu":
+                if "input_ids" in features:
+                    curr_tokenize_len = features["input_ids"].shape
+                    additional_pad_len = 2 ** math.ceil(math.log2(curr_tokenize_len[1])) - curr_tokenize_len[1]
+                    features["input_ids"] = torch.cat(
+                        (
+                            features["input_ids"],
+                            torch.ones((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
+                        ),
+                        -1,
+                    )
+                    features["attention_mask"] = torch.cat(
+                        (
+                            features["attention_mask"],
+                            torch.zeros((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
+                        ),
+                        -1,
+                    )
+                    if "token_type_ids" in features:
+                        features["token_type_ids"] = torch.cat(
+                            (
+                                features["token_type_ids"],
+                                torch.zeros((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
+                            ),
+                            -1,
+                        )
+
             features = batch_to_device(features, device)
             features.update(extra_features)
 
             with torch.no_grad():
-                out_features = self.forward(features)
+                if self.device.type == "hpu":
+                    hpu_graph_out = self.forward(features)
+                    out_features = copy.deepcopy(hpu_graph_out)
+                else:
+                    out_features = self.forward(features)
-                if self.device.type == "hpu":
-                    hpu_graph_out = self.forward(features)
-                    out_features = copy.deepcopy(hpu_graph_out)
-                else:
-                    out_features = self.forward(features)
+                out_features = self.forward(features)
+                if self.device.type == "hpu":
+                    out_features = copy.deepcopy(out_features)
-                if self.device.type == "hpu":
-                    hpu_graph_out = self.forward(features)
-                    out_features = copy.deepcopy(hpu_graph_out)
-                else:
-                    out_features = self.forward(features)
+                out_features = self.forward(features)
+                if self.device.type == "hpu":
+                    out_features = copy.deepcopy(out_features)
                 out_features["sentence_embedding"] = truncate_embeddings(
                     out_features["sentence_embedding"], self.truncate_dim
                 )
@@ -595,8 +633,7 @@ def tokenize(self, texts: Union[List[str], List[Dict], List[Tuple[str, str]]]):
         """
         kwargs = {}
         # HPU models reach optimal performance if the padding is not dynamic
-        if self.device.type == "hpu":
-            kwargs["padding"] = "max_length"
+        kwargs["padding"] = self.padding
 
         try:
             return self._first_module().tokenize(texts, **kwargs)

diff --git a/sentence_transformers/models/CLIPModel.py b/sentence_transformers/models/CLIPModel.py
@@ -3,6 +3,7 @@
 import transformers
 import torch
 from PIL import Image
+from sentence_transformers.util import get_device_name
 
 
 class CLIPModel(nn.Module):
@@ -72,7 +73,11 @@ def tokenize(self, texts, padding: Union[str, bool] = True):
             encoding["pixel_values"] = image_features.pixel_values
 
         encoding["image_text_info"] = image_text_info
-        return encoding
+        device = get_device_name()
+        if device == "hpu":
+            return dict(encoding)
+        else:
+            return encoding
 
     def save(self, output_path: str):
         self.model.save_pretrained(output_path)

diff --git a/tests/test_compute_embeddings.py b/tests/test_compute_embeddings.py
@@ -5,7 +5,6 @@
 import numpy as np
 
 from sentence_transformers import SentenceTransformer
-from sentence_transformers.util import get_device_name
 
 
 def test_encode_token_embeddings(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
@@ -24,13 +23,8 @@ def test_encode_token_embeddings(paraphrase_distilroberta_base_v1_model: Sentenc
     emb = model.encode(sent, output_value="token_embeddings", batch_size=2)
     assert len(emb) == len(sent)
 
-    device = get_device_name()
-    if device == "hpu":
-        for s, e in zip(sent, emb):
-            assert len(model.tokenize([s])["input_ids"][0]) == model.get_max_seq_length()
-    else:
-        for s, e in zip(sent, emb):
-            assert len(model.tokenize([s])["input_ids"][0]) == e.shape[0]
+    for s, e in zip(sent, emb):
+        assert len(model.tokenize([s])["input_ids"][0]) == e.shape[0]
 
 
 def test_encode_single_sentences(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None: