From e80b906476ac3669cc15afed43b5e1bcf3faae7e Mon Sep 17 00:00:00 2001 From: raushan Date: Mon, 15 Jul 2024 09:53:27 +0200 Subject: [PATCH 1/2] make image token attribute --- src/transformers/tokenization_utils_base.py | 103 ++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 6d2e7f502e0089..caf21a0cab1ac7 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -842,6 +842,7 @@ class SpecialTokensMixin: "pad_token", "cls_token", "mask_token", + "image_token", "additional_special_tokens", ] @@ -853,6 +854,9 @@ def __init__(self, verbose=False, **kwargs): self._pad_token = None self._cls_token = None self._mask_token = None + self._image_token = None + self._boi_token = None + self._eoi_token = None self._pad_token_type_id = 0 self._additional_special_tokens = [] self.verbose = verbose @@ -1118,6 +1122,44 @@ def mask_token(self) -> str: return None return str(self._mask_token) + @property + def image_token(self) -> str: + """ + `str`: Image token, to use with Vision-Language Models. Placeholder for image embeddings and + will be used to merge image embeds with language-model embeds. Log an error if used while not having been set. + """ + if self._image_token is None: + if self.verbose: + logger.error("Using image_token, but it is not set yet.") + return None + return str(self._image_token) + + @property + def boi_token(self) -> str: + """ + `str`: BOI token, to use if a Vision-Language Model has a special beginning-of-image token + to separate image tokens from language tokens. Log an error if used while not + having been set. + """ + if self._boi_token is None: + if self.verbose: + logger.error("Using boi_token, but it is not set yet.") + return None + return str(self._boi_token) + + @property + def eoi_token(self) -> str: + """ + `str`: EOI token, to use if a Vision-Language Model has a special end-of-image token + to separate image tokens from language tokens. Log an error if used while not + having been set. + """ + if self._eoi_token is None: + if self.verbose: + logger.error("Using eoi_token, but it is not set yet.") + return None + return str(self._eoi_token) + @property def additional_special_tokens(self) -> List[str]: """ @@ -1172,6 +1214,24 @@ def mask_token(self, value): raise ValueError("Cannot set a non-string value as the MASK token") self._mask_token = value + @image_token.setter + def image_token(self, value): + if not isinstance(value, (str, AddedToken)) and value is not None: + raise ValueError("Cannot set a non-string value as the IMAGE token") + self._image_token = value + + @boi_token.setter + def boi_token(self, value): + if not isinstance(value, (str, AddedToken)) and value is not None: + raise ValueError("Cannot set a non-string value as the BOI token") + self._boi_token = value + + @eoi_token.setter + def eoi_token(self, value): + if not isinstance(value, (str, AddedToken)) and value is not None: + raise ValueError("Cannot set a non-string value as the EOI token") + self._eoi_token = value + @additional_special_tokens.setter def additional_special_tokens(self, value): self._additional_special_tokens = value if value is not None else None @@ -1253,6 +1313,37 @@ def mask_token_id(self) -> Optional[int]: return None return self.convert_tokens_to_ids(self.mask_token) + @property + def image_token_id(self) -> Optional[int]: + """ + `Optional[int]`: Id of the image token in the vocabulary, used with Vision-Language Models. + Placeholder for image embeddings and will be used to merge image embeds with language-model embeds. + Returns `None` if the token has not been set. + """ + if self._image_token is None: + return None + return self.convert_tokens_to_ids(self.image_token) + + @property + def boi_token_id(self) -> Optional[int]: + """ + `Optional[int]`: Id of the boi token in the vocabulary, used if a Vision-Language Model has a special + beginning-of-image token to separate image tokens from language tokens. Returns `None` if the token has not been set. + """ + if self._boi_token is None: + return None + return self.convert_tokens_to_ids(self.boi_token) + + @property + def eoi_token_id(self) -> Optional[int]: + """ + `Optional[int]`: Id of the eoi token in the vocabulary, used if a Vision-Language Model has a special end-of-image token + to separate image tokens from language tokens. Returns `None` if the token has not been set. + """ + if self._eoi_token is None: + return None + return self.convert_tokens_to_ids(self.eoi_token) + @property def additional_special_tokens_ids(self) -> List[int]: """ @@ -1289,6 +1380,18 @@ def cls_token_id(self, value): def mask_token_id(self, value): self._mask_token = self.convert_ids_to_tokens(value) if value is not None else None + @image_token_id.setter + def image_token_id(self, value): + self._image_token = self.convert_ids_to_tokens(value) if value is not None else None + + @boi_token_id.setter + def boi_token_id(self, value): + self._boi_token = self.convert_ids_to_tokens(value) if value is not None else None + + @eoi_token_id.setter + def eoi_token_id(self, value): + self._eoi_token = self.convert_ids_to_tokens(value) if value is not None else None + @additional_special_tokens_ids.setter def additional_special_tokens_ids(self, values): self._additional_special_tokens = [self.convert_ids_to_tokens(value) for value in values] From 6af303b36db470060e2fc3792b5c728c7f009b8a Mon Sep 17 00:00:00 2001 From: raushan Date: Mon, 15 Jul 2024 10:06:37 +0200 Subject: [PATCH 2/2] tmp --- src/transformers/tokenization_utils_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index caf21a0cab1ac7..357cf38bc13cad 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -843,6 +843,8 @@ class SpecialTokensMixin: "cls_token", "mask_token", "image_token", + "boi_token", + "eoi_token", "additional_special_tokens", ]