Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make special image tokens attribute of tokenizer #31967

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions src/transformers/tokenization_utils_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,6 +842,9 @@ class SpecialTokensMixin:
"pad_token",
"cls_token",
"mask_token",
"image_token",
"boi_token",
"eoi_token",
"additional_special_tokens",
]

Expand All @@ -853,6 +856,9 @@ def __init__(self, verbose=False, **kwargs):
self._pad_token = None
self._cls_token = None
self._mask_token = None
self._image_token = None
self._boi_token = None
self._eoi_token = None
self._pad_token_type_id = 0
self._additional_special_tokens = []
self.verbose = verbose
Expand Down Expand Up @@ -1118,6 +1124,44 @@ def mask_token(self) -> str:
return None
return str(self._mask_token)

@property
def image_token(self) -> str:
"""
`str`: Image token, to use with Vision-Language Models. Placeholder for image embeddings and
will be used to merge image embeds with language-model embeds. Log an error if used while not having been set.
"""
if self._image_token is None:
if self.verbose:
logger.error("Using image_token, but it is not set yet.")
return None
return str(self._image_token)

@property
def boi_token(self) -> str:
"""
`str`: BOI token, to use if a Vision-Language Model has a special beginning-of-image token
to separate image tokens from language tokens. Log an error if used while not
having been set.
"""
if self._boi_token is None:
if self.verbose:
logger.error("Using boi_token, but it is not set yet.")
return None
return str(self._boi_token)

@property
def eoi_token(self) -> str:
"""
`str`: EOI token, to use if a Vision-Language Model has a special end-of-image token
to separate image tokens from language tokens. Log an error if used while not
having been set.
"""
if self._eoi_token is None:
if self.verbose:
logger.error("Using eoi_token, but it is not set yet.")
return None
return str(self._eoi_token)

@property
def additional_special_tokens(self) -> List[str]:
"""
Expand Down Expand Up @@ -1172,6 +1216,24 @@ def mask_token(self, value):
raise ValueError("Cannot set a non-string value as the MASK token")
self._mask_token = value

@image_token.setter
def image_token(self, value):
if not isinstance(value, (str, AddedToken)) and value is not None:
raise ValueError("Cannot set a non-string value as the IMAGE token")
self._image_token = value

@boi_token.setter
def boi_token(self, value):
if not isinstance(value, (str, AddedToken)) and value is not None:
raise ValueError("Cannot set a non-string value as the BOI token")
self._boi_token = value

@eoi_token.setter
def eoi_token(self, value):
if not isinstance(value, (str, AddedToken)) and value is not None:
raise ValueError("Cannot set a non-string value as the EOI token")
self._eoi_token = value

@additional_special_tokens.setter
def additional_special_tokens(self, value):
self._additional_special_tokens = value if value is not None else None
Expand Down Expand Up @@ -1253,6 +1315,37 @@ def mask_token_id(self) -> Optional[int]:
return None
return self.convert_tokens_to_ids(self.mask_token)

@property
def image_token_id(self) -> Optional[int]:
"""
`Optional[int]`: Id of the image token in the vocabulary, used with Vision-Language Models.
Placeholder for image embeddings and will be used to merge image embeds with language-model embeds.
Returns `None` if the token has not been set.
"""
if self._image_token is None:
return None
return self.convert_tokens_to_ids(self.image_token)

@property
def boi_token_id(self) -> Optional[int]:
"""
`Optional[int]`: Id of the boi token in the vocabulary, used if a Vision-Language Model has a special
beginning-of-image token to separate image tokens from language tokens. Returns `None` if the token has not been set.
"""
if self._boi_token is None:
return None
return self.convert_tokens_to_ids(self.boi_token)

@property
def eoi_token_id(self) -> Optional[int]:
"""
`Optional[int]`: Id of the eoi token in the vocabulary, used if a Vision-Language Model has a special end-of-image token
to separate image tokens from language tokens. Returns `None` if the token has not been set.
"""
if self._eoi_token is None:
return None
return self.convert_tokens_to_ids(self.eoi_token)

@property
def additional_special_tokens_ids(self) -> List[int]:
"""
Expand Down Expand Up @@ -1289,6 +1382,18 @@ def cls_token_id(self, value):
def mask_token_id(self, value):
self._mask_token = self.convert_ids_to_tokens(value) if value is not None else None

@image_token_id.setter
def image_token_id(self, value):
self._image_token = self.convert_ids_to_tokens(value) if value is not None else None

@boi_token_id.setter
def boi_token_id(self, value):
self._boi_token = self.convert_ids_to_tokens(value) if value is not None else None

@eoi_token_id.setter
def eoi_token_id(self, value):
self._eoi_token = self.convert_ids_to_tokens(value) if value is not None else None

@additional_special_tokens_ids.setter
def additional_special_tokens_ids(self, values):
self._additional_special_tokens = [self.convert_ids_to_tokens(value) for value in values]
Expand Down
Loading