Skip to content

Commit

Permalink
move 2 convert fnc. to PreTrainedSentencepieceTokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipMay committed Jun 1, 2021
1 parent 370ba9b commit b772f0a
Show file tree
Hide file tree
Showing 6 changed files with 8 additions and 38 deletions.
8 changes: 0 additions & 8 deletions src/transformers/models/albert/tokenization_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,14 +197,6 @@ def _tokenize(self, text: str) -> List[str]:

return new_pieces

def _convert_token_to_id(self, token):
"""Converts a token (str) in an id using the vocab."""
return self.sp_model.PieceToId(token)

def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
return self.sp_model.IdToPiece(index)

def build_inputs_with_special_tokens(
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,6 @@ def __init__(
**kwargs,
)

def _convert_token_to_id(self, token):
"""Converts a token (str) in an id using the vocab."""
return self.sp_model.piece_to_id(token)

def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
token = self.sp_model.IdToPiece(index)
return token

def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
Expand Down
9 changes: 0 additions & 9 deletions src/transformers/models/big_bird/tokenization_big_bird.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,6 @@ def __init__(
**kwargs,
)

def _convert_token_to_id(self, token):
"""Converts a token (str) in an id using the vocab."""
return self.sp_model.piece_to_id(token)

def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
token = self.sp_model.IdToPiece(index)
return token

def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
Expand Down
4 changes: 0 additions & 4 deletions src/transformers/models/reformer/tokenization_reformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,6 @@ def __init__(
**kwargs,
)

def _convert_token_to_id(self, token):
"""Converts a token (str) in an id using the vocab."""
return self.sp_model.piece_to_id(token)

def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
if index < self.sp_model.get_piece_size():
Expand Down
8 changes: 0 additions & 8 deletions src/transformers/models/xlnet/tokenization_xlnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,6 @@ def _tokenize(self, text: str) -> List[str]:

return new_pieces

def _convert_token_to_id(self, token):
"""Converts a token (str) in an id using the vocab."""
return self.sp_model.PieceToId(token)

def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
return self.sp_model.IdToPiece(index)

def build_inputs_with_special_tokens(
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/tokenization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,14 @@ def get_vocab(self):
vocab.update(self.added_tokens_encoder)
return vocab

def _convert_token_to_id(self, token):
"""Converts a token (str) in an id using the vocab."""
return self.sp_model.PieceToId(token)

def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
return self.sp_model.IdToPiece(index)

def __getstate__(self):
state = self.__dict__.copy()
state["sp_model"] = None
Expand Down

0 comments on commit b772f0a

Please sign in to comment.