Skip to content

Commit

Permalink
fix convert_tokens_to_string calls
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipMay committed Jul 19, 2021
1 parent 546dc24 commit 960a76f
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 11 deletions.
3 changes: 1 addition & 2 deletions src/transformers/models/albert/tokenization_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,7 @@ def _convert_id_to_token(self, index):
return self.sp_model.IdToPiece(index)

def convert_tokens_to_string(self, tokens):
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
return out_string
return self.sp_model.decode(tokens)

def build_inputs_with_special_tokens(
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/barthez/tokenization_barthez.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,7 @@ def __setstate__(self, d):

def convert_tokens_to_string(self, tokens):
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
return out_string
return self.sp_model.decode(tokens)

def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/camembert/tokenization_camembert.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,7 @@ def __setstate__(self, d):

def convert_tokens_to_string(self, tokens):
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
return out_string
return self.sp_model.decode(tokens)

def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/m2m_100/tokenization_m2m_100.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,7 @@ def _convert_id_to_token(self, index: int) -> str:

def convert_tokens_to_string(self, tokens: List[str]) -> str:
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
return out_string
return self.sp_model.decode(tokens)

def get_special_tokens_mask(
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/mbart/tokenization_mbart50.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,7 @@ def _convert_id_to_token(self, index: int) -> str:

def convert_tokens_to_string(self, tokens: List[str]) -> str:
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
return out_string
return self.sp_model.decode(tokens)

def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if not os.path.isdir(save_directory):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def _convert_id_to_token(self, index: int) -> str:

def convert_tokens_to_string(self, tokens: List[str]) -> str:
"""Converts a sequence of tokens (strings for sub-words) in a single string."""
out_string = "".join(tokens).replace(SPIECE_UNDERLINE, " ").strip()
out_string = self.sp_model.decode(tokens)

if self.do_upper_case:
out_string = out_string.upper()
Expand Down

0 comments on commit 960a76f

Please sign in to comment.