huggingface · ArthurZucker · Dec 18, 2023 · Dec 18, 2023
diff --git a/src/transformers/models/jukebox/tokenization_jukebox.py b/src/transformers/models/jukebox/tokenization_jukebox.py
@@ -185,7 +185,7 @@ def _convert_token_to_id(self, list_artists, list_genres, list_lyrics):
 
     def _tokenize(self, lyrics):
         """
-        Converts a string in a sequence of tokens (string), using the tokenizer. Split in words for word-based
+        Converts a string into a sequence of tokens (string), using the tokenizer. Split in words for word-based
         vocabulary or sub-words for sub-word-based vocabularies (BPE/SentencePieces/WordPieces).
 
         Do NOT take care of added tokens. Only the lyrics are split into character for the character-based vocabulary.

diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py
@@ -281,7 +281,7 @@ def _add_tokens(self, new_tokens: Union[List[str], List[AddedToken]], special_to
 
     def _tokenize(self, text, **kwargs):
         """
-        Converts a string in a sequence of tokens (string), using the tokenizer.
+        Converts a string into a sequence of tokens (string), using the tokenizer.
         """
         if self.do_lower_case:
             text = text.upper()

diff --git a/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py b/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py
@@ -247,7 +247,7 @@ def prepare_for_tokenization(
 
     def _tokenize(self, text, **kwargs):
         """
-        Converts a string in a sequence of tokens (string), using the tokenizer.
+        Converts a string into a sequence of tokens (string), using the tokenizer.
         """
 
         # make sure whitespace is stripped to prevent <unk>

diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py
@@ -540,7 +540,7 @@ def num_special_tokens_to_add(self, pair: bool = False) -> int:
 
     def tokenize(self, text: TextInput, **kwargs) -> List[str]:
         """
-        Converts a string in a sequence of tokens, using the tokenizer.
+        Converts a string into a sequence of tokens, using the tokenizer.
 
         Split in words for word-based vocabulary or sub-words for sub-word-based vocabularies
         (BPE/SentencePieces/WordPieces). Takes care of added tokens.
@@ -620,7 +620,7 @@ def tokenize(self, text: TextInput, **kwargs) -> List[str]:
 
     def _tokenize(self, text, **kwargs):
         """
-        Converts a string in a sequence of tokens (string), using the tokenizer. Split in words for word-based
+        Converts a string into a sequence of tokens (string), using the tokenizer. Split in words for word-based
         vocabulary or sub-words for sub-word-based vocabularies (BPE/SentencePieces/WordPieces).
 
         Do NOT take care of added tokens.

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
@@ -2515,7 +2515,7 @@ def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] =
 
     def tokenize(self, text: str, pair: Optional[str] = None, add_special_tokens: bool = False, **kwargs) -> List[str]:
         """
-        Converts a string in a sequence of tokens, replacing unknown tokens with the `unk_token`.
+        Converts a string into a sequence of tokens, replacing unknown tokens with the `unk_token`.
 
         Args:
             text (`str`):