From 8a48df7cb13ca7c6301a9c658d8a9846547d6b8b Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <Sylvain.gugger@gmail.com>
Date: Wed, 27 Jul 2022 12:35:53 -0400
Subject: [PATCH 01/10] Preliminary work on tokenizers

---
 .../models/m2m_100/tokenization_m2m_100.py    | 12 +--
 .../models/marian/tokenization_marian.py      | 14 +--
 .../models/mbart/tokenization_mbart.py        | 14 +--
 .../models/mbart/tokenization_mbart_fast.py   | 14 +--
 .../models/mbart50/tokenization_mbart50.py    | 14 +--
 .../mbart50/tokenization_mbart50_fast.py      | 14 +--
 .../models/nllb/tokenization_nllb.py          | 14 +--
 .../models/nllb/tokenization_nllb_fast.py     | 14 +--
 .../models/plbart/tokenization_plbart.py      | 14 +--
 .../models/rag/tokenization_rag.py            | 12 +--
 .../models/tapex/tokenization_tapex.py        | 13 +--
 src/transformers/tokenization_utils_base.py   | 98 ++++++++++++++++++-
 12 files changed, 146 insertions(+), 101 deletions(-)

diff --git a/src/transformers/models/m2m_100/tokenization_m2m_100.py b/src/transformers/models/m2m_100/tokenization_m2m_100.py
index f2e9c855bf90..45beeb7f7f3e 100644
--- a/src/transformers/models/m2m_100/tokenization_m2m_100.py
+++ b/src/transformers/models/m2m_100/tokenization_m2m_100.py
@@ -346,16 +346,12 @@ def _build_translation_inputs(self, raw_inputs, src_lang: Optional[str], tgt_lan
         inputs["forced_bos_token_id"] = tgt_lang_id
         return inputs
 
-    @contextmanager
-    def as_target_tokenizer(self):
-        """
-        Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
-        sequence-to-sequence models that need a slightly different processing for the labels.
-        """
-        self.set_tgt_lang_special_tokens(self.tgt_lang)
-        yield
+    def _switch_to_input_mode(self):
         self.set_src_lang_special_tokens(self.src_lang)
 
+    def _switch_to_targett_mode(self):
+        self.set_tgt_lang_special_tokens(self.tgt_lang)
+
     def set_src_lang_special_tokens(self, src_lang: str) -> None:
         """Reset the special tokens to the source lang setting. No prefix and suffix=[eos, src_lang_code]."""
         lang_token = self.get_lang_token(src_lang)
diff --git a/src/transformers/models/marian/tokenization_marian.py b/src/transformers/models/marian/tokenization_marian.py
index 62f145e7b798..da9cb66f8603 100644
--- a/src/transformers/models/marian/tokenization_marian.py
+++ b/src/transformers/models/marian/tokenization_marian.py
@@ -281,18 +281,14 @@ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None) -> Lis
         # We don't expect to process pairs, but leave the pair logic for API consistency
         return token_ids_0 + token_ids_1 + [self.eos_token_id]
 
-    @contextmanager
-    def as_target_tokenizer(self):
-        """
-        Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
-        sequence-to-sequence models that need a slightly different processing for the labels.
-        """
+    def _switch_to_input_mode(self):
+        self.current_spm = self.spm_source
+        self.current_encoder = self.encoder
+
+    def _switch_to_target_mode(self):
         self.current_spm = self.spm_target
         if self.separate_vocabs:
             self.current_encoder = self.target_encoder
-        yield
-        self.current_spm = self.spm_source
-        self.current_encoder = self.encoder
 
     @property
     def vocab_size(self) -> int:
diff --git a/src/transformers/models/mbart/tokenization_mbart.py b/src/transformers/models/mbart/tokenization_mbart.py
index 2517dfb584bb..7621cc6bde18 100644
--- a/src/transformers/models/mbart/tokenization_mbart.py
+++ b/src/transformers/models/mbart/tokenization_mbart.py
@@ -340,15 +340,11 @@ def prepare_seq2seq_batch(
         self.tgt_lang = tgt_lang
         return super().prepare_seq2seq_batch(src_texts, tgt_texts, **kwargs)
 
-    @contextmanager
-    def as_target_tokenizer(self):
-        """
-        Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
-        sequence-to-sequence models that need a slightly different processing for the labels.
-        """
-        self.set_tgt_lang_special_tokens(self.tgt_lang)
-        yield
-        self.set_src_lang_special_tokens(self.src_lang)
+    def _switch_to_input_mode(self):
+        return self.set_src_lang_special_tokens(self.src_lang)
+
+    def _switch_to_target_mode(self):
+        return self.set_tgt_lang_special_tokens(self.tgt_lang)
 
     def set_src_lang_special_tokens(self, src_lang) -> None:
         """Reset the special tokens to the source lang setting. No prefix and suffix=[eos, src_lang_code]."""
diff --git a/src/transformers/models/mbart/tokenization_mbart_fast.py b/src/transformers/models/mbart/tokenization_mbart_fast.py
index 52902e3a40f0..6bd45cfcc02d 100644
--- a/src/transformers/models/mbart/tokenization_mbart_fast.py
+++ b/src/transformers/models/mbart/tokenization_mbart_fast.py
@@ -240,15 +240,11 @@ def prepare_seq2seq_batch(
         self.tgt_lang = tgt_lang
         return super().prepare_seq2seq_batch(src_texts, tgt_texts, **kwargs)
 
-    @contextmanager
-    def as_target_tokenizer(self):
-        """
-        Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
-        sequence-to-sequence models that need a slightly different processing for the labels.
-        """
-        self.set_tgt_lang_special_tokens(self.tgt_lang)
-        yield
-        self.set_src_lang_special_tokens(self.src_lang)
+    def _switch_to_input_mode(self):
+        return self.set_src_lang_special_tokens(self.src_lang)
+
+    def _switch_to_target_mode(self):
+        return self.set_tgt_lang_special_tokens(self.tgt_lang)
 
     def set_src_lang_special_tokens(self, src_lang) -> None:
         """Reset the special tokens to the source lang setting. No prefix and suffix=[eos, src_lang_code]."""
diff --git a/src/transformers/models/mbart50/tokenization_mbart50.py b/src/transformers/models/mbart50/tokenization_mbart50.py
index 145a546c1810..7c1777349c1c 100644
--- a/src/transformers/models/mbart50/tokenization_mbart50.py
+++ b/src/transformers/models/mbart50/tokenization_mbart50.py
@@ -337,15 +337,11 @@ def prepare_seq2seq_batch(
         self.tgt_lang = tgt_lang
         return super().prepare_seq2seq_batch(src_texts, tgt_texts, **kwargs)
 
-    @contextmanager
-    def as_target_tokenizer(self):
-        """
-        Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
-        sequence-to-sequence models that need a slightly different processing for the labels.
-        """
-        self.set_tgt_lang_special_tokens(self.tgt_lang)
-        yield
-        self.set_src_lang_special_tokens(self.src_lang)
+    def _switch_to_input_mode(self):
+        return self.set_src_lang_special_tokens(self.src_lang)
+
+    def _switch_to_target_mode(self):
+        return self.set_tgt_lang_special_tokens(self.tgt_lang)
 
     def set_src_lang_special_tokens(self, src_lang: str) -> None:
         """Reset the special tokens to the source lang setting. prefix=[src_lang_code] and suffix=[eos]."""
diff --git a/src/transformers/models/mbart50/tokenization_mbart50_fast.py b/src/transformers/models/mbart50/tokenization_mbart50_fast.py
index 28fb726c476d..09fe1b94cbe1 100644
--- a/src/transformers/models/mbart50/tokenization_mbart50_fast.py
+++ b/src/transformers/models/mbart50/tokenization_mbart50_fast.py
@@ -211,15 +211,11 @@ def prepare_seq2seq_batch(
         self.tgt_lang = tgt_lang
         return super().prepare_seq2seq_batch(src_texts, tgt_texts, **kwargs)
 
-    @contextmanager
-    def as_target_tokenizer(self):
-        """
-        Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
-        sequence-to-sequence models that need a slightly different processing for the labels.
-        """
-        self.set_tgt_lang_special_tokens(self.tgt_lang)
-        yield
-        self.set_src_lang_special_tokens(self.src_lang)
+    def _switch_to_input_mode(self):
+        return self.set_src_lang_special_tokens(self.src_lang)
+
+    def _switch_to_target_mode(self):
+        return self.set_tgt_lang_special_tokens(self.tgt_lang)
 
     def set_src_lang_special_tokens(self, src_lang: str) -> None:
         """Reset the special tokens to the source lang setting. prefix=[src_lang_code] and suffix=[eos]."""
diff --git a/src/transformers/models/nllb/tokenization_nllb.py b/src/transformers/models/nllb/tokenization_nllb.py
index ef0ee942bfa9..24d4d21eb5c9 100644
--- a/src/transformers/models/nllb/tokenization_nllb.py
+++ b/src/transformers/models/nllb/tokenization_nllb.py
@@ -386,15 +386,11 @@ def prepare_seq2seq_batch(
         self.tgt_lang = tgt_lang
         return super().prepare_seq2seq_batch(src_texts, tgt_texts, **kwargs)
 
-    @contextmanager
-    def as_target_tokenizer(self):
-        """
-        Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
-        sequence-to-sequence models that need a slightly different processing for the labels.
-        """
-        self.set_tgt_lang_special_tokens(self.tgt_lang)
-        yield
-        self.set_src_lang_special_tokens(self.src_lang)
+    def _switch_to_input_mode(self):
+        return self.set_src_lang_special_tokens(self.src_lang)
+
+    def _switch_to_target_mode(self):
+        return self.set_tgt_lang_special_tokens(self.tgt_lang)
 
     def set_src_lang_special_tokens(self, src_lang) -> None:
         """Reset the special tokens to the source lang setting. No prefix and suffix=[eos, src_lang_code]."""
diff --git a/src/transformers/models/nllb/tokenization_nllb_fast.py b/src/transformers/models/nllb/tokenization_nllb_fast.py
index fa4eaa4c5a80..0785bb4bcf05 100644
--- a/src/transformers/models/nllb/tokenization_nllb_fast.py
+++ b/src/transformers/models/nllb/tokenization_nllb_fast.py
@@ -284,15 +284,11 @@ def prepare_seq2seq_batch(
         self.tgt_lang = tgt_lang
         return super().prepare_seq2seq_batch(src_texts, tgt_texts, **kwargs)
 
-    @contextmanager
-    def as_target_tokenizer(self):
-        """
-        Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
-        sequence-to-sequence models that need a slightly different processing for the labels.
-        """
-        self.set_tgt_lang_special_tokens(self.tgt_lang)
-        yield
-        self.set_src_lang_special_tokens(self.src_lang)
+    def _switch_to_input_mode(self):
+        return self.set_src_lang_special_tokens(self.src_lang)
+
+    def _switch_to_target_mode(self):
+        return self.set_tgt_lang_special_tokens(self.tgt_lang)
 
     def set_src_lang_special_tokens(self, src_lang) -> None:
         """Reset the special tokens to the source lang setting. No prefix and suffix=[eos, src_lang_code]."""
diff --git a/src/transformers/models/plbart/tokenization_plbart.py b/src/transformers/models/plbart/tokenization_plbart.py
index 4a3ee1cdcd11..f14ee506e6ff 100644
--- a/src/transformers/models/plbart/tokenization_plbart.py
+++ b/src/transformers/models/plbart/tokenization_plbart.py
@@ -441,15 +441,11 @@ def prepare_seq2seq_batch(
         self.tgt_lang = tgt_lang
         return super().prepare_seq2seq_batch(src_texts, tgt_texts, **kwargs)
 
-    @contextmanager
-    def as_target_tokenizer(self):
-        """
-        Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
-        sequence-to-sequence models that need a slightly different processing for the labels.
-        """
-        self.set_tgt_lang_special_tokens(self.tgt_lang)
-        yield
-        self.set_src_lang_special_tokens(self.src_lang)
+    def _switch_to_input_mode(self):
+        return self.set_src_lang_special_tokens(self.src_lang)
+
+    def _switch_to_target_mode(self):
+        return self.set_tgt_lang_special_tokens(self.tgt_lang)
 
     def set_src_lang_special_tokens(self, src_lang) -> None:
         """Reset the special tokens to the source lang setting. No prefix and suffix=[eos, src_lang_code]."""
diff --git a/src/transformers/models/rag/tokenization_rag.py b/src/transformers/models/rag/tokenization_rag.py
index d92ca1788faa..a1af9af6168b 100644
--- a/src/transformers/models/rag/tokenization_rag.py
+++ b/src/transformers/models/rag/tokenization_rag.py
@@ -68,16 +68,12 @@ def batch_decode(self, *args, **kwargs):
     def decode(self, *args, **kwargs):
         return self.generator.decode(*args, **kwargs)
 
-    @contextmanager
-    def as_target_tokenizer(self):
-        """
-        Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
-        sequence-to-sequence models that need a slightly different processing for the labels.
-        """
-        self.current_tokenizer = self.generator
-        yield
+    def _switch_to_input_mode(self):
         self.current_tokenizer = self.question_encoder
 
+    def _switch_to_target_mode(self):
+        self.current_tokenizer = self.generator
+
     def prepare_seq2seq_batch(
         self,
         src_texts: List[str],
diff --git a/src/transformers/models/tapex/tokenization_tapex.py b/src/transformers/models/tapex/tokenization_tapex.py
index ea1dc0dcc492..5bdc307ef665 100644
--- a/src/transformers/models/tapex/tokenization_tapex.py
+++ b/src/transformers/models/tapex/tokenization_tapex.py
@@ -1330,17 +1330,12 @@ def _target_encode_plus(
             verbose=verbose,
         )
 
-    @contextmanager
-    def as_target_tokenizer(self):
-        """
-        Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
-        sequence-to-sequence models that need a slightly different processing for the labels.
-        """
-        self.current_tokenizer = TokenizerStrategy.TOKENIZE_TARGET
-        yield
-        # restore the call function
+    def _switch_to_input_mode(self):
         self.current_tokenizer = TokenizerStrategy.TOKENIZE_SOURCE
 
+    def _switch_to_target_mode(self):
+        self.current_tokenizer = TokenizerStrategy.TOKENIZE_TARGET
+
     def prepare_table_query(
         self,
         table,
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 776c9a69db4c..31c0d3ddb0d8 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2431,8 +2431,12 @@ def _get_padding_truncation_strategies(
     @add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
     def __call__(
         self,
-        text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]],
+        text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
         text_pair: Optional[Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]] = None,
+        text_target: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
+        text_pair_target: Optional[
+            Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]
+        ] = None,
         add_special_tokens: bool = True,
         padding: Union[bool, str, PaddingStrategy] = False,
         truncation: Union[bool, str, TruncationStrategy] = False,
@@ -2455,15 +2459,82 @@ def __call__(
         sequences.
 
         Args:
-            text (`str`, `List[str]`, `List[List[str]]`):
+            text (`str`, `List[str]`, `List[List[str]]`, *optional*):
                 The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                 (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                 `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
-            text_pair (`str`, `List[str]`, `List[List[str]]`):
+            text_pair (`str`, `List[str]`, `List[List[str]]`, *optional*):
                 The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                 (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                 `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
-        """
+            text_target (`str`, `List[str]`, `List[List[str]]`, *optional*):
+                The sequence or batch of sequences to be encoded as target texts. Each sequence can be a string or a
+                list of strings (pretokenized string). If the sequences are provided as list of strings (pretokenized),
+                you must set `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
+            text_pair_target (`str`, `List[str]`, `List[List[str]]`, *optional*):
+                The sequence or batch of sequences to be encoded as target texts. Each sequence can be a string or a
+                list of strings (pretokenized string). If the sequences are provided as list of strings (pretokenized),
+                you must set `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
+        """
+        # To avoid duplicating
+        all_kwargs = dict(
+            add_special_tokens=add_special_tokens,
+            padding=padding,
+            truncation=truncation,
+            max_length=max_length,
+            stride=stride,
+            is_split_into_words=is_split_into_words,
+            pad_to_multiple_of=pad_to_multiple_of,
+            return_tensors=return_tensors,
+            return_token_type_ids=return_token_type_ids,
+            return_attention_mask=return_attention_mask,
+            return_overflowing_tokens=return_overflowing_tokens,
+            return_special_tokens_mask=return_special_tokens_mask,
+            return_offsets_mapping=return_offsets_mapping,
+            return_length=return_length,
+            verbose=verbose,
+        )
+        all_kwargs.update(kwargs)
+        if text is None and text_target is None:
+            raise ValueError("You need to specify either `text` or `text_target`.")
+        if text is not None:
+            self._switch_to_input_mode()
+            encodings = self._call_one(text=text, text_pair=text_pair, **all_kwargs)
+        if text_target is not None:
+            self._switch_to_target_mode()
+            target_encodings = self._call_one(text=text_target, text_pair=text_pair_target, **all_kwargs)
+        # Leave back tokenizer in input mode
+        self._switch_to_input_mode()
+
+        if text_target is None:
+            return encodings
+        elif text is None:
+            return target_encodings
+        else:
+            encodings["labels"] = target_encodings["input_ids"]
+            return encodings
+
+    def _call_one(
+        self,
+        text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]],
+        text_pair: Optional[Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]] = None,
+        add_special_tokens: bool = True,
+        padding: Union[bool, str, PaddingStrategy] = False,
+        truncation: Union[bool, str, TruncationStrategy] = False,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        is_split_into_words: bool = False,
+        pad_to_multiple_of: Optional[int] = None,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_overflowing_tokens: bool = False,
+        return_special_tokens_mask: bool = False,
+        return_offsets_mapping: bool = False,
+        return_length: bool = False,
+        verbose: bool = True,
+        **kwargs
+    ) -> BatchEncoding:
         # Input type checking for clearer error
         def _is_valid_text_input(t):
             if isinstance(t, str):
@@ -3456,13 +3527,32 @@ def _eventual_warn_about_too_long_sequence(self, ids: List[int], max_length: Opt
                 )
             self.deprecation_warnings["sequence-length-is-longer-than-the-specified-maximum"] = True
 
+    def _switch_to_input_mode(self):
+        """
+        Private method to put the tokenizer in input mode (when it has different modes for input/outputs)
+        """
+        pass
+
+    def _switch_to_target_mode(self):
+        """
+        Private method to put the tokenizer in target mode (when it has different modes for input/outputs)
+        """
+        pass
+
     @contextmanager
     def as_target_tokenizer(self):
         """
         Temporarily sets the tokenizer for encoding the targets. Useful for tokenizer associated to
         sequence-to-sequence models that need a slightly different processing for the labels.
         """
+        warnings.warn(
+            "`as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your "
+            "labels by using the argument `text_target` of the regular `__call__` method (either in the same call as "
+            "your input texts if you use the same keyword arguments, or in a separate call."
+        )
+        self._switch_to_target_mode()
         yield
+        self._switch_to_input_mode()
 
     @classmethod
     def register_for_auto_class(cls, auto_class="AutoTokenizer"):

From f529476b492c4a67d3264d6c6cc7b5d9de2f4e6e Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <Sylvain.gugger@gmail.com>
Date: Wed, 27 Jul 2022 14:10:32 -0400
Subject: [PATCH 02/10] Quality + fix tests

---
 src/transformers/models/m2m_100/tokenization_m2m_100.py   | 3 +--
 src/transformers/models/marian/tokenization_marian.py     | 1 -
 src/transformers/models/mbart/tokenization_mbart.py       | 1 -
 src/transformers/models/mbart/tokenization_mbart_fast.py  | 1 -
 src/transformers/models/mbart50/tokenization_mbart50.py   | 1 -
 .../models/mbart50/tokenization_mbart50_fast.py           | 1 -
 src/transformers/models/nllb/tokenization_nllb.py         | 1 -
 src/transformers/models/nllb/tokenization_nllb_fast.py    | 1 -
 src/transformers/models/plbart/tokenization_plbart.py     | 1 -
 src/transformers/models/rag/tokenization_rag.py           | 1 -
 src/transformers/models/tapex/tokenization_tapex.py       | 1 -
 src/transformers/tokenization_utils_base.py               | 8 ++++++--
 12 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/src/transformers/models/m2m_100/tokenization_m2m_100.py b/src/transformers/models/m2m_100/tokenization_m2m_100.py
index 45beeb7f7f3e..c11c8782f227 100644
--- a/src/transformers/models/m2m_100/tokenization_m2m_100.py
+++ b/src/transformers/models/m2m_100/tokenization_m2m_100.py
@@ -14,7 +14,6 @@
 """Tokenization classes for M2M100."""
 import json
 import os
-from contextlib import contextmanager
 from pathlib import Path
 from shutil import copyfile
 from typing import Any, Dict, List, Optional, Tuple, Union
@@ -349,7 +348,7 @@ def _build_translation_inputs(self, raw_inputs, src_lang: Optional[str], tgt_lan
     def _switch_to_input_mode(self):
         self.set_src_lang_special_tokens(self.src_lang)
 
-    def _switch_to_targett_mode(self):
+    def _switch_to_target_mode(self):
         self.set_tgt_lang_special_tokens(self.tgt_lang)
 
     def set_src_lang_special_tokens(self, src_lang: str) -> None:
diff --git a/src/transformers/models/marian/tokenization_marian.py b/src/transformers/models/marian/tokenization_marian.py
index da9cb66f8603..6967a675813c 100644
--- a/src/transformers/models/marian/tokenization_marian.py
+++ b/src/transformers/models/marian/tokenization_marian.py
@@ -15,7 +15,6 @@
 import os
 import re
 import warnings
-from contextlib import contextmanager
 from pathlib import Path
 from shutil import copyfile
 from typing import Any, Dict, List, Optional, Tuple, Union
diff --git a/src/transformers/models/mbart/tokenization_mbart.py b/src/transformers/models/mbart/tokenization_mbart.py
index 7621cc6bde18..431908b0e97c 100644
--- a/src/transformers/models/mbart/tokenization_mbart.py
+++ b/src/transformers/models/mbart/tokenization_mbart.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import os
-from contextlib import contextmanager
 from shutil import copyfile
 from typing import Any, Dict, List, Optional, Tuple
 
diff --git a/src/transformers/models/mbart/tokenization_mbart_fast.py b/src/transformers/models/mbart/tokenization_mbart_fast.py
index 6bd45cfcc02d..85937a5fd8ed 100644
--- a/src/transformers/models/mbart/tokenization_mbart_fast.py
+++ b/src/transformers/models/mbart/tokenization_mbart_fast.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import os
-from contextlib import contextmanager
 from shutil import copyfile
 from typing import List, Optional, Tuple
 
diff --git a/src/transformers/models/mbart50/tokenization_mbart50.py b/src/transformers/models/mbart50/tokenization_mbart50.py
index 7c1777349c1c..274b7f95f829 100644
--- a/src/transformers/models/mbart50/tokenization_mbart50.py
+++ b/src/transformers/models/mbart50/tokenization_mbart50.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import os
-from contextlib import contextmanager
 from shutil import copyfile
 from typing import Any, Dict, List, Optional, Tuple
 
diff --git a/src/transformers/models/mbart50/tokenization_mbart50_fast.py b/src/transformers/models/mbart50/tokenization_mbart50_fast.py
index 09fe1b94cbe1..b0f41a0fd719 100644
--- a/src/transformers/models/mbart50/tokenization_mbart50_fast.py
+++ b/src/transformers/models/mbart50/tokenization_mbart50_fast.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import os
-from contextlib import contextmanager
 from shutil import copyfile
 from typing import List, Optional, Tuple
 
diff --git a/src/transformers/models/nllb/tokenization_nllb.py b/src/transformers/models/nllb/tokenization_nllb.py
index 24d4d21eb5c9..d0c0ce8b74ec 100644
--- a/src/transformers/models/nllb/tokenization_nllb.py
+++ b/src/transformers/models/nllb/tokenization_nllb.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import os
-from contextlib import contextmanager
 from shutil import copyfile
 from typing import Any, Dict, List, Optional, Tuple
 
diff --git a/src/transformers/models/nllb/tokenization_nllb_fast.py b/src/transformers/models/nllb/tokenization_nllb_fast.py
index 0785bb4bcf05..404b7c093e05 100644
--- a/src/transformers/models/nllb/tokenization_nllb_fast.py
+++ b/src/transformers/models/nllb/tokenization_nllb_fast.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import os
-from contextlib import contextmanager
 from shutil import copyfile
 from typing import List, Optional, Tuple
 
diff --git a/src/transformers/models/plbart/tokenization_plbart.py b/src/transformers/models/plbart/tokenization_plbart.py
index f14ee506e6ff..93e109473bdf 100644
--- a/src/transformers/models/plbart/tokenization_plbart.py
+++ b/src/transformers/models/plbart/tokenization_plbart.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import os
-from contextlib import contextmanager
 from shutil import copyfile
 from typing import Any, Dict, List, Optional, Tuple
 
diff --git a/src/transformers/models/rag/tokenization_rag.py b/src/transformers/models/rag/tokenization_rag.py
index a1af9af6168b..485c2c448373 100644
--- a/src/transformers/models/rag/tokenization_rag.py
+++ b/src/transformers/models/rag/tokenization_rag.py
@@ -15,7 +15,6 @@
 """Tokenization classes for RAG."""
 import os
 import warnings
-from contextlib import contextmanager
 from typing import List, Optional
 
 from ...tokenization_utils_base import BatchEncoding
diff --git a/src/transformers/models/tapex/tokenization_tapex.py b/src/transformers/models/tapex/tokenization_tapex.py
index 5bdc307ef665..d9afd160d38d 100644
--- a/src/transformers/models/tapex/tokenization_tapex.py
+++ b/src/transformers/models/tapex/tokenization_tapex.py
@@ -17,7 +17,6 @@
 import json
 import os
 import random
-from contextlib import contextmanager
 from functools import lru_cache
 from typing import Dict, List, Optional, Tuple, Union
 
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 31c0d3ddb0d8..3b10c0a3d847 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -1501,7 +1501,7 @@ def __init__(self, **kwargs):
         self.deprecation_warnings = (
             {}
         )  # Use to store when we have already noticed a deprecation warning (avoid overlogging).
-
+        self._in_target_context_manager = False
         super().__init__(**kwargs)
 
     @property
@@ -2498,7 +2498,9 @@ def __call__(
         if text is None and text_target is None:
             raise ValueError("You need to specify either `text` or `text_target`.")
         if text is not None:
-            self._switch_to_input_mode()
+            # The context manager will send the inputs as normal texts and not text_target.
+            if not self._in_target_context_manager:
+                self._switch_to_input_mode()
             encodings = self._call_one(text=text, text_pair=text_pair, **all_kwargs)
         if text_target is not None:
             self._switch_to_target_mode()
@@ -3551,7 +3553,9 @@ def as_target_tokenizer(self):
             "your input texts if you use the same keyword arguments, or in a separate call."
         )
         self._switch_to_target_mode()
+        self._in_target_context_manager = True
         yield
+        self._in_target_context_manager = False
         self._switch_to_input_mode()
 
     @classmethod

From 8b2af7275da1de3b7c84385bbff048c5ec43e324 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <Sylvain.gugger@gmail.com>
Date: Wed, 27 Jul 2022 14:38:28 -0400
Subject: [PATCH 03/10] Treat processors

---
 .../models/mctct/processing_mctct.py          | 62 +++++++++++++++++-
 .../processing_speech_to_text.py              | 39 ++++++++++-
 .../processing_speech_to_text_2.py            | 39 ++++++++++-
 .../models/trocr/processing_trocr.py          | 39 ++++++++++-
 .../models/wav2vec2/processing_wav2vec2.py    | 61 +++++++++++++++++-
 .../processing_wav2vec2_with_lm.py            | 64 ++++++++++++++++++-
 src/transformers/tokenization_utils_base.py   |  3 +-
 7 files changed, 296 insertions(+), 11 deletions(-)

diff --git a/src/transformers/models/mctct/processing_mctct.py b/src/transformers/models/mctct/processing_mctct.py
index 0892f345928b..8cea9ce2a45e 100644
--- a/src/transformers/models/mctct/processing_mctct.py
+++ b/src/transformers/models/mctct/processing_mctct.py
@@ -15,6 +15,7 @@
 """
 Speech processor class for M-CTC-T
 """
+import warnings
 from contextlib import contextmanager
 
 from ...processing_utils import ProcessorMixin
@@ -39,6 +40,7 @@ class MCTCTProcessor(ProcessorMixin):
     def __init__(self, feature_extractor, tokenizer):
         super().__init__(feature_extractor, tokenizer)
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
 
     def __call__(self, *args, **kwargs):
         """
@@ -47,7 +49,35 @@ def __call__(self, *args, **kwargs):
         [`~MCTCTProcessor.as_target_processor`] this method forwards all its arguments to AutoTokenizer's
         [`~AutoTokenizer.__call__`]. Please refer to the doctsring of the above two methods for more information.
         """
-        return self.current_processor(*args, **kwargs)
+        # For backward compatibility
+        if self._in_target_context_manager:
+            return self.current_processor(*args, **kwargs)
+
+        if "raw_speech" in kwargs:
+            warnings.warn("Using `raw_speech` as a keyword argument is deprecated. Use `audio` instead.")
+            audio = kwargs.pop("raw_speech")
+        else:
+            audio = kwargs.pop("audio", None)
+        text = kwargs.pop("text", None)
+        if len(args) > 0:
+            audio = args[0]
+            args = args[1:]
+
+        if audio is None and text is None:
+            raise ValueError("You need to specify either an `audio` or `text` input to process.")
+
+        if audio is not None:
+            inputs = self.feature_extractor(audio, *args, **kwargs)
+        if text is not None:
+            encodings = self.tokenizer(text, **kwargs)
+
+        if text is None:
+            return inputs
+        elif audio is None:
+            return encodings
+        else:
+            inputs["labels"] = encodings["input_ids"]
+            return inputs
 
     def batch_decode(self, *args, **kwargs):
         """
@@ -63,7 +93,28 @@ def pad(self, *args, **kwargs):
         [`~MCTCTProcessor.as_target_processor`] this method forwards all its arguments to PreTrainedTokenizer's
         [`~PreTrainedTokenizer.pad`]. Please refer to the docstring of the above two methods for more information.
         """
-        return self.current_processor.pad(*args, **kwargs)
+        # For backward compatibility
+        if self._in_target_context_manager:
+            return self.current_processor.pad(*args, **kwargs)
+
+        input_features = kwargs.pop("input_features", None)
+        labels = kwargs.pop("labels", None)
+        if len(args) > 1:
+            input_features = args[0]
+            args = args[1:]
+
+        if input_features is not None:
+            input_features = self.feature_extractor.pad(input_features, *args, **kwargs)
+        if labels is not None:
+            labels = self.tokenizer.pad(labels, **kwargs)
+
+        if labels is None:
+            return input_features
+        elif input_features is None:
+            return labels
+        else:
+            input_features["labels"] = labels["input_ids"]
+            return input_features
 
     def decode(self, *args, **kwargs):
         """
@@ -77,6 +128,13 @@ def as_target_processor(self):
         """
         Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning MCTCT.
         """
+        warnings.warn(
+            "`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your "
+            "labels by using the argument `text` of the regular `__call__` method (either in the same call as "
+            "your audio inputs, or in a separate call."
+        )
+        self._in_target_context_manager = True
         self.current_processor = self.tokenizer
         yield
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
diff --git a/src/transformers/models/speech_to_text/processing_speech_to_text.py b/src/transformers/models/speech_to_text/processing_speech_to_text.py
index 969df9d108fe..3f047932030f 100644
--- a/src/transformers/models/speech_to_text/processing_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/processing_speech_to_text.py
@@ -15,6 +15,7 @@
 """
 Speech processor class for Speech2Text
 """
+import warnings
 from contextlib import contextmanager
 
 from ...processing_utils import ProcessorMixin
@@ -41,6 +42,7 @@ class Speech2TextProcessor(ProcessorMixin):
     def __init__(self, feature_extractor, tokenizer):
         super().__init__(feature_extractor, tokenizer)
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
 
     def __call__(self, *args, **kwargs):
         """
@@ -50,7 +52,35 @@ def __call__(self, *args, **kwargs):
         [`~Speech2TextTokenizer.__call__`]. Please refer to the doctsring of the above two methods for more
         information.
         """
-        return self.current_processor(*args, **kwargs)
+        # For backward compatibility
+        if self._in_target_context_manager:
+            return self.current_processor(*args, **kwargs)
+
+        if "raw_speech" in kwargs:
+            warnings.warn("Using `raw_speech` as a keyword argument is deprecated. Use `audio` instead.")
+            audio = kwargs.pop("raw_speech")
+        else:
+            audio = kwargs.pop("audio", None)
+        text = kwargs.pop("text", None)
+        if len(args) > 0:
+            audio = args[0]
+            args = args[1:]
+
+        if audio is None and text is None:
+            raise ValueError("You need to specify either an `audio` or `text` input to process.")
+
+        if audio is not None:
+            inputs = self.feature_extractor(audio, *args, **kwargs)
+        if text is not None:
+            encodings = self.tokenizer(text, **kwargs)
+
+        if text is None:
+            return inputs
+        elif audio is None:
+            return encodings
+        else:
+            inputs["labels"] = encodings["input_ids"]
+            return inputs
 
     def batch_decode(self, *args, **kwargs):
         """
@@ -72,6 +102,13 @@ def as_target_processor(self):
         Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning
         Speech2Text.
         """
+        warnings.warn(
+            "`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your "
+            "labels by using the argument `text` of the regular `__call__` method (either in the same call as "
+            "your audio inputs, or in a separate call."
+        )
+        self._in_target_context_manager = True
         self.current_processor = self.tokenizer
         yield
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
diff --git a/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py b/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py
index 28189ba88198..c40831d0214a 100644
--- a/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py
+++ b/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py
@@ -15,6 +15,7 @@
 """
 Speech processor class for Speech2Text2
 """
+import warnings
 from contextlib import contextmanager
 
 from ...processing_utils import ProcessorMixin
@@ -40,6 +41,7 @@ class Speech2Text2Processor(ProcessorMixin):
     def __init__(self, feature_extractor, tokenizer):
         super().__init__(feature_extractor, tokenizer)
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
 
     def __call__(self, *args, **kwargs):
         """
@@ -49,7 +51,35 @@ def __call__(self, *args, **kwargs):
         Speech2Text2Tokenizer's [`~Speech2Text2Tokenizer.__call__`]. Please refer to the doctsring of the above two
         methods for more information.
         """
-        return self.current_processor(*args, **kwargs)
+        # For backward compatibility
+        if self._in_target_context_manager:
+            return self.current_processor(*args, **kwargs)
+
+        if "raw_speech" in kwargs:
+            warnings.warn("Using `raw_speech` as a keyword argument is deprecated. Use `audio` instead.")
+            audio = kwargs.pop("raw_speech")
+        else:
+            audio = kwargs.pop("audio", None)
+        text = kwargs.pop("text", None)
+        if len(args) > 0:
+            audio = args[0]
+            args = args[1:]
+
+        if audio is None and text is None:
+            raise ValueError("You need to specify either an `audio` or `text` input to process.")
+
+        if audio is not None:
+            inputs = self.feature_extractor(audio, *args, **kwargs)
+        if text is not None:
+            encodings = self.tokenizer(text, **kwargs)
+
+        if text is None:
+            return inputs
+        elif audio is None:
+            return encodings
+        else:
+            inputs["labels"] = encodings["input_ids"]
+            return inputs
 
     def batch_decode(self, *args, **kwargs):
         """
@@ -71,6 +101,13 @@ def as_target_processor(self):
         Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning
         Speech2Text2.
         """
+        warnings.warn(
+            "`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your "
+            "labels by using the argument `text` of the regular `__call__` method (either in the same call as "
+            "your audio inputs, or in a separate call."
+        )
+        self._in_target_context_manager = True
         self.current_processor = self.tokenizer
         yield
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
diff --git a/src/transformers/models/trocr/processing_trocr.py b/src/transformers/models/trocr/processing_trocr.py
index 2c7893a0915b..44a276fd63ae 100644
--- a/src/transformers/models/trocr/processing_trocr.py
+++ b/src/transformers/models/trocr/processing_trocr.py
@@ -15,6 +15,7 @@
 """
 Processor class for TrOCR.
 """
+import warnings
 from contextlib import contextmanager
 
 from ...processing_utils import ProcessorMixin
@@ -40,6 +41,7 @@ class TrOCRProcessor(ProcessorMixin):
     def __init__(self, feature_extractor, tokenizer):
         super().__init__(feature_extractor, tokenizer)
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
 
     def __call__(self, *args, **kwargs):
         """
@@ -48,7 +50,35 @@ def __call__(self, *args, **kwargs):
         [`~TrOCRProcessor.as_target_processor`] this method forwards all its arguments to TrOCRTokenizer's
         [`~TrOCRTokenizer.__call__`]. Please refer to the doctsring of the above two methods for more information.
         """
-        return self.current_processor(*args, **kwargs)
+        # For backward compatibility
+        if self._in_target_context_manager:
+            return self.current_processor(*args, **kwargs)
+
+        if "raw_speech" in kwargs:
+            warnings.warn("Using `raw_speech` as a keyword argument is deprecated. Use `audio` instead.")
+            audio = kwargs.pop("raw_speech")
+        else:
+            audio = kwargs.pop("audio", None)
+        text = kwargs.pop("text", None)
+        if len(args) > 0:
+            audio = args[0]
+            args = args[1:]
+
+        if audio is None and text is None:
+            raise ValueError("You need to specify either an `audio` or `text` input to process.")
+
+        if audio is not None:
+            inputs = self.feature_extractor(audio, *args, **kwargs)
+        if text is not None:
+            encodings = self.tokenizer(text, **kwargs)
+
+        if text is None:
+            return inputs
+        elif audio is None:
+            return encodings
+        else:
+            inputs["labels"] = encodings["input_ids"]
+            return inputs
 
     def batch_decode(self, *args, **kwargs):
         """
@@ -69,6 +99,13 @@ def as_target_processor(self):
         """
         Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning TrOCR.
         """
+        warnings.warn(
+            "`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your "
+            "labels by using the argument `text` of the regular `__call__` method (either in the same call as "
+            "your audio inputs, or in a separate call."
+        )
+        self._in_target_context_manager = True
         self.current_processor = self.tokenizer
         yield
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
diff --git a/src/transformers/models/wav2vec2/processing_wav2vec2.py b/src/transformers/models/wav2vec2/processing_wav2vec2.py
index 1470c254dc63..55af0f1f6ccd 100644
--- a/src/transformers/models/wav2vec2/processing_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/processing_wav2vec2.py
@@ -43,6 +43,7 @@ class Wav2Vec2Processor(ProcessorMixin):
     def __init__(self, feature_extractor, tokenizer):
         super().__init__(feature_extractor, tokenizer)
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
 
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
@@ -70,7 +71,35 @@ def __call__(self, *args, **kwargs):
         [`~Wav2Vec2Processor.as_target_processor`] this method forwards all its arguments to PreTrainedTokenizer's
         [`~PreTrainedTokenizer.__call__`]. Please refer to the docstring of the above two methods for more information.
         """
-        return self.current_processor(*args, **kwargs)
+        # For backward compatibility
+        if self._in_target_context_manager:
+            return self.current_processor(*args, **kwargs)
+
+        if "raw_speech" in kwargs:
+            warnings.warn("Using `raw_speech` as a keyword argument is deprecated. Use `audio` instead.")
+            audio = kwargs.pop("raw_speech")
+        else:
+            audio = kwargs.pop("audio", None)
+        text = kwargs.pop("text", None)
+        if len(args) > 0:
+            audio = args[0]
+            args = args[1:]
+
+        if audio is None and text is None:
+            raise ValueError("You need to specify either an `audio` or `text` input to process.")
+
+        if audio is not None:
+            inputs = self.feature_extractor(audio, *args, **kwargs)
+        if text is not None:
+            encodings = self.tokenizer(text, **kwargs)
+
+        if text is None:
+            return inputs
+        elif audio is None:
+            return encodings
+        else:
+            inputs["labels"] = encodings["input_ids"]
+            return inputs
 
     def pad(self, *args, **kwargs):
         """
@@ -79,7 +108,28 @@ def pad(self, *args, **kwargs):
         [`~Wav2Vec2Processor.as_target_processor`] this method forwards all its arguments to PreTrainedTokenizer's
         [`~PreTrainedTokenizer.pad`]. Please refer to the docstring of the above two methods for more information.
         """
-        return self.current_processor.pad(*args, **kwargs)
+        # For backward compatibility
+        if self._in_target_context_manager:
+            return self.current_processor.pad(*args, **kwargs)
+
+        input_features = kwargs.pop("input_features", None)
+        labels = kwargs.pop("labels", None)
+        if len(args) > 1:
+            input_features = args[0]
+            args = args[1:]
+
+        if input_features is not None:
+            input_features = self.feature_extractor.pad(input_features, *args, **kwargs)
+        if labels is not None:
+            labels = self.tokenizer.pad(labels, **kwargs)
+
+        if labels is None:
+            return input_features
+        elif input_features is None:
+            return labels
+        else:
+            input_features["labels"] = labels["input_ids"]
+            return input_features
 
     def batch_decode(self, *args, **kwargs):
         """
@@ -101,6 +151,13 @@ def as_target_processor(self):
         Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning
         Wav2Vec2.
         """
+        warnings.warn(
+            "`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your "
+            "labels by using the argument `text` of the regular `__call__` method (either in the same call as "
+            "your audio inputs, or in a separate call."
+        )
+        self._in_target_context_manager = True
         self.current_processor = self.tokenizer
         yield
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
diff --git a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
index 4e7da075261b..534ea566ccc5 100644
--- a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
+++ b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
@@ -16,6 +16,7 @@
 Speech processor class for Wav2Vec2
 """
 import os
+import warnings
 from contextlib import contextmanager
 from dataclasses import dataclass
 from multiprocessing import get_context
@@ -99,6 +100,7 @@ def __init__(
 
         self.decoder = decoder
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
 
     def save_pretrained(self, save_directory):
         super().save_pretrained(save_directory)
@@ -214,7 +216,35 @@ def __call__(self, *args, **kwargs):
         Wav2Vec2CTCTokenizer's [`~Wav2Vec2CTCTokenizer.__call__`]. Please refer to the docstring of the above two
         methods for more information.
         """
-        return self.current_processor(*args, **kwargs)
+        # For backward compatibility
+        if self._in_target_context_manager:
+            return self.current_processor(*args, **kwargs)
+
+        if "raw_speech" in kwargs:
+            warnings.warn("Using `raw_speech` as a keyword argument is deprecated. Use `audio` instead.")
+            audio = kwargs.pop("raw_speech")
+        else:
+            audio = kwargs.pop("audio", None)
+        text = kwargs.pop("text", None)
+        if len(args) > 0:
+            audio = args[0]
+            args = args[1:]
+
+        if audio is None and text is None:
+            raise ValueError("You need to specify either an `audio` or `text` input to process.")
+
+        if audio is not None:
+            inputs = self.feature_extractor(audio, *args, **kwargs)
+        if text is not None:
+            encodings = self.tokenizer(text, **kwargs)
+
+        if text is None:
+            return inputs
+        elif audio is None:
+            return encodings
+        else:
+            inputs["labels"] = encodings["input_ids"]
+            return inputs
 
     def pad(self, *args, **kwargs):
         """
@@ -224,7 +254,28 @@ def pad(self, *args, **kwargs):
         Wav2Vec2CTCTokenizer's [`~Wav2Vec2CTCTokenizer.pad`]. Please refer to the docstring of the above two methods
         for more information.
         """
-        return self.current_processor.pad(*args, **kwargs)
+        # For backward compatibility
+        if self._in_target_context_manager:
+            return self.current_processor.pad(*args, **kwargs)
+
+        input_features = kwargs.pop("input_features", None)
+        labels = kwargs.pop("labels", None)
+        if len(args) > 1:
+            input_features = args[0]
+            args = args[1:]
+
+        if input_features is not None:
+            input_features = self.feature_extractor.pad(input_features, *args, **kwargs)
+        if labels is not None:
+            labels = self.tokenizer.pad(labels, **kwargs)
+
+        if labels is None:
+            return input_features
+        elif input_features is None:
+            return labels
+        else:
+            input_features["labels"] = labels["input_ids"]
+            return input_features
 
     def batch_decode(
         self,
@@ -486,9 +537,16 @@ def decode(
     @contextmanager
     def as_target_processor(self):
         """
-        Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning
+        Temporarily sets the processor for processing the target. Useful for encoding the labels when fine-tuning
         Wav2Vec2.
         """
+        warnings.warn(
+            "`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your "
+            "labels by using the argument `text` of the regular `__call__` method (either in the same call as "
+            "your audio inputs, or in a separate call."
+        )
+        self._in_target_context_manager = True
         self.current_processor = self.tokenizer
         yield
         self.current_processor = self.feature_extractor
+        self._in_target_context_manager = False
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 3b10c0a3d847..b8ab1e09c445 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2498,7 +2498,8 @@ def __call__(
         if text is None and text_target is None:
             raise ValueError("You need to specify either `text` or `text_target`.")
         if text is not None:
-            # The context manager will send the inputs as normal texts and not text_target.
+            # The context manager will send the inputs as normal texts and not text_target, but we shouldn't change the
+            # input mode in this case.
             if not self._in_target_context_manager:
                 self._switch_to_input_mode()
             encodings = self._call_one(text=text, text_pair=text_pair, **all_kwargs)

From 5bfa57bf9152e5d8f48dbf2726f438464ec3222b Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <Sylvain.gugger@gmail.com>
Date: Wed, 27 Jul 2022 15:33:30 -0400
Subject: [PATCH 04/10] Fix pad

---
 src/transformers/models/mctct/processing_mctct.py               | 2 +-
 src/transformers/models/wav2vec2/processing_wav2vec2.py         | 2 +-
 .../models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/transformers/models/mctct/processing_mctct.py b/src/transformers/models/mctct/processing_mctct.py
index 8cea9ce2a45e..2e05020196ac 100644
--- a/src/transformers/models/mctct/processing_mctct.py
+++ b/src/transformers/models/mctct/processing_mctct.py
@@ -99,7 +99,7 @@ def pad(self, *args, **kwargs):
 
         input_features = kwargs.pop("input_features", None)
         labels = kwargs.pop("labels", None)
-        if len(args) > 1:
+        if len(args) > 0:
             input_features = args[0]
             args = args[1:]
 
diff --git a/src/transformers/models/wav2vec2/processing_wav2vec2.py b/src/transformers/models/wav2vec2/processing_wav2vec2.py
index 55af0f1f6ccd..5763d4d59eea 100644
--- a/src/transformers/models/wav2vec2/processing_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/processing_wav2vec2.py
@@ -114,7 +114,7 @@ def pad(self, *args, **kwargs):
 
         input_features = kwargs.pop("input_features", None)
         labels = kwargs.pop("labels", None)
-        if len(args) > 1:
+        if len(args) > 0:
             input_features = args[0]
             args = args[1:]
 
diff --git a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
index 534ea566ccc5..f09b5eb922ab 100644
--- a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
+++ b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
@@ -260,7 +260,7 @@ def pad(self, *args, **kwargs):
 
         input_features = kwargs.pop("input_features", None)
         labels = kwargs.pop("labels", None)
-        if len(args) > 1:
+        if len(args) > 0:
             input_features = args[0]
             args = args[1:]
 

From 7ea9d1d37e0fd9a18eba1db541b1acf580a743f7 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <Sylvain.gugger@gmail.com>
Date: Thu, 28 Jul 2022 11:22:32 -0400
Subject: [PATCH 05/10] Remove all uses of  in tests, docs and examples

---
 docs/source/en/model_doc/mctct.mdx            |  1 -
 .../en/model_doc/speech-encoder-decoder.mdx   | 10 +++-----
 docs/source/en/model_doc/speech_to_text.mdx   |  1 -
 docs/source/en/model_doc/speech_to_text_2.mdx |  1 -
 docs/source/en/model_doc/trocr.mdx            |  1 -
 docs/source/en/model_doc/wav2vec2.mdx         |  2 --
 docs/source/en/preprocessing.mdx              |  4 +---
 docs/source/en/tasks/asr.mdx                  | 19 ++++-----------
 docs/source/es/preprocessing.mdx              |  4 +---
 docs/source/it/preprocessing.mdx              |  4 +---
 .../run_speech_recognition_ctc.py             | 13 +++++-----
 .../run_speech_recognition_ctc_bnb.py         | 13 +++++-----
 .../run_speech_recognition_ctc_streaming.py   | 13 +++++-----
 .../research_projects/wav2vec2/run_asr.py     | 22 ++++++++---------
 .../wav2vec2/run_common_voice.py              | 24 +++++++++----------
 .../xtreme-s/run_xtreme_s.py                  | 13 +++++-----
 .../models/hubert/modeling_tf_hubert.py       |  5 ++--
 .../modeling_speech_encoder_decoder.py        |  3 +--
 .../models/wav2vec2/modeling_tf_wav2vec2.py   |  5 ++--
 src/transformers/utils/doc.py                 |  6 ++---
 tests/models/mctct/test_processor_mctct.py    |  3 +--
 .../test_processor_speech_to_text.py          |  3 +--
 .../wav2vec2/test_processor_wav2vec2.py       |  3 +--
 .../test_processor_wav2vec2_with_lm.py        |  3 +--
 24 files changed, 69 insertions(+), 107 deletions(-)

diff --git a/docs/source/en/model_doc/mctct.mdx b/docs/source/en/model_doc/mctct.mdx
index f064f1e3d020..531508cfa9df 100644
--- a/docs/source/en/model_doc/mctct.mdx
+++ b/docs/source/en/model_doc/mctct.mdx
@@ -48,7 +48,6 @@ This model was contributed by [cwkeam](https://huggingface.co/cwkeam). The origi
     - save_pretrained
     - batch_decode
     - decode
-    - as_target_processor
 
 
 ## MCTCTModel
diff --git a/docs/source/en/model_doc/speech-encoder-decoder.mdx b/docs/source/en/model_doc/speech-encoder-decoder.mdx
index 9aee71ed6669..7294b55d3c31 100644
--- a/docs/source/en/model_doc/speech-encoder-decoder.mdx
+++ b/docs/source/en/model_doc/speech-encoder-decoder.mdx
@@ -85,7 +85,7 @@ As you can see, only 2 inputs are required for the model in order to compute a l
 speech inputs) and `labels` (which are the `input_ids` of the encoded target sequence).
 
 ```python
->>> from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
+>>> from transformers import BertTokenizer, Wav2Vec2Processor, SpeechEncoderDecoderModel
 >>> from datasets import load_dataset
 
 >>> processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
@@ -99,14 +99,10 @@ speech inputs) and `labels` (which are the `input_ids` of the encoded target seq
 
 >>> # load a speech input
 >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
->>> input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values
-
->>> # load its corresponding transcription
->>> with processor.as_target_processor():
-...     labels = processor(ds[0]["text"], return_tensors="pt").input_ids
+>>> input_features = processor(ds[0]["audio"]["array"], text=ds[0]["text"], return_tensors="pt")
 
 >>> # the forward function automatically creates the correct decoder_input_ids
->>> loss = model(input_values, labels=labels).loss
+>>> loss = model(**input_features).loss
 >>> loss.backward()
 ```
 
diff --git a/docs/source/en/model_doc/speech_to_text.mdx b/docs/source/en/model_doc/speech_to_text.mdx
index e11d95442d26..9d855fceb480 100644
--- a/docs/source/en/model_doc/speech_to_text.mdx
+++ b/docs/source/en/model_doc/speech_to_text.mdx
@@ -120,7 +120,6 @@ See the [model hub](https://huggingface.co/models?filter=speech_to_text) to look
     - save_pretrained
     - batch_decode
     - decode
-    - as_target_processor
 
 ## Speech2TextModel
 
diff --git a/docs/source/en/model_doc/speech_to_text_2.mdx b/docs/source/en/model_doc/speech_to_text_2.mdx
index 72754b67aab9..ce9e29c32e82 100644
--- a/docs/source/en/model_doc/speech_to_text_2.mdx
+++ b/docs/source/en/model_doc/speech_to_text_2.mdx
@@ -114,7 +114,6 @@ See [model hub](https://huggingface.co/models?filter=speech2text2) to look for S
     - save_pretrained
     - batch_decode
     - decode
-    - as_target_processor
 
 ## Speech2Text2ForCausalLM
 
diff --git a/docs/source/en/model_doc/trocr.mdx b/docs/source/en/model_doc/trocr.mdx
index 08de107e434c..37dc6f545595 100644
--- a/docs/source/en/model_doc/trocr.mdx
+++ b/docs/source/en/model_doc/trocr.mdx
@@ -94,7 +94,6 @@ See the [model hub](https://huggingface.co/models?filter=trocr) to look for TrOC
     - save_pretrained
     - batch_decode
     - decode
-    - as_target_processor
 
 ## TrOCRForCausalLM
 
diff --git a/docs/source/en/model_doc/wav2vec2.mdx b/docs/source/en/model_doc/wav2vec2.mdx
index 9b2f13ea4541..eaca36be4673 100644
--- a/docs/source/en/model_doc/wav2vec2.mdx
+++ b/docs/source/en/model_doc/wav2vec2.mdx
@@ -62,7 +62,6 @@ This model was contributed by [patrickvonplaten](https://huggingface.co/patrickv
     - save_pretrained
     - batch_decode
     - decode
-    - as_target_processor
 
 ## Wav2Vec2ProcessorWithLM
 
@@ -73,7 +72,6 @@ This model was contributed by [patrickvonplaten](https://huggingface.co/patrickv
     - save_pretrained
     - batch_decode
     - decode
-    - as_target_processor
 
 ## Wav2Vec2 specific outputs
 
diff --git a/docs/source/en/preprocessing.mdx b/docs/source/en/preprocessing.mdx
index f9bdae3603af..e67741633acb 100644
--- a/docs/source/en/preprocessing.mdx
+++ b/docs/source/en/preprocessing.mdx
@@ -486,10 +486,8 @@ A processor combines a feature extractor and tokenizer. Load a processor with [`
 >>> def prepare_dataset(example):
 ...     audio = example["audio"]
 
-...     example["input_values"] = processor(audio["array"], sampling_rate=16000)
+...     example.update(processor(audio=audio["array"], text=example["text"], sampling_rate=16000))
 
-...     with processor.as_target_processor():
-...         example["labels"] = processor(example["text"]).input_ids
 ...     return example
 ```
 
diff --git a/docs/source/en/tasks/asr.mdx b/docs/source/en/tasks/asr.mdx
index 8ceea824f4ee..daa627aaf131 100644
--- a/docs/source/en/tasks/asr.mdx
+++ b/docs/source/en/tasks/asr.mdx
@@ -109,11 +109,10 @@ The preprocessing function needs to:
 >>> def prepare_dataset(batch):
 ...     audio = batch["audio"]
 
-...     batch["input_values"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
+...     batch = processor(audio=audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
 ...     batch["input_length"] = len(batch["input_values"])
 
-...     with processor.as_target_processor():
-...         batch["labels"] = processor(batch["transcription"]).input_ids
+...     batch["labels"] = processor(text=batch["transcription"]).input_ids
 ...     return batch
 ```
 
@@ -146,17 +145,9 @@ Unlike other data collators, this specific data collator needs to apply a differ
 ...         input_features = [{"input_values": feature["input_values"]} for feature in features]
 ...         label_features = [{"input_ids": feature["labels"]} for feature in features]
 
-...         batch = self.processor.pad(
-...             input_features,
-...             padding=self.padding,
-...             return_tensors="pt",
-...         )
-...         with self.processor.as_target_processor():
-...             labels_batch = self.processor.pad(
-...                 label_features,
-...                 padding=self.padding,
-...                 return_tensors="pt",
-...             )
+...         batch = self.processor.pad(input_features, padding=self.padding, return_tensors="pt")
+
+...         labels_batch = self.processor.pad(labels=label_features, padding=self.padding, return_tensors="pt")
 
 ...         # replace padding with -100 to ignore loss correctly
 ...         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
diff --git a/docs/source/es/preprocessing.mdx b/docs/source/es/preprocessing.mdx
index 3e749ca2cde8..9608bf58d9d6 100644
--- a/docs/source/es/preprocessing.mdx
+++ b/docs/source/es/preprocessing.mdx
@@ -471,10 +471,8 @@ Un processor combina un extractor de características y un tokenizador. Cargue u
 >>> def prepare_dataset(example):
 ...     audio = example["audio"]
 
-...     example["input_values"] = processor(audio["array"], sampling_rate=16000)
+...     example.update(processor(audio=audio["array"], text=example["text"], sampling_rate=16000))
 
-...     with processor.as_target_processor():
-...         example["labels"] = processor(example["text"]).input_ids
 ...     return example
 ```
 
diff --git a/docs/source/it/preprocessing.mdx b/docs/source/it/preprocessing.mdx
index 5a245fe84302..a57ff9df9151 100644
--- a/docs/source/it/preprocessing.mdx
+++ b/docs/source/it/preprocessing.mdx
@@ -471,10 +471,8 @@ Un processor combina un estrattore di caratteristiche e un tokenizer. Carica un
 >>> def prepare_dataset(example):
 ...     audio = example["audio"]
 
-...     example["input_values"] = processor(audio["array"], sampling_rate=16000)
+...     example.update(processor(audio=audio["array"], text=example["text"], sampling_rate=16000))
 
-...     with processor.as_target_processor():
-...         example["labels"] = processor(example["text"]).input_ids
 ...     return example
 ```
 
diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
index 25e817637779..140e1bb64b04 100755
--- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
+++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
@@ -305,13 +305,12 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) ->
             return_tensors="pt",
         )
 
-        with self.processor.as_target_processor():
-            labels_batch = self.processor.pad(
-                label_features,
-                padding=self.padding,
-                pad_to_multiple_of=self.pad_to_multiple_of_labels,
-                return_tensors="pt",
-            )
+        labels_batch = self.processor.pad(
+            labels=label_features,
+            padding=self.padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_labels,
+            return_tensors="pt",
+        )
 
         # replace padding with -100 to ignore loss correctly
         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
diff --git a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py
index 521036c78e4b..afa3397eb430 100755
--- a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py
+++ b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py
@@ -304,13 +304,12 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) ->
             return_tensors="pt",
         )
 
-        with self.processor.as_target_processor():
-            labels_batch = self.processor.pad(
-                label_features,
-                padding=self.padding,
-                pad_to_multiple_of=self.pad_to_multiple_of_labels,
-                return_tensors="pt",
-            )
+        labels_batch = self.processor.pad(
+            labels=label_features,
+            padding=self.padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_labels,
+            return_tensors="pt",
+        )
 
         # replace padding with -100 to ignore loss correctly
         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
diff --git a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py
index d357bc469649..57f54048a523 100644
--- a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py
+++ b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_streaming.py
@@ -301,13 +301,12 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) ->
             return_tensors="pt",
         )
 
-        with self.processor.as_target_processor():
-            labels_batch = self.processor.pad(
-                label_features,
-                padding=self.padding,
-                pad_to_multiple_of=self.pad_to_multiple_of_labels,
-                return_tensors="pt",
-            )
+        labels_batch = self.processor.pad(
+            labels=label_features,
+            padding=self.padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_labels,
+            return_tensors="pt",
+        )
 
         # replace padding with -100 to ignore loss correctly
         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
diff --git a/examples/research_projects/wav2vec2/run_asr.py b/examples/research_projects/wav2vec2/run_asr.py
index bb34e0a0c71a..ab9db11d2a02 100755
--- a/examples/research_projects/wav2vec2/run_asr.py
+++ b/examples/research_projects/wav2vec2/run_asr.py
@@ -266,14 +266,13 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) ->
             pad_to_multiple_of=self.pad_to_multiple_of,
             return_tensors="pt",
         )
-        with self.processor.as_target_processor():
-            labels_batch = self.processor.pad(
-                label_features,
-                padding=self.padding,
-                max_length=self.max_length_labels,
-                pad_to_multiple_of=self.pad_to_multiple_of_labels,
-                return_tensors="pt",
-            )
+        labels_batch = self.processor.pad(
+            labels=label_features,
+            padding=self.padding,
+            max_length=self.max_length_labels,
+            pad_to_multiple_of=self.pad_to_multiple_of_labels,
+            return_tensors="pt",
+        )
 
         # replace padding with -100 to ignore loss correctly
         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
@@ -419,9 +418,10 @@ def prepare_dataset(batch):
             len(set(batch["sampling_rate"])) == 1
         ), f"Make sure all inputs have the same sampling rate of {processor.feature_extractor.sampling_rate}."
 
-        batch["input_values"] = processor(batch["speech"], sampling_rate=batch["sampling_rate"][0]).input_values
-        with processor.as_target_processor():
-            batch["labels"] = processor(batch[data_args.target_text_column]).input_ids
+        processed_batch = processor(
+            audio=batch["speech"], text=batch[data_args.target_text_column], sampling_rate=batch["sampling_rate"][0]
+        )
+        batch.update(processed_batch)
         return batch
 
     train_dataset = train_dataset.map(
diff --git a/examples/research_projects/wav2vec2/run_common_voice.py b/examples/research_projects/wav2vec2/run_common_voice.py
index b8480d3c7d1c..10a3a77fa758 100644
--- a/examples/research_projects/wav2vec2/run_common_voice.py
+++ b/examples/research_projects/wav2vec2/run_common_voice.py
@@ -185,14 +185,13 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) ->
             pad_to_multiple_of=self.pad_to_multiple_of,
             return_tensors="pt",
         )
-        with self.processor.as_target_processor():
-            labels_batch = self.processor.pad(
-                label_features,
-                padding=self.padding,
-                max_length=self.max_length_labels,
-                pad_to_multiple_of=self.pad_to_multiple_of_labels,
-                return_tensors="pt",
-            )
+        labels_batch = self.processor.pad(
+            labels=label_features,
+            padding=self.padding,
+            max_length=self.max_length_labels,
+            pad_to_multiple_of=self.pad_to_multiple_of_labels,
+            return_tensors="pt",
+        )
 
         # replace padding with -100 to ignore loss correctly
         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
@@ -414,10 +413,11 @@ def prepare_dataset(batch):
         assert (
             len(set(batch["sampling_rate"])) == 1
         ), f"Make sure all inputs have the same sampling rate of {processor.feature_extractor.sampling_rate}."
-        batch["input_values"] = processor(batch["speech"], sampling_rate=batch["sampling_rate"][0]).input_values
-        # Setup the processor for targets
-        with processor.as_target_processor():
-            batch["labels"] = processor(batch["target_text"]).input_ids
+
+        processed_batch = processor(
+            audio=batch["speech"], text=batch["target_text"], sampling_rate=batch["sampling_rate"][0]
+        )
+        batch.update(processed_batch)
         return batch
 
     train_dataset = train_dataset.map(
diff --git a/examples/research_projects/xtreme-s/run_xtreme_s.py b/examples/research_projects/xtreme-s/run_xtreme_s.py
index 972c6d5462ff..d3e4f5cb38ab 100644
--- a/examples/research_projects/xtreme-s/run_xtreme_s.py
+++ b/examples/research_projects/xtreme-s/run_xtreme_s.py
@@ -349,13 +349,12 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) ->
 
         if self.pad_labels:
             label_features = [{"input_ids": feature["labels"]} for feature in features]
-            with self.processor.as_target_processor():
-                labels_batch = self.processor.pad(
-                    label_features,
-                    padding=self.padding,
-                    pad_to_multiple_of=self.pad_to_multiple_of_labels,
-                    return_tensors="pt",
-                )
+            labels_batch = self.processor.pad(
+                labels=label_features,
+                padding=self.padding,
+                pad_to_multiple_of=self.pad_to_multiple_of_labels,
+                return_tensors="pt",
+            )
 
             # replace padding with -100 to ignore loss correctly
             labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py
index fc6e5b13d408..f078b5d0cfc7 100644
--- a/src/transformers/models/hubert/modeling_tf_hubert.py
+++ b/src/transformers/models/hubert/modeling_tf_hubert.py
@@ -1612,9 +1612,8 @@ def call(
         >>> # compute loss
         >>> target_transcription = "A MAN SAID TO THE UNIVERSE SIR I EXIST"
 
-        >>> # wrap processor as target processor to encode labels
-        >>> with processor.as_target_processor():
-        ...     labels = processor(transcription, return_tensors="tf").input_values
+        >>> # Pass the transcription as text to encode labels
+        >>> labels = processor(text=transcription, return_tensors="tf").input_values
 
         >>> loss = model(input_values, labels=labels).loss
         ```"""
diff --git a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py
index bf67c6d54466..388be2449947 100644
--- a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py
+++ b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py
@@ -482,8 +482,7 @@ def forward(
         'Mr. Quilter ist der Apostel der Mittelschicht und wir freuen uns, sein Evangelium willkommen heißen zu können.'
 
         >>> # Training: Train model on English transcription
-        >>> with processor.as_target_processor():
-        ...     labels = processor(ds[0]["text"], return_tensors="pt").input_ids
+        >>> labels = processor(text=ds[0]["text"], return_tensors="pt").input_ids
 
         >>> loss = model(input_values, labels=labels).loss
         >>> loss.backward()
diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
index fed0414863a5..854831e45a09 100644
--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -1650,9 +1650,8 @@ def call(
         >>> # compute loss
         >>> target_transcription = "A MAN SAID TO THE UNIVERSE SIR I EXIST"
 
-        >>> # wrap processor as target processor to encode labels
-        >>> with processor.as_target_processor():
-        ...     labels = processor(transcription, return_tensors="tf").input_ids
+        >>> # Pass transcription as `text` to encode labels
+        >>> labels = processor(text=transcription, return_tensors="tf").input_ids
 
         >>> loss = model(input_values, labels=labels).loss
         ```"""
diff --git a/src/transformers/utils/doc.py b/src/transformers/utils/doc.py
index 8f0caf825bba..6761dec9c969 100644
--- a/src/transformers/utils/doc.py
+++ b/src/transformers/utils/doc.py
@@ -428,8 +428,7 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
     ```
 
     ```python
-    >>> with processor.as_target_processor():
-    ...     inputs["labels"] = processor(dataset[0]["text"], return_tensors="pt").input_ids
+    >>> inputs["labels"] = processor(text=dataset[0]["text"], return_tensors="pt").input_ids
 
     >>> # compute loss
     >>> loss = model(**inputs).loss
@@ -849,8 +848,7 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
     ```
 
     ```python
-    >>> with processor.as_target_processor():
-    ...     inputs["labels"] = processor(dataset[0]["text"], return_tensors="tf").input_ids
+    >>> inputs["labels"] = processor(text=dataset[0]["text"], return_tensors="tf").input_ids
 
     >>> # compute loss
     >>> loss = model(**inputs).loss
diff --git a/tests/models/mctct/test_processor_mctct.py b/tests/models/mctct/test_processor_mctct.py
index 83201f410215..821e44b48e24 100644
--- a/tests/models/mctct/test_processor_mctct.py
+++ b/tests/models/mctct/test_processor_mctct.py
@@ -125,8 +125,7 @@ def test_tokenizer(self):
 
         input_str = "This is a test string"
 
-        with processor.as_target_processor():
-            encoded_processor = processor(input_str)
+        encoded_processor = processor(text=input_str)
 
         encoded_tok = tokenizer(input_str)
 
diff --git a/tests/models/speech_to_text/test_processor_speech_to_text.py b/tests/models/speech_to_text/test_processor_speech_to_text.py
index e6e43f1bb8d7..d519f005d3eb 100644
--- a/tests/models/speech_to_text/test_processor_speech_to_text.py
+++ b/tests/models/speech_to_text/test_processor_speech_to_text.py
@@ -125,8 +125,7 @@ def test_tokenizer(self):
 
         input_str = "This is a test string"
 
-        with processor.as_target_processor():
-            encoded_processor = processor(input_str)
+        encoded_processor = processor(text=input_str)
 
         encoded_tok = tokenizer(input_str)
 
diff --git a/tests/models/wav2vec2/test_processor_wav2vec2.py b/tests/models/wav2vec2/test_processor_wav2vec2.py
index 8b7188f8ebc0..5f1c259061c4 100644
--- a/tests/models/wav2vec2/test_processor_wav2vec2.py
+++ b/tests/models/wav2vec2/test_processor_wav2vec2.py
@@ -118,8 +118,7 @@ def test_tokenizer(self):
 
         input_str = "This is a test string"
 
-        with processor.as_target_processor():
-            encoded_processor = processor(input_str)
+        encoded_processor = processor(text=input_str)
 
         encoded_tok = tokenizer(input_str)
 
diff --git a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
index f5b3eea926d8..d66a5923868d 100644
--- a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
+++ b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
@@ -164,8 +164,7 @@ def test_tokenizer(self):
 
         input_str = "This is a test string"
 
-        with processor.as_target_processor():
-            encoded_processor = processor(input_str)
+        encoded_processor = processor(text=input_str)
 
         encoded_tok = tokenizer(input_str)
 

From 024483cb69322a76b7541713f66eb7fffa20291c Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <Sylvain.gugger@gmail.com>
Date: Thu, 28 Jul 2022 12:33:15 -0400
Subject: [PATCH 06/10] Replace all as_target_tokenizer

---
 docs/source/en/model_doc/m2m_100.mdx          |  4 +-
 docs/source/en/model_doc/marian.mdx           |  1 -
 docs/source/en/model_doc/mbart.mdx            | 17 +++------
 docs/source/en/model_doc/nllb.mdx             |  1 -
 docs/source/en/model_doc/plbart.mdx           | 12 ++----
 docs/source/en/tasks/summarization.mdx        |  5 +--
 docs/source/en/tasks/translation.mdx          |  7 +---
 .../run_image_captioning_flax.py              |  9 ++---
 .../summarization/run_summarization_flax.py   | 11 ++++--
 .../question-answering/run_seq2seq_qa.py      | 10 ++---
 .../summarization/run_summarization.py        |  5 +--
 .../run_summarization_no_trainer.py           |  5 +--
 .../pytorch/translation/run_translation.py    |  5 +--
 .../translation/run_translation_no_trainer.py |  5 +--
 .../tapex/run_wikisql_with_tapex.py           | 13 +++----
 .../run_wikitablequestions_with_tapex.py      | 13 +++----
 .../summarization/run_summarization.py        |  5 +--
 .../tensorflow/translation/run_translation.py |  5 +--
 .../models/m2m_100/tokenization_m2m_100.py    |  6 +--
 .../models/marian/tokenization_marian.py      |  5 +--
 .../models/mbart/tokenization_mbart.py        |  5 +--
 .../models/mbart/tokenization_mbart_fast.py   |  5 +--
 .../models/mbart50/tokenization_mbart50.py    |  6 +--
 .../mbart50/tokenization_mbart50_fast.py      |  6 +--
 .../models/mt5/modeling_flax_mt5.py           |  9 ++---
 src/transformers/models/mt5/modeling_mt5.py   |  9 ++---
 .../models/mt5/modeling_tf_mt5.py             |  9 ++---
 .../models/nllb/tokenization_nllb.py          |  5 +--
 .../models/nllb/tokenization_nllb_fast.py     |  5 +--
 .../models/plbart/tokenization_plbart.py      |  5 +--
 src/transformers/models/rag/modeling_rag.py   |  6 +--
 .../models/rag/tokenization_rag.py            | 23 ++++++------
 src/transformers/tokenization_utils_base.py   | 30 ++++++++-------
 tests/models/bart/test_tokenization_bart.py   |  8 ++--
 tests/models/byt5/test_tokenization_byt5.py   | 13 +++----
 .../models/canine/test_tokenization_canine.py |  5 ++-
 .../m2m_100/test_tokenization_m2m_100.py      | 20 +++++-----
 tests/models/marian/test_modeling_marian.py   |  5 +--
 .../models/marian/test_tokenization_marian.py |  5 +--
 tests/models/mbart/test_tokenization_mbart.py | 37 ++++++++-----------
 .../mbart50/test_tokenization_mbart50.py      | 37 ++++++++-----------
 tests/models/mvp/test_tokenization_mvp.py     | 11 ++----
 tests/models/nllb/test_tokenization_nllb.py   | 23 +++++-------
 .../pegasus/test_tokenization_pegasus.py      | 14 +++----
 .../perceiver/test_tokenization_perceiver.py  |  7 ++--
 .../models/plbart/test_tokenization_plbart.py | 34 +++++++----------
 tests/models/t5/test_tokenization_t5.py       | 13 +++----
 tests/models/tapex/test_tokenization_tapex.py | 37 +++++++++----------
 48 files changed, 217 insertions(+), 319 deletions(-)

diff --git a/docs/source/en/model_doc/m2m_100.mdx b/docs/source/en/model_doc/m2m_100.mdx
index 65e119aa4eea..f0a7714d2418 100644
--- a/docs/source/en/model_doc/m2m_100.mdx
+++ b/docs/source/en/model_doc/m2m_100.mdx
@@ -55,9 +55,7 @@ tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="en
 src_text = "Life is like a box of chocolates."
 tgt_text = "La vie est comme une boîte de chocolat."
 
-model_inputs = tokenizer(src_text, return_tensors="pt")
-with tokenizer.as_target_tokenizer():
-    labels = tokenizer(tgt_text, return_tensors="pt").input_ids
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
 
 loss = model(**model_inputs, labels=labels)  # forward pass
 ```
diff --git a/docs/source/en/model_doc/marian.mdx b/docs/source/en/model_doc/marian.mdx
index 7b10c9309a0b..b415e9413c40 100644
--- a/docs/source/en/model_doc/marian.mdx
+++ b/docs/source/en/model_doc/marian.mdx
@@ -155,7 +155,6 @@ Example of translating english to many romance languages, using old-style 2 char
 ## MarianTokenizer
 
 [[autodoc]] MarianTokenizer
-    - as_target_tokenizer
 
 ## MarianModel
 
diff --git a/docs/source/en/model_doc/mbart.mdx b/docs/source/en/model_doc/mbart.mdx
index 0f3d82ce5dac..b24e31f33c9f 100644
--- a/docs/source/en/model_doc/mbart.mdx
+++ b/docs/source/en/model_doc/mbart.mdx
@@ -34,8 +34,8 @@ model is multilingual it expects the sequences in a different format. A special
 source and target text. The source text format is `X [eos, src_lang_code]` where `X` is the source text. The
 target text format is `[tgt_lang_code] X [eos]`. `bos` is never used.
 
-The regular [`~MBartTokenizer.__call__`] will encode source text format, and it should be wrapped
-inside the context manager [`~MBartTokenizer.as_target_tokenizer`] to encode target text format.
+The regular [`~MBartTokenizer.__call__`] will encode source text format passed as first argument or with the `text`
+keyword, and target text format passed with the `text_label` keyword argument.
 
 - Supervised training
 
@@ -46,13 +46,11 @@ inside the context manager [`~MBartTokenizer.as_target_tokenizer`] to encode tar
 >>> example_english_phrase = "UN Chief Says There Is No Military Solution in Syria"
 >>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
 
->>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
->>> with tokenizer.as_target_tokenizer():
-...     labels = tokenizer(expected_translation_romanian, return_tensors="pt")
+>>> inputs = tokenizer(example_english_phrase, text_target=expected_translation_romanian, return_tensors="pt")
 
 >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
 >>> # forward pass
->>> model(**inputs, labels=batch["labels"])
+>>> model(**inputs)
 ```
 
 - Generation
@@ -108,11 +106,9 @@ tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50", src_
 src_text = " UN Chief Says There Is No Military Solution in Syria"
 tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
 
-model_inputs = tokenizer(src_text, return_tensors="pt")
-with tokenizer.as_target_tokenizer():
-    labels = tokenizer(tgt_text, return_tensors="pt").input_ids
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
 
-model(**model_inputs, labels=labels)  # forward pass
+model(**model_inputs)  # forward pass
 ```
 
 - Generation
@@ -154,7 +150,6 @@ tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
 ## MBartTokenizer
 
 [[autodoc]] MBartTokenizer
-    - as_target_tokenizer
     - build_inputs_with_special_tokens
 
 ## MBartTokenizerFast
diff --git a/docs/source/en/model_doc/nllb.mdx b/docs/source/en/model_doc/nllb.mdx
index 477ef1ca83ec..d2c0089fa3a1 100644
--- a/docs/source/en/model_doc/nllb.mdx
+++ b/docs/source/en/model_doc/nllb.mdx
@@ -91,7 +91,6 @@ UN-Chef sagt, es gibt keine militärische Lösung in Syrien
 ## NllbTokenizer
 
 [[autodoc]] NllbTokenizer
-    - as_target_tokenizer
     - build_inputs_with_special_tokens
 
 ## NllbTokenizerFast
diff --git a/docs/source/en/model_doc/plbart.mdx b/docs/source/en/model_doc/plbart.mdx
index 6e3e4a5b7773..0755bb9a56e1 100644
--- a/docs/source/en/model_doc/plbart.mdx
+++ b/docs/source/en/model_doc/plbart.mdx
@@ -45,8 +45,9 @@ target text format is `[tgt_lang_code] X [eos]`. `bos` is never used.
 
 However, for fine-tuning, in some cases no language token is provided in cases where a single language is used. Please refer to [the paper](https://arxiv.org/abs/2103.06333) to learn more about this.
 
-In cases where the language code is needed, The regular [`~PLBartTokenizer.__call__`] will encode source text format, and it should be wrapped
-inside the context manager [`~PLBartTokenizer.as_target_tokenizer`] to encode target text format.
+In cases where the language code is needed, the regular [`~PLBartTokenizer.__call__`] will encode source text format 
+when you pass texts as the first argument or with the keyword argument `text`, and will encode target text format if
+it's passed with the `text_target` keyword argument.
 
 - Supervised training
 
@@ -56,11 +57,7 @@ inside the context manager [`~PLBartTokenizer.as_target_tokenizer`] to encode ta
 >>> tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base", src_lang="en_XX", tgt_lang="python")
 >>> example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
 >>> expected_translation_english = "Returns the maximum value of a b c."
->>> inputs = tokenizer(example_python_phrase, return_tensors="pt")
->>> with tokenizer.as_target_tokenizer():
-...     labels = tokenizer(expected_translation_english, return_tensors="pt")
->>> inputs["labels"] = labels["input_ids"]
->>> # forward pass
+>>> inputs = tokenizer(example_python_phrase, text_target=expected_translation_english, return_tensors="pt")
 >>> model(**inputs)
 ```
 
@@ -88,7 +85,6 @@ inside the context manager [`~PLBartTokenizer.as_target_tokenizer`] to encode ta
 ## PLBartTokenizer
 
 [[autodoc]] PLBartTokenizer
-    - as_target_tokenizer
     - build_inputs_with_special_tokens
 
 ## PLBartModel
diff --git a/docs/source/en/tasks/summarization.mdx b/docs/source/en/tasks/summarization.mdx
index 1c73c7396e64..f636141a1507 100644
--- a/docs/source/en/tasks/summarization.mdx
+++ b/docs/source/en/tasks/summarization.mdx
@@ -67,7 +67,7 @@ Load the T5 tokenizer to process `text` and `summary`:
 The preprocessing function needs to:
 
 1. Prefix the input with a prompt so T5 knows this is a summarization task. Some models capable of multiple NLP tasks require prompting for specific tasks.
-2. Use a context manager with the `as_target_tokenizer()` function to parallelize tokenization of inputs and labels.
+2. Use the keyword `text_target` argument when tokenizing labels.
 3. Truncate sequences to be no longer than the maximum length set by the `max_length` parameter.
 
 ```py
@@ -78,8 +78,7 @@ The preprocessing function needs to:
 ...     inputs = [prefix + doc for doc in examples["text"]]
 ...     model_inputs = tokenizer(inputs, max_length=1024, truncation=True)
 
-...     with tokenizer.as_target_tokenizer():
-...         labels = tokenizer(examples["summary"], max_length=128, truncation=True)
+...     labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)
 
 ...     model_inputs["labels"] = labels["input_ids"]
 ...     return model_inputs
diff --git a/docs/source/en/tasks/translation.mdx b/docs/source/en/tasks/translation.mdx
index 4f628b06db15..d17b87041418 100644
--- a/docs/source/en/tasks/translation.mdx
+++ b/docs/source/en/tasks/translation.mdx
@@ -78,12 +78,7 @@ The preprocessing function needs to:
 >>> def preprocess_function(examples):
 ...     inputs = [prefix + example[source_lang] for example in examples["translation"]]
 ...     targets = [example[target_lang] for example in examples["translation"]]
-...     model_inputs = tokenizer(inputs, max_length=128, truncation=True)
-
-...     with tokenizer.as_target_tokenizer():
-...         labels = tokenizer(targets, max_length=128, truncation=True)
-
-...     model_inputs["labels"] = labels["input_ids"]
+...     model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)
 ...     return model_inputs
 ```
 
diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py
index a4deab8041b2..e3ea20e85770 100644
--- a/examples/flax/image-captioning/run_image_captioning_flax.py
+++ b/examples/flax/image-captioning/run_image_captioning_flax.py
@@ -551,11 +551,10 @@ def tokenization_fn(examples, max_target_length):
         targets = captions
 
         model_inputs = {}
-        # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(
-                targets, max_length=max_target_length, padding="max_length", truncation=True, return_tensors="np"
-            )
+
+        labels = tokenizer(
+            targets, max_length=max_target_length, padding="max_length", truncation=True, return_tensors="np"
+        )
         model_inputs["labels"] = labels["input_ids"]
         decoder_input_ids = shift_tokens_right_fn(
             labels["input_ids"], model.config.pad_token_id, model.config.decoder_start_token_id
diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py
index a1b5fc37e24c..019964e2a5b2 100644
--- a/examples/flax/summarization/run_summarization_flax.py
+++ b/examples/flax/summarization/run_summarization_flax.py
@@ -586,10 +586,13 @@ def preprocess_function(examples):
         )
 
         # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(
-                targets, max_length=max_target_length, padding="max_length", truncation=True, return_tensors="np"
-            )
+        labels = tokenizer(
+            text_target=targets,
+            max_length=max_target_length,
+            padding="max_length",
+            truncation=True,
+            return_tensors="np",
+        )
 
         model_inputs["labels"] = labels["input_ids"]
         decoder_input_ids = shift_tokens_right_fn(
diff --git a/examples/pytorch/question-answering/run_seq2seq_qa.py b/examples/pytorch/question-answering/run_seq2seq_qa.py
index 6249613313c8..70bf3272b628 100644
--- a/examples/pytorch/question-answering/run_seq2seq_qa.py
+++ b/examples/pytorch/question-answering/run_seq2seq_qa.py
@@ -453,9 +453,8 @@ def preprocess_function(examples):
         inputs, targets = preprocess_squad_batch(examples, question_column, context_column, answer_column)
 
         model_inputs = tokenizer(inputs, max_length=max_seq_length, padding=padding, truncation=True)
-        # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(targets, max_length=max_answer_length, padding=padding, truncation=True)
+        # Tokenize targets with text_target=...
+        labels = tokenizer(text_target=targets, max_length=max_answer_length, padding=padding, truncation=True)
 
         # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
         # padding in the loss.
@@ -479,9 +478,8 @@ def preprocess_validation_function(examples):
             return_overflowing_tokens=True,
             return_offsets_mapping=True,
         )
-        # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(targets, max_length=max_answer_length, padding=padding, truncation=True)
+        # Tokenize targets with the `text_target` keyword argument
+        labels = tokenizer(text_target=targets, max_length=max_answer_length, padding=padding, truncation=True)
 
         # Since one example might give us several features if it has a long context, we need a map from a feature to
         # its corresponding example. This key gives us just that.
diff --git a/examples/pytorch/summarization/run_summarization.py b/examples/pytorch/summarization/run_summarization.py
index 33aa2bfd2c0c..bb0bdfa74327 100755
--- a/examples/pytorch/summarization/run_summarization.py
+++ b/examples/pytorch/summarization/run_summarization.py
@@ -522,9 +522,8 @@ def preprocess_function(examples):
         inputs = [prefix + inp for inp in inputs]
         model_inputs = tokenizer(inputs, max_length=data_args.max_source_length, padding=padding, truncation=True)
 
-        # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(targets, max_length=max_target_length, padding=padding, truncation=True)
+        # Tokenize targets with the `text_target` keyword argument
+        labels = tokenizer(text_target=targets, max_length=max_target_length, padding=padding, truncation=True)
 
         # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
         # padding in the loss.
diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py
index c38925743090..16f24cbdabbf 100644
--- a/examples/pytorch/summarization/run_summarization_no_trainer.py
+++ b/examples/pytorch/summarization/run_summarization_no_trainer.py
@@ -470,9 +470,8 @@ def preprocess_function(examples):
         inputs = [prefix + inp for inp in inputs]
         model_inputs = tokenizer(inputs, max_length=args.max_source_length, padding=padding, truncation=True)
 
-        # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(targets, max_length=max_target_length, padding=padding, truncation=True)
+        # Tokenize targets with the `text_target` keyword argument
+        labels = tokenizer(text_target=targets, max_length=max_target_length, padding=padding, truncation=True)
 
         # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
         # padding in the loss.
diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py
index a53fd039dc19..747234a1da1e 100755
--- a/examples/pytorch/translation/run_translation.py
+++ b/examples/pytorch/translation/run_translation.py
@@ -443,9 +443,8 @@ def preprocess_function(examples):
         inputs = [prefix + inp for inp in inputs]
         model_inputs = tokenizer(inputs, max_length=data_args.max_source_length, padding=padding, truncation=True)
 
-        # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(targets, max_length=max_target_length, padding=padding, truncation=True)
+        # Tokenize targets with the `text_target` keyword argument
+        labels = tokenizer(text_target=targets, max_length=max_target_length, padding=padding, truncation=True)
 
         # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
         # padding in the loss.
diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py
index 7227681d0589..6db6e11c500f 100644
--- a/examples/pytorch/translation/run_translation_no_trainer.py
+++ b/examples/pytorch/translation/run_translation_no_trainer.py
@@ -452,9 +452,8 @@ def preprocess_function(examples):
         inputs = [prefix + inp for inp in inputs]
         model_inputs = tokenizer(inputs, max_length=args.max_source_length, padding=padding, truncation=True)
 
-        # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(targets, max_length=max_target_length, padding=padding, truncation=True)
+        # Tokenize targets with the `text_target` keyword argument
+        labels = tokenizer(text_target=targets, max_length=max_target_length, padding=padding, truncation=True)
 
         # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
         # padding in the loss.
diff --git a/examples/research_projects/tapex/run_wikisql_with_tapex.py b/examples/research_projects/tapex/run_wikisql_with_tapex.py
index 461bfbec9ae3..7573893629c6 100644
--- a/examples/research_projects/tapex/run_wikisql_with_tapex.py
+++ b/examples/research_projects/tapex/run_wikisql_with_tapex.py
@@ -437,13 +437,12 @@ def _convert_table_types(_table):
                 table=tables, query=questions, max_length=data_args.max_source_length, padding=padding, truncation=True
             )
 
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(
-                answer=[", ".join(answer) for answer in answers],
-                max_length=max_target_length,
-                padding=padding,
-                truncation=True,
-            )
+        labels = tokenizer(
+            answer=[", ".join(answer) for answer in answers],
+            max_length=max_target_length,
+            padding=padding,
+            truncation=True,
+        )
 
         # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
         # padding in the loss.
diff --git a/examples/research_projects/tapex/run_wikitablequestions_with_tapex.py b/examples/research_projects/tapex/run_wikitablequestions_with_tapex.py
index 1750adc546f0..7ffa8f5f91cc 100644
--- a/examples/research_projects/tapex/run_wikitablequestions_with_tapex.py
+++ b/examples/research_projects/tapex/run_wikitablequestions_with_tapex.py
@@ -413,13 +413,12 @@ def preprocess_tableqa_function(examples, is_training=False):
                 table=tables, query=questions, max_length=data_args.max_source_length, padding=padding, truncation=True
             )
 
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(
-                answer=[", ".join(answer) for answer in answers],
-                max_length=max_target_length,
-                padding=padding,
-                truncation=True,
-            )
+        labels = tokenizer(
+            answer=[", ".join(answer) for answer in answers],
+            max_length=max_target_length,
+            padding=padding,
+            truncation=True,
+        )
 
         # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
         # padding in the loss.
diff --git a/examples/tensorflow/summarization/run_summarization.py b/examples/tensorflow/summarization/run_summarization.py
index fdeb3f624985..9994f2feb91b 100644
--- a/examples/tensorflow/summarization/run_summarization.py
+++ b/examples/tensorflow/summarization/run_summarization.py
@@ -503,9 +503,8 @@ def preprocess_function(examples):
         inputs = [prefix + inp for inp in inputs]
         model_inputs = tokenizer(inputs, max_length=data_args.max_source_length, padding=padding, truncation=True)
 
-        # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(targets, max_length=max_target_length, padding=padding, truncation=True)
+        # Tokenize targets with the `text_target` keyword argument
+        labels = tokenizer(text_target=targets, max_length=max_target_length, padding=padding, truncation=True)
 
         # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
         # padding in the loss.
diff --git a/examples/tensorflow/translation/run_translation.py b/examples/tensorflow/translation/run_translation.py
index b85b5bcb71b7..5be4728a814c 100644
--- a/examples/tensorflow/translation/run_translation.py
+++ b/examples/tensorflow/translation/run_translation.py
@@ -457,9 +457,8 @@ def preprocess_function(examples):
         inputs = [prefix + inp for inp in inputs]
         model_inputs = tokenizer(inputs, max_length=data_args.max_source_length, padding=padding, truncation=True)
 
-        # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(targets, max_length=max_target_length, padding=padding, truncation=True)
+        # Tokenize targets with the `text_target` keyword argument
+        labels = tokenizer(text_target=targets, max_length=max_target_length, padding=padding, truncation=True)
 
         # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
         # padding in the loss.
diff --git a/src/transformers/models/m2m_100/tokenization_m2m_100.py b/src/transformers/models/m2m_100/tokenization_m2m_100.py
index c11c8782f227..b67b82fb7a58 100644
--- a/src/transformers/models/m2m_100/tokenization_m2m_100.py
+++ b/src/transformers/models/m2m_100/tokenization_m2m_100.py
@@ -115,10 +115,8 @@ class M2M100Tokenizer(PreTrainedTokenizer):
     >>> tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="en", tgt_lang="ro")
     >>> src_text = " UN Chief Says There Is No Military Solution in Syria"
     >>> tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
-    >>> model_inputs = tokenizer(src_text, return_tensors="pt")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(tgt_text, return_tensors="pt").input_ids
-    >>> model(**model_inputs, labels=labels)  # should work
+    >>> model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+    >>> model(**model_inputs)  # should work
     ```"""
 
     vocab_files_names = VOCAB_FILES_NAMES
diff --git a/src/transformers/models/marian/tokenization_marian.py b/src/transformers/models/marian/tokenization_marian.py
index 6967a675813c..66eb5a44c5bf 100644
--- a/src/transformers/models/marian/tokenization_marian.py
+++ b/src/transformers/models/marian/tokenization_marian.py
@@ -111,10 +111,7 @@ class MarianTokenizer(PreTrainedTokenizer):
     >>> tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
     >>> src_texts = ["I am a small frog.", "Tom asked his teacher for advice."]
     >>> tgt_texts = ["Ich bin ein kleiner Frosch.", "Tom bat seinen Lehrer um Rat."]  # optional
-    >>> inputs = tokenizer(src_texts, return_tensors="pt", padding=True)
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(tgt_texts, return_tensors="pt", padding=True)
-    >>> inputs["labels"] = labels["input_ids"]
+    >>> inputs = tokenizer(src_texts, text_target=tgt_texts, return_tensors="pt", padding=True)
     # keys  [input_ids, attention_mask, labels].
 
     >>> outputs = model(**inputs)  # should work
diff --git a/src/transformers/models/mbart/tokenization_mbart.py b/src/transformers/models/mbart/tokenization_mbart.py
index 431908b0e97c..65460746425f 100644
--- a/src/transformers/models/mbart/tokenization_mbart.py
+++ b/src/transformers/models/mbart/tokenization_mbart.py
@@ -68,10 +68,7 @@ class MBartTokenizer(PreTrainedTokenizer):
     >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX", tgt_lang="ro_RO")
     >>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
     >>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
-    >>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(expected_translation_romanian, return_tensors="pt")
-    >>> inputs["labels"] = labels["input_ids"]
+    >>> inputs = tokenizer(example_english_phrase, text_target=expected_translation_romanian, return_tensors="pt")
     ```"""
 
     vocab_files_names = VOCAB_FILES_NAMES
diff --git a/src/transformers/models/mbart/tokenization_mbart_fast.py b/src/transformers/models/mbart/tokenization_mbart_fast.py
index 85937a5fd8ed..8bf75ebe59c0 100644
--- a/src/transformers/models/mbart/tokenization_mbart_fast.py
+++ b/src/transformers/models/mbart/tokenization_mbart_fast.py
@@ -81,10 +81,7 @@ class MBartTokenizerFast(PreTrainedTokenizerFast):
     ... )
     >>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
     >>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
-    >>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(expected_translation_romanian, return_tensors="pt")
-    >>> inputs["labels"] = labels["input_ids"]
+    >>> inputs = tokenizer(example_english_phrase, text_target=expected_translation_romanian, return_tensors="pt")
     ```"""
 
     vocab_files_names = VOCAB_FILES_NAMES
diff --git a/src/transformers/models/mbart50/tokenization_mbart50.py b/src/transformers/models/mbart50/tokenization_mbart50.py
index 274b7f95f829..707a97734927 100644
--- a/src/transformers/models/mbart50/tokenization_mbart50.py
+++ b/src/transformers/models/mbart50/tokenization_mbart50.py
@@ -101,10 +101,8 @@ class MBart50Tokenizer(PreTrainedTokenizer):
     >>> tokenizer = MBart50Tokenizer.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")
     >>> src_text = " UN Chief Says There Is No Military Solution in Syria"
     >>> tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
-    >>> model_inputs = tokenizer(src_text, return_tensors="pt")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(tgt_text, return_tensors="pt").input_ids
-    >>> # model(**model_inputs, labels=labels) should work
+    >>> model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+    >>> # model(**model_inputs) should work
     ```"""
 
     vocab_files_names = VOCAB_FILES_NAMES
diff --git a/src/transformers/models/mbart50/tokenization_mbart50_fast.py b/src/transformers/models/mbart50/tokenization_mbart50_fast.py
index b0f41a0fd719..1ab8ff06e260 100644
--- a/src/transformers/models/mbart50/tokenization_mbart50_fast.py
+++ b/src/transformers/models/mbart50/tokenization_mbart50_fast.py
@@ -97,10 +97,8 @@ class MBart50TokenizerFast(PreTrainedTokenizerFast):
     >>> tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")
     >>> src_text = " UN Chief Says There Is No Military Solution in Syria"
     >>> tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
-    >>> model_inputs = tokenizer(src_text, return_tensors="pt")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(tgt_text, return_tensors="pt").input_ids
-    >>> # model(**model_inputs, labels=labels) should work
+    >>> model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+    >>> # model(**model_inputs) should work
     ```"""
 
     vocab_files_names = VOCAB_FILES_NAMES
diff --git a/src/transformers/models/mt5/modeling_flax_mt5.py b/src/transformers/models/mt5/modeling_flax_mt5.py
index 841d2069e65c..4f2fa5b9fb39 100644
--- a/src/transformers/models/mt5/modeling_flax_mt5.py
+++ b/src/transformers/models/mt5/modeling_flax_mt5.py
@@ -57,8 +57,7 @@ class FlaxMT5Model(FlaxT5Model):
     >>> summary = "Weiter Verhandlung in Syrien."
     >>> inputs = tokenizer(article, return_tensors="np")
 
-    >>> with tokenizer.as_target_tokenizer():
-    ...     decoder_input_ids = tokenizer(summary, return_tensors="np").input_ids
+    >>> decoder_input_ids = tokenizer(text_target=summary, return_tensors="np").input_ids
 
     >>> outputs = model(input_ids=inputs["input_ids"], decoder_input_ids=decoder_input_ids)
     >>> hidden_states = outputs.last_hidden_state
@@ -84,8 +83,7 @@ class FlaxMT5EncoderModel(FlaxT5EncoderModel):
     >>> summary = "Weiter Verhandlung in Syrien."
     >>> inputs = tokenizer(article, return_tensors="np")
 
-    >>> with tokenizer.as_target_tokenizer():
-    ...     decoder_input_ids = tokenizer(summary, return_tensors="np").input_ids
+    >>> decoder_input_ids = tokenizer(text_target=summary, return_tensors="np").input_ids
 
     >>> outputs = model(input_ids=inputs["input_ids"])
     >>> hidden_states = outputs.last_hidden_state
@@ -111,8 +109,7 @@ class FlaxMT5ForConditionalGeneration(FlaxT5ForConditionalGeneration):
     >>> summary = "Weiter Verhandlung in Syrien."
     >>> inputs = tokenizer(article, return_tensors="np")
 
-    >>> with tokenizer.as_target_tokenizer():
-    ...     decoder_input_ids = tokenizer(summary, return_tensors="np").input_ids
+    >>> decoder_input_ids = tokenizer(text_target=summary, return_tensors="np").input_ids
 
     >>> outputs = model(**inputs, decoder_input_ids=decoder_input_ids)
     >>> logits = outputs.logits
diff --git a/src/transformers/models/mt5/modeling_mt5.py b/src/transformers/models/mt5/modeling_mt5.py
index 8c19a63eded3..c562b011522d 100644
--- a/src/transformers/models/mt5/modeling_mt5.py
+++ b/src/transformers/models/mt5/modeling_mt5.py
@@ -40,8 +40,7 @@ class MT5Model(T5Model):
     >>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
     >>> summary = "Weiter Verhandlung in Syrien."
     >>> inputs = tokenizer(article, return_tensors="pt")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(summary, return_tensors="pt")
+    >>> labels = tokenizer(text_target=summary, return_tensors="pt")
 
     >>> outputs = model(input_ids=inputs["input_ids"], decoder_input_ids=labels["input_ids"])
     >>> hidden_states = outputs.last_hidden_state
@@ -73,11 +72,9 @@ class MT5ForConditionalGeneration(T5ForConditionalGeneration):
     >>> tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
     >>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
     >>> summary = "Weiter Verhandlung in Syrien."
-    >>> inputs = tokenizer(article, return_tensors="pt")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(summary, return_tensors="pt")
+    >>> inputs = tokenizer(article, text_target=summary, return_tensors="pt")
 
-    >>> outputs = model(**inputs, labels=labels["input_ids"])
+    >>> outputs = model(**inputs)
     >>> loss = outputs.loss
     ```"""
 
diff --git a/src/transformers/models/mt5/modeling_tf_mt5.py b/src/transformers/models/mt5/modeling_tf_mt5.py
index 2808b8421a16..71aa0bb66a7a 100644
--- a/src/transformers/models/mt5/modeling_tf_mt5.py
+++ b/src/transformers/models/mt5/modeling_tf_mt5.py
@@ -40,8 +40,7 @@ class TFMT5Model(TFT5Model):
     >>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
     >>> summary = "Weiter Verhandlung in Syrien."
     >>> inputs = tokenizer(article, return_tensors="tf")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(summary, return_tensors="tf")
+    >>> labels = tokenizer(text_target=summary, return_tensors="tf")
 
     >>> outputs = model(input_ids=inputs["input_ids"], decoder_input_ids=labels["input_ids"])
     >>> hidden_states = outputs.last_hidden_state
@@ -64,11 +63,9 @@ class TFMT5ForConditionalGeneration(TFT5ForConditionalGeneration):
     >>> tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
     >>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
     >>> summary = "Weiter Verhandlung in Syrien."
-    >>> inputs = tokenizer(article, return_tensors="tf")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(summary, return_tensors="tf")
+    >>> inputs = tokenizer(article, text_target=summary, return_tensors="tf")
 
-    >>> outputs = model(**inputs, labels=labels["input_ids"])
+    >>> outputs = model(**inputs)
     >>> loss = outputs.loss
     ```"""
 
diff --git a/src/transformers/models/nllb/tokenization_nllb.py b/src/transformers/models/nllb/tokenization_nllb.py
index d0c0ce8b74ec..6a326fd3ca10 100644
--- a/src/transformers/models/nllb/tokenization_nllb.py
+++ b/src/transformers/models/nllb/tokenization_nllb.py
@@ -66,10 +66,7 @@ class NllbTokenizer(PreTrainedTokenizer):
     ... )
     >>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
     >>> expected_translation_french = "Le chef de l'ONU affirme qu'il n'y a pas de solution militaire en Syrie."
-    >>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(expected_translation_french, return_tensors="pt")
-    >>> inputs["labels"] = labels["input_ids"]
+    >>> inputs = tokenizer(example_english_phrase, text_target=expected_translation_french, return_tensors="pt")
     ```
 
     Args:
diff --git a/src/transformers/models/nllb/tokenization_nllb_fast.py b/src/transformers/models/nllb/tokenization_nllb_fast.py
index 404b7c093e05..1afe27f43b4e 100644
--- a/src/transformers/models/nllb/tokenization_nllb_fast.py
+++ b/src/transformers/models/nllb/tokenization_nllb_fast.py
@@ -79,10 +79,7 @@ class NllbTokenizerFast(PreTrainedTokenizerFast):
     ... )
     >>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
     >>> expected_translation_french = "Le chef de l'ONU affirme qu'il n'y a pas de solution militaire en Syrie."
-    >>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(expected_translation_french, return_tensors="pt")
-    >>> inputs["labels"] = labels["input_ids"]
+    >>> inputs = tokenizer(example_english_phrase, text_target=expected_translation_french, return_tensors="pt")
     ```
 
     Args:
diff --git a/src/transformers/models/plbart/tokenization_plbart.py b/src/transformers/models/plbart/tokenization_plbart.py
index 93e109473bdf..411df996926a 100644
--- a/src/transformers/models/plbart/tokenization_plbart.py
+++ b/src/transformers/models/plbart/tokenization_plbart.py
@@ -152,10 +152,7 @@ class PLBartTokenizer(PreTrainedTokenizer):
     >>> tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-python-en_XX", src_lang="python", tgt_lang="en_XX")
     >>> example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
     >>> expected_translation_english = "Returns the maximum value of a b c."
-    >>> inputs = tokenizer(example_python_phrase, return_tensors="pt")
-    >>> with tokenizer.as_target_tokenizer():
-    ...     labels = tokenizer(expected_translation_english, return_tensors="pt")
-    >>> inputs["labels"] = labels["input_ids"]
+    >>> inputs = tokenizer(example_python_phrase, text_target=expected_translation_english, return_tensors="pt")
     ```"""
 
     vocab_files_names = VOCAB_FILES_NAMES
diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py
index 1d6a62b2013d..41af393c6710 100644
--- a/src/transformers/models/rag/modeling_rag.py
+++ b/src/transformers/models/rag/modeling_rag.py
@@ -818,8 +818,7 @@ def forward(
         >>> model = RagSequenceForGeneration.from_pretrained("facebook/rag-token-nq", retriever=retriever)
 
         >>> inputs = tokenizer("How many people live in Paris?", return_tensors="pt")
-        >>> with tokenizer.as_target_tokenizer():
-        ...     targets = tokenizer("In Paris, there are 10 million people.", return_tensors="pt")
+        >>> targets = tokenizer(text_target="In Paris, there are 10 million people.", return_tensors="pt")
         >>> input_ids = inputs["input_ids"]
         >>> labels = targets["input_ids"]
         >>> outputs = model(input_ids=input_ids, labels=labels)
@@ -1287,8 +1286,7 @@ def forward(
         >>> model = RagTokenForGeneration.from_pretrained("facebook/rag-token-nq", retriever=retriever)
 
         >>> inputs = tokenizer("How many people live in Paris?", return_tensors="pt")
-        >>> with tokenizer.as_target_tokenizer():
-        ...     targets = tokenizer("In Paris, there are 10 million people.", return_tensors="pt")
+        >>> targets = tokenizer(text_target="In Paris, there are 10 million people.", return_tensors="pt")
         >>> input_ids = inputs["input_ids"]
         >>> labels = targets["input_ids"]
         >>> outputs = model(input_ids=input_ids, labels=labels)
diff --git a/src/transformers/models/rag/tokenization_rag.py b/src/transformers/models/rag/tokenization_rag.py
index 485c2c448373..5b6ec67e6bf8 100644
--- a/src/transformers/models/rag/tokenization_rag.py
+++ b/src/transformers/models/rag/tokenization_rag.py
@@ -105,17 +105,16 @@ def prepare_seq2seq_batch(
         if tgt_texts is None:
             return model_inputs
         # Process tgt_texts
-        with self.as_target_tokenizer():
-            if max_target_length is None:
-                max_target_length = self.current_tokenizer.model_max_length
-            labels = self(
-                tgt_texts,
-                add_special_tokens=True,
-                return_tensors=return_tensors,
-                padding=padding,
-                max_length=max_target_length,
-                truncation=truncation,
-                **kwargs,
-            )
+        if max_target_length is None:
+            max_target_length = self.current_tokenizer.model_max_length
+        labels = self(
+            text_target=tgt_texts,
+            add_special_tokens=True,
+            return_tensors=return_tensors,
+            padding=padding,
+            max_length=max_target_length,
+            truncation=truncation,
+            **kwargs,
+        )
         model_inputs["labels"] = labels["input_ids"]
         return model_inputs
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index b8ab1e09c445..a945b1abe58f 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -3658,14 +3658,17 @@ def prepare_seq2seq_batch(
         # docstyle-ignore
         formatted_warning = """
 `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
-`__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` context manager to prepare
-your targets.
+`__call__` method to prepare your inputs and targets.
 
 Here is a short example:
 
+model_inputs = tokenizer(src_texts, text_target=tgt_texts, ...)
+
+If you either need to use different keyword arguments for the source and target texts, you should do two calls like
+this:
+
 model_inputs = tokenizer(src_texts, ...)
-with tokenizer.as_target_tokenizer():
-    labels = tokenizer(tgt_texts, ...)
+labels = tokenizer(text_target=tgt_texts, ...)
 model_inputs["labels"] = labels["input_ids"]
 
 See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
@@ -3691,16 +3694,15 @@ def prepare_seq2seq_batch(
         # Process tgt_texts
         if max_target_length is None:
             max_target_length = max_length
-        with self.as_target_tokenizer():
-            labels = self(
-                tgt_texts,
-                add_special_tokens=True,
-                return_tensors=return_tensors,
-                padding=padding,
-                max_length=max_target_length,
-                truncation=truncation,
-                **kwargs,
-            )
+        labels = self(
+            text_target=tgt_texts,
+            add_special_tokens=True,
+            return_tensors=return_tensors,
+            padding=padding,
+            max_length=max_target_length,
+            truncation=truncation,
+            **kwargs,
+        )
         model_inputs["labels"] = labels["input_ids"]
         return model_inputs
 
diff --git a/tests/models/bart/test_tokenization_bart.py b/tests/models/bart/test_tokenization_bart.py
index b8e216e69ba2..24ea6e1e5cd9 100644
--- a/tests/models/bart/test_tokenization_bart.py
+++ b/tests/models/bart/test_tokenization_bart.py
@@ -112,14 +112,13 @@ def test_prepare_batch_empty_target_text(self):
             self.assertNotIn("decoder_attention_mask", batch)
 
     @require_torch
-    def test_as_target_tokenizer_target_length(self):
+    def test_tokenizer_as_target_length(self):
         tgt_text = [
             "Summary of the text.",
             "Another summary.",
         ]
         for tokenizer in [self.default_tokenizer, self.default_tokenizer_fast]:
-            with tokenizer.as_target_tokenizer():
-                targets = tokenizer(tgt_text, max_length=32, padding="max_length", return_tensors="pt")
+            targets = tokenizer(text_target=tgt_text, max_length=32, padding="max_length", return_tensors="pt")
             self.assertEqual(32, targets["input_ids"].shape[1])
 
     @require_torch
@@ -140,8 +139,7 @@ def test_special_tokens(self):
         ]
         for tokenizer in [self.default_tokenizer, self.default_tokenizer_fast]:
             inputs = tokenizer(src_text, return_tensors="pt")
-            with tokenizer.as_target_tokenizer():
-                targets = tokenizer(tgt_text, return_tensors="pt")
+            targets = tokenizer(text_target=tgt_text, return_tensors="pt")
             input_ids = inputs["input_ids"]
             labels = targets["input_ids"]
             self.assertTrue((input_ids[:, 0] == tokenizer.bos_token_id).all().item())
diff --git a/tests/models/byt5/test_tokenization_byt5.py b/tests/models/byt5/test_tokenization_byt5.py
index 70cfa40ef919..85057c5278bb 100644
--- a/tests/models/byt5/test_tokenization_byt5.py
+++ b/tests/models/byt5/test_tokenization_byt5.py
@@ -152,10 +152,9 @@ def test_max_length_integration(self):
             "Summary of the text.",
             "Another summary.",
         ]
-        with tokenizer.as_target_tokenizer():
-            targets = tokenizer(
-                tgt_text, max_length=32, padding="max_length", truncation=True, return_tensors=FRAMEWORK
-            )
+        targets = tokenizer(
+            text_target=tgt_text, max_length=32, padding="max_length", truncation=True, return_tensors=FRAMEWORK
+        )
         self.assertEqual(32, targets["input_ids"].shape[1])
 
     def test_eos_in_input(self):
@@ -167,12 +166,10 @@ def test_eos_in_input(self):
         expected_tgt_tokens = [86, 120, 112, 112, 100, 117, 124, 35, 114, 105, 35, 119, 107, 104, 35, 119, 104, 123, 119, 49, 35, 1]
         # fmt: on
 
-        batch = tokenizer(src_text)
-        with tokenizer.as_target_tokenizer():
-            targets = tokenizer(tgt_text)
+        batch = tokenizer(src_text, text_target=tgt_text)
 
         self.assertEqual(expected_src_tokens, batch["input_ids"][0])
-        self.assertEqual(expected_tgt_tokens, targets["input_ids"][0])
+        self.assertEqual(expected_tgt_tokens, batch["labels"][0])
 
     # cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
     def test_save_and_load_tokenizer(self):
diff --git a/tests/models/canine/test_tokenization_canine.py b/tests/models/canine/test_tokenization_canine.py
index 0e016d523b5c..6ae27082cceb 100644
--- a/tests/models/canine/test_tokenization_canine.py
+++ b/tests/models/canine/test_tokenization_canine.py
@@ -80,8 +80,9 @@ def test_max_length_integration(self):
             "What's the weater?",
             "It's about 25 degrees.",
         ]
-        with tokenizer.as_target_tokenizer():
-            targets = tokenizer(tgt_text, max_length=32, padding="max_length", truncation=True, return_tensors="pt")
+        targets = tokenizer(
+            text_target=tgt_text, max_length=32, padding="max_length", truncation=True, return_tensors="pt"
+        )
         self.assertEqual(32, targets["input_ids"].shape[1])
 
     # cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
diff --git a/tests/models/m2m_100/test_tokenization_m2m_100.py b/tests/models/m2m_100/test_tokenization_m2m_100.py
index 729deb6cd486..ca8349d94016 100644
--- a/tests/models/m2m_100/test_tokenization_m2m_100.py
+++ b/tests/models/m2m_100/test_tokenization_m2m_100.py
@@ -187,9 +187,7 @@ def test_batch_fairseq_parity(self):
         self.tokenizer.src_lang = "en"
         self.tokenizer.tgt_lang = "fr"
 
-        batch = self.tokenizer(self.src_text, padding=True, return_tensors="pt")
-        with self.tokenizer.as_target_tokenizer():
-            batch["labels"] = self.tokenizer(self.tgt_text, padding=True, return_tensors="pt").input_ids
+        batch = self.tokenizer(self.src_text, text_target=self.tgt_text, padding=True, return_tensors="pt")
 
         batch["decoder_input_ids"] = shift_tokens_right(
             batch["labels"], self.tokenizer.pad_token_id, self.tokenizer.eos_token_id
@@ -217,17 +215,19 @@ def test_src_lang_setter(self):
         self.assertListEqual(self.tokenizer.suffix_tokens, [self.tokenizer.eos_token_id])
 
     @require_torch
-    def test_as_target_tokenizer(self):
+    def test_tokenizer_target_mode(self):
         self.tokenizer.tgt_lang = "mr"
-        with self.tokenizer.as_target_tokenizer():
-            self.assertListEqual(self.tokenizer.prefix_tokens, [self.tokenizer.get_lang_id("mr")])
-            self.assertListEqual(self.tokenizer.suffix_tokens, [self.tokenizer.eos_token_id])
+        self.tokenizer._switch_to_target_mode()
+        self.assertListEqual(self.tokenizer.prefix_tokens, [self.tokenizer.get_lang_id("mr")])
+        self.assertListEqual(self.tokenizer.suffix_tokens, [self.tokenizer.eos_token_id])
+        self.tokenizer._switch_to_input_mode()
         self.assertListEqual(self.tokenizer.prefix_tokens, [self.tokenizer.get_lang_id(self.tokenizer.src_lang)])
 
         self.tokenizer.tgt_lang = "zh"
-        with self.tokenizer.as_target_tokenizer():
-            self.assertListEqual(self.tokenizer.prefix_tokens, [self.tokenizer.get_lang_id("zh")])
-            self.assertListEqual(self.tokenizer.suffix_tokens, [self.tokenizer.eos_token_id])
+        self.tokenizer._switch_to_target_mode()
+        self.assertListEqual(self.tokenizer.prefix_tokens, [self.tokenizer.get_lang_id("zh")])
+        self.assertListEqual(self.tokenizer.suffix_tokens, [self.tokenizer.eos_token_id])
+        self.tokenizer._switch_to_input_mode()
         self.assertListEqual(self.tokenizer.prefix_tokens, [self.tokenizer.get_lang_id(self.tokenizer.src_lang)])
 
     @require_torch
diff --git a/tests/models/marian/test_modeling_marian.py b/tests/models/marian/test_modeling_marian.py
index e454f981b4a0..6ca951e37aed 100644
--- a/tests/models/marian/test_modeling_marian.py
+++ b/tests/models/marian/test_modeling_marian.py
@@ -438,10 +438,7 @@ def test_forward(self):
         src, tgt = ["I am a small frog"], ["Ich bin ein kleiner Frosch."]
         expected_ids = [38, 121, 14, 697, 38848, 0]
 
-        model_inputs = self.tokenizer(src, return_tensors="pt").to(torch_device)
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(tgt, return_tensors="pt")
-        model_inputs["labels"] = targets["input_ids"].to(torch_device)
+        model_inputs = self.tokenizer(src, text_target=tgt, return_tensors="pt").to(torch_device)
 
         self.assertListEqual(expected_ids, model_inputs.input_ids[0].tolist())
 
diff --git a/tests/models/marian/test_tokenization_marian.py b/tests/models/marian/test_tokenization_marian.py
index 2cbc0b0a3fe7..6a079036bb6d 100644
--- a/tests/models/marian/test_tokenization_marian.py
+++ b/tests/models/marian/test_tokenization_marian.py
@@ -145,9 +145,8 @@ def test_tokenizer_integration_seperate_vocabs(self):
         src_ids = tokenizer(source_text).input_ids
         self.assertListEqual(src_ids, expected_src_ids)
 
-        with tokenizer.as_target_tokenizer():
-            target_ids = tokenizer(target_text).input_ids
-            self.assertListEqual(target_ids, expected_target_ids)
+        target_ids = tokenizer(text_target=target_text).input_ids
+        self.assertListEqual(target_ids, expected_target_ids)
 
         decoded = tokenizer.decode(target_ids, skip_special_tokens=True)
         self.assertEqual(decoded, target_text)
diff --git a/tests/models/mbart/test_tokenization_mbart.py b/tests/models/mbart/test_tokenization_mbart.py
index e80531051b65..f65662dbe247 100644
--- a/tests/models/mbart/test_tokenization_mbart.py
+++ b/tests/models/mbart/test_tokenization_mbart.py
@@ -265,33 +265,27 @@ def test_special_tokens_unaffacted_by_save_load(self):
 
     @require_torch
     def test_batch_fairseq_parity(self):
-        batch = self.tokenizer(self.src_text, padding=True)
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(self.tgt_text, padding=True, return_tensors="pt")
-        labels = targets["input_ids"]
-        batch["decoder_input_ids"] = shift_tokens_right(labels, self.tokenizer.pad_token_id).tolist()
+        batch = self.tokenizer(self.src_text, text_target=self.tgt_text, padding=True, return_tensors="pt")
+        batch["decoder_input_ids"] = shift_tokens_right(batch["labels"], self.tokenizer.pad_token_id)
 
         # fairseq batch: https://gist.github.com/sshleifer/cba08bc2109361a74ac3760a7e30e4f4
-        assert batch.input_ids[1][-2:] == [2, EN_CODE]
-        assert batch.decoder_input_ids[1][0] == RO_CODE
+        assert batch.input_ids[1][-2:].tolist() == [2, EN_CODE]
+        assert batch.decoder_input_ids[1][0].tolist() == RO_CODE
         assert batch.decoder_input_ids[1][-1] == 2
-        assert labels[1][-2:].tolist() == [2, RO_CODE]
+        assert batch.labels[1][-2:].tolist() == [2, RO_CODE]
 
     @require_torch
     def test_enro_tokenizer_prepare_batch(self):
         batch = self.tokenizer(
-            self.src_text, padding=True, truncation=True, max_length=len(self.expected_src_tokens), return_tensors="pt"
+            self.src_text,
+            text_target=self.tgt_text,
+            padding=True,
+            truncation=True,
+            max_length=len(self.expected_src_tokens),
+            return_tensors="pt",
         )
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(
-                self.tgt_text,
-                padding=True,
-                truncation=True,
-                max_length=len(self.expected_src_tokens),
-                return_tensors="pt",
-            )
-        labels = targets["input_ids"]
-        batch["decoder_input_ids"] = shift_tokens_right(labels, self.tokenizer.pad_token_id)
+
+        batch["decoder_input_ids"] = shift_tokens_right(batch["labels"], self.tokenizer.pad_token_id)
 
         self.assertIsInstance(batch, BatchEncoding)
 
@@ -306,8 +300,9 @@ def test_enro_tokenizer_prepare_batch(self):
 
     def test_seq2seq_max_length(self):
         batch = self.tokenizer(self.src_text, padding=True, truncation=True, max_length=3, return_tensors="pt")
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(self.tgt_text, padding=True, truncation=True, max_length=10, return_tensors="pt")
+        targets = self.tokenizer(
+            text_target=self.tgt_text, padding=True, truncation=True, max_length=10, return_tensors="pt"
+        )
         labels = targets["input_ids"]
         batch["decoder_input_ids"] = shift_tokens_right(labels, self.tokenizer.pad_token_id)
 
diff --git a/tests/models/mbart50/test_tokenization_mbart50.py b/tests/models/mbart50/test_tokenization_mbart50.py
index 5a65d8856656..d10d51df907c 100644
--- a/tests/models/mbart50/test_tokenization_mbart50.py
+++ b/tests/models/mbart50/test_tokenization_mbart50.py
@@ -256,35 +256,27 @@ def test_special_tokens_unaffacted_by_save_load(self):
 
     @require_torch
     def test_batch_fairseq_parity(self):
-        batch = self.tokenizer(self.src_text, padding=True)
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(self.tgt_text, padding=True, return_tensors="pt")
-        labels = targets["input_ids"]
-        batch["decoder_input_ids"] = shift_tokens_right(labels, self.tokenizer.pad_token_id).tolist()
-        labels = labels.tolist()
+        batch = self.tokenizer(self.src_text, text_target=self.tgt_text, padding=True, return_tensors="pt")
+        batch["decoder_input_ids"] = shift_tokens_right(batch["labels"], self.tokenizer.pad_token_id)
 
         # fairseq batch: https://gist.github.com/sshleifer/cba08bc2109361a74ac3760a7e30e4f4
         assert batch.input_ids[1][0] == EN_CODE
         assert batch.input_ids[1][-1] == 2
-        assert labels[1][0] == RO_CODE
-        assert labels[1][-1] == 2
-        assert batch.decoder_input_ids[1][:2] == [2, RO_CODE]
+        assert batch.labels[1][0] == RO_CODE
+        assert batch.labels[1][-1] == 2
+        assert batch.decoder_input_ids[1][:2].tolist() == [2, RO_CODE]
 
     @require_torch
     def test_tokenizer_prepare_batch(self):
         batch = self.tokenizer(
-            self.src_text, padding=True, truncation=True, max_length=len(self.expected_src_tokens), return_tensors="pt"
+            self.src_text,
+            text_target=self.tgt_text,
+            padding=True,
+            truncation=True,
+            max_length=len(self.expected_src_tokens),
+            return_tensors="pt",
         )
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(
-                self.tgt_text,
-                padding=True,
-                truncation=True,
-                max_length=len(self.expected_src_tokens),
-                return_tensors="pt",
-            )
-        labels = targets["input_ids"]
-        batch["decoder_input_ids"] = shift_tokens_right(labels, self.tokenizer.pad_token_id)
+        batch["decoder_input_ids"] = shift_tokens_right(batch["labels"], self.tokenizer.pad_token_id)
 
         self.assertIsInstance(batch, BatchEncoding)
 
@@ -299,8 +291,9 @@ def test_tokenizer_prepare_batch(self):
 
     def test_seq2seq_max_target_length(self):
         batch = self.tokenizer(self.src_text, padding=True, truncation=True, max_length=3, return_tensors="pt")
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(self.tgt_text, padding=True, truncation=True, max_length=10, return_tensors="pt")
+        targets = self.tokenizer(
+            text_target=self.tgt_text, padding=True, truncation=True, max_length=10, return_tensors="pt"
+        )
         labels = targets["input_ids"]
         batch["decoder_input_ids"] = shift_tokens_right(labels, self.tokenizer.pad_token_id)
 
diff --git a/tests/models/mvp/test_tokenization_mvp.py b/tests/models/mvp/test_tokenization_mvp.py
index ad3fad67c91d..71e83fba0e16 100644
--- a/tests/models/mvp/test_tokenization_mvp.py
+++ b/tests/models/mvp/test_tokenization_mvp.py
@@ -112,14 +112,13 @@ def test_prepare_batch_empty_target_text(self):
             self.assertNotIn("decoder_attention_mask", batch)
 
     @require_torch
-    def test_as_target_tokenizer_target_length(self):
+    def test_tokenizer_as_target_length(self):
         tgt_text = [
             "Summary of the text.",
             "Another summary.",
         ]
         for tokenizer in [self.default_tokenizer, self.default_tokenizer_fast]:
-            with tokenizer.as_target_tokenizer():
-                targets = tokenizer(tgt_text, max_length=32, padding="max_length", return_tensors="pt")
+            targets = tokenizer(text_target=tgt_text, max_length=32, padding="max_length", return_tensors="pt")
             self.assertEqual(32, targets["input_ids"].shape[1])
 
     @require_torch
@@ -139,11 +138,9 @@ def test_special_tokens(self):
             "Summary of the text.",
         ]
         for tokenizer in [self.default_tokenizer, self.default_tokenizer_fast]:
-            inputs = tokenizer(src_text, return_tensors="pt")
-            with tokenizer.as_target_tokenizer():
-                targets = tokenizer(tgt_text, return_tensors="pt")
+            inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
             input_ids = inputs["input_ids"]
-            labels = targets["input_ids"]
+            labels = inputs["labels"]
             self.assertTrue((input_ids[:, 0] == tokenizer.bos_token_id).all().item())
             self.assertTrue((labels[:, 0] == tokenizer.bos_token_id).all().item())
             self.assertTrue((input_ids[:, -1] == tokenizer.eos_token_id).all().item())
diff --git a/tests/models/nllb/test_tokenization_nllb.py b/tests/models/nllb/test_tokenization_nllb.py
index 10575084a727..88feb0ae533d 100644
--- a/tests/models/nllb/test_tokenization_nllb.py
+++ b/tests/models/nllb/test_tokenization_nllb.py
@@ -373,19 +373,15 @@ def test_special_tokens_unaffacted_by_save_load(self):
     @require_torch
     def test_enro_tokenizer_prepare_batch(self):
         batch = self.tokenizer(
-            self.src_text, padding=True, truncation=True, max_length=len(self.expected_src_tokens), return_tensors="pt"
+            self.src_text,
+            target_text=self.tgt_text,
+            padding=True,
+            truncation=True,
+            max_length=len(self.expected_src_tokens),
+            return_tensors="pt",
         )
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(
-                self.tgt_text,
-                padding=True,
-                truncation=True,
-                max_length=len(self.expected_src_tokens),
-                return_tensors="pt",
-            )
-        labels = targets["input_ids"]
         batch["decoder_input_ids"] = shift_tokens_right(
-            labels, self.tokenizer.pad_token_id, self.tokenizer.lang_code_to_id["ron_Latn"]
+            batch["labels"], self.tokenizer.pad_token_id, self.tokenizer.lang_code_to_id["ron_Latn"]
         )
 
         self.assertIsInstance(batch, BatchEncoding)
@@ -401,8 +397,9 @@ def test_enro_tokenizer_prepare_batch(self):
 
     def test_seq2seq_max_length(self):
         batch = self.tokenizer(self.src_text, padding=True, truncation=True, max_length=3, return_tensors="pt")
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(self.tgt_text, padding=True, truncation=True, max_length=10, return_tensors="pt")
+        targets = self.tokenizer(
+            text_target=self.tgt_text, padding=True, truncation=True, max_length=10, return_tensors="pt"
+        )
         labels = targets["input_ids"]
         batch["decoder_input_ids"] = shift_tokens_right(
             labels,
diff --git a/tests/models/pegasus/test_tokenization_pegasus.py b/tests/models/pegasus/test_tokenization_pegasus.py
index d473725f9ae9..de2886a5e120 100644
--- a/tests/models/pegasus/test_tokenization_pegasus.py
+++ b/tests/models/pegasus/test_tokenization_pegasus.py
@@ -109,10 +109,9 @@ def test_large_seq2seq_truncation(self):
         src_texts = ["This is going to be way too long." * 150, "short example"]
         tgt_texts = ["not super long but more than 5 tokens", "tiny"]
         batch = self._large_tokenizer(src_texts, padding=True, truncation=True, return_tensors="pt")
-        with self._large_tokenizer.as_target_tokenizer():
-            targets = self._large_tokenizer(
-                tgt_texts, max_length=5, padding=True, truncation=True, return_tensors="pt"
-            )
+        targets = self._large_tokenizer(
+            text_target=tgt_texts, max_length=5, padding=True, truncation=True, return_tensors="pt"
+        )
 
         assert batch.input_ids.shape == (2, 1024)
         assert batch.attention_mask.shape == (2, 1024)
@@ -174,10 +173,9 @@ def test_large_seq2seq_truncation(self):
         src_texts = ["This is going to be way too long." * 1000, "short example"]
         tgt_texts = ["not super long but more than 5 tokens", "tiny"]
         batch = self._large_tokenizer(src_texts, padding=True, truncation=True, return_tensors="pt")
-        with self._large_tokenizer.as_target_tokenizer():
-            targets = self._large_tokenizer(
-                tgt_texts, max_length=5, padding=True, truncation=True, return_tensors="pt"
-            )
+        targets = self._large_tokenizer(
+            text_target=tgt_texts, max_length=5, padding=True, truncation=True, return_tensors="pt"
+        )
 
         assert batch.input_ids.shape == (2, 4096)
         assert batch.attention_mask.shape == (2, 4096)
diff --git a/tests/models/perceiver/test_tokenization_perceiver.py b/tests/models/perceiver/test_tokenization_perceiver.py
index ca61e9c856f1..3c7a67bcd2b9 100644
--- a/tests/models/perceiver/test_tokenization_perceiver.py
+++ b/tests/models/perceiver/test_tokenization_perceiver.py
@@ -146,10 +146,9 @@ def test_max_length_integration(self):
             "Summary of the text.",
             "Another summary.",
         ]
-        with tokenizer.as_target_tokenizer():
-            targets = tokenizer(
-                tgt_text, max_length=32, padding="max_length", truncation=True, return_tensors=FRAMEWORK
-            )
+        targets = tokenizer(
+            text_target=tgt_text, max_length=32, padding="max_length", truncation=True, return_tensors=FRAMEWORK
+        )
         self.assertEqual(32, targets["input_ids"].shape[1])
 
     # cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
diff --git a/tests/models/plbart/test_tokenization_plbart.py b/tests/models/plbart/test_tokenization_plbart.py
index 9aed6040f3fd..2ce7cafbda6e 100644
--- a/tests/models/plbart/test_tokenization_plbart.py
+++ b/tests/models/plbart/test_tokenization_plbart.py
@@ -299,33 +299,26 @@ def test_special_tokens_unaffacted_by_save_load(self):
 
     @require_torch
     def test_batch_fairseq_parity(self):
-        batch = self.tokenizer(self.src_text, padding=True)
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(self.tgt_text, padding=True, return_tensors="pt")
-        labels = targets["input_ids"]
-        batch["decoder_input_ids"] = shift_tokens_right(labels, self.tokenizer.pad_token_id).tolist()
+        batch = self.tokenizer(self.src_text, text_target=self.tgt_text, padding=True, return_tensors="pt")
+        batch["decoder_input_ids"] = shift_tokens_right(batch["labels"], self.tokenizer.pad_token_id)
 
         # fairseq batch: https://gist.github.com/sshleifer/cba08bc2109361a74ac3760a7e30e4f4
-        self.assertEqual(batch.input_ids[1][-2:], [2, PYTHON_CODE])
+        self.assertEqual(batch.input_ids[1][-2:].tolist(), [2, PYTHON_CODE])
         self.assertEqual(batch.decoder_input_ids[1][0], EN_CODE)
         self.assertEqual(batch.decoder_input_ids[1][-1], 2)
-        self.assertEqual(labels[1][-2:].tolist(), [2, EN_CODE])
+        self.assertEqual(batch.labels[1][-2:].tolist(), [2, EN_CODE])
 
     @require_torch
     def test_python_en_tokenizer_prepare_batch(self):
         batch = self.tokenizer(
-            self.src_text, padding=True, truncation=True, max_length=len(self.expected_src_tokens), return_tensors="pt"
+            self.src_text,
+            text_target=self.tgt_text,
+            padding=True,
+            truncation=True,
+            max_length=len(self.expected_src_tokens),
+            return_tensors="pt",
         )
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(
-                self.tgt_text,
-                padding=True,
-                truncation=True,
-                max_length=len(self.expected_src_tokens),
-                return_tensors="pt",
-            )
-        labels = targets["input_ids"]
-        batch["decoder_input_ids"] = shift_tokens_right(labels, self.tokenizer.pad_token_id)
+        batch["decoder_input_ids"] = shift_tokens_right(batch["labels"], self.tokenizer.pad_token_id)
 
         self.assertIsInstance(batch, BatchEncoding)
 
@@ -340,8 +333,9 @@ def test_python_en_tokenizer_prepare_batch(self):
 
     def test_seq2seq_max_length(self):
         batch = self.tokenizer(self.src_text, padding=True, truncation=True, max_length=3, return_tensors="pt")
-        with self.tokenizer.as_target_tokenizer():
-            targets = self.tokenizer(self.tgt_text, padding=True, truncation=True, max_length=10, return_tensors="pt")
+        targets = self.tokenizer(
+            text_target=self.tgt_text, padding=True, truncation=True, max_length=10, return_tensors="pt"
+        )
         labels = targets["input_ids"]
         batch["decoder_input_ids"] = shift_tokens_right(labels, self.tokenizer.pad_token_id)
 
diff --git a/tests/models/t5/test_tokenization_t5.py b/tests/models/t5/test_tokenization_t5.py
index 1c0fde222cdb..28d85c77c97c 100644
--- a/tests/models/t5/test_tokenization_t5.py
+++ b/tests/models/t5/test_tokenization_t5.py
@@ -210,10 +210,9 @@ def test_max_length(self):
             "Summary of the text.",
             "Another summary.",
         ]
-        with tokenizer.as_target_tokenizer():
-            targets = tokenizer(
-                tgt_text, max_length=32, padding="max_length", truncation=True, return_tensors=FRAMEWORK
-            )
+        targets = tokenizer(
+            text_target=tgt_text, max_length=32, padding="max_length", truncation=True, return_tensors=FRAMEWORK
+        )
         self.assertEqual(32, targets["input_ids"].shape[1])
 
     def test_outputs_not_longer_than_maxlen(self):
@@ -235,12 +234,10 @@ def test_eos_in_input(self):
         expected_src_tokens = [71, 307, 8986, 21, 4505, 1635, 1707, 5, 1]
         expected_tgt_tokens = [20698, 13, 8, 1499, 5, 1]
 
-        batch = tokenizer(src_text)
-        with tokenizer.as_target_tokenizer():
-            targets = tokenizer(tgt_text)
+        batch = tokenizer(src_text, text_target=tgt_text)
 
         self.assertEqual(expected_src_tokens, batch["input_ids"][0])
-        self.assertEqual(expected_tgt_tokens, targets["input_ids"][0])
+        self.assertEqual(expected_tgt_tokens, batch["labels"][0])
 
     def test_token_type_ids(self):
         src_text_1 = ["A first paragraph for summarization."]
diff --git a/tests/models/tapex/test_tokenization_tapex.py b/tests/models/tapex/test_tokenization_tapex.py
index c959b780215b..dec0f507ed3c 100644
--- a/tests/models/tapex/test_tokenization_tapex.py
+++ b/tests/models/tapex/test_tokenization_tapex.py
@@ -859,9 +859,8 @@ def test_tokenizer_as_target(self):
         tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-base")
         answer_text = "tapex is a good model!"
         expected_src_tokens = [0, 90, 5776, 1178, 16, 10, 205, 1421, 328, 2]
-        with tokenizer.as_target_tokenizer():
-            answer_encoding = tokenizer(answer=answer_text)
-            self.assertListEqual(answer_encoding.input_ids, expected_src_tokens)
+        answer_encoding = tokenizer(answer=answer_text)
+        self.assertListEqual(answer_encoding.input_ids, expected_src_tokens)
 
     @slow
     def test_tokenizer_lower_case(self):
@@ -870,23 +869,21 @@ def test_tokenizer_lower_case(self):
         answer_text = "Beijing, London, Paris"
         answer_text_lower = "beijing, london, paris"
 
-        with cased_tokenizer.as_target_tokenizer():
-            with uncased_tokenizer.as_target_tokenizer():
-                self.assertNotEqual(
-                    cased_tokenizer(answer=answer_text).input_ids, uncased_tokenizer(answer=answer_text).input_ids
-                )
-                self.assertEqual(
-                    cased_tokenizer(answer=answer_text_lower).input_ids,
-                    uncased_tokenizer(answer=answer_text).input_ids,
-                )
-                # batched encoding assert
-                self.assertNotEqual(
-                    cased_tokenizer(answer=[answer_text]).input_ids, uncased_tokenizer(answer=[answer_text]).input_ids
-                )
-                self.assertEqual(
-                    cased_tokenizer(answer=[answer_text_lower]).input_ids,
-                    uncased_tokenizer(answer=[answer_text]).input_ids,
-                )
+        self.assertNotEqual(
+            cased_tokenizer(answer=answer_text).input_ids, uncased_tokenizer(answer=answer_text).input_ids
+        )
+        self.assertEqual(
+            cased_tokenizer(answer=answer_text_lower).input_ids,
+            uncased_tokenizer(answer=answer_text).input_ids,
+        )
+        # batched encoding assert
+        self.assertNotEqual(
+            cased_tokenizer(answer=[answer_text]).input_ids, uncased_tokenizer(answer=[answer_text]).input_ids
+        )
+        self.assertEqual(
+            cased_tokenizer(answer=[answer_text_lower]).input_ids,
+            uncased_tokenizer(answer=[answer_text]).input_ids,
+        )
         # test input encoding lowercase
         question = "Greece held its last Summer Olympics in 2004"
         table_dict = {

From b2a8e18fd7d9618427bf817b06fdfd68129a326c Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <Sylvain.gugger@gmail.com>
Date: Thu, 28 Jul 2022 13:05:19 -0400
Subject: [PATCH 07/10] Fix tests

---
 docs/source/en/model_doc/marian.mdx           |  1 +
 .../models/tapex/tokenization_tapex.py        | 25 +++----------------
 src/transformers/tokenization_utils_base.py   | 19 +++++++-------
 tests/models/dpr/test_tokenization_dpr.py     |  2 ++
 tests/models/nllb/test_tokenization_nllb.py   |  2 +-
 5 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/docs/source/en/model_doc/marian.mdx b/docs/source/en/model_doc/marian.mdx
index b415e9413c40..9d0a9ff2576a 100644
--- a/docs/source/en/model_doc/marian.mdx
+++ b/docs/source/en/model_doc/marian.mdx
@@ -155,6 +155,7 @@ Example of translating english to many romance languages, using old-style 2 char
 ## MarianTokenizer
 
 [[autodoc]] MarianTokenizer
+    - build_inputs_with_special_tokens
 
 ## MarianModel
 
diff --git a/src/transformers/models/tapex/tokenization_tapex.py b/src/transformers/models/tapex/tokenization_tapex.py
index d9afd160d38d..7c0725ffe7c1 100644
--- a/src/transformers/models/tapex/tokenization_tapex.py
+++ b/src/transformers/models/tapex/tokenization_tapex.py
@@ -62,12 +62,6 @@ class TapexTruncationStrategy(ExplicitEnum):
     DROP_ROWS_TO_FIT = "drop_rows_to_fit"
 
 
-class TokenizerStrategy(ExplicitEnum):
-
-    TOKENIZE_SOURCE = "tokenize_source"
-    TOKENIZE_TARGET = "tokenize_target"
-
-
 TAPEX_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
             add_special_tokens (`bool`, *optional*, defaults to `True`):
                 Whether or not to encode the sequences with the special tokens relative to their model.
@@ -340,9 +334,6 @@ def __init__(
         self.max_cell_length = max_cell_length
         self.table_linearize = IndexedRowTableLinearize()
 
-        # property to decide using which call function
-        self.current_tokenizer = TokenizerStrategy.TOKENIZE_SOURCE
-
     def build_inputs_with_special_tokens(
         self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
     ) -> List[int]:
@@ -554,9 +545,7 @@ def __call__(
                 Optionally, the corresponding answer to the questions as supervision.
         """
 
-        if self.current_tokenizer == TokenizerStrategy.TOKENIZE_SOURCE:
-            if table is None:
-                raise ValueError("Please ensure that the table is not empty if you use TAPEX to encode source.")
+        if table is not None:
             return self.source_call_func(
                 table=table,
                 query=query,
@@ -577,9 +566,7 @@ def __call__(
                 verbose=verbose,
                 **kwargs,
             )
-        else:
-            if answer is None:
-                raise ValueError("Please ensure that the answer is not empty if you use TAPEX to encode target.")
+        elif answer is not None:
             return self.target_call_func(
                 answer=answer,
                 add_special_tokens=add_special_tokens,
@@ -598,6 +585,8 @@ def __call__(
                 verbose=verbose,
                 **kwargs,
             )
+        else:
+            raise ValueError("You need to provide either a `table` or an `answer`.")
 
     def source_call_func(
         self,
@@ -1329,12 +1318,6 @@ def _target_encode_plus(
             verbose=verbose,
         )
 
-    def _switch_to_input_mode(self):
-        self.current_tokenizer = TokenizerStrategy.TOKENIZE_SOURCE
-
-    def _switch_to_target_mode(self):
-        self.current_tokenizer = TokenizerStrategy.TOKENIZE_TARGET
-
     def prepare_table_query(
         self,
         table,
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index a945b1abe58f..7e259fce9036 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -3694,15 +3694,16 @@ def prepare_seq2seq_batch(
         # Process tgt_texts
         if max_target_length is None:
             max_target_length = max_length
-        labels = self(
-            text_target=tgt_texts,
-            add_special_tokens=True,
-            return_tensors=return_tensors,
-            padding=padding,
-            max_length=max_target_length,
-            truncation=truncation,
-            **kwargs,
-        )
+        with self.as_target_tokenizer():
+            labels = self(
+                tgt_texts,
+                add_special_tokens=True,
+                return_tensors=return_tensors,
+                padding=padding,
+                max_length=max_target_length,
+                truncation=truncation,
+                **kwargs,
+            )
         model_inputs["labels"] = labels["input_ids"]
         return model_inputs
 
diff --git a/tests/models/dpr/test_tokenization_dpr.py b/tests/models/dpr/test_tokenization_dpr.py
index 2870e0bcf352..1a085d86234d 100644
--- a/tests/models/dpr/test_tokenization_dpr.py
+++ b/tests/models/dpr/test_tokenization_dpr.py
@@ -14,6 +14,8 @@
 # limitations under the License.
 
 
+import unittest
+
 from transformers import (
     DPRContextEncoderTokenizer,
     DPRContextEncoderTokenizerFast,
diff --git a/tests/models/nllb/test_tokenization_nllb.py b/tests/models/nllb/test_tokenization_nllb.py
index 88feb0ae533d..d77b101fa766 100644
--- a/tests/models/nllb/test_tokenization_nllb.py
+++ b/tests/models/nllb/test_tokenization_nllb.py
@@ -374,7 +374,7 @@ def test_special_tokens_unaffacted_by_save_load(self):
     def test_enro_tokenizer_prepare_batch(self):
         batch = self.tokenizer(
             self.src_text,
-            target_text=self.tgt_text,
+            text_target=self.tgt_text,
             padding=True,
             truncation=True,
             max_length=len(self.expected_src_tokens),

From 1cabae4f30bf53c14e48e2e4a165ed3e60de3977 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <Sylvain.gugger@gmail.com>
Date: Thu, 28 Jul 2022 13:10:25 -0400
Subject: [PATCH 08/10] Fix quality

---
 tests/models/dpr/test_tokenization_dpr.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/models/dpr/test_tokenization_dpr.py b/tests/models/dpr/test_tokenization_dpr.py
index 1a085d86234d..8ad2fea09c8b 100644
--- a/tests/models/dpr/test_tokenization_dpr.py
+++ b/tests/models/dpr/test_tokenization_dpr.py
@@ -13,9 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import unittest
-
 from transformers import (
     DPRContextEncoderTokenizer,
     DPRContextEncoderTokenizerFast,

From 13251259ca61a86cee7a86906311e11896c77310 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Fri, 29 Jul 2022 07:30:13 -0400
Subject: [PATCH 09/10] Update
 examples/flax/image-captioning/run_image_captioning_flax.py

Co-authored-by: amyeroberts <amy@huggingface.co>
---
 examples/flax/image-captioning/run_image_captioning_flax.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py
index e3ea20e85770..53556fdc806b 100644
--- a/examples/flax/image-captioning/run_image_captioning_flax.py
+++ b/examples/flax/image-captioning/run_image_captioning_flax.py
@@ -553,7 +553,7 @@ def tokenization_fn(examples, max_target_length):
         model_inputs = {}
 
         labels = tokenizer(
-            targets, max_length=max_target_length, padding="max_length", truncation=True, return_tensors="np"
+            text_target=targets, max_length=max_target_length, padding="max_length", truncation=True, return_tensors="np"
         )
         model_inputs["labels"] = labels["input_ids"]
         decoder_input_ids = shift_tokens_right_fn(

From d9394918d0dd2302828b3e6908c3f55faf0077f3 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <Sylvain.gugger@gmail.com>
Date: Fri, 29 Jul 2022 07:39:13 -0400
Subject: [PATCH 10/10] Style

---
 examples/flax/image-captioning/run_image_captioning_flax.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py
index a0e8d19b0c2e..4552defb8efc 100644
--- a/examples/flax/image-captioning/run_image_captioning_flax.py
+++ b/examples/flax/image-captioning/run_image_captioning_flax.py
@@ -554,7 +554,11 @@ def tokenization_fn(examples, max_target_length):
         model_inputs = {}
 
         labels = tokenizer(
-            text_target=targets, max_length=max_target_length, padding="max_length", truncation=True, return_tensors="np"
+            text_target=targets,
+            max_length=max_target_length,
+            padding="max_length",
+            truncation=True,
+            return_tensors="np",
         )
         model_inputs["labels"] = labels["input_ids"]
         decoder_input_ids = shift_tokens_right_fn(