From a6697baaa2532ab21c694c4cbef6977ebb2fcb7c Mon Sep 17 00:00:00 2001
From: LysandreJik <lysandre.debut@reseau.eseo.fr>
Date: Tue, 17 Nov 2020 10:31:16 -0500
Subject: [PATCH 1/5] Tokenizers should be framework agnostic

---
 docs/source/model_doc/marian.rst              |  4 ++--
 docs/source/model_doc/mbart.rst               |  4 ++--
 docs/source/model_doc/pegasus.rst             |  2 +-
 .../tuner007/pegasus_paraphrase/README.md     |  2 +-
 model_cards/tuner007/pegasus_qa/README.md     |  2 +-
 scripts/fsmt/fsmt-make-super-tiny-model.py    |  2 +-
 scripts/fsmt/fsmt-make-tiny-model.py          |  2 +-
 .../models/bart/tokenization_bart.py          |  4 ++--
 .../models/bart/tokenization_bart_fast.py     |  4 ++--
 .../models/fsmt/tokenization_fsmt.py          |  2 +-
 .../models/marian/modeling_marian.py          |  2 +-
 .../models/marian/tokenization_marian.py      |  4 ++--
 .../models/mbart/modeling_mbart.py            |  2 +-
 .../models/mbart/tokenization_mbart.py        |  4 ++--
 .../models/mbart/tokenization_mbart_fast.py   |  4 ++--
 .../models/pegasus/modeling_pegasus.py        |  2 +-
 .../models/pegasus/tokenization_pegasus.py    |  2 +-
 .../pegasus/tokenization_pegasus_fast.py      |  2 +-
 .../models/rag/tokenization_rag.py            |  2 +-
 src/transformers/tokenization_utils.py        |  2 +-
 src/transformers/tokenization_utils_base.py   |  2 +-
 tests/test_modeling_marian.py                 | 20 ++++++++++++-------
 tests/test_modeling_mbart.py                  | 12 +++++++----
 tests/test_tokenization_common.py             |  2 +-
 tests/test_tokenization_mbart.py              | 11 +++++-----
 tests/test_tokenization_pegasus.py            |  4 +++-
 26 files changed, 58 insertions(+), 47 deletions(-)

diff --git a/docs/source/model_doc/marian.rst b/docs/source/model_doc/marian.rst
index 58d9f4c0a41ada..da54488a75cd80 100644
--- a/docs/source/model_doc/marian.rst
+++ b/docs/source/model_doc/marian.rst
@@ -78,7 +78,7 @@ require 3 character language codes:
     tokenizer = MarianTokenizer.from_pretrained(model_name)
     print(tokenizer.supported_language_codes)
     model = MarianMTModel.from_pretrained(model_name)
-    translated = model.generate(**tokenizer.prepare_seq2seq_batch(src_text))
+    translated = model.generate(**tokenizer.prepare_seq2seq_batch(src_text, return_tensors="pt"))
     tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
     # ["c'est une phrase en anglais que nous voulons traduire en français",
     # 'Isto deve ir para o português.',
@@ -150,7 +150,7 @@ Example of translating english to many romance languages, using old-style 2 char
     print(tokenizer.supported_language_codes)
 
     model = MarianMTModel.from_pretrained(model_name)
-    translated = model.generate(**tokenizer.prepare_seq2seq_batch(src_text))
+    translated = model.generate(**tokenizer.prepare_seq2seq_batch(src_text, return_tensors="pt"))
     tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
     # ["c'est une phrase en anglais que nous voulons traduire en français", 'Isto deve ir para o português.',  'Y esto al español']
 
diff --git a/docs/source/model_doc/mbart.rst b/docs/source/model_doc/mbart.rst
index 9d3aab93fd3590..26b00cbd411c04 100644
--- a/docs/source/model_doc/mbart.rst
+++ b/docs/source/model_doc/mbart.rst
@@ -44,7 +44,7 @@ the sequences for sequence-to-sequence fine-tuning.
 
     example_english_phrase = "UN Chief Says There Is No Military Solution in Syria"
     expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
-    batch = tokenizer.prepare_seq2seq_batch(example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian)
+    batch = tokenizer.prepare_seq2seq_batch(example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian, return_tensors="pt")
     model(input_ids=batch['input_ids'], labels=batch['labels']) # forward pass
 
 - Generation
@@ -58,7 +58,7 @@ the sequences for sequence-to-sequence fine-tuning.
     model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
     tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro")
     article = "UN Chief Says There Is No Military Solution in Syria"
-    batch = tokenizer.prepare_seq2seq_batch(src_texts=[article], src_lang="en_XX")
+    batch = tokenizer.prepare_seq2seq_batch(src_texts=[article], src_lang="en_XX", return_tensors="pt")
     translated_tokens = model.generate(**batch, decoder_start_token_id=tokenizer.lang_code_to_id["ro_RO"])
     translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
     assert translation == "Şeful ONU declară că nu există o soluţie militară în Siria"
diff --git a/docs/source/model_doc/pegasus.rst b/docs/source/model_doc/pegasus.rst
index a652ce0851ef4f..290266051e2731 100644
--- a/docs/source/model_doc/pegasus.rst
+++ b/docs/source/model_doc/pegasus.rst
@@ -78,7 +78,7 @@ Usage Example
     torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
     tokenizer = PegasusTokenizer.from_pretrained(model_name)
     model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
-    batch = tokenizer.prepare_seq2seq_batch(src_text, truncation=True, padding='longest').to(torch_device)
+    batch = tokenizer.prepare_seq2seq_batch(src_text, truncation=True, padding='longest', return_tensors="pt").to(torch_device)
     translated = model.generate(**batch)
     tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
     assert tgt_text[0] == "California's largest electricity provider has turned off power to hundreds of thousands of customers."
diff --git a/model_cards/tuner007/pegasus_paraphrase/README.md b/model_cards/tuner007/pegasus_paraphrase/README.md
index f83d6d29e1d29b..311d8de61c3326 100644
--- a/model_cards/tuner007/pegasus_paraphrase/README.md
+++ b/model_cards/tuner007/pegasus_paraphrase/README.md
@@ -11,7 +11,7 @@ tokenizer = PegasusTokenizer.from_pretrained(model_name)
 model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
 
 def get_response(input_text,num_return_sequences):
-  batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60).to(torch_device)
+  batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
   translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
   tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
   return tgt_text
diff --git a/model_cards/tuner007/pegasus_qa/README.md b/model_cards/tuner007/pegasus_qa/README.md
index 3d3831bfc97c69..bc9397225d7b83 100644
--- a/model_cards/tuner007/pegasus_qa/README.md
+++ b/model_cards/tuner007/pegasus_qa/README.md
@@ -12,7 +12,7 @@ model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_dev
 
 def get_answer(question, context):
   input_text = "question: %s text: %s" % (question,context)
-  batch = tokenizer.prepare_seq2seq_batch([input_text], truncation=True, padding='longest').to(torch_device)
+  batch = tokenizer.prepare_seq2seq_batch([input_text], truncation=True, padding='longest', return_tensors="pt").to(torch_device)
   translated = model.generate(**batch)
   tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
   return tgt_text[0]
diff --git a/scripts/fsmt/fsmt-make-super-tiny-model.py b/scripts/fsmt/fsmt-make-super-tiny-model.py
index f1742a4dc4ffaf..b5ec17c65f4834 100755
--- a/scripts/fsmt/fsmt-make-super-tiny-model.py
+++ b/scripts/fsmt/fsmt-make-super-tiny-model.py
@@ -58,7 +58,7 @@
 print(f"num of params {tiny_model.num_parameters()}")
 
 # Test
-batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
+batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"], return_tensors="pt")
 outputs = tiny_model(**batch)
 
 print("test output:", len(outputs.logits[0]))
diff --git a/scripts/fsmt/fsmt-make-tiny-model.py b/scripts/fsmt/fsmt-make-tiny-model.py
index 924eb0b63c93d6..ba4786fba3b639 100755
--- a/scripts/fsmt/fsmt-make-tiny-model.py
+++ b/scripts/fsmt/fsmt-make-tiny-model.py
@@ -29,7 +29,7 @@
 print(f"num of params {tiny_model.num_parameters()}")
 
 # Test
-batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
+batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"], return_tensors="pt")
 outputs = tiny_model(**batch)
 
 print("test output:", len(outputs.logits[0]))
diff --git a/src/transformers/models/bart/tokenization_bart.py b/src/transformers/models/bart/tokenization_bart.py
index 24046b39676a33..071492238ae5c0 100644
--- a/src/transformers/models/bart/tokenization_bart.py
+++ b/src/transformers/models/bart/tokenization_bart.py
@@ -61,7 +61,7 @@ def prepare_seq2seq_batch(
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
         padding: str = "longest",
-        return_tensors: str = "None",
+        return_tensors: str = None,
         truncation=True,
         **kwargs,
     ) -> BatchEncoding:
@@ -91,7 +91,7 @@ def prepare_seq2seq_batch(
                   maximum acceptable input length for the model if that argument is not provided.
                 * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
                   different lengths).
-            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`, defaults to "pt"):
+            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`):
                 If set, will return tensors instead of list of python integers. Acceptable values are:
 
                 * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.
diff --git a/src/transformers/models/bart/tokenization_bart_fast.py b/src/transformers/models/bart/tokenization_bart_fast.py
index 69c16c2be4cfe8..43f226f3103217 100644
--- a/src/transformers/models/bart/tokenization_bart_fast.py
+++ b/src/transformers/models/bart/tokenization_bart_fast.py
@@ -56,7 +56,7 @@ def prepare_seq2seq_batch(
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
         padding: str = "longest",
-        return_tensors: str = "None",
+        return_tensors: str = None,
         truncation=True,
         **kwargs,
     ) -> BatchEncoding:
@@ -86,7 +86,7 @@ def prepare_seq2seq_batch(
                   maximum acceptable input length for the model if that argument is not provided.
                 * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
                   different lengths).
-            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`, defaults to "pt"):
+            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`):
                 If set, will return tensors instead of list of python integers. Acceptable values are:
 
                 * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.
diff --git a/src/transformers/models/fsmt/tokenization_fsmt.py b/src/transformers/models/fsmt/tokenization_fsmt.py
index 083906bf3031e6..0c11a7a64db9a8 100644
--- a/src/transformers/models/fsmt/tokenization_fsmt.py
+++ b/src/transformers/models/fsmt/tokenization_fsmt.py
@@ -491,7 +491,7 @@ def prepare_seq2seq_batch(
         tgt_texts: Optional[List[str]] = None,
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         truncation=True,
         padding="longest",
         **unused,
diff --git a/src/transformers/models/marian/modeling_marian.py b/src/transformers/models/marian/modeling_marian.py
index b02e4599fdc6bc..637529c1168414 100644
--- a/src/transformers/models/marian/modeling_marian.py
+++ b/src/transformers/models/marian/modeling_marian.py
@@ -41,7 +41,7 @@ class MarianMTModel(BartForConditionalGeneration):
 
         >>> model = MarianMTModel.from_pretrained(mname)
         >>> tok = MarianTokenizer.from_pretrained(mname)
-        >>> batch = tok.prepare_seq2seq_batch(src_texts=[sample_text])  # don't need tgt_text for inference
+        >>> batch = tok.prepare_seq2seq_batch(src_texts=[sample_text], return_tensors="pt")  # don't need tgt_text for inference
         >>> gen = model.generate(**batch)  # for forward pass: model(**batch)
         >>> words: List[str] = tok.batch_decode(gen, skip_special_tokens=True)  # returns "Where is the bus stop ?"
 
diff --git a/src/transformers/models/marian/tokenization_marian.py b/src/transformers/models/marian/tokenization_marian.py
index 8af0c40f4f769b..cb81ddfe309aae 100644
--- a/src/transformers/models/marian/tokenization_marian.py
+++ b/src/transformers/models/marian/tokenization_marian.py
@@ -70,7 +70,7 @@ class MarianTokenizer(PreTrainedTokenizer):
         >>> tok = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-de')
         >>> src_texts = [ "I am a small frog.", "Tom asked his teacher for advice."]
         >>> tgt_texts = ["Ich bin ein kleiner Frosch.", "Tom bat seinen Lehrer um Rat."]  # optional
-        >>> batch_enc: BatchEncoding = tok.prepare_seq2seq_batch(src_texts, tgt_texts=tgt_texts)
+        >>> batch_enc: BatchEncoding = tok.prepare_seq2seq_batch(src_texts, tgt_texts=tgt_texts, return_tensors="pt")
         >>> # keys  [input_ids, attention_mask, labels].
         >>> # model(**batch) should work
     """
@@ -175,7 +175,7 @@ def prepare_seq2seq_batch(
         tgt_texts: Optional[List[str]] = None,
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         truncation=True,
         padding="longest",
         **unused,
diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py
index 7f92929125321a..2978a250dcb9f3 100644
--- a/src/transformers/models/mbart/modeling_mbart.py
+++ b/src/transformers/models/mbart/modeling_mbart.py
@@ -22,7 +22,7 @@ class MBartForConditionalGeneration(BartForConditionalGeneration):
         >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
         >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro")
         >>> article = "UN Chief Says There Is No Military Solution in Syria"
-        >>> batch = tokenizer.prepare_seq2seq_batch(src_texts=[article])
+        >>> batch = tokenizer.prepare_seq2seq_batch(src_texts=[article], return_tensors="pt")
         >>> translated_tokens = model.generate(**batch)
         >>> translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
         >>> assert translation == "Şeful ONU declară că nu există o soluţie militară în Siria"
diff --git a/src/transformers/models/mbart/tokenization_mbart.py b/src/transformers/models/mbart/tokenization_mbart.py
index bb5f604d6b4c5a..98448fe66168ad 100644
--- a/src/transformers/models/mbart/tokenization_mbart.py
+++ b/src/transformers/models/mbart/tokenization_mbart.py
@@ -81,7 +81,7 @@ class MBartTokenizer(XLMRobertaTokenizer):
         >>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
         >>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
         >>> batch: dict = tokenizer.prepare_seq2seq_batch(
-        ...     example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian
+        ...     example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian, return_tensors="pt"
         ... )
 
     """
@@ -183,7 +183,7 @@ def prepare_seq2seq_batch(
         max_target_length: Optional[int] = None,
         truncation: bool = True,
         padding: str = "longest",
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         add_prefix_space: bool = False,  # ignored
         **kwargs,
     ) -> BatchEncoding:
diff --git a/src/transformers/models/mbart/tokenization_mbart_fast.py b/src/transformers/models/mbart/tokenization_mbart_fast.py
index 27243c55afa5f9..14b6e4919b7962 100644
--- a/src/transformers/models/mbart/tokenization_mbart_fast.py
+++ b/src/transformers/models/mbart/tokenization_mbart_fast.py
@@ -89,7 +89,7 @@ class MBartTokenizerFast(XLMRobertaTokenizerFast):
         >>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
         >>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
         >>> batch: dict = tokenizer.prepare_seq2seq_batch(
-        ...     example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian
+        ...     example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian, return_tensors="pt"
         ... )
     """
 
@@ -181,7 +181,7 @@ def prepare_seq2seq_batch(
         max_target_length: Optional[int] = None,
         truncation: bool = True,
         padding: str = "longest",
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         **kwargs,
     ) -> BatchEncoding:
         if max_length is None:
diff --git a/src/transformers/models/pegasus/modeling_pegasus.py b/src/transformers/models/pegasus/modeling_pegasus.py
index 3d721d5ae883b0..64515c7a8ba733 100644
--- a/src/transformers/models/pegasus/modeling_pegasus.py
+++ b/src/transformers/models/pegasus/modeling_pegasus.py
@@ -38,7 +38,7 @@ class PegasusForConditionalGeneration(BartForConditionalGeneration):
 
         >>> model = PegasusForConditionalGeneration.from_pretrained(mname)
         >>> tok = PegasusTokenizer.from_pretrained(mname)
-        >>> batch = tok.prepare_seq2seq_batch(src_texts=[PGE_ARTICLE])  # don't need tgt_text for inference
+        >>> batch = tok.prepare_seq2seq_batch(src_texts=[PGE_ARTICLE], return_tensors="pt")  # don't need tgt_text for inference
         >>> gen = model.generate(**batch)  # for forward pass: model(**batch)
         >>> summary: List[str] = tok.batch_decode(gen, skip_special_tokens=True)
         >>> assert summary == "California's largest electricity provider has turned off power to tens of thousands of customers."
diff --git a/src/transformers/models/pegasus/tokenization_pegasus.py b/src/transformers/models/pegasus/tokenization_pegasus.py
index 170eb37e5f9ff7..5728338276d26c 100644
--- a/src/transformers/models/pegasus/tokenization_pegasus.py
+++ b/src/transformers/models/pegasus/tokenization_pegasus.py
@@ -134,7 +134,7 @@ def prepare_seq2seq_batch(
         tgt_texts: Optional[List[str]] = None,
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         truncation=True,
         padding="longest",
         **unused,
diff --git a/src/transformers/models/pegasus/tokenization_pegasus_fast.py b/src/transformers/models/pegasus/tokenization_pegasus_fast.py
index 30fb45e0be96ef..e221eb4b54b018 100644
--- a/src/transformers/models/pegasus/tokenization_pegasus_fast.py
+++ b/src/transformers/models/pegasus/tokenization_pegasus_fast.py
@@ -95,7 +95,7 @@ def prepare_seq2seq_batch(
         tgt_texts: Optional[List[str]] = None,
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
-        return_tensors: str = "pt",
+        return_tensors: str = None,
         truncation=True,
         padding="longest",
         **unused,
diff --git a/src/transformers/models/rag/tokenization_rag.py b/src/transformers/models/rag/tokenization_rag.py
index c3deffc98654f1..766d04662d71cd 100644
--- a/src/transformers/models/rag/tokenization_rag.py
+++ b/src/transformers/models/rag/tokenization_rag.py
@@ -71,7 +71,7 @@ def prepare_seq2seq_batch(
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
         padding: str = "longest",
-        return_tensors: str = "np",
+        return_tensors: str = None,
         truncation=True,
         **kwargs,
     ) -> BatchEncoding:
diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py
index 0f6f8b473bffc0..c9f63eba3bbd7c 100644
--- a/src/transformers/tokenization_utils.py
+++ b/src/transformers/tokenization_utils.py
@@ -797,7 +797,7 @@ def prepare_seq2seq_batch(
                   maximum acceptable input length for the model if that argument is not provided.
                 * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
                   different lengths).
-            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`, defaults to "pt"):
+            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`):
                 If set, will return tensors instead of list of python integers. Acceptable values are:
 
                 * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 1f492b06e92053..25bd051f827ab6 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -1455,7 +1455,7 @@ def all_special_ids(self) -> List[int]:
                   maximum acceptable input length for the model if that argument is not provided.
                 * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
                   different lengths).
-            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`, defaults to "pt"):
+            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`):
                 If set, will return tensors instead of list of python integers. Acceptable values are:
 
                 * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.
diff --git a/tests/test_modeling_marian.py b/tests/test_modeling_marian.py
index cdbb92f5047cac..dc50daa9a78406 100644
--- a/tests/test_modeling_marian.py
+++ b/tests/test_modeling_marian.py
@@ -132,9 +132,9 @@ def _assert_generated_batch_equal_expected(self, **tokenizer_kwargs):
         self.assertListEqual(self.expected_text, generated_words)
 
     def translate_src_text(self, **tokenizer_kwargs):
-        model_inputs = self.tokenizer.prepare_seq2seq_batch(src_texts=self.src_text, **tokenizer_kwargs).to(
-            torch_device
-        )
+        model_inputs = self.tokenizer.prepare_seq2seq_batch(
+            src_texts=self.src_text, return_tensors="pt", **tokenizer_kwargs
+        ).to(torch_device)
         self.assertEqual(self.model.device, model_inputs.input_ids.device)
         generated_ids = self.model.generate(
             model_inputs.input_ids, attention_mask=model_inputs.attention_mask, num_beams=2, max_length=128
@@ -151,7 +151,9 @@ def test_forward(self):
         src, tgt = ["I am a small frog"], ["Ich bin ein kleiner Frosch."]
         expected_ids = [38, 121, 14, 697, 38848, 0]
 
-        model_inputs: dict = self.tokenizer.prepare_seq2seq_batch(src, tgt_texts=tgt).to(torch_device)
+        model_inputs: dict = self.tokenizer.prepare_seq2seq_batch(src, tgt_texts=tgt, return_tensors="pt").to(
+            torch_device
+        )
 
         self.assertListEqual(expected_ids, model_inputs.input_ids[0].tolist())
 
@@ -171,12 +173,16 @@ def test_forward(self):
 
     def test_unk_support(self):
         t = self.tokenizer
-        ids = t.prepare_seq2seq_batch(["||"]).to(torch_device).input_ids[0].tolist()
+        ids = t.prepare_seq2seq_batch(["||"], return_tensors="pt").to(torch_device).input_ids[0].tolist()
         expected = [t.unk_token_id, t.unk_token_id, t.eos_token_id]
         self.assertEqual(expected, ids)
 
     def test_pad_not_split(self):
-        input_ids_w_pad = self.tokenizer.prepare_seq2seq_batch(["I am a small frog <pad>"]).input_ids[0].tolist()
+        input_ids_w_pad = (
+            self.tokenizer.prepare_seq2seq_batch(["I am a small frog <pad>"], return_tensors="pt")
+            .input_ids[0]
+            .tolist()
+        )
         expected_w_pad = [38, 121, 14, 697, 38848, self.tokenizer.pad_token_id, 0]  # pad
         self.assertListEqual(expected_w_pad, input_ids_w_pad)
 
@@ -294,7 +300,7 @@ def test_tokenizer_handles_empty(self):
         normalized = self.tokenizer.normalize("")
         self.assertIsInstance(normalized, str)
         with self.assertRaises(ValueError):
-            self.tokenizer.prepare_seq2seq_batch([""])
+            self.tokenizer.prepare_seq2seq_batch([""], return_tensors="pt")
 
     @slow
     def test_pipeline(self):
diff --git a/tests/test_modeling_mbart.py b/tests/test_modeling_mbart.py
index 2b8da23338a532..8bb874613e9d64 100644
--- a/tests/test_modeling_mbart.py
+++ b/tests/test_modeling_mbart.py
@@ -92,7 +92,7 @@ class MBartEnroIntegrationTest(AbstractSeq2SeqIntegrationTest):
     @slow
     def test_enro_generate_one(self):
         batch: BatchEncoding = self.tokenizer.prepare_seq2seq_batch(
-            ["UN Chief Says There Is No Military Solution in Syria"]
+            ["UN Chief Says There Is No Military Solution in Syria"], return_tensors="pt"
         ).to(torch_device)
         translated_tokens = self.model.generate(**batch)
         decoded = self.tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)
@@ -101,7 +101,9 @@ def test_enro_generate_one(self):
 
     @slow
     def test_enro_generate_batch(self):
-        batch: BatchEncoding = self.tokenizer.prepare_seq2seq_batch(self.src_text).to(torch_device)
+        batch: BatchEncoding = self.tokenizer.prepare_seq2seq_batch(self.src_text, return_tensors="pt").to(
+            torch_device
+        )
         translated_tokens = self.model.generate(**batch)
         decoded = self.tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)
         assert self.tgt_text == decoded
@@ -153,7 +155,7 @@ class MBartCC25IntegrationTest(AbstractSeq2SeqIntegrationTest):
 
     @unittest.skip("This test is broken, still generates english")
     def test_cc25_generate(self):
-        inputs = self.tokenizer.prepare_seq2seq_batch([self.src_text[0]]).to(torch_device)
+        inputs = self.tokenizer.prepare_seq2seq_batch([self.src_text[0]], return_tensors="pt").to(torch_device)
         translated_tokens = self.model.generate(
             input_ids=inputs["input_ids"].to(torch_device),
             decoder_start_token_id=self.tokenizer.lang_code_to_id["ro_RO"],
@@ -163,7 +165,9 @@ def test_cc25_generate(self):
 
     @slow
     def test_fill_mask(self):
-        inputs = self.tokenizer.prepare_seq2seq_batch(["One of the best <mask> I ever read!"]).to(torch_device)
+        inputs = self.tokenizer.prepare_seq2seq_batch(["One of the best <mask> I ever read!"], return_tensors="pt").to(
+            torch_device
+        )
         outputs = self.model.generate(
             inputs["input_ids"], decoder_start_token_id=self.tokenizer.lang_code_to_id["en_XX"], num_beams=1
         )
diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
index 376616a0b5deaa..1bfd54c3fed885 100644
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -1794,7 +1794,7 @@ def test_prepare_seq2seq_batch(self):
         self.assertEqual(batch.input_ids.shape[1], 3)
         self.assertEqual(batch.labels.shape[1], 10)
         # max_target_length will default to max_length if not specified
-        batch = tokenizer.prepare_seq2seq_batch(src_text, tgt_texts=tgt_text, max_length=3)
+        batch = tokenizer.prepare_seq2seq_batch(src_text, tgt_texts=tgt_text, max_length=3, return_tensors="pt")
         self.assertEqual(batch.input_ids.shape[1], 3)
         self.assertEqual(batch.labels.shape[1], 3)
 
diff --git a/tests/test_tokenization_mbart.py b/tests/test_tokenization_mbart.py
index dd8d6e3f4fb344..f41925e0b91de3 100644
--- a/tests/test_tokenization_mbart.py
+++ b/tests/test_tokenization_mbart.py
@@ -165,7 +165,6 @@ def test_enro_tokenizer_truncation(self):
         desired_max_length = 10
         ids = self.tokenizer.prepare_seq2seq_batch(
             src_text,
-            return_tensors=None,
             max_length=desired_max_length,
         ).input_ids[0]
         self.assertEqual(ids[-2], 2)
@@ -203,9 +202,7 @@ def test_batch_fairseq_parity(self):
     @require_torch
     def test_enro_tokenizer_prepare_seq2seq_batch(self):
         batch = self.tokenizer.prepare_seq2seq_batch(
-            self.src_text,
-            tgt_texts=self.tgt_text,
-            max_length=len(self.expected_src_tokens),
+            self.src_text, tgt_texts=self.tgt_text, max_length=len(self.expected_src_tokens), return_tensors="pt"
         )
         batch["decoder_input_ids"] = shift_tokens_right(batch.labels, self.tokenizer.pad_token_id)
         self.assertIsInstance(batch, BatchEncoding)
@@ -221,13 +218,15 @@ def test_enro_tokenizer_prepare_seq2seq_batch(self):
 
     def test_seq2seq_max_target_length(self):
         batch = self.tokenizer.prepare_seq2seq_batch(
-            self.src_text, tgt_texts=self.tgt_text, max_length=3, max_target_length=10
+            self.src_text, tgt_texts=self.tgt_text, max_length=3, max_target_length=10, return_tensors="pt"
         )
         batch["decoder_input_ids"] = shift_tokens_right(batch.labels, self.tokenizer.pad_token_id)
         self.assertEqual(batch.input_ids.shape[1], 3)
         self.assertEqual(batch.decoder_input_ids.shape[1], 10)
         # max_target_length will default to max_length if not specified
-        batch = self.tokenizer.prepare_seq2seq_batch(self.src_text, tgt_texts=self.tgt_text, max_length=3)
+        batch = self.tokenizer.prepare_seq2seq_batch(
+            self.src_text, tgt_texts=self.tgt_text, max_length=3, return_tensors="pt"
+        )
         batch["decoder_input_ids"] = shift_tokens_right(batch.labels, self.tokenizer.pad_token_id)
         self.assertEqual(batch.input_ids.shape[1], 3)
         self.assertEqual(batch.decoder_input_ids.shape[1], 3)
diff --git a/tests/test_tokenization_pegasus.py b/tests/test_tokenization_pegasus.py
index 6536220c32cabc..ad26075da69f67 100644
--- a/tests/test_tokenization_pegasus.py
+++ b/tests/test_tokenization_pegasus.py
@@ -61,7 +61,9 @@ def test_pegasus_large_tokenizer_settings(self):
     def test_pegasus_large_seq2seq_truncation(self):
         src_texts = ["This is going to be way too long." * 150, "short example"]
         tgt_texts = ["not super long but more than 5 tokens", "tiny"]
-        batch = self.pegasus_large_tokenizer.prepare_seq2seq_batch(src_texts, tgt_texts=tgt_texts, max_target_length=5)
+        batch = self.pegasus_large_tokenizer.prepare_seq2seq_batch(
+            src_texts, tgt_texts=tgt_texts, max_target_length=5, return_tensors="pt"
+        )
         assert batch.input_ids.shape == (2, 1024)
         assert batch.attention_mask.shape == (2, 1024)
         assert "labels" in batch  # because tgt_texts was specified

From 82a62ea3fd69700950ee2ebbabce8ea1c3c9fecd Mon Sep 17 00:00:00 2001
From: LysandreJik <lysandre.debut@reseau.eseo.fr>
Date: Tue, 17 Nov 2020 12:07:23 -0500
Subject: [PATCH 2/5] Run the slow tests

---
 .github/workflows/self-push.yml      | 8 ++++----
 .github/workflows/self-scheduled.yml | 9 +++++----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
index 4a0611fc5a2542..286dd231eff51f 100644
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@@ -16,7 +16,7 @@ on:
 
 jobs:
   run_tests_torch_gpu:
-    runs-on: [self-hosted, single-gpu]
+    runs-on: [self-hosted, gpu-tests, single-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version
@@ -86,7 +86,7 @@ jobs:
                   
 
   run_tests_tf_gpu:
-    runs-on: [self-hosted, single-gpu]
+    runs-on: [self-hosted, gpu-tests, single-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version
@@ -154,7 +154,7 @@ jobs:
           path: reports
 
   run_tests_torch_multi_gpu:
-    runs-on: [self-hosted, multi-gpu]
+    runs-on: [self-hosted, gpu-tests, multi-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version
@@ -213,7 +213,7 @@ jobs:
           path: reports
 
   run_tests_tf_multi_gpu:
-    runs-on: [self-hosted, multi-gpu]
+    runs-on: [self-hosted, gpu-tests, multi-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version
diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 54c126f39f8283..1daf08898958c1 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -9,13 +9,14 @@ on:
   push:
     branches:
       - ci_*
+      - framework-agnostic-tokenizers
   repository_dispatch:
   schedule:
     - cron: "0 0 * * *"
 
 jobs:
   run_all_tests_torch_gpu:
-    runs-on: [self-hosted, single-gpu]
+    runs-on: [self-hosted, gpu-tests, single-gpu]
     steps:
       - uses: actions/checkout@v2
 
@@ -109,7 +110,7 @@ jobs:
 
 
   run_all_tests_tf_gpu:
-    runs-on: [self-hosted, single-gpu]
+    runs-on: [self-hosted, gpu-tests, single-gpu]
     steps:
       - uses: actions/checkout@v2
 
@@ -188,7 +189,7 @@ jobs:
           path: reports
           
   run_all_tests_torch_multi_gpu:
-    runs-on: [self-hosted, multi-gpu]
+    runs-on: [self-hosted, gpu-tests, multi-gpu]
     steps:
       - uses: actions/checkout@v2
 
@@ -279,7 +280,7 @@ jobs:
           path: reports
 
   run_all_tests_tf_multi_gpu:
-    runs-on: [self-hosted, multi-gpu]
+    runs-on: [self-hosted, gpu-tests, multi-gpu]
     steps:
       - uses: actions/checkout@v2
 

From e2d9526be880c34c745e1397368aade9746d5796 Mon Sep 17 00:00:00 2001
From: LysandreJik <lysandre.debut@reseau.eseo.fr>
Date: Tue, 17 Nov 2020 12:12:43 -0500
Subject: [PATCH 3/5] Not testing

---
 .github/workflows/self-push.yml      | 8 ++++----
 .github/workflows/self-scheduled.yml | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
index 286dd231eff51f..0957f2f865cc75 100644
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@@ -16,7 +16,7 @@ on:
 
 jobs:
   run_tests_torch_gpu:
-    runs-on: [self-hosted, gpu-tests, single-gpu]
+    runs-on: [self-hosted, gpu, single-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version
@@ -86,7 +86,7 @@ jobs:
                   
 
   run_tests_tf_gpu:
-    runs-on: [self-hosted, gpu-tests, single-gpu]
+    runs-on: [self-hosted, gpu, single-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version
@@ -154,7 +154,7 @@ jobs:
           path: reports
 
   run_tests_torch_multi_gpu:
-    runs-on: [self-hosted, gpu-tests, multi-gpu]
+    runs-on: [self-hosted, gpu, multi-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version
@@ -213,7 +213,7 @@ jobs:
           path: reports
 
   run_tests_tf_multi_gpu:
-    runs-on: [self-hosted, gpu-tests, multi-gpu]
+    runs-on: [self-hosted, gpu, multi-gpu]
     steps:
       - uses: actions/checkout@v2
       - name: Python version
diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 1daf08898958c1..592733b5ba607d 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -16,7 +16,7 @@ on:
 
 jobs:
   run_all_tests_torch_gpu:
-    runs-on: [self-hosted, gpu-tests, single-gpu]
+    runs-on: [self-hosted, gpu, single-gpu]
     steps:
       - uses: actions/checkout@v2
 
@@ -110,7 +110,7 @@ jobs:
 
 
   run_all_tests_tf_gpu:
-    runs-on: [self-hosted, gpu-tests, single-gpu]
+    runs-on: [self-hosted, gpu, single-gpu]
     steps:
       - uses: actions/checkout@v2
 
@@ -189,7 +189,7 @@ jobs:
           path: reports
           
   run_all_tests_torch_multi_gpu:
-    runs-on: [self-hosted, gpu-tests, multi-gpu]
+    runs-on: [self-hosted, gpu, multi-gpu]
     steps:
       - uses: actions/checkout@v2
 
@@ -280,7 +280,7 @@ jobs:
           path: reports
 
   run_all_tests_tf_multi_gpu:
-    runs-on: [self-hosted, gpu-tests, multi-gpu]
+    runs-on: [self-hosted, gpu, multi-gpu]
     steps:
       - uses: actions/checkout@v2
 

From 996775386d0134977dd520202d6f473a5c451e7d Mon Sep 17 00:00:00 2001
From: LysandreJik <lysandre.debut@reseau.eseo.fr>
Date: Tue, 17 Nov 2020 12:18:34 -0500
Subject: [PATCH 4/5] Fix documentation

---
 .../models/bart/tokenization_bart.py          | 65 ++----------------
 .../models/bart/tokenization_bart_fast.py     | 67 ++-----------------
 2 files changed, 8 insertions(+), 124 deletions(-)

diff --git a/src/transformers/models/bart/tokenization_bart.py b/src/transformers/models/bart/tokenization_bart.py
index 071492238ae5c0..6b46e30e9d527c 100644
--- a/src/transformers/models/bart/tokenization_bart.py
+++ b/src/transformers/models/bart/tokenization_bart.py
@@ -15,7 +15,9 @@
 
 from typing import List, Optional
 
-from ...tokenization_utils_base import BatchEncoding
+from transformers import add_start_docstrings
+
+from ...tokenization_utils_base import PREPARE_SEQ2SEQ_BATCH_DOCSTRING, BatchEncoding
 from ...utils import logging
 from ..roberta.tokenization_roberta import RobertaTokenizer
 
@@ -54,6 +56,7 @@ class BartTokenizer(RobertaTokenizer):
         "merges_file": {m: merges_url for m in _all_bart_models},
     }
 
+    @add_start_docstrings(PREPARE_SEQ2SEQ_BATCH_DOCSTRING)
     def prepare_seq2seq_batch(
         self,
         src_texts: List[str],
@@ -65,66 +68,6 @@ def prepare_seq2seq_batch(
         truncation=True,
         **kwargs,
     ) -> BatchEncoding:
-        r"""
-
-        Prepare a batch that can be passed directly to an instance of :class:`~transformers.BartModel`.
-
-        Args:
-            src_texts: (:obj:`List[str]`):
-                List of documents to summarize or source language texts.
-            tgt_texts: (:obj:`List[str]`, `optional`):
-                List of summaries or target language texts.
-            max_length (:obj:`int`, `optional`):
-                Controls the maximum length for encoder inputs (documents to summarize or source language texts). If
-                left unset or set to :obj:`None`, this will use the predefined model maximum length if a maximum length
-                is required by one of the truncation/padding parameters. If the model has no specific maximum input
-                length (like XLNet) truncation/padding to a maximum length will be deactivated.
-            max_target_length (:obj:`int`, `optional`):
-                Controls the maximum length of decoder inputs (target language texts or summaries). If left unset or
-                set to :obj:`None`, this will use the max_length value.
-            padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`False`):
-                Activates and controls padding. Accepts the following values:
-
-                * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a
-                  single sequence if provided).
-                * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
-                  maximum acceptable input length for the model if that argument is not provided.
-                * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
-                  different lengths).
-            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`):
-                If set, will return tensors instead of list of python integers. Acceptable values are:
-
-                * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.
-                * :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects.
-                * :obj:`'np'`: Return Numpy :obj:`np.ndarray` objects.
-            truncation (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.TruncationStrategy`, `optional`, defaults to :obj:`True`):
-                Activates and controls truncation. Accepts the following values:
-
-                * :obj:`True` or :obj:`'longest_first'`: Truncate to a maximum length specified with the argument
-                  :obj:`max_length` or to the maximum acceptable input length for the model if that argument is not
-                  provided. This will truncate token by token, removing a token from the longest sequence in the pair
-                  if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`'only_first'`: Truncate to a maximum length specified with the argument :obj:`max_length` or to
-                  the maximum acceptable input length for the model if that argument is not provided. This will only
-                  truncate the first sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`'only_second'`: Truncate to a maximum length specified with the argument :obj:`max_length` or
-                  to the maximum acceptable input length for the model if that argument is not provided. This will only
-                  truncate the second sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`False` or :obj:`'do_not_truncate'` (default): No truncation (i.e., can output batch with
-                  sequence lengths greater than the model maximum admissible input size).
-            **kwargs:
-                Additional keyword arguments passed along to :obj:`self.__call__`.
-
-        Returns:
-            :class:`~transformers.BatchEncoding`: A :class:`~transformers.BatchEncoding` with the following fields:
-
-            - **input_ids** -- List of token ids to be fed to the encoder.
-            - **attention_mask** -- List of indices specifying which tokens should be attended to by the model.
-            - **labels** -- List of token ids for tgt_texts
-
-            The full set of keys ``[input_ids, attention_mask, labels]``, will only be returned if tgt_texts is passed.
-            Otherwise, input_ids, attention_mask will be the only keys.
-        """
         kwargs.pop("src_lang", None)
         kwargs.pop("tgt_lang", None)
         if max_length is None:
diff --git a/src/transformers/models/bart/tokenization_bart_fast.py b/src/transformers/models/bart/tokenization_bart_fast.py
index 43f226f3103217..21a70e51ece9dc 100644
--- a/src/transformers/models/bart/tokenization_bart_fast.py
+++ b/src/transformers/models/bart/tokenization_bart_fast.py
@@ -15,7 +15,9 @@
 
 from typing import List, Optional
 
-from ...tokenization_utils_base import BatchEncoding
+from transformers import add_start_docstrings
+
+from ...tokenization_utils_base import PREPARE_SEQ2SEQ_BATCH_DOCSTRING, BatchEncoding
 from ...utils import logging
 from ..roberta.tokenization_roberta_fast import RobertaTokenizerFast
 from .tokenization_bart import BartTokenizer
@@ -49,6 +51,7 @@ class BartTokenizerFast(RobertaTokenizerFast):
     }
     slow_tokenizer_class = BartTokenizer
 
+    @add_start_docstrings(PREPARE_SEQ2SEQ_BATCH_DOCSTRING)
     def prepare_seq2seq_batch(
         self,
         src_texts: List[str],
@@ -60,68 +63,6 @@ def prepare_seq2seq_batch(
         truncation=True,
         **kwargs,
     ) -> BatchEncoding:
-        r"""
-
-        Prepare a batch that can be passed directly to an instance of :class:`~transformers.BartModel`.
-
-        Args:
-            src_texts: (:obj:`List[str]`):
-                List of documents to summarize or source language texts.
-            tgt_texts: (:obj:`List[str]`, `optional`):
-                List of summaries or target language texts.
-            max_length (:obj:`int`, `optional`):
-                Controls the maximum length for encoder inputs (documents to summarize or source language texts). If
-                left unset or set to :obj:`None`, this will use the predefined model maximum length if a maximum length
-                is required by one of the truncation/padding parameters. If the model has no specific maximum input
-                length (like XLNet) truncation/padding to a maximum length will be deactivated.
-            max_target_length (:obj:`int`, `optional`):
-                Controls the maximum length of decoder inputs (target language texts or summaries). If left unset or
-                set to :obj:`None`, this will use the max_length value.
-            padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`False`):
-                Activates and controls padding. Accepts the following values:
-
-                * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a
-                  single sequence if provided).
-                * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
-                  maximum acceptable input length for the model if that argument is not provided.
-                * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
-                  different lengths).
-            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`):
-                If set, will return tensors instead of list of python integers. Acceptable values are:
-
-                * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.
-                * :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects.
-                * :obj:`'np'`: Return Numpy :obj:`np.ndarray` objects.
-            truncation (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.TruncationStrategy`, `optional`, defaults to :obj:`True`):
-                Activates and controls truncation. Accepts the following values:
-
-                * :obj:`True` or :obj:`'longest_first'`: Truncate to a maximum length specified with the argument
-                  :obj:`max_length` or to the maximum acceptable input length for the model if that argument is not
-                  provided. This will truncate token by token, removing a token from the longest sequence in the pair
-                  if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`'only_first'`: Truncate to a maximum length specified with the argument :obj:`max_length` or to
-                  the maximum acceptable input length for the model if that argument is not provided. This will only
-                  truncate the first sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`'only_second'`: Truncate to a maximum length specified with the argument :obj:`max_length` or
-                  to the maximum acceptable input length for the model if that argument is not provided. This will only
-                  truncate the second sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
-                * :obj:`False` or :obj:`'do_not_truncate'` (default): No truncation (i.e., can output batch with
-                  sequence lengths greater than the model maximum admissible input size).
-            **kwargs:
-                Additional keyword arguments passed along to :obj:`self.__call__`.
-
-        Returns:
-            :class:`~transformers.BatchEncoding`: A :class:`~transformers.BatchEncoding` with the following fields:
-
-            - **input_ids** -- List of token ids to be fed to the encoder.
-            - **attention_mask** -- List of indices specifying which tokens should be attended to by the model.
-            - **decoder_input_ids** -- List of token ids to be fed to the decoder.
-            - **decoder_attention_mask** -- List of indices specifying which tokens should be attended to by the
-              decoder. This does not include causal mask, which is built by the model.
-
-            The full set of keys ``[input_ids, attention_mask, decoder_input_ids, decoder_attention_mask]``, will only
-            be returned if tgt_texts is passed. Otherwise, input_ids, attention_mask will be the only keys.
-        """
         if max_length is None:
             max_length = self.model_max_length
         model_inputs: BatchEncoding = self(

From 90555c2c8f4f5cade6d19b431443a91a31b83ea5 Mon Sep 17 00:00:00 2001
From: Lysandre Debut <lysandre@huggingface.co>
Date: Tue, 17 Nov 2020 14:02:40 -0500
Subject: [PATCH 5/5] Apply suggestions from code review

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
---
 src/transformers/models/bart/tokenization_bart_fast.py | 2 +-
 src/transformers/models/fsmt/tokenization_fsmt.py      | 2 +-
 src/transformers/models/marian/tokenization_marian.py  | 2 +-
 src/transformers/models/mbart/tokenization_mbart.py    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/transformers/models/bart/tokenization_bart_fast.py b/src/transformers/models/bart/tokenization_bart_fast.py
index 21a70e51ece9dc..30b77275f22169 100644
--- a/src/transformers/models/bart/tokenization_bart_fast.py
+++ b/src/transformers/models/bart/tokenization_bart_fast.py
@@ -59,7 +59,7 @@ def prepare_seq2seq_batch(
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
         padding: str = "longest",
-        return_tensors: str = None,
+        return_tensors: Optional[str] = None,
         truncation=True,
         **kwargs,
     ) -> BatchEncoding:
diff --git a/src/transformers/models/fsmt/tokenization_fsmt.py b/src/transformers/models/fsmt/tokenization_fsmt.py
index 0c11a7a64db9a8..71bfd93000f8ce 100644
--- a/src/transformers/models/fsmt/tokenization_fsmt.py
+++ b/src/transformers/models/fsmt/tokenization_fsmt.py
@@ -491,7 +491,7 @@ def prepare_seq2seq_batch(
         tgt_texts: Optional[List[str]] = None,
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
-        return_tensors: str = None,
+        return_tensors: Optional[str] = None,
         truncation=True,
         padding="longest",
         **unused,
diff --git a/src/transformers/models/marian/tokenization_marian.py b/src/transformers/models/marian/tokenization_marian.py
index cb81ddfe309aae..67b289db1fd84d 100644
--- a/src/transformers/models/marian/tokenization_marian.py
+++ b/src/transformers/models/marian/tokenization_marian.py
@@ -175,7 +175,7 @@ def prepare_seq2seq_batch(
         tgt_texts: Optional[List[str]] = None,
         max_length: Optional[int] = None,
         max_target_length: Optional[int] = None,
-        return_tensors: str = None,
+        return_tensors: Optional[str] = None,
         truncation=True,
         padding="longest",
         **unused,
diff --git a/src/transformers/models/mbart/tokenization_mbart.py b/src/transformers/models/mbart/tokenization_mbart.py
index 98448fe66168ad..468d218ed37cbd 100644
--- a/src/transformers/models/mbart/tokenization_mbart.py
+++ b/src/transformers/models/mbart/tokenization_mbart.py
@@ -183,7 +183,7 @@ def prepare_seq2seq_batch(
         max_target_length: Optional[int] = None,
         truncation: bool = True,
         padding: str = "longest",
-        return_tensors: str = None,
+        return_tensors: Optional[str] = None,
         add_prefix_space: bool = False,  # ignored
         **kwargs,
     ) -> BatchEncoding: