From 60edea6d56978eae1813f456bf3c7bdcd8eea7de Mon Sep 17 00:00:00 2001
From: Steven <steven.liu@huggingface.co>
Date: Wed, 7 Sep 2022 10:32:35 -0700
Subject: [PATCH 1/4] skip some code examples for doctests

---
 docs/source/en/quicktour.mdx | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/docs/source/en/quicktour.mdx b/docs/source/en/quicktour.mdx
index f1b3ca5bf0f688..c81b037385b804 100644
--- a/docs/source/en/quicktour.mdx
+++ b/docs/source/en/quicktour.mdx
@@ -435,8 +435,8 @@ Depending on your task, you'll typically pass the following parameters to [`Trai
 4. Your preprocessed train and test datasets:
 
    ```py
-   >>> train_dataset = dataset["train"]
-   >>> eval_dataset = dataset["eval"]
+   >>> train_dataset = dataset["train"] # doctest: +SKIP
+   >>> eval_dataset = dataset["eval"] # doctest: +SKIP
    ```
 
 5. A [`DataCollator`] to create a batch of examples from your dataset:
@@ -459,13 +459,13 @@ Now gather all these classes in [`Trainer`]:
 ...     eval_dataset=dataset["test"],
 ...     tokenizer=tokenizer,
 ...     data_collator=data_collator,
-... )
+... ) # doctest: +SKIP
 ```
 
 When you're ready, call [`~Trainer.train`] to start training:
 
 ```py
->>> trainer.train()
+>>> trainer.train() # doctest: +SKIP
 ```
 
 <Tip>
@@ -502,11 +502,9 @@ All models are a standard [`tf.keras.Model`](https://www.tensorflow.org/api_docs
 
    ```py
    >>> def tokenize_dataset(dataset):
-   ...     return tokenizer(dataset["text"])
-
-
-   >>> dataset = dataset.map(tokenize_dataset)
-   >>> tf_dataset = model.prepare_tf_dataset(dataset, batch_size=16, shuffle=True, tokenizer=tokenizer)
+   ...     return tokenizer(dataset["text"]) # doctest: +SKIP
+   >>> dataset = dataset.map(tokenize_dataset) # doctest: +SKIP
+   >>> tf_dataset = model.prepare_tf_dataset(dataset, batch_size=16, shuffle=True, tokenizer=tokenizer) # doctest: +SKIP
    ```
 
 4. When you're ready, you can call `compile` and `fit` to start training:
@@ -515,7 +513,7 @@ All models are a standard [`tf.keras.Model`](https://www.tensorflow.org/api_docs
    >>> from tensorflow.keras.optimizers import Adam
 
    >>> model.compile(optimizer=Adam(3e-5))
-   >>> model.fit(dataset)
+   >>> model.fit(dataset) # doctest: +SKIP
    ```
 
 ## What's next?

From b6e5e74560f1835860a1083eb02c48bcd32a2b77 Mon Sep 17 00:00:00 2001
From: Steven <steven.liu@huggingface.co>
Date: Wed, 7 Sep 2022 10:35:43 -0700
Subject: [PATCH 2/4] make style

---
 docs/source/en/quicktour.mdx | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/docs/source/en/quicktour.mdx b/docs/source/en/quicktour.mdx
index c81b037385b804..a536ab304e878b 100644
--- a/docs/source/en/quicktour.mdx
+++ b/docs/source/en/quicktour.mdx
@@ -435,8 +435,8 @@ Depending on your task, you'll typically pass the following parameters to [`Trai
 4. Your preprocessed train and test datasets:
 
    ```py
-   >>> train_dataset = dataset["train"] # doctest: +SKIP
-   >>> eval_dataset = dataset["eval"] # doctest: +SKIP
+   >>> train_dataset = dataset["train"]  # doctest: +SKIP
+   >>> eval_dataset = dataset["eval"]  # doctest: +SKIP
    ```
 
 5. A [`DataCollator`] to create a batch of examples from your dataset:
@@ -459,13 +459,13 @@ Now gather all these classes in [`Trainer`]:
 ...     eval_dataset=dataset["test"],
 ...     tokenizer=tokenizer,
 ...     data_collator=data_collator,
-... ) # doctest: +SKIP
+... )  # doctest: +SKIP
 ```
 
 When you're ready, call [`~Trainer.train`] to start training:
 
 ```py
->>> trainer.train() # doctest: +SKIP
+>>> trainer.train()  # doctest: +SKIP
 ```
 
 <Tip>
@@ -502,9 +502,13 @@ All models are a standard [`tf.keras.Model`](https://www.tensorflow.org/api_docs
 
    ```py
    >>> def tokenize_dataset(dataset):
-   ...     return tokenizer(dataset["text"]) # doctest: +SKIP
-   >>> dataset = dataset.map(tokenize_dataset) # doctest: +SKIP
-   >>> tf_dataset = model.prepare_tf_dataset(dataset, batch_size=16, shuffle=True, tokenizer=tokenizer) # doctest: +SKIP
+   ...     return tokenizer(dataset["text"])  # doctest: +SKIP
+
+
+   >>> dataset = dataset.map(tokenize_dataset)  # doctest: +SKIP
+   >>> tf_dataset = model.prepare_tf_dataset(
+   ...     dataset, batch_size=16, shuffle=True, tokenizer=tokenizer
+   ... )  # doctest: +SKIP
    ```
 
 4. When you're ready, you can call `compile` and `fit` to start training:
@@ -513,7 +517,7 @@ All models are a standard [`tf.keras.Model`](https://www.tensorflow.org/api_docs
    >>> from tensorflow.keras.optimizers import Adam
 
    >>> model.compile(optimizer=Adam(3e-5))
-   >>> model.fit(dataset) # doctest: +SKIP
+   >>> model.fit(dataset)  # doctest: +SKIP
    ```
 
 ## What's next?

From 54ec4dddc66334b17035efc40cd6d8ea75c83880 Mon Sep 17 00:00:00 2001
From: Steven <steven.liu@huggingface.co>
Date: Wed, 7 Sep 2022 11:09:28 -0700
Subject: [PATCH 3/4] fix code snippet formatting

---
 docs/source/en/quicktour.mdx | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/source/en/quicktour.mdx b/docs/source/en/quicktour.mdx
index a536ab304e878b..3a61723bf2d432 100644
--- a/docs/source/en/quicktour.mdx
+++ b/docs/source/en/quicktour.mdx
@@ -503,8 +503,6 @@ All models are a standard [`tf.keras.Model`](https://www.tensorflow.org/api_docs
    ```py
    >>> def tokenize_dataset(dataset):
    ...     return tokenizer(dataset["text"])  # doctest: +SKIP
-
-
    >>> dataset = dataset.map(tokenize_dataset)  # doctest: +SKIP
    >>> tf_dataset = model.prepare_tf_dataset(
    ...     dataset, batch_size=16, shuffle=True, tokenizer=tokenizer

From cb628f77cb15ed6cdac1670e03df5c54507f267e Mon Sep 17 00:00:00 2001
From: Steven <steven.liu@huggingface.co>
Date: Wed, 7 Sep 2022 12:23:30 -0700
Subject: [PATCH 4/4] separate code snippet into two blocks

---
 docs/source/en/quicktour.mdx | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/docs/source/en/quicktour.mdx b/docs/source/en/quicktour.mdx
index 3a61723bf2d432..3fcdb4fff22457 100644
--- a/docs/source/en/quicktour.mdx
+++ b/docs/source/en/quicktour.mdx
@@ -498,18 +498,23 @@ All models are a standard [`tf.keras.Model`](https://www.tensorflow.org/api_docs
    >>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
    ```
 
-3. Tokenize the dataset and pass it and the tokenizer to [`~TFPreTrainedModel.prepare_tf_dataset`]. You can also change the batch size and shuffle the dataset here if you'd like:
+3. Create a function to tokenize the dataset:
 
    ```py
    >>> def tokenize_dataset(dataset):
    ...     return tokenizer(dataset["text"])  # doctest: +SKIP
+   ```
+
+4. Apply the tokenizer over the entire dataset with [`~datasets.Dataset.map`] and then pass the dataset and tokenizer to [`~TFPreTrainedModel.prepare_tf_dataset`]. You can also change the batch size and shuffle the dataset here if you'd like:
+
+   ```py
    >>> dataset = dataset.map(tokenize_dataset)  # doctest: +SKIP
    >>> tf_dataset = model.prepare_tf_dataset(
    ...     dataset, batch_size=16, shuffle=True, tokenizer=tokenizer
    ... )  # doctest: +SKIP
    ```
 
-4. When you're ready, you can call `compile` and `fit` to start training:
+5. When you're ready, you can call `compile` and `fit` to start training:
 
    ```py
    >>> from tensorflow.keras.optimizers import Adam