Harras3
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 0 additions & 18 deletions b/‎CONTRIBUTING.md‎
Lines changed: 0 additions & 18 deletions
diff --git a/‎docs/source/lora_without_regret.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/lora_without_regret.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/reducing_memory_usage.md‎
Lines changed: 0 additions & 3 deletions b/‎docs/source/reducing_memory_usage.md‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎examples/datasets/hh-rlhf-helpful-base.py‎
Lines changed: 2 additions & 3 deletions b/‎examples/datasets/hh-rlhf-helpful-base.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎examples/datasets/llava_instruct_mix.py‎
Lines changed: 1 addition & 2 deletions b/‎examples/datasets/llava_instruct_mix.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/datasets/lm-human-preferences-descriptiveness.py‎
Lines changed: 1 addition & 2 deletions b/‎examples/datasets/lm-human-preferences-descriptiveness.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/datasets/lm-human-preferences-sentiment.py‎
Lines changed: 1 addition & 2 deletions b/‎examples/datasets/lm-human-preferences-sentiment.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/datasets/math_shepherd.py‎
Lines changed: 2 additions & 3 deletions b/‎examples/datasets/math_shepherd.py‎
Lines changed: 2 additions & 3 deletions
@@ -40,7 +40,7 @@ jobs:
     name: Tests
     strategy:
       matrix:
-        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
+        python-version: ['3.10', '3.11', '3.12', '3.13']
       fail-fast: false
     runs-on:
       group: aws-g4dn-2xlarge
 
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.11.10
+    rev: v0.13.3
     hooks:
       - id: ruff-check
         types_or: [ python, pyi ]
 
@@ -285,24 +285,6 @@ def replicate_str(string: str, n: int, sep: str = " ") -> str:
 * **Definite Articles:** Removed definite articles where possible to streamline language. (Eg: Changed "The string to replicate" to "String to replicate")
 * **Type Annotations:**
   * Always include type definitions, indicating if a parameter is optional and specifying the default value.
-  * Note that `Optional` means that the value can be `None`, and `*optional*` means that it is not required for the user to pass a value.
-    E.g., for arguments that can't be `None` and aren't required:
-
-    ```txt
-    foo (`int`, *optional*, defaults to `4`):
-    ```
-
-    For arguments that can be `None` and are required:
-
-    ```txt
-    foo (`Optional[int]`):
-    ```
-
-    for arguments that can be `None` and aren't required (in this case, if the default value is `None`, you can omit it):
-
-    ```txt
-    foo (`Optional[int]`, *optional*):
-    ```
 
 * **String Defaults:**
   * Ensured that default string values are wrapped in double quotes:
 
@@ -141,7 +141,7 @@ For reinforcement learning, the blog uses a math reasoning task that we can repr
 ```python
 def strip_reasoning_accuracy_reward(
     completions: list[list[dict[str, str]]], solution: list[str], **kwargs
-) -> list[Optional[float]]:
+) -> list[float | None]:
     """Reward function that strips reasoning tags and checks mathematical accuracy.
 
     This function:
 
@@ -90,9 +90,6 @@ from trl import SFTConfig
 training_args = SFTConfig(..., packing=True, max_length=512)
 ```
 
-> [!WARNING]
-> Packing may cause batch contamination, where adjacent sequences influence one another. This can be problematic for some applications. For more details, see [#1230](https://github.com/huggingface/trl/issues/1230).
-
 ## Liger for reducing peak memory usage
 
 > [Liger Kernel](https://github.com/linkedin/Liger-Kernel) is a collection of Triton kernels designed specifically for LLM training. It can effectively increase multi-GPU training throughput by 20% and reduce memory usage by 60%.
 
@@ -14,7 +14,6 @@
 
 import re
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -42,15 +41,15 @@ class ScriptArguments:
     repo_id: str = field(
         default="trl-lib/hh-rlhf-helpful-base", metadata={"help": "Hugging Face repository ID to push the dataset to."}
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None, metadata={"help": "Number of workers to use for dataset processing."}
     )
 
 
 def common_start(str1: str, str2: str) -> str:
     # Zip the two strings and iterate over them together
     common_chars = []
-    for c1, c2 in zip(str1, str2):
+    for c1, c2 in zip(str1, str2, strict=True):
         if c1 == c2:
             common_chars.append(c1)
         else:
 
@@ -14,7 +14,6 @@
 
 import ast
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -43,7 +42,7 @@ class ScriptArguments:
         default="trl-lib/llava-instruct-mix",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )
 
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -42,7 +41,7 @@ class ScriptArguments:
         default="trl-lib/lm-human-preferences-descriptiveness",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )
 
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -42,7 +41,7 @@ class ScriptArguments:
         default="trl-lib/lm-human-preferences-sentiment",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )
 
@@ -15,7 +15,6 @@
 import re
 from dataclasses import dataclass, field
 from itertools import chain
-from typing import Optional
 
 from datasets import load_dataset
 from huggingface_hub import ModelCard
@@ -44,7 +43,7 @@ class ScriptArguments:
         default="trl-lib/math_shepherd",
         metadata={"help": "Hugging Face repository ID to push the dataset to."},
     )
-    dataset_num_proc: Optional[int] = field(
+    dataset_num_proc: int | None = field(
         default=None,
         metadata={"help": "Number of workers to use for dataset processing."},
     )
@@ -64,7 +63,7 @@ def process_example(example):
     labels = [example["label"][idx] == "+" for idx in indexes]
 
     # Split the inputs into steps (caution, the first step is missing here, it is the prompt)
-    steps = [inputs[i:j] for i, j in zip(chain([0], indexes), chain(indexes, [None]))]
+    steps = [inputs[i:j] for i, j in zip(chain([0], indexes), chain(indexes, [None]), strict=True)]
 
     # Remove the last step (single ⶻ)
     steps = steps[:-1]
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,6 @@`
`14`	`14`
`15`	`15`	`import ast`
`16`	`16`	`from dataclasses import dataclass, field`
`17`		`-from typing import Optional`
`18`	`17`
`19`	`18`	`from datasets import load_dataset`
`20`	`19`	`from huggingface_hub import ModelCard`
`@@ -43,7 +42,7 @@ class ScriptArguments:`
`43`	`42`	`default="trl-lib/llava-instruct-mix",`
`44`	`43`	`metadata={"help": "Hugging Face repository ID to push the dataset to."},`
`45`	`44`	`)`
`46`		`- dataset_num_proc: Optional[int] = field(`
	`45`	`+ dataset_num_proc: int \| None = field(`
`47`	`46`	`default=None,`
`48`	`47`	`metadata={"help": "Number of workers to use for dataset processing."},`
`49`	`48`	`)`
Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,6 @@`
`13`	`13`	`# limitations under the License.`
`14`	`14`
`15`	`15`	`from dataclasses import dataclass, field`
`16`		`-from typing import Optional`
`17`	`16`
`18`	`17`	`from datasets import load_dataset`
`19`	`18`	`from huggingface_hub import ModelCard`
`@@ -42,7 +41,7 @@ class ScriptArguments:`
`42`	`41`	`default="trl-lib/lm-human-preferences-descriptiveness",`
`43`	`42`	`metadata={"help": "Hugging Face repository ID to push the dataset to."},`
`44`	`43`	`)`
`45`		`- dataset_num_proc: Optional[int] = field(`
	`44`	`+ dataset_num_proc: int \| None = field(`
`46`	`45`	`default=None,`
`47`	`46`	`metadata={"help": "Number of workers to use for dataset processing."},`
`48`	`47`	`)`