huggingface · kashif · Sep 2, 2024 · Aug 31, 2024 · Sep 2, 2024 · Sep 2, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,10 +1,12 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.2.0
+    rev: v0.6.3
     hooks:
       - id: ruff
+        types_or: [ python, pyi ]
         args: [ --fix ]
       - id: ruff-format
+        types_or: [ python, pyi ]
 
   # - repo: https://github.com/codespell-project/codespell
   #   rev: v2.1.0

diff --git a/examples/scripts/alignprop.py b/examples/scripts/alignprop.py
@@ -24,6 +24,7 @@
     --log_with="wandb"
 
 """
+
 from dataclasses import dataclass, field
 
 import numpy as np

diff --git a/examples/scripts/ddpo.py b/examples/scripts/ddpo.py
@@ -24,6 +24,7 @@
     --tracker_project_name="stable_diffusion_training" \
     --log_with="wandb"
 """
+
 import os
 from dataclasses import dataclass, field
 

diff --git a/examples/scripts/ppo.py b/examples/scripts/ppo.py
@@ -15,6 +15,7 @@
 python examples/scripts/ppo.py \
     --log_with=wandb
 """
+
 from dataclasses import dataclass, field
 from typing import Optional
 

diff --git a/examples/scripts/reward_modeling.py b/examples/scripts/reward_modeling.py
@@ -28,6 +28,7 @@
     --eval_steps=500 \
     --max_length=512 \
 """
+
 import warnings
 
 import torch

diff --git a/scripts/stale.py b/scripts/stale.py
@@ -15,6 +15,7 @@
 Script to close stale issue. Taken in part from the AllenNLP repository.
 https://github.com/allenai/allennlp.
 """
+
 import os
 from datetime import datetime as dt
 from datetime import timezone

diff --git a/setup.py b/setup.py
@@ -1,4 +1,4 @@
-""" trl is an open library for RL with transformer models.
+"""trl is an open library for RL with transformer models.
 
 Note:
 
@@ -53,6 +53,7 @@
 8. Change the version in __init__.py and setup.py to X.X.X+1.dev0 (e.g. VERSION=1.18.3 -> 1.18.4.dev0).
    Then push the change with a message 'set dev version'
 """
+
 import os
 
 from setuptools import find_packages, setup

diff --git a/trl/extras/best_of_n_sampler.py b/trl/extras/best_of_n_sampler.py
@@ -92,9 +92,9 @@ def generate(
             queries = tokenized_query.unsqueeze(0)
         elif isinstance(tokenized_query, List):
             element_type = type(tokenized_query[0])
-            if element_type == int:
+            if element_type is int:
                 queries = torch.tensor(tokenized_query).unsqueeze(0)
-            elif element_type == torch.Tensor:
+            elif element_type is torch.Tensor:
                 queries = [tensor.reshape((1, -1)) for tensor in tokenized_query]
             else:
                 queries = [torch.tensor(query).reshape((1, -1)) for query in tokenized_query]

diff --git a/trl/models/sd_utils.py b/trl/models/sd_utils.py
@@ -16,6 +16,7 @@
 File copied from diffusers to avoid import issues and make TRL compatible
 with most of diffusers versions.
 """
+
 import enum
 
 

diff --git a/trl/trainer/bco_trainer.py b/trl/trainer/bco_trainer.py
@@ -327,7 +327,7 @@ def __init__(
         embedding_func: Optional[Callable] = None,
         embedding_tokenizer: Optional[PreTrainedTokenizerBase] = None,
     ):
-        if type(args) == TrainingArguments:
+        if type(args) is TrainingArguments:
             raise ValueError("Please use `BCOConfig` instead TrainingArguments.")
 
         if args.model_init_kwargs is None:

diff --git a/trl/trainer/kto_trainer.py b/trl/trainer/kto_trainer.py
@@ -315,7 +315,7 @@ def __init__(
         model_adapter_name: Optional[str] = None,
         ref_adapter_name: Optional[str] = None,
     ):
-        if type(args) == TrainingArguments:
+        if type(args) is TrainingArguments:
             raise ValueError("Please use `KTOConfig` instead TrainingArguments.")
 
         if args.model_init_kwargs is None:

diff --git a/trl/trainer/ppov2_trainer.py b/trl/trainer/ppov2_trainer.py
@@ -473,14 +473,14 @@ def repeat_generator():
                                 entropy = torch.logsumexp(logits, dim=-1) - torch.sum(prob_dist * logits, dim=-1)
                                 approxkl = 0.5 * (logprobs_diff**2).mean()
                                 approxkl_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = approxkl
-                                pg_clipfrac_stats[
-                                    ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx
-                                ] = pg_clipfrac
+                                pg_clipfrac_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = (
+                                    pg_clipfrac
+                                )
                                 pg_loss_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = pg_loss
                                 vf_loss_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = vf_loss
-                                vf_clipfrac_stats[
-                                    ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx
-                                ] = vf_clipfrac
+                                vf_clipfrac_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = (
+                                    vf_clipfrac
+                                )
                                 entropy_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = entropy.mean()
                                 ratio_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = ratio.mean()
                         gradient_accumulation_idx += 1

diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py
@@ -110,7 +110,7 @@ def __init__(
             peft_config (`Dict`, defaults to `None`):
                 The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in a PEFT model.
         """
-        if type(args) == TrainingArguments:
+        if type(args) is TrainingArguments:
             warnings.warn(
                 "Using `transformers.TrainingArguments` for `args` is deprecated and will be removed in a future version. Please use `RewardConfig` instead.",
                 FutureWarning,
@@ -163,7 +163,7 @@ def __init__(
                 raise ValueError(
                     "max_length or a tokenizer must be specified when using the default RewardDataCollatorWithPadding"
                 )
-            if type(args) == TrainingArguments:
+            if type(args) is TrainingArguments:
                 if max_length is None:
                     warnings.warn(
                         "When using RewardDataCollatorWithPadding, you should set `max_length` in RewardConfig."

diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py
@@ -396,9 +396,9 @@ def repeat_generator():
                                 entropy = torch.logsumexp(logits, dim=-1) - torch.sum(prob_dist * logits, dim=-1)
                                 approxkl = 0.5 * (logprobs_diff**2).mean()
                                 approxkl_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = approxkl
-                                pg_clipfrac_stats[
-                                    ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx
-                                ] = pg_clipfrac
+                                pg_clipfrac_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = (
+                                    pg_clipfrac
+                                )
                                 pg_loss_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = pg_loss
                                 entropy_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = entropy.mean()
                                 ratio_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = new_ratio.mean()
-Original file line number
+Diff line change
@@ Expand Up / @@ -24,6 +24,7 @@ @@
         --log_with="wandb"
     """
     from dataclasses import dataclass, field
     import numpy as np
@@ Expand Down @@