huggingface · albertvillanova · Oct 1, 2025 · Oct 1, 2025
diff --git a/trl/models/utils.py b/trl/models/utils.py
@@ -90,6 +90,7 @@ def setup_chat_format(
     format: Optional[Literal["chatml"]] = "chatml",
     resize_to_multiple_of: Optional[int] = None,
 ) -> tuple[PreTrainedModel, PreTrainedTokenizer]:
+    # docstyle-ignore
     """
     Setup chat format by adding special tokens to the tokenizer, setting the correct format, and extending the
     embedding layer of the model based on the new special tokens.

diff --git a/trl/trainer/judges.py b/trl/trainer/judges.py
@@ -185,6 +185,7 @@ def judge(
 
 
 class PairRMJudge(BasePairwiseJudge):
+    # docstyle-ignore
     """
     LLM judge based on the PairRM model from AllenAI.
 

diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py
@@ -217,6 +217,7 @@ def ensure_master_addr_port(addr: Optional[str] = None, port: Optional[int] = No
 
 @dataclass
 class RewardDataCollatorWithPadding:
+    # docstyle-ignore
     r"""
     Reward DataCollator class that pads the inputs to the maximum length of the batch.
 
@@ -1251,6 +1252,7 @@ def empty_cache() -> None:
 
 
 def decode_and_strip_padding(inputs: torch.Tensor, tokenizer: PreTrainedTokenizerBase) -> list[str]:
+    # docstyle-ignore
     """
     Decodes the input tensor and strips the padding tokens.
-Original file line number
+Diff line change
@@ Expand Up / @@ -185,6 +185,7 @@ def judge( @@
     class PairRMJudge(BasePairwiseJudge):
+        # docstyle-ignore
         """
         LLM judge based on the PairRM model from AllenAI.
@@ Expand Down @@