Add experimental imports to docs (#4616)

albertvillanova · qgallouedec · web-flow · commit ddb65e8941fe · 2025-12-03T21:20:40.000+01:00
Co-authored-by: Quentin Gallouédec &lt;45557362+qgallouedec@users.noreply.github.com&gt;
diff --git a/docs/source/bco_trainer.md b/docs/source/bco_trainer.md
@@ -22,6 +22,8 @@ For a detailed example have a look at the `examples/scripts/bco.py` script. At a
 The `beta` refers to the hyperparameter of the implicit reward, and the dataset contains the 3 entries listed above. Note that the `model` and `ref_model` need to have the same architecture (ie decoder only or encoder-decoder).
 
 ```python
+from trl.experimental.bco import BCOConfig, BCOTrainer
+
 training_args = BCOConfig(
     beta=0.1,
 )
diff --git a/trl/experimental/papo/papo_trainer.py b/trl/experimental/papo/papo_trainer.py
@@ -40,7 +40,7 @@ class PAPOTrainer(GRPOTrainer):
 
     ```python
     from datasets import load_dataset
-    from trl import PAPOTrainer, PAPOConfig
+    from trl.experimental.papo import PAPOTrainer, PAPOConfig
 
     dataset = load_dataset("your-vlm-dataset", split="train")
 
diff --git a/trl/experimental/winrate_callback.py b/trl/experimental/winrate_callback.py
@@ -100,6 +100,10 @@ class WinRateCallback(TrainerCallback):
 
     Usage:
     ```python
+    from trl import DPOTrainer
+    from trl.experimental.judges import PairRMJudge
+    from trl.experimental.winrate_callback import WinRateCallback
+
     trainer = DPOTrainer(...)
     judge = PairRMJudge()
     win_rate_callback = WinRateCallback(judge=judge, trainer=trainer)

Original file line number	Diff line number	Diff line change
@@ -22,6 +22,8 @@ For a detailed example have a look at the `examples/scripts/bco.py` script. At a
`22`	`22`	The `beta` refers to the hyperparameter of the implicit reward, and the dataset contains the 3 entries listed above. Note that the `model` and `ref_model` need to have the same architecture (ie decoder only or encoder-decoder).
`23`	`23`
`24`	`24`	```python
	`25`	`+from trl.experimental.bco import BCOConfig, BCOTrainer`
	`26`	`+`
`25`	`27`	`training_args = BCOConfig(`
`26`	`28`	`beta=0.1,`
`27`	`29`	`)`