huggingface · behroozazarkhalili · Nov 2, 2025 · Nov 3, 2025 · Nov 3, 2025 · Nov 4, 2025
diff --git a/docs/source/clis.md b/docs/source/clis.md
@@ -53,6 +53,33 @@ trl reward \
   --dataset_name trl-lib/ultrafeedback_binarized
 ```
 
+</hfoption>
+<hfoption id="GRPO">
+
+```bash
+trl grpo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name trl-lib/ultrafeedback-prompt
+```
+
+</hfoption>
+<hfoption id="RLOO">
+
+```bash
+trl rloo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name AI-MO/NuminaMath-TIR
+```
+
+</hfoption>
+<hfoption id="KTO">
+
+```bash
+trl kto \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name trl-lib/kto-mix-14k
+```
+
 </hfoption>
 </hfoptions>
 
@@ -105,6 +132,51 @@ Launch with:
 trl reward --config reward_config.yaml
 ```
 
+</hfoption>
+<hfoption id="GRPO">
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/ultrafeedback-prompt
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+</hfoption>
+<hfoption id="RLOO">
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: AI-MO/NuminaMath-TIR
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+</hfoption>
+<hfoption id="KTO">
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/kto-mix-14k
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+
 </hfoption>
 </hfoptions>
 
@@ -192,6 +264,84 @@ Launch with:
 trl reward --config reward_config.yaml
 ```
 
+</hfoption>
+<hfoption id="GRPO inline">
+
+```bash
+trl grpo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name trl-lib/ultrafeedback-prompt \
+  --num_processes 4
+```
+
+</hfoption>
+<hfoption id="GRPO w/ config file">
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/ultrafeedback-prompt
+num_processes: 4
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+</hfoption>
+<hfoption id="RLOO inline">
+
+```bash
+trl rloo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name AI-MO/NuminaMath-TIR \
+  --num_processes 4
+```
+
+</hfoption>
+<hfoption id="RLOO w/ config file">
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: AI-MO/NuminaMath-TIR
+num_processes: 4
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+</hfoption>
+<hfoption id="KTO inline">
+
+```bash
+trl kto \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name trl-lib/kto-mix-14k \
+  --num_processes 4
+```
+
+</hfoption>
+<hfoption id="KTO w/ config file">
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/kto-mix-14k
+num_processes: 4
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+
 </hfoption>
 </hfoptions>
 
@@ -298,6 +448,84 @@ Launch with:
 trl reward --config reward_config.yaml
 ```
 
+</hfoption>
+<hfoption id="GRPO inline">
+
+```bash
+trl grpo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name trl-lib/ultrafeedback-prompt \
+  --accelerate_config zero2  # or path/to/my/accelerate/config.yaml
+```
+
+</hfoption>
+<hfoption id="GRPO w/ config file">
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/ultrafeedback-prompt
+accelerate_config: zero2  # or path/to/my/accelerate/config.yaml
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+</hfoption>
+<hfoption id="RLOO inline">
+
+```bash
+trl rloo \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name AI-MO/NuminaMath-TIR \
+  --accelerate_config zero2  # or path/to/my/accelerate/config.yaml
+```
+
+</hfoption>
+<hfoption id="RLOO w/ config file">
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: AI-MO/NuminaMath-TIR
+accelerate_config: zero2  # or path/to/my/accelerate/config.yaml
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+</hfoption>
+<hfoption id="KTO inline">
+
+```bash
+trl kto \
+  --model_name_or_path Qwen/Qwen2.5-0.5B \
+  --dataset_name trl-lib/kto-mix-14k \
+  --accelerate_config zero2  # or path/to/my/accelerate/config.yaml
+```
+
+</hfoption>
+<hfoption id="KTO w/ config file">
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/kto-mix-14k
+accelerate_config: zero2  # or path/to/my/accelerate/config.yaml
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+
 </hfoption>
 </hfoptions>
 
@@ -356,6 +584,57 @@ Launch with:
 trl reward --config reward_config.yaml
 ```
 
+</hfoption>
+<hfoption id="GRPO">
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+datasets:
+  - path: trl-lib/ultrafeedback-prompt
+  - path: BAAI/Infinity-Preference
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+</hfoption>
+<hfoption id="RLOO">
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+datasets:
+  - path: AI-MO/NuminaMath-TIR
+  - path: deepmind/math_dataset
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+</hfoption>
+<hfoption id="KTO">
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+datasets:
+  - path: trl-lib/kto-mix-14k
+  - path: argilla/ultrafeedback-binarized-preferences-cleaned
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+
 </hfoption>
 </hfoptions>