diff --git a/docs/source/clis.md b/docs/source/clis.md
index 666584decf4..efe47e84f3c 100644
--- a/docs/source/clis.md
+++ b/docs/source/clis.md
@@ -53,6 +53,33 @@ trl reward \
--dataset_name trl-lib/ultrafeedback_binarized
```
+
+
+
+```bash
+trl grpo \
+ --model_name_or_path Qwen/Qwen2.5-0.5B \
+ --dataset_name trl-lib/ultrafeedback-prompt
+```
+
+
+
+
+```bash
+trl rloo \
+ --model_name_or_path Qwen/Qwen2.5-0.5B \
+ --dataset_name AI-MO/NuminaMath-TIR
+```
+
+
+
+
+```bash
+trl kto \
+ --model_name_or_path Qwen/Qwen2.5-0.5B \
+ --dataset_name trl-lib/kto-mix-14k
+```
+
@@ -105,6 +132,51 @@ Launch with:
trl reward --config reward_config.yaml
```
+
+
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/ultrafeedback-prompt
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+
+
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: AI-MO/NuminaMath-TIR
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+
+
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/kto-mix-14k
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+
@@ -192,6 +264,84 @@ Launch with:
trl reward --config reward_config.yaml
```
+
+
+
+```bash
+trl grpo \
+ --model_name_or_path Qwen/Qwen2.5-0.5B \
+ --dataset_name trl-lib/ultrafeedback-prompt \
+ --num_processes 4
+```
+
+
+
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/ultrafeedback-prompt
+num_processes: 4
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+
+
+
+```bash
+trl rloo \
+ --model_name_or_path Qwen/Qwen2.5-0.5B \
+ --dataset_name AI-MO/NuminaMath-TIR \
+ --num_processes 4
+```
+
+
+
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: AI-MO/NuminaMath-TIR
+num_processes: 4
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+
+
+
+```bash
+trl kto \
+ --model_name_or_path Qwen/Qwen2.5-0.5B \
+ --dataset_name trl-lib/kto-mix-14k \
+ --num_processes 4
+```
+
+
+
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/kto-mix-14k
+num_processes: 4
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+
@@ -298,6 +448,84 @@ Launch with:
trl reward --config reward_config.yaml
```
+
+
+
+```bash
+trl grpo \
+ --model_name_or_path Qwen/Qwen2.5-0.5B \
+ --dataset_name trl-lib/ultrafeedback-prompt \
+ --accelerate_config zero2 # or path/to/my/accelerate/config.yaml
+```
+
+
+
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/ultrafeedback-prompt
+accelerate_config: zero2 # or path/to/my/accelerate/config.yaml
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+
+
+
+```bash
+trl rloo \
+ --model_name_or_path Qwen/Qwen2.5-0.5B \
+ --dataset_name AI-MO/NuminaMath-TIR \
+ --accelerate_config zero2 # or path/to/my/accelerate/config.yaml
+```
+
+
+
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: AI-MO/NuminaMath-TIR
+accelerate_config: zero2 # or path/to/my/accelerate/config.yaml
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+
+
+
+```bash
+trl kto \
+ --model_name_or_path Qwen/Qwen2.5-0.5B \
+ --dataset_name trl-lib/kto-mix-14k \
+ --accelerate_config zero2 # or path/to/my/accelerate/config.yaml
+```
+
+
+
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+dataset_name: trl-lib/kto-mix-14k
+accelerate_config: zero2 # or path/to/my/accelerate/config.yaml
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+
@@ -356,6 +584,57 @@ Launch with:
trl reward --config reward_config.yaml
```
+
+
+
+```yaml
+# grpo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+datasets:
+ - path: trl-lib/ultrafeedback-prompt
+ - path: BAAI/Infinity-Preference
+```
+
+Launch with:
+
+```bash
+trl grpo --config grpo_config.yaml
+```
+
+
+
+
+```yaml
+# rloo_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+datasets:
+ - path: AI-MO/NuminaMath-TIR
+ - path: deepmind/math_dataset
+```
+
+Launch with:
+
+```bash
+trl rloo --config rloo_config.yaml
+```
+
+
+
+
+```yaml
+# kto_config.yaml
+model_name_or_path: Qwen/Qwen2.5-0.5B
+datasets:
+ - path: trl-lib/kto-mix-14k
+ - path: argilla/ultrafeedback-binarized-preferences-cleaned
+```
+
+Launch with:
+
+```bash
+trl kto --config kto_config.yaml
+```
+