diff --git a/docs/source/clis.md b/docs/source/clis.md index 666584decf4..efe47e84f3c 100644 --- a/docs/source/clis.md +++ b/docs/source/clis.md @@ -53,6 +53,33 @@ trl reward \ --dataset_name trl-lib/ultrafeedback_binarized ``` + + + +```bash +trl grpo \ + --model_name_or_path Qwen/Qwen2.5-0.5B \ + --dataset_name trl-lib/ultrafeedback-prompt +``` + + + + +```bash +trl rloo \ + --model_name_or_path Qwen/Qwen2.5-0.5B \ + --dataset_name AI-MO/NuminaMath-TIR +``` + + + + +```bash +trl kto \ + --model_name_or_path Qwen/Qwen2.5-0.5B \ + --dataset_name trl-lib/kto-mix-14k +``` + @@ -105,6 +132,51 @@ Launch with: trl reward --config reward_config.yaml ``` + + + +```yaml +# grpo_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +dataset_name: trl-lib/ultrafeedback-prompt +``` + +Launch with: + +```bash +trl grpo --config grpo_config.yaml +``` + + + + +```yaml +# rloo_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +dataset_name: AI-MO/NuminaMath-TIR +``` + +Launch with: + +```bash +trl rloo --config rloo_config.yaml +``` + + + + +```yaml +# kto_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +dataset_name: trl-lib/kto-mix-14k +``` + +Launch with: + +```bash +trl kto --config kto_config.yaml +``` + @@ -192,6 +264,84 @@ Launch with: trl reward --config reward_config.yaml ``` + + + +```bash +trl grpo \ + --model_name_or_path Qwen/Qwen2.5-0.5B \ + --dataset_name trl-lib/ultrafeedback-prompt \ + --num_processes 4 +``` + + + + +```yaml +# grpo_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +dataset_name: trl-lib/ultrafeedback-prompt +num_processes: 4 +``` + +Launch with: + +```bash +trl grpo --config grpo_config.yaml +``` + + + + +```bash +trl rloo \ + --model_name_or_path Qwen/Qwen2.5-0.5B \ + --dataset_name AI-MO/NuminaMath-TIR \ + --num_processes 4 +``` + + + + +```yaml +# rloo_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +dataset_name: AI-MO/NuminaMath-TIR +num_processes: 4 +``` + +Launch with: + +```bash +trl rloo --config rloo_config.yaml +``` + + + + +```bash +trl kto \ + --model_name_or_path Qwen/Qwen2.5-0.5B \ + --dataset_name trl-lib/kto-mix-14k \ + --num_processes 4 +``` + + + + +```yaml +# kto_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +dataset_name: trl-lib/kto-mix-14k +num_processes: 4 +``` + +Launch with: + +```bash +trl kto --config kto_config.yaml +``` + @@ -298,6 +448,84 @@ Launch with: trl reward --config reward_config.yaml ``` + + + +```bash +trl grpo \ + --model_name_or_path Qwen/Qwen2.5-0.5B \ + --dataset_name trl-lib/ultrafeedback-prompt \ + --accelerate_config zero2 # or path/to/my/accelerate/config.yaml +``` + + + + +```yaml +# grpo_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +dataset_name: trl-lib/ultrafeedback-prompt +accelerate_config: zero2 # or path/to/my/accelerate/config.yaml +``` + +Launch with: + +```bash +trl grpo --config grpo_config.yaml +``` + + + + +```bash +trl rloo \ + --model_name_or_path Qwen/Qwen2.5-0.5B \ + --dataset_name AI-MO/NuminaMath-TIR \ + --accelerate_config zero2 # or path/to/my/accelerate/config.yaml +``` + + + + +```yaml +# rloo_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +dataset_name: AI-MO/NuminaMath-TIR +accelerate_config: zero2 # or path/to/my/accelerate/config.yaml +``` + +Launch with: + +```bash +trl rloo --config rloo_config.yaml +``` + + + + +```bash +trl kto \ + --model_name_or_path Qwen/Qwen2.5-0.5B \ + --dataset_name trl-lib/kto-mix-14k \ + --accelerate_config zero2 # or path/to/my/accelerate/config.yaml +``` + + + + +```yaml +# kto_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +dataset_name: trl-lib/kto-mix-14k +accelerate_config: zero2 # or path/to/my/accelerate/config.yaml +``` + +Launch with: + +```bash +trl kto --config kto_config.yaml +``` + @@ -356,6 +584,57 @@ Launch with: trl reward --config reward_config.yaml ``` + + + +```yaml +# grpo_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +datasets: + - path: trl-lib/ultrafeedback-prompt + - path: BAAI/Infinity-Preference +``` + +Launch with: + +```bash +trl grpo --config grpo_config.yaml +``` + + + + +```yaml +# rloo_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +datasets: + - path: AI-MO/NuminaMath-TIR + - path: deepmind/math_dataset +``` + +Launch with: + +```bash +trl rloo --config rloo_config.yaml +``` + + + + +```yaml +# kto_config.yaml +model_name_or_path: Qwen/Qwen2.5-0.5B +datasets: + - path: trl-lib/kto-mix-14k + - path: argilla/ultrafeedback-binarized-preferences-cleaned +``` + +Launch with: + +```bash +trl kto --config kto_config.yaml +``` +