diff --git a/docs/sphinx_doc/source/tutorial/example_async_mode.md b/docs/sphinx_doc/source/tutorial/example_async_mode.md index 17db92881f..f10089c001 100644 --- a/docs/sphinx_doc/source/tutorial/example_async_mode.md +++ b/docs/sphinx_doc/source/tutorial/example_async_mode.md @@ -21,6 +21,8 @@ algorithm: repeat_times: 8 model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 1024 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 4 @@ -69,6 +71,8 @@ algorithm: lr: 1e-6 model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 1024 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 4 @@ -128,6 +132,8 @@ algorithm: repeat_times: 8 model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 1024 + max_model_len: 2048 cluster: # important node_num: 1 gpu_per_node: 8 diff --git a/docs/sphinx_doc/source/tutorial/example_dpo.md b/docs/sphinx_doc/source/tutorial/example_dpo.md index 3b50ed58c0..3376f43161 100644 --- a/docs/sphinx_doc/source/tutorial/example_dpo.md +++ b/docs/sphinx_doc/source/tutorial/example_dpo.md @@ -66,6 +66,8 @@ algorithm: checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 1024 + max_model_len: 1536 cluster: node_num: 1 gpu_per_node: 8 @@ -114,6 +116,8 @@ algorithm: checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 10240 + max_model_len: 10752 cluster: node_num: 1 gpu_per_node: 2 diff --git a/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md b/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md index 74a252ac93..e0d2024f4f 100644 --- a/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md +++ b/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md @@ -59,6 +59,8 @@ algorithm: lr: 1e-5 model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 1024 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 2 diff --git a/docs/sphinx_doc/source_zh/tutorial/example_async_mode.md b/docs/sphinx_doc/source_zh/tutorial/example_async_mode.md index 804690ce67..804fa01b49 100644 --- a/docs/sphinx_doc/source_zh/tutorial/example_async_mode.md +++ b/docs/sphinx_doc/source_zh/tutorial/example_async_mode.md @@ -21,6 +21,8 @@ algorithm: repeat_times: 8 model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 1024 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 4 @@ -69,6 +71,8 @@ algorithm: lr: 1e-6 model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 1024 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 4 @@ -128,6 +132,8 @@ algorithm: repeat_times: 8 model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 1024 + max_model_len: 2048 cluster: # important node_num: 1 gpu_per_node: 8 diff --git a/docs/sphinx_doc/source_zh/tutorial/example_dpo.md b/docs/sphinx_doc/source_zh/tutorial/example_dpo.md index 6b61d004cf..4396bad063 100644 --- a/docs/sphinx_doc/source_zh/tutorial/example_dpo.md +++ b/docs/sphinx_doc/source_zh/tutorial/example_dpo.md @@ -68,6 +68,8 @@ algorithm: checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 1024 + max_model_len: 1536 cluster: node_num: 1 gpu_per_node: 8 @@ -116,6 +118,8 @@ algorithm: checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 10240 + max_model_len: 10752 cluster: node_num: 1 gpu_per_node: 2 diff --git a/docs/sphinx_doc/source_zh/tutorial/example_reasoning_basic.md b/docs/sphinx_doc/source_zh/tutorial/example_reasoning_basic.md index 0b9cdd7cae..17129a6555 100644 --- a/docs/sphinx_doc/source_zh/tutorial/example_reasoning_basic.md +++ b/docs/sphinx_doc/source_zh/tutorial/example_reasoning_basic.md @@ -59,6 +59,8 @@ algorithm: lr: 1e-5 model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_response_tokens: 1024 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 2 diff --git a/examples/asymre_gsm8k/gsm8k.yaml b/examples/asymre_gsm8k/gsm8k.yaml index 781339556c..d108ca24cd 100644 --- a/examples/asymre_gsm8k/gsm8k.yaml +++ b/examples/asymre_gsm8k/gsm8k.yaml @@ -8,7 +8,7 @@ checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 algorithm: algorithm_type: asymre repeat_times: 8 diff --git a/examples/asymre_math/math.yaml b/examples/asymre_math/math.yaml index c1636f5318..8ad903030a 100644 --- a/examples/asymre_math/math.yaml +++ b/examples/asymre_math/math.yaml @@ -6,8 +6,8 @@ name: asymre_math checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-3B-Instruct} # the path to your model - max_response_tokens: 1024 - max_model_len: 1280 + max_prompt_tokens: 1024 + max_response_tokens: 2048 algorithm: algorithm_type: asymre repeat_times: 8 @@ -62,8 +62,6 @@ explorer: enable_prefix_caching: false enforce_eager: true dtype: bfloat16 - max_prompt_tokens: 1024 - max_response_tokens: 2048 seed: 42 synchronizer: sync_method: 'nccl' diff --git a/examples/async_gsm8k/explorer.yaml b/examples/async_gsm8k/explorer.yaml index 1c4a457b95..9314b5340c 100644 --- a/examples/async_gsm8k/explorer.yaml +++ b/examples/async_gsm8k/explorer.yaml @@ -8,7 +8,7 @@ algorithm: model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 4 diff --git a/examples/async_gsm8k/trainer.yaml b/examples/async_gsm8k/trainer.yaml index 1c907bee33..a653a1fe3e 100644 --- a/examples/async_gsm8k/trainer.yaml +++ b/examples/async_gsm8k/trainer.yaml @@ -10,7 +10,7 @@ algorithm: model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 4 diff --git a/examples/cispo_gsm8k/gsm8k.yaml b/examples/cispo_gsm8k/gsm8k.yaml index 7875d2ac44..aff4c50f97 100644 --- a/examples/cispo_gsm8k/gsm8k.yaml +++ b/examples/cispo_gsm8k/gsm8k.yaml @@ -9,7 +9,7 @@ algorithm: model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 8 diff --git a/examples/grpo_gsm8k/gsm8k.yaml b/examples/grpo_gsm8k/gsm8k.yaml index 0e82b1b42d..b0640f089c 100644 --- a/examples/grpo_gsm8k/gsm8k.yaml +++ b/examples/grpo_gsm8k/gsm8k.yaml @@ -9,7 +9,7 @@ algorithm: model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 8 diff --git a/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml b/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml index c9c88b71e1..e633f7bbc1 100644 --- a/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml +++ b/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml @@ -29,7 +29,7 @@ algorithm: model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 8 diff --git a/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml b/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml index cd3fc7964d..e0297e099b 100644 --- a/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml +++ b/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml @@ -11,7 +11,7 @@ algorithm: model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 8 diff --git a/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml b/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml index 6f50110f34..e519e5c806 100644 --- a/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml +++ b/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml @@ -28,7 +28,7 @@ data_processor: model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 8 diff --git a/examples/grpo_lora_gsm8k/gsm8k.yaml b/examples/grpo_lora_gsm8k/gsm8k.yaml index 9e0198ac71..6818d82a74 100644 --- a/examples/grpo_lora_gsm8k/gsm8k.yaml +++ b/examples/grpo_lora_gsm8k/gsm8k.yaml @@ -9,7 +9,7 @@ algorithm: model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 lora_configs: - name: lora lora_rank: 32 diff --git a/examples/ppo_countdown/countdown.yaml b/examples/ppo_countdown/countdown.yaml index c7bdfc9774..a890c7e532 100644 --- a/examples/ppo_countdown/countdown.yaml +++ b/examples/ppo_countdown/countdown.yaml @@ -9,7 +9,7 @@ algorithm: model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 8 diff --git a/examples/ppo_countdown_megatron/countdown.yaml b/examples/ppo_countdown_megatron/countdown.yaml index bb592d464e..1d8ffa2b13 100644 --- a/examples/ppo_countdown_megatron/countdown.yaml +++ b/examples/ppo_countdown_megatron/countdown.yaml @@ -9,7 +9,7 @@ algorithm: model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 8 diff --git a/examples/rec_gsm8k/gsm8k.yaml b/examples/rec_gsm8k/gsm8k.yaml index 2af9c6e7ed..c0dd6a3f07 100644 --- a/examples/rec_gsm8k/gsm8k.yaml +++ b/examples/rec_gsm8k/gsm8k.yaml @@ -5,7 +5,7 @@ mode: both model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-3B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 algorithm: algorithm_type: rec repeat_times: 8 diff --git a/examples/sppo_gsm8k/gsm8k.yaml b/examples/sppo_gsm8k/gsm8k.yaml index 11c6529f6b..295c82116e 100644 --- a/examples/sppo_gsm8k/gsm8k.yaml +++ b/examples/sppo_gsm8k/gsm8k.yaml @@ -8,7 +8,7 @@ checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 algorithm: algorithm_type: sppo repeat_times: 8 diff --git a/examples/topr_gsm8k/gsm8k.yaml b/examples/topr_gsm8k/gsm8k.yaml index e8eeb425d1..252b5e7d7a 100644 --- a/examples/topr_gsm8k/gsm8k.yaml +++ b/examples/topr_gsm8k/gsm8k.yaml @@ -9,7 +9,7 @@ algorithm: model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 - max_model_len: 1280 + max_model_len: 2048 cluster: node_num: 1 gpu_per_node: 8