diff --git a/docs/examples/config.rst b/docs/examples/config.rst index 396538b7..1a4e4c72 100644 --- a/docs/examples/config.rst +++ b/docs/examples/config.rst @@ -19,7 +19,6 @@ Data max_prompt_length: 512 max_response_length: 512 train_batch_size: 1024 - val_batch_size: 1312 return_raw_input_ids: False # This should be set to true when the tokenizer between policy and rm differs return_raw_chat: False @@ -39,8 +38,6 @@ Data algorithms (e.g. PPO) generates up to this length - ``data.train_batch_size``: Batch size sampled for one training iteration of different RL algorithms. -- ``data.val_batch_size``: Batch size sampled for one validation - iteration. - ``data.return_raw_input_ids``: Whether to return the original input_ids without adding chat template. This is mainly used to accommodate situations where the reward model's chat template differs diff --git a/docs/examples/gsm8k_example.rst b/docs/examples/gsm8k_example.rst index d179c4a6..b636375c 100644 --- a/docs/examples/gsm8k_example.rst +++ b/docs/examples/gsm8k_example.rst @@ -130,7 +130,6 @@ The script of run_deepseek7b_llm.sh data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ diff --git a/docs/start/quickstart.rst b/docs/start/quickstart.rst index c3be7d62..e81c63a7 100644 --- a/docs/start/quickstart.rst +++ b/docs/start/quickstart.rst @@ -85,7 +85,6 @@ Set the ``data.train_files`` ,\ ``data.val_files``, ``actor_rollout_ref.model.pa data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=256 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=256 \ actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B-Instruct \ diff --git a/examples/grpo_trainer/run_deepseek7b_llm.sh b/examples/grpo_trainer/run_deepseek7b_llm.sh index b50045c1..21562caa 100644 --- a/examples/grpo_trainer/run_deepseek7b_llm.sh +++ b/examples/grpo_trainer/run_deepseek7b_llm.sh @@ -5,7 +5,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=1024 \ actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ diff --git a/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh b/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh index c1562cdf..e2f83c52 100644 --- a/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh +++ b/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh @@ -5,7 +5,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ diff --git a/examples/grpo_trainer/run_qwen2-7b.sh b/examples/grpo_trainer/run_qwen2-7b.sh index 4e299047..6028b223 100644 --- a/examples/grpo_trainer/run_qwen2-7b.sh +++ b/examples/grpo_trainer/run_qwen2-7b.sh @@ -7,7 +7,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=1024 \ actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ diff --git a/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh b/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh index 74eb1d63..ae18603d 100644 --- a/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh +++ b/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh @@ -7,7 +7,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=1024 \ actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ diff --git a/examples/ppo_trainer/run_deepseek7b_llm.sh b/examples/ppo_trainer/run_deepseek7b_llm.sh index 874f31bc..0d3a6fb3 100644 --- a/examples/ppo_trainer/run_deepseek7b_llm.sh +++ b/examples/ppo_trainer/run_deepseek7b_llm.sh @@ -4,7 +4,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ diff --git a/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh b/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh index 8d61d906..6863b66c 100644 --- a/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh +++ b/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh @@ -4,7 +4,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ diff --git a/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh b/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh index a7b16a7c..82df706b 100644 --- a/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh +++ b/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh @@ -7,7 +7,6 @@ python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megat data.train_files="$train_files" \ data.val_files="$test_files" \ data.train_batch_size=512 \ - data.val_batch_size=128 \ data.max_prompt_length=128 \ data.max_response_length=128 \ actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ diff --git a/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh b/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh index 17b170a1..82459115 100644 --- a/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh +++ b/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh @@ -12,7 +12,6 @@ python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megat data.train_files="$train_files" \ data.val_files="$test_files" \ data.train_batch_size=1024 \ - data.val_batch_size=6312 \ data.max_prompt_length=1024 \ data.max_response_length=512 \ actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-6.7b-instruct \ diff --git a/examples/ppo_trainer/run_deepseek_megatron.sh b/examples/ppo_trainer/run_deepseek_megatron.sh index c41aae35..bb915d23 100644 --- a/examples/ppo_trainer/run_deepseek_megatron.sh +++ b/examples/ppo_trainer/run_deepseek_megatron.sh @@ -13,7 +13,6 @@ python3 -m verl.trainer.main_ppo --config-path=config \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=$HOME/models/deepseek-llm-7b-chat \ diff --git a/examples/ppo_trainer/run_gemma.sh b/examples/ppo_trainer/run_gemma.sh index a0ab2c08..1863e22b 100644 --- a/examples/ppo_trainer/run_gemma.sh +++ b/examples/ppo_trainer/run_gemma.sh @@ -4,7 +4,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=512 \ - data.val_batch_size=1312 \ data.max_prompt_length=1024 \ data.max_response_length=512 \ actor_rollout_ref.model.path=google/gemma-2-2b-it \ diff --git a/examples/ppo_trainer/run_qwen2-7b.sh b/examples/ppo_trainer/run_qwen2-7b.sh index fae4ab54..3a7ab9f1 100644 --- a/examples/ppo_trainer/run_qwen2-7b.sh +++ b/examples/ppo_trainer/run_qwen2-7b.sh @@ -12,7 +12,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files="$train_files" \ data.val_files="$test_files" \ data.train_batch_size=1024 \ - data.val_batch_size=6312 \ data.max_prompt_length=1024 \ data.max_response_length=512 \ actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ diff --git a/examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh b/examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh index 0a66e83f..9d162c6b 100644 --- a/examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh +++ b/examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh @@ -14,7 +14,6 @@ python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megat data.train_files="$train_files" \ data.val_files="$test_files" \ data.train_batch_size=1024 \ - data.val_batch_size=6312 \ data.max_prompt_length=1024 \ data.max_response_length=512 \ actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ diff --git a/examples/ppo_trainer/run_qwen2-7b_rm.sh b/examples/ppo_trainer/run_qwen2-7b_rm.sh index 360b8e7c..988741f6 100644 --- a/examples/ppo_trainer/run_qwen2-7b_rm.sh +++ b/examples/ppo_trainer/run_qwen2-7b_rm.sh @@ -26,7 +26,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files="$train_files" \ data.val_files="$test_files" \ data.train_batch_size=1024 \ - data.val_batch_size=6312 \ data.max_prompt_length=1024 \ data.max_response_length=512 \ data.return_raw_chat=True \ diff --git a/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh b/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh index 4e3d89de..fca90a2e 100644 --- a/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh +++ b/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh @@ -12,7 +12,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files="$train_files" \ data.val_files="$test_files" \ data.train_batch_size=4096 \ - data.val_batch_size=1312 \ data.max_prompt_length=4096 \ data.max_response_length=4096 \ data.return_raw_chat=True \ diff --git a/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh b/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh index 54570876..6d76c930 100644 --- a/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh +++ b/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh @@ -12,7 +12,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files="$train_files" \ data.val_files="$test_files" \ data.train_batch_size=4096 \ - data.val_batch_size=1312 \ data.max_prompt_length=4096 \ data.max_response_length=4096 \ actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ diff --git a/examples/ppo_trainer/run_qwen2.5-32b.sh b/examples/ppo_trainer/run_qwen2.5-32b.sh index 6be2e6fa..bd7c42da 100644 --- a/examples/ppo_trainer/run_qwen2.5-32b.sh +++ b/examples/ppo_trainer/run_qwen2.5-32b.sh @@ -12,7 +12,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files="$train_files" \ data.val_files="$test_files" \ data.train_batch_size=1024 \ - data.val_batch_size=6304 \ data.max_prompt_length=1024 \ data.max_response_length=1024 \ actor_rollout_ref.model.path=Qwen/Qwen2.5-32B-Instruct \ diff --git a/examples/ppo_trainer/verl_getting_started.ipynb b/examples/ppo_trainer/verl_getting_started.ipynb index 15bf5cb9..d9401132 100644 --- a/examples/ppo_trainer/verl_getting_started.ipynb +++ b/examples/ppo_trainer/verl_getting_started.ipynb @@ -314,16 +314,16 @@ "source": [ "import torch\n", "try:\n", - " assert torch.cuda.is_available() is True\n", - " torch.ones(1, dtype=torch.bfloat16).cuda()\n", + " assert torch.cuda.is_available() is True\n", + " torch.ones(1, dtype=torch.bfloat16).cuda()\n", "except AssertionError:\n", - " print(\"Please switch to an env with GPUs supporting bfloat16 (L4 RTX 5000, A5000, A100, H100, A10, etc)\")\n", + " print(\"Please switch to an env with GPUs supporting bfloat16 (L4 RTX 5000, A5000, A100, H100, A10, etc)\")\n", "\n", "try:\n", - " import verl\n", + " import verl\n", "except Exception as e:\n", - " print(\"Please install verl via pip and restart the kernel\")\n", - " raise e\n", + " print(\"Please install verl via pip and restart the kernel\")\n", + " raise e\n", "\n", "import flash_attn" ] @@ -561,6 +561,7 @@ "source": [ "import inspect\n", "from verl.utils.reward_score.gsm8k import compute_score as gsm8k_reward\n", + "\n", "print(inspect.getsource(gsm8k_reward))" ] }, @@ -1103,7 +1104,6 @@ " data.train_files=$HOME/data/gsm8k/train.parquet \\\n", " data.val_files=$HOME/data/gsm8k/test.parquet \\\n", " data.train_batch_size=256 \\\n", - " data.val_batch_size=1312 \\\n", " data.max_prompt_length=512 \\\n", " data.max_response_length=256 \\\n", " actor_rollout_ref.model.path=$HOME/models/Qwen2.5-0.5B-Instruct \\\n", diff --git a/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh b/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh index d7945cf8..4f71f3c7 100644 --- a/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh +++ b/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh @@ -10,7 +10,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/train.parquet \ data.train_batch_size=512 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=1024 \ actor_rollout_ref.model.path=Qwen/Qwen2.5-3B-Instruct \ diff --git a/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh b/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh index 204460b4..34c2d7a3 100644 --- a/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh +++ b/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh @@ -10,7 +10,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/train.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=1024 \ actor_rollout_ref.model.path=Qwen/Qwen2.5-7B-Instruct \ diff --git a/examples/rloo_trainer/run_qwen2-7b.sh b/examples/rloo_trainer/run_qwen2-7b.sh index 56192ee3..0c0a4a6d 100644 --- a/examples/rloo_trainer/run_qwen2-7b.sh +++ b/examples/rloo_trainer/run_qwen2-7b.sh @@ -7,7 +7,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=1024 \ actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \ diff --git a/examples/slurm/ray_on_slurm.slurm b/examples/slurm/ray_on_slurm.slurm index cb5c3683..bd588965 100644 --- a/examples/slurm/ray_on_slurm.slurm +++ b/examples/slurm/ray_on_slurm.slurm @@ -75,7 +75,6 @@ PYTHONUNBUFFERED=1 srun --overlap --nodes=1 --ntasks=1 -w "$head_node" \ data.train_files=$train_files \ data.val_files=$val_files \ data.train_batch_size=256 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=256 \ actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B-Instruct \ diff --git a/examples/split_placement/config/ppo_trainer_split.yaml b/examples/split_placement/config/ppo_trainer_split.yaml index 98d1a939..d6fd08d3 100644 --- a/examples/split_placement/config/ppo_trainer_split.yaml +++ b/examples/split_placement/config/ppo_trainer_split.yaml @@ -6,7 +6,7 @@ data: max_prompt_length: 512 max_response_length: 512 train_batch_size: 1024 - val_batch_size: 1312 + val_batch_size: null # DEPRECATED: Validation datasets are sent to inference engines as a whole batch, which will schedule the memory themselves return_raw_input_ids: False # This should be set to true when the tokenizer between policy and rm differs return_raw_chat: False shuffle: True diff --git a/examples/split_placement/run_deepseek7b_llm.sh b/examples/split_placement/run_deepseek7b_llm.sh index 37295655..e0a47260 100644 --- a/examples/split_placement/run_deepseek7b_llm.sh +++ b/examples/split_placement/run_deepseek7b_llm.sh @@ -4,7 +4,6 @@ python3 main_ppo_split.py \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=deepseek-ai/deepseek-llm-7b-chat \ diff --git a/tests/e2e/run_deepseek_megatron.sh b/tests/e2e/run_deepseek_megatron.sh index 03394f3d..f86c96b3 100644 --- a/tests/e2e/run_deepseek_megatron.sh +++ b/tests/e2e/run_deepseek_megatron.sh @@ -9,7 +9,6 @@ python3 -m verl.trainer.main_ppo --config-path=config \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=deepseek-ai/deepseek-coder-1.3b-instruct \ diff --git a/tests/e2e/run_qwen_gsm8k_function_rm.sh b/tests/e2e/run_qwen_gsm8k_function_rm.sh index 4f0260fe..37f041c6 100644 --- a/tests/e2e/run_qwen_gsm8k_function_rm.sh +++ b/tests/e2e/run_qwen_gsm8k_function_rm.sh @@ -6,7 +6,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ diff --git a/tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh b/tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh index 6dc4fec6..f9eb4aca 100644 --- a/tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh +++ b/tests/e2e/run_qwen_gsm8k_function_rm_grpo.sh @@ -6,7 +6,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ diff --git a/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh b/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh index f3443ce1..cbe4fcec 100644 --- a/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh +++ b/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh @@ -6,7 +6,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ diff --git a/tests/e2e/run_qwen_gsm8k_function_rm_remax.sh b/tests/e2e/run_qwen_gsm8k_function_rm_remax.sh index 07c45809..1eb5f775 100644 --- a/tests/e2e/run_qwen_gsm8k_function_rm_remax.sh +++ b/tests/e2e/run_qwen_gsm8k_function_rm_remax.sh @@ -6,7 +6,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ diff --git a/tests/e2e/run_qwen_gsm8k_model_rm.sh b/tests/e2e/run_qwen_gsm8k_model_rm.sh index a13c00aa..3e908d7a 100644 --- a/tests/e2e/run_qwen_gsm8k_model_rm.sh +++ b/tests/e2e/run_qwen_gsm8k_model_rm.sh @@ -6,7 +6,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ data.return_raw_chat=True \ diff --git a/tests/e2e/run_qwen_gsm8k_model_rm_liger_kernel.sh b/tests/e2e/run_qwen_gsm8k_model_rm_liger_kernel.sh index 841597b5..ca6a5d10 100644 --- a/tests/e2e/run_qwen_gsm8k_model_rm_liger_kernel.sh +++ b/tests/e2e/run_qwen_gsm8k_model_rm_liger_kernel.sh @@ -6,7 +6,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ data.return_raw_chat=True \ diff --git a/tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh b/tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh index acd4a51c..97f270c5 100644 --- a/tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh +++ b/tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh @@ -6,7 +6,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ data.return_raw_chat=True \ diff --git a/tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh b/tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh index bd1f632f..efe279fb 100644 --- a/tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh +++ b/tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh @@ -6,7 +6,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ data.return_raw_chat=True \ diff --git a/tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh b/tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh index 08161eea..626f6349 100644 --- a/tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh +++ b/tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh @@ -6,7 +6,6 @@ python3 -m verl.trainer.main_ppo \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ data.return_raw_chat=True \ diff --git a/tests/e2e/run_qwen_megatron.sh b/tests/e2e/run_qwen_megatron.sh index daf78a03..46e5c6d2 100644 --- a/tests/e2e/run_qwen_megatron.sh +++ b/tests/e2e/run_qwen_megatron.sh @@ -9,7 +9,6 @@ python3 -m verl.trainer.main_ppo --config-path=config \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ data.train_batch_size=1024 \ - data.val_batch_size=1312 \ data.max_prompt_length=512 \ data.max_response_length=512 \ actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B \ diff --git a/tests/e2e/run_ray_trainer.sh b/tests/e2e/run_ray_trainer.sh index ff189f33..5688d849 100644 --- a/tests/e2e/run_ray_trainer.sh +++ b/tests/e2e/run_ray_trainer.sh @@ -11,7 +11,6 @@ python3 tests/e2e/arithmetic_sequence/rl/main_trainer.py \ data.train_files=tests/e2e/arithmetic_sequence/data/train.parquet \ data.val_files=tests/e2e/arithmetic_sequence/data/test.parquet \ data.train_batch_size=800 \ - data.val_batch_size=200 \ data.max_prompt_length=16 \ data.max_response_length=32 \ data.return_raw_input_ids=True \ diff --git a/verl/trainer/config/ppo_megatron_trainer.yaml b/verl/trainer/config/ppo_megatron_trainer.yaml index 96392a09..86ebc480 100644 --- a/verl/trainer/config/ppo_megatron_trainer.yaml +++ b/verl/trainer/config/ppo_megatron_trainer.yaml @@ -6,7 +6,7 @@ data: max_prompt_length: 512 max_response_length: 512 train_batch_size: 1024 - val_batch_size: 1312 + val_batch_size: null # DEPRECATED: Validation datasets are sent to inference engines as a whole batch, which will schedule the memory themselves return_raw_input_ids: False # This should be set to true when the tokenizer between policy and rm differs return_raw_chat: False shuffle: True diff --git a/verl/trainer/config/ppo_trainer.yaml b/verl/trainer/config/ppo_trainer.yaml index 1c67cc5c..bf919089 100644 --- a/verl/trainer/config/ppo_trainer.yaml +++ b/verl/trainer/config/ppo_trainer.yaml @@ -6,7 +6,7 @@ data: max_prompt_length: 512 max_response_length: 512 train_batch_size: 1024 - val_batch_size: 1312 + val_batch_size: null # DEPRECATED: Validation datasets are sent to inference engines as a whole batch, which will schedule the memory themselves return_raw_input_ids: False # This should be set to true when the tokenizer between policy and rm differs return_raw_chat: False shuffle: True diff --git a/verl/trainer/ppo/ray_trainer.py b/verl/trainer/ppo/ray_trainer.py index 00a4413f..80412d3e 100644 --- a/verl/trainer/ppo/ray_trainer.py +++ b/verl/trainer/ppo/ray_trainer.py @@ -465,6 +465,11 @@ def check_mutually_exclusive(mbs, mbs_per_gpu, name: str): assert config.critic.model.use_remove_padding, \ "When using sequence parallelism for critic, you must enable `use_remove_padding`." + if config.data.get('val_batch_size', None) is not None: + print( + f"WARNING: val_batch_size is deprecated. Validation datasets are sent to inference engines as a whole batch, which will schedule the memory themselves." + ) + print("[validate_config] All configuration checks passed successfully!") def _create_dataloader(self): @@ -498,11 +503,14 @@ def _create_dataloader(self): filter_prompts=True, return_raw_chat=self.config.data.get('return_raw_chat', False), truncation='error') - self.val_dataloader = DataLoader(dataset=self.val_dataset, - batch_size=len(self.val_dataset), - shuffle=True, - drop_last=True, - collate_fn=collate_fn) + self.val_dataloader = DataLoader( + dataset=self.val_dataset, + # Validation datasets are sent to inference engines as a whole batch, + # which will schedule the memory themselves. + batch_size=len(self.val_dataset), + shuffle=True, + drop_last=False, + collate_fn=collate_fn) assert len(self.train_dataloader) >= 1 assert len(self.val_dataloader) >= 1