Skip to content

Commit

Permalink
[fix] Improve the params template for generation (#351)
Browse files Browse the repository at this point in the history
fix the issue[#331](#331)
  • Loading branch information
BearBiscuit05 authored Feb 24, 2025
1 parent 4011f40 commit e53dcdb
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 2 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/vllm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,8 @@ jobs:
pip3 install --upgrade vllm
cd tests/rollout
torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_vllm_spmd.py
- name: Run QWen 0.5B generation test
run: |
cd tests/generation
bash ./run_gen_qwen05.sh 4 $HOME/data/gen/qwen_05_gen_test.parquet
rm -rf $HOME/data/gen/qwen_05_gen_test.parquet
30 changes: 30 additions & 0 deletions tests/generation/run_gen_qwen05.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Tested with 1 & 4 GPUs
set -x

if [ "$#" -lt 2 ]; then
echo "Usage: run_gen_qwen05.sh <nproc_per_node> <save_path> [other_configs...]"
exit 1
fi

nproc_per_node=$1
save_path=$2

# Shift the arguments so $@ refers to the rest
shift 2

python3 -m verl.trainer.main_generation \
trainer.nnodes=1 \
trainer.n_gpus_per_node=$nproc_per_node \
data.path=$HOME/data/gsm8k/test.parquet \
data.prompt_key=prompt \
data.n_samples=1 \
data.output_path=$save_path \
model.path=Qwen/Qwen2.5-0.5B-Instruct \
+model.trust_remote_code=True \
rollout.temperature=1.0 \
rollout.top_k=50 \
rollout.top_p=0.7 \
rollout.prompt_length=2048 \
rollout.response_length=1024 \
rollout.tensor_model_parallel_size=2 \
rollout.gpu_memory_utilization=0.8
33 changes: 32 additions & 1 deletion verl/trainer/config/generation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,35 @@ rollout:
log_prob_micro_batch_size: null # will be deprecated, use log_prob_micro_batch_size_per_gpu
log_prob_micro_batch_size_per_gpu: 8
# for hf rollout
do_sample: True
do_sample: True
disable_log_stats: True
enable_chunked_prefill: True
n: 1
actor:
strategy: fsdp # This is for backward-compatibility
ppo_mini_batch_size: 256
ppo_micro_batch_size: null # will be deprecated, use ppo_micro_batch_size_per_gpu
ppo_micro_batch_size_per_gpu: null
use_dynamic_bsz: False
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
grad_clip: 1.0
clip_ratio: 0.2
entropy_coeff: 0.001
use_kl_loss: False # True for GRPO
kl_loss_coef: 0.001 # for grpo
kl_loss_type: low_var_kl # for grpo
ppo_epochs: 1
shuffle: False
ulysses_sequence_parallel_size: 1 # sp size
optim:
lr: 1e-6
lr_warmup_steps_ratio: 0. # the total steps will be injected during runtime
min_lr_ratio: null # only useful for warmup with cosine
warmup_style: constant # select from constant/cosine
total_training_steps: -1 # must be override by program
fsdp_config:
wrap_policy:
min_num_params: 0
param_offload: False
optimizer_offload: False
fsdp_size: -1
2 changes: 1 addition & 1 deletion verl/trainer/main_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def main(config):
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token

ray_cls_with_init = RayClassWithInitArgs(cls=ray.remote(ActorRolloutRefWorker), config=config, role='rollout')
ray_cls_with_init = RayClassWithInitArgs(cls=ray.remote(ActorRolloutRefWorker), config=config, role='actor_rollout')
resource_pool = RayResourcePool(process_on_nodes=[config.trainer.n_gpus_per_node] * config.trainer.nnodes)
wg = RayWorkerGroup(resource_pool=resource_pool, ray_cls_with_init=ray_cls_with_init)
wg.init_model()
Expand Down

0 comments on commit e53dcdb

Please sign in to comment.