Skip to content

Commit

Permalink
truncation left for reward tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
qgallouedec committed Oct 28, 2024
1 parent 68bd4b2 commit 1834770
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions examples/scripts/dpo_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@
reward_tokenizer = AutoTokenizer.from_pretrained(
training_args.reward_model_path,
trust_remote_code=model_config.trust_remote_code,
truncation=True,
truncation_side="left", # since we judge the completion, truncating left is more appropriate
)
else:
reward_model = None
Expand Down Expand Up @@ -131,11 +133,14 @@
reward_processing_class=reward_tokenizer,
peft_config=get_peft_config(model_config),
)
generation_config = GenerationConfig(
max_new_tokens=training_args.max_new_tokens, do_sample=True, temperature=training_args.temperature
)
completions_callback = LogCompletionsCallback(trainer, generation_config, num_prompts=8)
trainer.add_callback(completions_callback)

if training_args.eval_strategy != "no":
generation_config = GenerationConfig(
max_new_tokens=training_args.max_new_tokens, do_sample=True, temperature=training_args.temperature
)
completions_callback = LogCompletionsCallback(trainer, generation_config, num_prompts=8)
trainer.add_callback(completions_callback)

trainer.train()

# Save and push to hub
Expand Down

0 comments on commit 1834770

Please sign in to comment.