-
Notifications
You must be signed in to change notification settings - Fork 229
/
run_orpo.py
35 lines (28 loc) · 911 Bytes
/
run_orpo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import ORPOConfig # noqa: F401
from liger_kernel.transformers.trainer import LigerORPOTrainer # noqa: F401
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.2-1B-Instruct",
torch_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(
"meta-llama/Llama-3.2-1B-Instruct",
max_length=512,
padding="max_length",
)
tokenizer.pad_token = tokenizer.eos_token
train_dataset = load_dataset("trl-lib/tldr-preference", split="train")
training_args = ORPOConfig(
output_dir="Llama3.2_1B_Instruct",
beta=0.1,
max_length=128,
per_device_train_batch_size=32,
max_steps=100,
save_strategy="no",
)
trainer = LigerORPOTrainer(
model=model, args=training_args, tokenizer=tokenizer, train_dataset=train_dataset
)
trainer.train()