Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions examples/bots/bots.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
project: "BOTS-Selector"
name: "qwen2.5-1.5B-instruct-bots"
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
data_processor:
experience_pipeline:
operators:
- name: pass_rate_calculator
algorithm:
algorithm_type: grpo
repeat_times: 16
optimizer:
lr: 1e-6
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
max_prompt_tokens: 4096
max_response_tokens: 8192
cluster:
node_num: 1
gpu_per_node: 8
buffer:
total_epochs: 1
batch_size: 32
explorer_input:
taskset:
name: math-train
storage_type: file
path: '<DATA_ROOT>/LLM360/guru-RL-92k/train/math__combined_54.4k.parquet'
split: 'train'
format:
prompt_key: 'prompt'
response_key: 'reward_model.ground_truth'
rollout_args:
temperature: 1.0
task_selector:
selector_type: difficulty_based
feature_keys: [ "qwen2.5_7b_pass_rate", "qwen3_30b_pass_rate" ]
kwargs:
m: 16
lamb: 0.1
rho: 0.1
target_reward: 0.5
tau: 0
do_sample: true
eval_tasksets:
- name: math-eval
storage_type: file
path: '<DATA_ROOT>/LLM360/guru-RL-92k/online_eval/math__math_500.parquet'
format:
prompt_key: 'prompt'
response_key: 'reward_model.ground_truth'
rollout_args:
temperature: 1.0
default_workflow_type: 'bots_math_boxed_workflow'
trainer_input:
experience_buffer:
name: exp_buffer
storage_type: queue
path: 'sqlite:///bots_trainer_buffer.db'
explorer:
eval_interval: 40
runner_per_model: 8
rollout_model:
engine_num: 4
tensor_parallel_size: 1
enable_prefix_caching: false
enforce_eager: true
dtype: bfloat16
seed: 42
synchronizer:
sync_method: 'nccl'
sync_interval: 8
sync_timeout: 1200
trainer:
trainer_type: 'verl'
save_interval: 800
grad_clip: 1.0
use_dynamic_bsz: true
max_token_len_per_gpu: 24576
ulysses_sequence_parallel_size: 1
32 changes: 32 additions & 0 deletions examples/bots/plugins/bots_math_boxed_reward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import Optional

from trinity.common.rewards.reward_fn import REWARD_FUNCTIONS, RewardFn
from trinity.utils.eval_utils import validate_think_pattern

from .bots_reward import compute_score

@REWARD_FUNCTIONS.register_module("bots_math_boxed_reward")
class BOTSMathBoxedRewardFn(RewardFn):
"""A reward function that rewards for math task for BOTS."""

def __init__(
self,
**kwargs,
) -> None:
pass

def __call__( # type: ignore
self,
response: str,
truth: Optional[str] = None,
with_think: Optional[bool] = False,
format_score_coef: Optional[float] = 0.1,
**kwargs,
) -> dict[str, float]:
accuracy_score = compute_score(response, truth)

format_score = 0.0
if with_think and not validate_think_pattern(response):
format_score = (format_score_coef or 0.1) * -1.0

return {"accuracy": accuracy_score, "format_score": format_score}
16 changes: 16 additions & 0 deletions examples/bots/plugins/bots_math_boxed_workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from trinity.common.workflows.customized_math_workflows import MathBoxedWorkflow, Task
from trinity.common.workflows.workflow import WORKFLOWS

from .bots_math_boxed_reward import BOTSMathBoxedRewardFn

@WORKFLOWS.register_module("bots_math_boxed_workflow")
class BOTSMathBoxedWorkflow(MathBoxedWorkflow):
"""A workflow for math tasks that give answers in boxed format for BOTS."""

def reset(self, task: Task):
super().reset(task)
self.reward_fn = BOTSMathBoxedRewardFn(**self.reward_fn_args)

def format_messages(self):
# the prompts are already in message format
return self.task_desc
Loading