diff --git a/examples/bots/README.md b/examples/bots/README.md index 63fd2345ca..d3e5ec19dd 100644 --- a/examples/bots/README.md +++ b/examples/bots/README.md @@ -33,6 +33,25 @@ Also refer to the [Data Preparation Guide](https://github.com/LLM360/Reasoning36 Remember to modify the model/data path in `bots.yaml` and `random.yaml` accordingly. +##### (Optional) Customize Reference Evaluation Results + +Modify `ref_eval_collect.yaml` to set the reference model you want to evaluate, e.g., Qwen2.5-1.5B-Instruct. + +Launch evaluation by executing: +```bash +BOTS_REF_EVAL_LOG_FILE="path/to/save/eval/logs" trinity run --config examples/bots/ref_eval_collect.yaml --plugin-dir examples/bots/workflow +``` + +The evaluation logs will be saved at the specified location. Then integrate the evaluation results as a new column into the original dataset: + +```bash +python examples/bots/ref_eval_collect.py \ +--data-path \ +--ref-eval-path \ +--ref-eval-key +``` +Remember to update `task_selector.feature_keys` in `bots.yaml`. + ##### Step 3: Training Launch training by executing: ```bash diff --git a/examples/bots/README_zh.md b/examples/bots/README_zh.md index 9f20ba0e4b..292336e334 100644 --- a/examples/bots/README_zh.md +++ b/examples/bots/README_zh.md @@ -30,6 +30,26 @@ BOTS 以任务选择、模型训练和后验概率更新的连续循环运行。 请参考LLM360提供的[数据准备指南](https://github.com/LLM360/Reasoning360?tab=readme-ov-file#data-preparation)和[技术报告](https://www.arxiv.org/pdf/2506.14965)。 请修改`bots.yaml`和`random.yaml`中相应的模型/数据路径。 + +##### (可选)客制参考评估结果 + +修改 `ref_eval_collect.yaml` 以设置你想要评估的参考模型,例如Qwen2.5-1.5B-Instruct。 + +执行以下命令启动评估: +```bash +BOTS_REF_EVAL_LOG_FILE="path/to/save/eval/logs" trinity run --config examples/bots/ref_eval_collect.yaml --plugin-dir examples/bots/workflow +``` + +评估日志会保存在指定的路径下。接下来将评估结果作为新列聚合到原数据集: + +```bash +python examples/bots/ref_eval_collect.py \ +--data-path \ +--ref-eval-path \ +--ref-eval-key +``` +记得修改`bots.yaml`中的`task_selector.feature_keys`字段。 + ##### 第三步:训练 执行以下命令启动训练: ```bash diff --git a/examples/bots/bots.yaml b/examples/bots/bots.yaml index e3a948fee3..a794b03e85 100644 --- a/examples/bots/bots.yaml +++ b/examples/bots/bots.yaml @@ -24,7 +24,7 @@ buffer: taskset: name: math-train storage_type: file - path: '/LLM360/guru-RL-92k/train/math__combined_54.4k.parquet' + path: 'your/data/path/containing/math__combined_54.4k.parquet' # you need to set it manually split: 'train' format: prompt_key: 'prompt' @@ -44,7 +44,7 @@ buffer: eval_tasksets: - name: math-eval storage_type: file - path: '/LLM360/guru-RL-92k/online_eval/math__math_500.parquet' + path: 'your/data/path/containing/math__math_500.parquet' # you need to set it manually format: prompt_key: 'prompt' response_key: 'reward_model.ground_truth' diff --git a/examples/bots/random.yaml b/examples/bots/random.yaml index 4fa2e3978a..cb6958004c 100644 --- a/examples/bots/random.yaml +++ b/examples/bots/random.yaml @@ -20,7 +20,7 @@ buffer: taskset: name: math-train storage_type: file - path: '/LLM360/guru-RL-92k/train/math__combined_54.4k.parquet' + path: 'your/data/path/containing/math__combined_54.4k.parquet' # you need to set it manually split: 'train' format: prompt_key: 'prompt' @@ -32,7 +32,7 @@ buffer: eval_tasksets: - name: math-eval storage_type: file - path: '/LLM360/guru-RL-92k/online_eval/math__math_500.parquet' + path: 'your/data/path/containing/math__math_500.parquet' # you need to set it manually format: prompt_key: 'prompt' response_key: 'reward_model.ground_truth' diff --git a/examples/bots/ref_eval_collect.py b/examples/bots/ref_eval_collect.py new file mode 100644 index 0000000000..f2d8b550e7 --- /dev/null +++ b/examples/bots/ref_eval_collect.py @@ -0,0 +1,33 @@ +import argparse +import json + +import numpy as np +import pandas as pd + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--data-path", type=str, required=True) + parser.add_argument("--ref-eval-path", type=str, required=True) + parser.add_argument("--ref-eval-key", type=str, required=True) + args = parser.parse_args() + + print(f"Loading original dataset from {args.data_path}...") + original_data = pd.read_parquet(args.data_path) + prompt2linenum = {} + for i, d in enumerate(original_data["prompt"]): + prompt2linenum[d[0]["content"]] = i + eval_results = [0.0 for _ in range(len(original_data))] + print(f"Loading reference evaluation results from {args.ref_eval_path}...") + print(f"Results will be written to the original dataset at a new column {args.ref_eval_key}...") + with open(args.ref_eval_path, "r") as f: + for line in f: + item = json.loads(line) + eval_results[prompt2linenum[item["question"][0]["content"]]] = np.mean(item["rewards"]) + original_data[args.ref_eval_key] = eval_results + print(f"Dataset overwritten at {args.data_path}...") + original_data.to_parquet(args.data_path) + + +if __name__ == "__main__": + main() diff --git a/examples/bots/ref_eval_collect.yaml b/examples/bots/ref_eval_collect.yaml new file mode 100644 index 0000000000..66777064f8 --- /dev/null +++ b/examples/bots/ref_eval_collect.yaml @@ -0,0 +1,59 @@ +project: "bots_ref_eval_collect_demo_1.5B" +name: "run-1" +mode: explore +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} +algorithm: + algorithm_type: grpo + repeat_times: 16 + optimizer: + lr: 1e-6 +model: + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} + max_prompt_tokens: 4096 + max_response_tokens: 8192 +cluster: + node_num: 1 + gpu_per_node: 8 +buffer: + total_epochs: 1 + batch_size: 100 + explorer_input: + taskset: + name: math-train + storage_type: file + path: 'your/data/path/containing/math__combined_54.4k.parquet' # you need to set it manually + split: 'train' + format: + prompt_key: 'prompt' + response_key: 'reward_model.ground_truth' + rollout_args: + temperature: 1.0 + task_selector: + selector_type: sequential + default_workflow_type: 'bots_ref_eval_collect_math_boxed_workflow' + trainer_input: + experience_buffer: + name: exp_buffer + storage_type: queue + path: 'sqlite:///bots_ref_eval_collect_buffer.db' +explorer: + eval_interval: 100 + runner_per_model: 16 + rollout_model: + engine_num: 8 + tensor_parallel_size: 1 + enable_prefix_caching: false + enforce_eager: true + dtype: bfloat16 + seed: 42 +synchronizer: + sync_method: 'nccl' + sync_interval: 10 + sync_timeout: 1200 +trainer: + trainer_type: 'verl' + save_interval: 1000 + grad_clip: 1.0 + use_dynamic_bsz: true + max_token_len_per_gpu: 24576 + ulysses_sequence_parallel_size: 1 diff --git a/examples/bots/workflow/bots_math_boxed_workflow.py b/examples/bots/workflow/bots_math_boxed_workflow.py index 8ca8929412..90ce2a3f2b 100644 --- a/examples/bots/workflow/bots_math_boxed_workflow.py +++ b/examples/bots/workflow/bots_math_boxed_workflow.py @@ -1,5 +1,9 @@ -from typing import Union +import fcntl +import json +import os +from typing import List, Union +from trinity.common.experience import Experience from trinity.common.workflows.customized_math_workflows import MathBoxedWorkflow, Task from trinity.common.workflows.workflow import WORKFLOWS @@ -21,6 +25,48 @@ def format_messages(self): return self.task_desc +@WORKFLOWS.register_module("bots_ref_eval_collect_math_boxed_workflow") +class BOTSRefEvalCollectMathBoxedWorkflow(MathBoxedWorkflow): + """A reference evaluation collection workflow for math tasks that give answers in boxed format for BOTS.""" + + def reset(self, task: Task): + super().reset(task) + from trinity.plugins.bots_math_boxed_reward import BOTSMathBoxedRewardFn + + self.reward_fn = BOTSMathBoxedRewardFn(**self.reward_fn_args) + self.task_desc = nested_query(self.format_args.prompt_key, self.raw_task) + self.truth = nested_query(self.format_args.response_key, self.raw_task) + + def format_messages(self): + # the prompts are already in message format + return self.task_desc + + def run(self) -> List[Experience]: + responses = super().run() + + rewards = [response.reward for response in responses] + + log_entry = { + "model_version": self.model.model_version, + "rewards": rewards, + "question": self.task_desc, + "truth": self.truth, + } + + log_file_path = os.environ.get("BOTS_REF_EVAL_LOG_FILE", "./bots_ref_eval_log.jsonl") + os.makedirs(os.path.dirname(log_file_path), exist_ok=True) + + with open(log_file_path, "a") as f: + fcntl.flock(f, fcntl.LOCK_EX) + try: + json.dump(log_entry, f) + f.write("\n") + finally: + fcntl.flock(f, fcntl.LOCK_UN) + + return responses + + def nested_query(query_key: str, query_obj: Union[dict, None]): # support nested query for a dict given query_keys split by '.' if query_obj is None: