diff --git a/.github/workflows/docker/docker-compose.yaml b/.github/workflows/docker/docker-compose.yaml index 5fe4571b57..3ccb526b75 100644 --- a/.github/workflows/docker/docker-compose.yaml +++ b/.github/workflows/docker/docker-compose.yaml @@ -6,12 +6,12 @@ services: environment: - HF_ENDPOINT=https://hf-mirror.com - RAY_ADDRESS=auto - - CHECKPOINT_ROOT_DIR=/mnt/checkpoints - - DATA_ROOT_DIR=/mnt/data - - MODEL_PATH=/mnt/models/Qwen3-0.6B - - API_MODEL_PATH=/mnt/models/Qwen3-1.7B - - VLM_MODEL_PATH=/mnt/models/Qwen2.5-VL-3B - - CHECKPOINT_PATH=/mnt/checkpoints + - TRINITY_CHECKPOINT_ROOT_DIR=/mnt/checkpoints + - TRINITY_TASKSET_PATH=/mnt/data + - TRINITY_SFT_DATASET_PATH=/mnt/data + - TRINITY_MODEL_PATH=/mnt/models/Qwen3-0.6B + - TRINITY_API_MODEL_PATH=/mnt/models/Qwen3-1.7B + - TRINITY_VLM_MODEL_PATH=/mnt/models/Qwen2.5-VL-3B working_dir: /workspace networks: - trinity-network @@ -33,10 +33,9 @@ services: command: sh -c "pip install -e .[dev] && ray start --address=trinity-node-1:6379 --block" environment: - HF_ENDPOINT=https://hf-mirror.com - - CHECKPOINT_ROOT_DIR=/mnt/checkpoints - - DATA_ROOT_DIR=/mnt/data - - MODEL_PATH=/mnt/models/Qwen3-1.7B - - CHECKPOINT_PATH=/mnt/checkpoints + - TRINITY_CHECKPOINT_ROOT_DIR=/mnt/checkpoints + - TRINITY_TASKSET_PATH=/mnt/data + - TRINITY_MODEL_PATH=/mnt/models/Qwen3-1.7B working_dir: /workspace volumes: - trinity-volume:/mnt diff --git a/benchmark/bench.py b/benchmark/bench.py index 1aafd2b55c..9d7c634527 100644 --- a/benchmark/bench.py +++ b/benchmark/bench.py @@ -8,6 +8,7 @@ import yaml from trinity.algorithm.algorithm import ALGORITHM_TYPE +from trinity.common.constants import MODEL_PATH_ENV_VAR from trinity.utils.dlc_utils import get_dlc_env_vars @@ -77,9 +78,8 @@ def prepare_configs(args, rank, current_time): set_engine_num(config, args) config["model"]["model_path"] = ( args.model_path - or os.environ.get("MODEL_PATH") or config["model"]["model_path"] - or "Qwen/Qwen2.5-1.5B-Instruct" + or os.environ.get(MODEL_PATH_ENV_VAR, "Qwen/Qwen2.5-1.5B-Instruct") ) if ALGORITHM_TYPE.get(config["algorithm"]["algorithm_type"]).use_critic: config["model"]["critic_model_path"] = ( diff --git a/docs/sphinx_doc/source/tutorial/example_async_mode.md b/docs/sphinx_doc/source/tutorial/example_async_mode.md index 3669271207..bf1015db29 100644 --- a/docs/sphinx_doc/source/tutorial/example_async_mode.md +++ b/docs/sphinx_doc/source/tutorial/example_async_mode.md @@ -15,12 +15,12 @@ Assuming we have a node with 8 GPUs, we allocate 4 GPUs for the trainer and 4 GP project: name: mode: explore -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} cluster: node_num: 1 gpu_per_node: 4 @@ -31,7 +31,8 @@ buffer: taskset: name: gsm8k storage_type: file - path: /PATH/TO/DATASET/ + path: 'openai/gsm8k' + subset_name: 'main' split: train format: prompt_key: 'question' @@ -61,12 +62,12 @@ Key configurations in `trainer.yaml` are as follows: project: name: mode: train -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} cluster: node_num: 1 gpu_per_node: 4 @@ -77,7 +78,8 @@ buffer: taskset: name: gsm8k storage_type: file - path: /PATH/TO/DATASET/ + path: 'openai/gsm8k' + subset_name: 'main' format: prompt_key: 'question' response_key: 'answer' @@ -129,12 +131,12 @@ Trinity-RFT also supports dynamic scaling in asynchronous mode. Continuing with project: name: mode: explore -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} cluster: # important node_num: 1 gpu_per_node: 8 @@ -151,7 +153,8 @@ buffer: taskset: # important name: gsm8k storage_type: file - path: /PATH/TO/DATASET/ + path: 'openai/gsm8k' + subset_name: 'main' format: prompt_key: 'question' response_key: 'answer' diff --git a/docs/sphinx_doc/source/tutorial/example_data_functionalities.md b/docs/sphinx_doc/source/tutorial/example_data_functionalities.md index 73fe56c729..7f62add78a 100644 --- a/docs/sphinx_doc/source/tutorial/example_data_functionalities.md +++ b/docs/sphinx_doc/source/tutorial/example_data_functionalities.md @@ -53,7 +53,7 @@ data_processor: input_keys: ["question", "answer"] field_names: ["Question", "Answer"] inputs: # the output will be set to the explorer input automatically - - /PATH/TO/GSM8K/DATA/FILE + - ${oc.env:TRINITY_TASKSET_PATH} target_fields: ["question", "answer"] service: data_juicer: diff --git a/docs/sphinx_doc/source/tutorial/example_dpo.md b/docs/sphinx_doc/source/tutorial/example_dpo.md index 2841690415..3b50ed58c0 100644 --- a/docs/sphinx_doc/source/tutorial/example_dpo.md +++ b/docs/sphinx_doc/source/tutorial/example_dpo.md @@ -63,9 +63,9 @@ algorithm: kl_loss_fn: k1 kl_loss_fn_args: kl_coef: 0.1 # value of beta in DPO -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: - model_path: $MODEL_PATH/Qwen2.5-1.5B-Instruct + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} cluster: node_num: 1 gpu_per_node: 8 @@ -111,9 +111,9 @@ name: mode: train algorithm: algorithm_type: sft -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} cluster: node_num: 1 gpu_per_node: 2 diff --git a/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md b/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md index 01c69eb1e0..048842bd50 100644 --- a/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md +++ b/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md @@ -106,12 +106,12 @@ We use the configurations in [`gsm8k.yaml`](https://github.com/modelscope/Trinit ```yaml project: name: -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} cluster: node_num: 1 gpu_per_node: 2 @@ -122,7 +122,7 @@ buffer: taskset: name: gsm8k storage_type: file - path: <$DATASET_PATH/gsm8k> + path: 'openai/gsm8k' subset_name: 'main' split: 'train' format: @@ -133,7 +133,7 @@ buffer: eval_tasksets: - name: gsm8k-eval storage_type: file - path: <$DATASET_PATH/gsm8k> + path: 'openai/gsm8k' subset_name: 'main' split: 'test' format: diff --git a/docs/sphinx_doc/source/tutorial/example_step_wise.md b/docs/sphinx_doc/source/tutorial/example_step_wise.md index cbf45dfe09..73d6042de2 100644 --- a/docs/sphinx_doc/source/tutorial/example_step_wise.md +++ b/docs/sphinx_doc/source/tutorial/example_step_wise.md @@ -91,13 +91,13 @@ The example configuration is shown as: ```yaml project: "ALFWORLD" name: "Step_Wise_Alfworld" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ALFWORLD_RFT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 16 advantage_fn: step_wise_grpo model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct} max_response_tokens: 16384 max_model_len: 20480 cluster: @@ -141,7 +141,7 @@ explorer: gpu_memory_utilization: 0.7 enable_chunked_prefill: true env_vars: - TMPDIR: /PATH/TO/ALFWORLD_TMP_DIR + TMPDIR: ${oc.env:TMPDIR,/tmp} synchronizer: sync_style: dynamic_by_explorer sync_method: 'nccl' diff --git a/docs/sphinx_doc/source/tutorial/trinity_configs.md b/docs/sphinx_doc/source/tutorial/trinity_configs.md index d2ac4a10d1..0bafd44be7 100644 --- a/docs/sphinx_doc/source/tutorial/trinity_configs.md +++ b/docs/sphinx_doc/source/tutorial/trinity_configs.md @@ -11,7 +11,7 @@ The configuration for **Trinity-RFT** is defined in a `YAML` file and organized project: Trinity-RFT name: example mode: both -checkpoint_root_dir: /PATH/TO/CHECKPOINT +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} continue_from_checkpoint: true algorithm: @@ -67,7 +67,7 @@ These are general settings that apply to the entire experiment. project: Trinity-RFT name: example mode: both -checkpoint_root_dir: ${oc.env:CHECKPOINT_ROOT_DIR} # CHECKPOINT_ROOT_DIR is an environment variable set in advance +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} # TRINITY_CHECKPOINT_ROOT_DIR is an environment variable set in advance ``` - `project`: The name of the project. @@ -214,7 +214,7 @@ buffer: taskset: name: countdown_train storage_type: file - path: /PATH/TO/DATA + path: ${oc.env:TRINITY_TASKSET_PATH} split: train format: prompt_key: 'question' @@ -227,7 +227,7 @@ buffer: eval_tasksets: - name: countdown_eval storage_type: file - path: /PATH/TO/DATA + path: ${oc.env:TRINITY_TASKSET_PATH} split: test repeat_times: 1 format: @@ -281,7 +281,7 @@ buffer: sft_warmup_dataset: name: warmup_data storage_type: file - path: /PATH/TO/WARMUP_DATA + path: ${oc.env:TRINITY_SFT_DATASET_PATH} format: prompt_key: 'question' response_key: 'answer' @@ -338,7 +338,7 @@ explorer: tensor_parallel_size: 1 enable_history: False auxiliary_models: - - model_path: /PATH/TO/MODEL + - model_path: Qwen/Qwen2.5-7B-Instruct tensor_parallel_size: 1 eval_interval: 100 eval_on_startup: True @@ -438,7 +438,7 @@ data_processor: input_keys: ["question", "answer"] field_names: ["Question", "Answer"] inputs: # the output will be set to the explorer input automatically - - /PATH/TO/GSM8K/DATA/FILE + - ${oc.env:TRINITY_TASKSET_PATH} target_fields: ["question", "answer"] experience_pipeline: operators: diff --git a/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md b/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md index 516b802084..c87ec8779f 100644 --- a/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md +++ b/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md @@ -83,7 +83,7 @@ buffer: explorer_input: taskset: default_workflow: "math_workflow" - path: "/PATH/TO/FILE/DIR" + path: ${oc.env:TRINITY_TASKSET_PATH} format: prompt_key: "question" response_key: "answer" diff --git a/examples/RAFT_alfworld/RAFT_alfworld_7B.yaml b/examples/RAFT_alfworld/RAFT_alfworld_7B.yaml index 78d5793d4f..3dbf42645f 100644 --- a/examples/RAFT_alfworld/RAFT_alfworld_7B.yaml +++ b/examples/RAFT_alfworld/RAFT_alfworld_7B.yaml @@ -1,12 +1,12 @@ project: "Trinity-RFT-RAFT-alfworld" name: "qwen2.5-7B-RAFT-alfworld" mode: both -checkpoint_root_dir: /PATH/TO/CHECKPOINT/RAFT_ALFWORLD/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: raft repeat_times: 1 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct} max_response_tokens: 4096 max_model_len: 20480 cluster: @@ -19,7 +19,7 @@ buffer: taskset: name: alfworld-train storage_type: file - path: '/PATH/TO/ALFWORLD_DATA/' + path: ${oc.env:TRINITY_TASKSET_PATH} split: 'train' format: prompt_key: 'game_file' @@ -31,7 +31,7 @@ buffer: eval_tasksets: - name: alfworld-eval storage_type: file - path: '/PATH/TO/ALFWORLD_DATA/' + path: ${oc.env:TRINITY_TASKSET_PATH} split: 'test' format: prompt_key: 'game_file' diff --git a/examples/RAFT_alfworld/RAFT_reflect_alfworld_7B.yaml b/examples/RAFT_alfworld/RAFT_reflect_alfworld_7B.yaml index fc721b68f7..ed32dab857 100644 --- a/examples/RAFT_alfworld/RAFT_reflect_alfworld_7B.yaml +++ b/examples/RAFT_alfworld/RAFT_reflect_alfworld_7B.yaml @@ -1,12 +1,12 @@ project: "Trinity-RFT-RAFT-reflect-alfworld" name: "qwen2.5-7B-RAFT-reflect-alfworld" mode: both -checkpoint_root_dir: /PATH/TO/CHECKPOINT/RAFT_REFLECT_ALFWORLD/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: raft repeat_times: 1 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct} max_response_tokens: 4096 max_model_len: 20480 cluster: @@ -19,7 +19,7 @@ buffer: taskset: name: alfworld-train storage_type: file - path: '/PATH/TO/ALFWORLD_DATA/' + path: ${oc.env:TRINITY_TASKSET_PATH} split: 'train' format: prompt_key: 'game_file' @@ -31,7 +31,7 @@ buffer: eval_tasksets: - name: alfworld-eval storage_type: file - path: '/PATH/TO/ALFWORLD_DATA/' + path: ${oc.env:TRINITY_TASKSET_PATH} split: 'test' format: prompt_key: 'game_file' diff --git a/examples/RAFT_alfworld/README.md b/examples/RAFT_alfworld/README.md index 291a150005..1f0835fdb9 100644 --- a/examples/RAFT_alfworld/README.md +++ b/examples/RAFT_alfworld/README.md @@ -22,7 +22,7 @@ python examples/grpo_alfworld/get_alfworld_data.py ### Configuration Before running, make sure to update the following paths in the YAML files: -- `model.model_path`: Replace with your model path (e.g., `/PATH/TO/MODEL/`) +- `model.model_path`: Replace with your model path (e.g., `Qwen/Qwen2.5-7B-Instruct`) - `buffer.explorer_input.taskset.path`: Replace with your alfworld dataset path - `buffer.explorer_input.eval_tasksets[0].path`: Replace with your alfworld dataset path - `checkpoint_root_dir`: Replace with your desired checkpoint directory diff --git a/examples/agentscope_tool_react/agentscope_tool_react_dapo.yaml b/examples/agentscope_tool_react/agentscope_tool_react_dapo.yaml index 0ff1792fce..cfc6e80152 100644 --- a/examples/agentscope_tool_react/agentscope_tool_react_dapo.yaml +++ b/examples/agentscope_tool_react/agentscope_tool_react_dapo.yaml @@ -1,12 +1,12 @@ project: "Trinity-RFT-dapo-reactv2" name: "Qwen3-8B-dapo-reactv2" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 advantage_fn: step_wise_grpo model: - model_path: /PATH/TO/MODEL/Qwen3-8B + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-8B} max_response_tokens: 16384 max_model_len: 24576 cluster: diff --git a/examples/agentscope_tool_react/agentscope_tool_react_gsm8k.yaml b/examples/agentscope_tool_react/agentscope_tool_react_gsm8k.yaml index 07fe2a2756..a40e96fd8e 100644 --- a/examples/agentscope_tool_react/agentscope_tool_react_gsm8k.yaml +++ b/examples/agentscope_tool_react/agentscope_tool_react_gsm8k.yaml @@ -1,12 +1,12 @@ project: "Trinity-RFT-gsm8k-reactv2" name: "Qwen3-4B-gsm8k-reactv2" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 advantage_fn: step_wise_grpo model: - model_path: /PATH/TO/MODEL/Qwen3-4B + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-4B} max_response_tokens: 16384 max_model_len: 24576 cluster: diff --git a/examples/asymre_gsm8k/gsm8k.yaml b/examples/asymre_gsm8k/gsm8k.yaml index cde75174e8..7977a0957c 100644 --- a/examples/asymre_gsm8k/gsm8k.yaml +++ b/examples/asymre_gsm8k/gsm8k.yaml @@ -4,9 +4,9 @@ project: "Trinity-RFT-GSM8K" name: asymre_gsm8k -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 algorithm: @@ -26,7 +26,8 @@ buffer: taskset: name: gsm8k storage_type: file - path: /PATH/TO/DATASET/ + path: 'openai/gsm8k' + subset_name: 'main' split: train format: prompt_key: question @@ -36,7 +37,8 @@ buffer: eval_tasksets: - name: gsm8k-eval storage_type: file - path: /PATH/TO/DATASET/ + path: 'openai/gsm8k' + subset_name: 'main' split: test format: prompt_key: question diff --git a/examples/asymre_math/math.yaml b/examples/asymre_math/math.yaml index 888ef61bf0..b7ca4f5e61 100644 --- a/examples/asymre_math/math.yaml +++ b/examples/asymre_math/math.yaml @@ -3,9 +3,9 @@ # https://arxiv.org/abs/2506.20520. project: "Trinity-RFT-MATH" name: asymre_math -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: - model_path: /PATH/TO/MODEL/ # the path to your model + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-3B-Instruct} # the path to your model max_response_tokens: 1024 max_model_len: 1280 algorithm: @@ -26,7 +26,7 @@ buffer: taskset: name: math storage_type: file - path: /PATH/TO/DATASET/ + path: ${oc.env:TRINITY_TASKSET_PATH} format: prompt_key: 'problem' response_key: 'solution' @@ -37,7 +37,7 @@ buffer: eval_tasksets: - name: math storage_type: file - path: /PATH/TO/DATASET/ + path: ${oc.env:TRINITY_TASKSET_PATH} split: 'test' format: prompt_key: 'problem' diff --git a/examples/async_gsm8k/explorer.yaml b/examples/async_gsm8k/explorer.yaml index 45e2f74f6a..07c21ef043 100644 --- a/examples/async_gsm8k/explorer.yaml +++ b/examples/async_gsm8k/explorer.yaml @@ -1,12 +1,12 @@ project: "Trinity-RFT-gsm8k" name: "async-qwen2.5-1.5B-gsm8k" mode: explore -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 cluster: @@ -19,7 +19,8 @@ buffer: taskset: name: gsm8k storage_type: file - path: /PATH/TO/DATASET/ + path: 'openai/gsm8k' + subset_name: 'main' split: train format: prompt_key: 'question' diff --git a/examples/async_gsm8k/trainer.yaml b/examples/async_gsm8k/trainer.yaml index 90e4663f98..ae60168ad7 100644 --- a/examples/async_gsm8k/trainer.yaml +++ b/examples/async_gsm8k/trainer.yaml @@ -1,12 +1,12 @@ project: "Trinity-RFT-gsm8k" name: "async-qwen2.5-1.5B-gsm8k" mode: train -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 cluster: @@ -19,7 +19,9 @@ buffer: taskset: name: gsm8k storage_type: file - path: /PATH/TO/DATASET/ + path: 'openai/gsm8k' + subset_name: 'main' + split: 'train' format: prompt_key: 'question' response_key: 'answer' diff --git a/examples/cispo_gsm8k/gsm8k.yaml b/examples/cispo_gsm8k/gsm8k.yaml index 8e01e321c5..02d787ef9b 100644 --- a/examples/cispo_gsm8k/gsm8k.yaml +++ b/examples/cispo_gsm8k/gsm8k.yaml @@ -1,11 +1,11 @@ project: "Trinity-RFT-gsm8k" name: "qwen2.5-1.5B-gsm8k-cispo" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: cispo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 cluster: @@ -45,7 +45,7 @@ buffer: # sft_warmup_dataset: # Uncomment these to enable sft warmup # name: warmup_data # storage_type: file - # path: '/PATH/TO/WARMUP_DATA/' + # path: ${oc.env:TRINITY_SFT_DATASET_PATH} explorer: eval_interval: 50 runner_per_model: 8 diff --git a/examples/dapo_math/dapo.yaml b/examples/dapo_math/dapo.yaml index 333ccf9c03..1bd7b1d7ed 100644 --- a/examples/dapo_math/dapo.yaml +++ b/examples/dapo_math/dapo.yaml @@ -1,8 +1,8 @@ project: Trinity-RFT-example name: dapo -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-32B-Instruct} max_response_tokens: 20480 max_model_len: 21504 algorithm: @@ -40,7 +40,7 @@ buffer: eval_tasksets: - name: AIME2024 storage_type: file - path: /PATH/TO/AIME2024/ + path: ${oc.env:TRINITY_TASKSET_PATH} # e.g. path to AIME2024 split: 'test' repeat_times: 32 format: diff --git a/examples/dpo_human_in_the_loop/dpo.yaml b/examples/dpo_human_in_the_loop/dpo.yaml index bd3ab3a6cb..b202c5e211 100644 --- a/examples/dpo_human_in_the_loop/dpo.yaml +++ b/examples/dpo_human_in_the_loop/dpo.yaml @@ -41,9 +41,9 @@ algorithm: kl_loss_fn: k1 kl_loss_fn_args: kl_coef: 0.1 -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: - model_path: /PATH/TO/MODEL + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct} max_response_tokens: 1024 max_model_len: 1536 cluster: diff --git a/examples/dpo_humanlike/dpo.yaml b/examples/dpo_humanlike/dpo.yaml index 81042ff431..c932fa0a43 100644 --- a/examples/dpo_humanlike/dpo.yaml +++ b/examples/dpo_humanlike/dpo.yaml @@ -6,9 +6,9 @@ algorithm: kl_loss_fn: k1 kl_loss_fn_args: kl_coef: 0.1 -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: - model_path: /PATH/TO/MODEL + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct} max_response_tokens: 1024 max_model_len: 1536 cluster: @@ -22,7 +22,7 @@ buffer: name: dpo_buffer storage_type: file enable_progress_bar: True - path: /PATH/TO/DATASET/ + path: ${oc.env:TRINITY_TASKSET_PATH} format: prompt_type: plaintext # plaintext/messages prompt_key: prompt diff --git a/examples/grpo_alfworld/alfworld.yaml b/examples/grpo_alfworld/alfworld.yaml index ca19267ad6..ed993790d4 100644 --- a/examples/grpo_alfworld/alfworld.yaml +++ b/examples/grpo_alfworld/alfworld.yaml @@ -1,11 +1,11 @@ project: "ALFWORLD" name: "ALFWORLD_RFT" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ALFWORLD_RFT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct} max_response_tokens: 16384 max_model_len: 20480 cluster: @@ -43,7 +43,7 @@ explorer: gpu_memory_utilization: 0.7 enable_chunked_prefill: true env_vars: - TMPDIR: /PATH/TO/ALFWORLD_TMP_DIR + TMPDIR: ${oc.env:TMPDIR,/tmp} synchronizer: sync_method: 'nccl' sync_interval: 5 diff --git a/examples/grpo_alfworld_general_multi_step/alfworld.yaml b/examples/grpo_alfworld_general_multi_step/alfworld.yaml index 8829b3b77f..f93f028420 100644 --- a/examples/grpo_alfworld_general_multi_step/alfworld.yaml +++ b/examples/grpo_alfworld_general_multi_step/alfworld.yaml @@ -1,12 +1,12 @@ project: "ALFWORLD" name: "Step_Wise_Alfworld" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ALFWORLD_RFT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 16 advantage_fn: step_wise_grpo model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct} max_response_tokens: 16384 max_model_len: 20480 cluster: @@ -50,7 +50,7 @@ explorer: gpu_memory_utilization: 0.7 enable_chunked_prefill: true env_vars: - TMPDIR: /PATH/TO/ALFWORLD_TMP_DIR + TMPDIR: ${oc.env:TMPDIR,/tmp} synchronizer: sync_style: dynamic_by_explorer sync_method: 'nccl' diff --git a/examples/grpo_email_search/email_search.yaml b/examples/grpo_email_search/email_search.yaml index 6b861ae96c..67f0cb098e 100644 --- a/examples/grpo_email_search/email_search.yaml +++ b/examples/grpo_email_search/email_search.yaml @@ -1,12 +1,12 @@ project: "Trinity_Multi_Step" name: "Email_Example" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 advantage_fn: grpo model: - model_path: /PATH/TO/Qwen3-4B-Instruct-2507 + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-4B-Instruct-2507} max_response_tokens: 4096 max_model_len: 20480 cluster: @@ -20,7 +20,7 @@ buffer: taskset: name: enron_train storage_type: file - path: '/PATH/TO/ENRON_EMAIL_QA_PAIRS/' + path: ${oc.env:TRINITY_TASKSET_PATH} # e.g. path to ENRON_EMAIL_QA_PAIRS split: train format: prompt_key: 'question' @@ -35,7 +35,7 @@ buffer: eval_tasksets: - name: enron_test storage_type: file - path: '/PATH/TO/ENRON_EMAIL_QA_PAIRS/' + path: ${oc.env:TRINITY_TASKSET_PATH} # e.g. path to ENRON_EMAIL_QA_PAIRS split: test format: prompt_key: 'question' @@ -73,7 +73,7 @@ explorer: gpu_memory_utilization: 0.7 enable_chunked_prefill: true auxiliary_models: - - model_path: /PATH/TO/Qwen3-30B-A3B-Instruct-2507 + - model_path: Qwen/Qwen3-30B-A3B-Instruct-2507 engine_num: 1 tensor_parallel_size: 2 enable_thinking: false diff --git a/examples/grpo_gsm8k/gsm8k.yaml b/examples/grpo_gsm8k/gsm8k.yaml index 327d6fb10b..78d9a2d85b 100644 --- a/examples/grpo_gsm8k/gsm8k.yaml +++ b/examples/grpo_gsm8k/gsm8k.yaml @@ -1,11 +1,11 @@ project: "Trinity-RFT-gsm8k" name: "qwen2.5-1.5B-gsm8k" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 cluster: @@ -45,7 +45,7 @@ buffer: # sft_warmup_dataset: # Uncomment these to enable sft warmup # name: warmup_data # storage_type: file - # path: '/PATH/TO/WARMUP_DATA/' + # path: ${oc.env:TRINITY_SFT_DATASET_PATH} explorer: eval_interval: 50 runner_per_model: 8 diff --git a/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml b/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml index 18aa511ac8..e78f7abd9f 100644 --- a/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml +++ b/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml @@ -1,6 +1,6 @@ project: "Trinity-RFT-gsm8k-exp-pipe" name: "qwen2.5-1.5B-gsm8k-exp-pipe" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} service: data_juicer: @@ -25,7 +25,7 @@ algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 cluster: diff --git a/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml b/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml index f578f4df67..2adddd0b6e 100644 --- a/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml +++ b/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml @@ -1,13 +1,13 @@ project: "Trinity-RFT-gsm8k-ruler" name: "qwen2.5-1.5B-gsm8k-ruler" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo advantage_fn_args: std_threshold: 0.0001 # effectively zero repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 cluster: @@ -53,7 +53,7 @@ explorer: dtype: bfloat16 seed: 42 auxiliary_models: - - model_path: /PATH/TO/Qwen2.5-32B-Instruct + - model_path: Qwen/Qwen2.5-32B-Instruct engine_num: 1 tensor_parallel_size: 2 enable_thinking: false diff --git a/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml b/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml index 0dd9397efb..bf7d996feb 100644 --- a/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml +++ b/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml @@ -1,6 +1,6 @@ project: "Trinity-RFT-gsm8k-task-pipeline" name: "qwen2.5-1.5B-gsm8k-task-pipeline" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 @@ -20,11 +20,11 @@ data_processor: input_keys: ["question", "answer"] field_names: ["Question", "Answer"] inputs: # the output will be set to the explorer input automatically - - /PATH/TO/GSM8K/DATA/FILE + - ${oc.env:TRINITY_TASKSET_PATH} target_fields: ["question", "answer"] model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 cluster: diff --git a/examples/grpo_math/math.yaml b/examples/grpo_math/math.yaml index ead393ffc0..601e3dea29 100644 --- a/examples/grpo_math/math.yaml +++ b/examples/grpo_math/math.yaml @@ -1,8 +1,8 @@ project: grpo_math name: grpo_math_example -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct} max_response_tokens: 3072 max_model_len: 4096 algorithm: @@ -18,7 +18,7 @@ buffer: taskset: name: math storage_type: file - path: /PATH/TO/DATASET/ + path: ${oc.env:TRINITY_TASKSET_PATH} format: prompt_key: 'question' response_key: 'gt_answer' diff --git a/examples/grpo_sciworld/sciworld.yaml b/examples/grpo_sciworld/sciworld.yaml index 8d7bc08ecd..8b2c3703d3 100644 --- a/examples/grpo_sciworld/sciworld.yaml +++ b/examples/grpo_sciworld/sciworld.yaml @@ -1,11 +1,11 @@ project: "sciworld" name: "sciworld_RFT" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct} max_response_tokens: 16384 max_model_len: 20480 cluster: diff --git a/examples/grpo_toolcall/toolace.yaml b/examples/grpo_toolcall/toolace.yaml index e6d324387f..1bce1884c4 100644 --- a/examples/grpo_toolcall/toolace.yaml +++ b/examples/grpo_toolcall/toolace.yaml @@ -1,11 +1,11 @@ project: "Trinity-RFT-toolace" name: "qwen2.5-7B-toolace" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen1.5-7B-Chat} max_response_tokens: 8192 max_model_len: 12288 cluster: diff --git a/examples/grpo_vlm/vlm.yaml b/examples/grpo_vlm/vlm.yaml index 7ce58841a1..890fed3349 100644 --- a/examples/grpo_vlm/vlm.yaml +++ b/examples/grpo_vlm/vlm.yaml @@ -1,11 +1,11 @@ project: "Trinity-RFT" name: "grpo_vlm" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/Qwen2.5-VL-3B-Instruct + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-VL-3B-Instruct} max_response_tokens: 1024 max_model_len: 20480 cluster: diff --git a/examples/grpo_webshop/webshop.yaml b/examples/grpo_webshop/webshop.yaml index 4ce466aa29..7b7b7f0611 100644 --- a/examples/grpo_webshop/webshop.yaml +++ b/examples/grpo_webshop/webshop.yaml @@ -1,11 +1,11 @@ project: "WEBSHOP" name: "WEBSHOP_RFT" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: grpo repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct} max_response_tokens: 16384 max_model_len: 20480 cluster: diff --git a/examples/mix_chord/mix_chord.yaml b/examples/mix_chord/mix_chord.yaml index 46d0b8caff..aaa3f6eeee 100644 --- a/examples/mix_chord/mix_chord.yaml +++ b/examples/mix_chord/mix_chord.yaml @@ -1,6 +1,6 @@ project: "mix_chord" name: "test_mix_chord" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: mix_chord repeat_times: 8 # or 16 for better performance in math related tasks @@ -23,7 +23,7 @@ algorithm: train_batch_size_expert: 64 train_batch_size_usual: 256 # 32 batchsize * 8 repeat times model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 10240 max_model_len: 11264 cluster: @@ -37,7 +37,7 @@ buffer: taskset: name: openr1_data_filtered_int storage_type: file - path: /PATH/TO/RL_DATASET + path: ${oc.env:TRINITY_TASKSET_PATH} format: prompt_key: 'problem' response_key: 'answer' @@ -57,7 +57,7 @@ buffer: total_epochs: 25 name: SFT_data storage_type: file - path: /PATH/TO/SFT_DATASET + path: ${oc.env:TRINITY_SFT_DATASET_PATH,open-r1/Mixture-of-Thoughts} split: 'train' format: prompt_type: messages diff --git a/examples/mix_math/mix_math.yaml b/examples/mix_math/mix_math.yaml index 043c57d55b..68d45d9696 100644 --- a/examples/mix_math/mix_math.yaml +++ b/examples/mix_math/mix_math.yaml @@ -1,6 +1,6 @@ project: "mix_math" name: "expert0.20_mu0.1" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: mix repeat_times: 8 @@ -17,7 +17,7 @@ algorithm: train_batch_size_expert: 64 train_batch_size_usual: 256 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 10240 max_model_len: 11264 cluster: @@ -31,7 +31,7 @@ buffer: taskset: name: math_train storage_type: file - path: /PATH/TO/DATASET/ + path: ${oc.env:TRINITY_TASKSET_PATH} split: 'train' format: prompt_key: 'problem' @@ -42,7 +42,7 @@ buffer: eval_tasksets: - name: math_eval storage_type: file - path: /PATH/TO/DATASET/ + path: ${oc.env:TRINITY_TASKSET_PATH} split: 'test' format: prompt_key: 'problem' @@ -52,12 +52,12 @@ buffer: experience_buffer: name: math_buffer storage_type: queue - path: /PATH/TO/BUFFER/ + path: ${oc.env:TRINITY_BUFFER_PATH,./exp_buffer} sft_warmup_dataset: total_epochs: 10 name: math_sft storage_type: file - path: /PATH/TO/EXPERT_DATA/ + path: ${oc.env:TRINITY_SFT_DATASET_PATH,open-r1/Mixture-of-Thoughts} split: 'train' format: prompt_type: messages diff --git a/examples/opmd_gsm8k/opmd_gsm8k.yaml b/examples/opmd_gsm8k/opmd_gsm8k.yaml index 81d1ae50c9..cdce17f58d 100644 --- a/examples/opmd_gsm8k/opmd_gsm8k.yaml +++ b/examples/opmd_gsm8k/opmd_gsm8k.yaml @@ -1,11 +1,11 @@ project: "Trinity-RFT-gsm8k-test-opmd" name: "opmd_test" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: opmd repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 512 max_model_len: 1024 cluster: diff --git a/examples/ppo_countdown/countdown.yaml b/examples/ppo_countdown/countdown.yaml index 7ce9a604d4..9df019ca61 100644 --- a/examples/ppo_countdown/countdown.yaml +++ b/examples/ppo_countdown/countdown.yaml @@ -1,11 +1,11 @@ project: "Trinity-RFT-countdown" name: "qwen2.5-1.5B-countdown" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: ppo repeat_times: 5 model: - model_path: '/PATH/TO/MODEL/CHECKPOINT/' + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 cluster: diff --git a/examples/ppo_countdown_megatron/countdown.yaml b/examples/ppo_countdown_megatron/countdown.yaml index d0996bcedd..f0aa2724b2 100644 --- a/examples/ppo_countdown_megatron/countdown.yaml +++ b/examples/ppo_countdown_megatron/countdown.yaml @@ -1,11 +1,11 @@ project: "Trinity-RFT-countdown" name: "qwen2.5-1.5B-countdown" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: ppo repeat_times: 5 model: - model_path: '/PATH/TO/MODEL/CHECKPOINT/' + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 cluster: diff --git a/examples/sft_mot/sft.yaml b/examples/sft_mot/sft.yaml index 88d78cb06f..4542310e56 100644 --- a/examples/sft_mot/sft.yaml +++ b/examples/sft_mot/sft.yaml @@ -1,11 +1,11 @@ mode: train project: "Trinity-RFT-example" name: "sft_mot" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: sft model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct} max_response_tokens: 10240 max_model_len: 10752 cluster: diff --git a/examples/sppo_gsm8k/gsm8k.yaml b/examples/sppo_gsm8k/gsm8k.yaml index 49c886fde4..700f89029d 100644 --- a/examples/sppo_gsm8k/gsm8k.yaml +++ b/examples/sppo_gsm8k/gsm8k.yaml @@ -4,9 +4,9 @@ project: "Trinity-RFT-GSM8K" name: sppo_gsm8k -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 algorithm: @@ -24,7 +24,8 @@ buffer: taskset: name: gsm8k storage_type: file - path: /PATH/TO/DATASET/ + path: 'openai/gsm8k' + subset_name: 'main' split: train format: prompt_key: question @@ -34,7 +35,8 @@ buffer: eval_tasksets: - name: gsm8k-eval storage_type: file - path: /PATH/TO/DATASET/ + path: 'openai/gsm8k' + subset_name: 'main' split: test format: prompt_key: question diff --git a/examples/topr_gsm8k/gsm8k.yaml b/examples/topr_gsm8k/gsm8k.yaml index 98aec1e7d7..bb7cc094e2 100644 --- a/examples/topr_gsm8k/gsm8k.yaml +++ b/examples/topr_gsm8k/gsm8k.yaml @@ -1,11 +1,11 @@ project: "Trinity-RFT-gsm8k" name: "qwen2.5-1.5B-gsm8k-topr" -checkpoint_root_dir: /PATH/TO/CHECKPOINT/ +checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} algorithm: algorithm_type: topr repeat_times: 8 model: - model_path: /PATH/TO/MODEL/ + model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct} max_response_tokens: 1024 max_model_len: 1280 cluster: diff --git a/tests/common/vllm_test.py b/tests/common/vllm_test.py index c442f60f6e..d9c54deda5 100644 --- a/tests/common/vllm_test.py +++ b/tests/common/vllm_test.py @@ -1,11 +1,16 @@ -import os import unittest import torch from parameterized import parameterized_class from transformers import AutoTokenizer -from tests.tools import RayUnittestBase, RayUnittestBaseAysnc, get_template_config +from tests.tools import ( + RayUnittestBase, + RayUnittestBaseAysnc, + get_api_model_path, + get_model_path, + get_template_config, +) from trinity.common.models import create_inference_models from trinity.common.models.model import ModelWrapper from trinity.common.models.utils import ( @@ -13,25 +18,6 @@ tokenize_and_mask_messages_hf, ) - -def get_model_path() -> str: - path = os.environ.get("MODEL_PATH") - if not path: - raise EnvironmentError( - "Please set `export MODEL_PATH=` before running this test." - ) - return path - - -def get_api_model_path() -> str: - path = os.environ.get("API_MODEL_PATH") - if not path: - raise EnvironmentError( - "Please set `export API_MODEL_PATH=` before running this test." - ) - return path - - DEBUG = False diff --git a/tests/tools.py b/tests/tools.py index 02c3cd5f2c..7dc94bf881 100644 --- a/tests/tools.py +++ b/tests/tools.py @@ -7,7 +7,15 @@ from tensorboard.backend.event_processing.event_accumulator import EventAccumulator from trinity.common.config import Config, FormatConfig, StorageConfig, load_config -from trinity.common.constants import PromptType +from trinity.common.constants import ( + CHECKPOINT_ROOT_DIR_ENV_VAR, + MODEL_PATH_ENV_VAR, + PromptType, +) + +API_MODEL_PATH_ENV_VAR = "TRINITY_API_MODEL_PATH" +VLM_MODEL_PATH_ENV_VAR = "TRINITY_VLM_MODEL_PATH" +SFT_DATASET_PATH_ENV_VAR = "TRINITY_SFT_DATASET_PATH" def get_template_config() -> Config: @@ -21,28 +29,37 @@ def get_template_config() -> Config: def get_model_path() -> str: - path = os.environ.get("MODEL_PATH") + path = os.environ.get(MODEL_PATH_ENV_VAR) + if not path: + raise EnvironmentError( + f"Please set `export {MODEL_PATH_ENV_VAR}=` before running this test." + ) + return path + + +def get_api_model_path() -> str: + path = os.environ.get(API_MODEL_PATH_ENV_VAR) if not path: raise EnvironmentError( - "Please set `export MODEL_PATH=` before running this test." + f"Please set `export {API_MODEL_PATH_ENV_VAR}=` before running this test." ) return path def get_checkpoint_path() -> str: - path = os.environ.get("CHECKPOINT_PATH") + path = os.environ.get(CHECKPOINT_ROOT_DIR_ENV_VAR) if not path: raise EnvironmentError( - "Please set `export CHECKPOINT_PATH=` before running this test." + f"Please set `export {CHECKPOINT_ROOT_DIR_ENV_VAR}=` before running this test." ) return path def get_vision_languge_model_path() -> str: - path = os.environ.get("VLM_MODEL_PATH") + path = os.environ.get(VLM_MODEL_PATH_ENV_VAR) if not path: raise EnvironmentError( - "Please set `export VLM_MODEL_PATH=` before running this test." + f"Please set `export {VLM_MODEL_PATH_ENV_VAR}=` before running this test." ) return path diff --git a/trinity/common/constants.py b/trinity/common/constants.py index aa35472c5b..10fff98d99 100644 --- a/trinity/common/constants.py +++ b/trinity/common/constants.py @@ -10,6 +10,10 @@ ROLLOUT_WEIGHT_SYNC_GROUP_NAME = "rollout_weight_sync" # trinity env var names +CHECKPOINT_ROOT_DIR_ENV_VAR = "TRINITY_CHECKPOINT_ROOT_DIR" +MODEL_PATH_ENV_VAR = "TRINITY_MODEL_PATH" +TASKSET_PATH_ENV_VAR = "TRINITY_TASKSET_PATH" +BUFFER_PATH_ENV_VAR = "TRINITY_BUFFER_PATH" PLUGIN_DIRS_ENV_VAR = "TRINITY_PLUGIN_DIRS" LOG_DIR_ENV_VAR = "TRINITY_LOG_DIR" # log dir LOG_LEVEL_ENV_VAR = "TRINITY_LOG_LEVEL" # global log level