agentscope-ai · pan-x-c · Sep 10, 2025 · Sep 10, 2025 · Sep 10, 2025 · Sep 10, 2025
diff --git a/.github/workflows/docker/docker-compose.yaml b/.github/workflows/docker/docker-compose.yaml
@@ -6,12 +6,12 @@ services:
     environment:
       - HF_ENDPOINT=https://hf-mirror.com
       - RAY_ADDRESS=auto
-      - CHECKPOINT_ROOT_DIR=/mnt/checkpoints
-      - DATA_ROOT_DIR=/mnt/data
-      - MODEL_PATH=/mnt/models/Qwen3-0.6B
-      - API_MODEL_PATH=/mnt/models/Qwen3-1.7B
-      - VLM_MODEL_PATH=/mnt/models/Qwen2.5-VL-3B
-      - CHECKPOINT_PATH=/mnt/checkpoints
+      - TRINITY_CHECKPOINT_ROOT_DIR=/mnt/checkpoints
+      - TRINITY_TASKSET_PATH=/mnt/data
+      - TRINITY_SFT_DATASET_PATH=/mnt/data
+      - TRINITY_MODEL_PATH=/mnt/models/Qwen3-0.6B
+      - TRINITY_API_MODEL_PATH=/mnt/models/Qwen3-1.7B
+      - TRINITY_VLM_MODEL_PATH=/mnt/models/Qwen2.5-VL-3B
     working_dir: /workspace
     networks:
       - trinity-network
@@ -33,10 +33,9 @@ services:
     command: sh -c "pip install -e .[dev] && ray start --address=trinity-node-1:6379 --block"
     environment:
       - HF_ENDPOINT=https://hf-mirror.com
-      - CHECKPOINT_ROOT_DIR=/mnt/checkpoints
-      - DATA_ROOT_DIR=/mnt/data
-      - MODEL_PATH=/mnt/models/Qwen3-1.7B
-      - CHECKPOINT_PATH=/mnt/checkpoints
+      - TRINITY_CHECKPOINT_ROOT_DIR=/mnt/checkpoints
+      - TRINITY_TASKSET_PATH=/mnt/data
+      - TRINITY_MODEL_PATH=/mnt/models/Qwen3-1.7B
     working_dir: /workspace
     volumes:
       - trinity-volume:/mnt

diff --git a/benchmark/bench.py b/benchmark/bench.py
@@ -8,6 +8,7 @@
 import yaml
 
 from trinity.algorithm.algorithm import ALGORITHM_TYPE
+from trinity.common.constants import MODEL_PATH_ENV_VAR
 from trinity.utils.dlc_utils import get_dlc_env_vars
 
 
@@ -77,9 +78,8 @@ def prepare_configs(args, rank, current_time):
         set_engine_num(config, args)
         config["model"]["model_path"] = (
             args.model_path
-            or os.environ.get("MODEL_PATH")
             or config["model"]["model_path"]
-            or "Qwen/Qwen2.5-1.5B-Instruct"
+            or os.environ.get(MODEL_PATH_ENV_VAR, "Qwen/Qwen2.5-1.5B-Instruct")
         )
         if ALGORITHM_TYPE.get(config["algorithm"]["algorithm_type"]).use_critic:
             config["model"]["critic_model_path"] = (

diff --git a/docs/sphinx_doc/source/tutorial/example_async_mode.md b/docs/sphinx_doc/source/tutorial/example_async_mode.md
@@ -15,12 +15,12 @@ Assuming we have a node with 8 GPUs, we allocate 4 GPUs for the trainer and 4 GP
 project: <project_name>
 name: <experiment_name>
 mode: explore
-checkpoint_root_dir: /PATH/TO/CHECKPOINT/
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 algorithm:
   algorithm_type: grpo
   repeat_times: 8
 model:
-  model_path: /PATH/TO/MODEL/
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
 cluster:
   node_num: 1
   gpu_per_node: 4
@@ -31,7 +31,8 @@ buffer:
     taskset:
       name: gsm8k
       storage_type: file
-      path: /PATH/TO/DATASET/
+      path: 'openai/gsm8k'
+      subset_name: 'main'
       split: train
       format:
         prompt_key: 'question'
@@ -61,12 +62,12 @@ Key configurations in `trainer.yaml` are as follows:
 project: <project_name>
 name: <experiment_name>
 mode: train
-checkpoint_root_dir: /PATH/TO/CHECKPOINT/
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 algorithm:
   algorithm_type: grpo
   repeat_times: 8
 model:
-  model_path: /PATH/TO/MODEL/
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
 cluster:
   node_num: 1
   gpu_per_node: 4
@@ -77,7 +78,8 @@ buffer:
     taskset:
       name: gsm8k
       storage_type: file
-      path: /PATH/TO/DATASET/
+      path: 'openai/gsm8k'
+      subset_name: 'main'
       format:
         prompt_key: 'question'
         response_key: 'answer'
@@ -129,12 +131,12 @@ Trinity-RFT also supports dynamic scaling in asynchronous mode. Continuing with
 project: <project_name>
 name: <experiment_name>
 mode: explore
-checkpoint_root_dir: /PATH/TO/CHECKPOINT/
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 algorithm:
   algorithm_type: grpo
   repeat_times: 8
 model:
-  model_path: /PATH/TO/MODEL/
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
 cluster:  # important
   node_num: 1
   gpu_per_node: 8
@@ -151,7 +153,8 @@ buffer:
     taskset:  # important
       name: gsm8k
       storage_type: file
-      path: /PATH/TO/DATASET/
+      path: 'openai/gsm8k'
+      subset_name: 'main'
       format:
         prompt_key: 'question'
         response_key: 'answer'

diff --git a/docs/sphinx_doc/source/tutorial/example_data_functionalities.md b/docs/sphinx_doc/source/tutorial/example_data_functionalities.md
@@ -53,7 +53,7 @@ data_processor:
           input_keys: ["question", "answer"]
           field_names: ["Question", "Answer"]
     inputs:  # the output will be set to the explorer input automatically
-      - /PATH/TO/GSM8K/DATA/FILE
+      - ${oc.env:TRINITY_TASKSET_PATH}
     target_fields: ["question", "answer"]
 service:
   data_juicer:

diff --git a/docs/sphinx_doc/source/tutorial/example_dpo.md b/docs/sphinx_doc/source/tutorial/example_dpo.md
@@ -63,9 +63,9 @@ algorithm:
   kl_loss_fn: k1
   kl_loss_fn_args:
     kl_coef: 0.1  # value of beta in DPO
-checkpoint_root_dir: /PATH/TO/CHECKPOINT/
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 model:
-  model_path: $MODEL_PATH/Qwen2.5-1.5B-Instruct
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
 cluster:
   node_num: 1
   gpu_per_node: 8
@@ -111,9 +111,9 @@ name: <experiment_name>
 mode: train
 algorithm:
   algorithm_type: sft
-checkpoint_root_dir: /PATH/TO/CHECKPOINT/
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 model:
-  model_path: /PATH/TO/MODEL/
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
 cluster:
   node_num: 1
   gpu_per_node: 2

diff --git a/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md b/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md
@@ -106,12 +106,12 @@ We use the configurations in [`gsm8k.yaml`](https://github.com/modelscope/Trinit
 ```yaml
 project: <project_name>
 name: <experiment_name>
-checkpoint_root_dir: /PATH/TO/CHECKPOINT/
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 algorithm:
   algorithm_type: grpo
   repeat_times: 8
 model:
-  model_path: /PATH/TO/MODEL/
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-1.5B-Instruct}
 cluster:
   node_num: 1
   gpu_per_node: 2
@@ -122,7 +122,7 @@ buffer:
     taskset:
       name: gsm8k
       storage_type: file
-      path: <$DATASET_PATH/gsm8k>
+      path: 'openai/gsm8k'
       subset_name: 'main'
       split: 'train'
       format:
@@ -133,7 +133,7 @@ buffer:
     eval_tasksets:
     - name: gsm8k-eval
       storage_type: file
-      path: <$DATASET_PATH/gsm8k>
+      path: 'openai/gsm8k'
       subset_name: 'main'
       split: 'test'
       format:

diff --git a/docs/sphinx_doc/source/tutorial/example_step_wise.md b/docs/sphinx_doc/source/tutorial/example_step_wise.md
@@ -91,13 +91,13 @@ The example configuration is shown as:
 ```yaml
 project: "ALFWORLD"
 name: "Step_Wise_Alfworld"
-checkpoint_root_dir: /PATH/TO/CHECKPOINT/ALFWORLD_RFT/
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 algorithm:
   algorithm_type: grpo
   repeat_times: 16
   advantage_fn: step_wise_grpo
 model:
-  model_path: /PATH/TO/MODEL/
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct}
   max_response_tokens: 16384
   max_model_len: 20480
 cluster:
@@ -141,7 +141,7 @@ explorer:
     gpu_memory_utilization: 0.7
     enable_chunked_prefill: true
   env_vars:
-    TMPDIR: /PATH/TO/ALFWORLD_TMP_DIR
+    TMPDIR: ${oc.env:TMPDIR,/tmp}
 synchronizer:
   sync_style: dynamic_by_explorer
   sync_method: 'nccl'

diff --git a/docs/sphinx_doc/source/tutorial/trinity_configs.md b/docs/sphinx_doc/source/tutorial/trinity_configs.md
@@ -11,7 +11,7 @@ The configuration for **Trinity-RFT** is defined in a `YAML` file and organized
 project: Trinity-RFT
 name: example
 mode: both
-checkpoint_root_dir: /PATH/TO/CHECKPOINT
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 continue_from_checkpoint: true
 
 algorithm:
@@ -67,7 +67,7 @@ These are general settings that apply to the entire experiment.
 project: Trinity-RFT
 name: example
 mode: both
-checkpoint_root_dir: ${oc.env:CHECKPOINT_ROOT_DIR}   # CHECKPOINT_ROOT_DIR is an environment variable set in advance
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}   # TRINITY_CHECKPOINT_ROOT_DIR is an environment variable set in advance
 ```
 
 - `project`: The name of the project.
@@ -214,7 +214,7 @@ buffer:
     taskset:
       name: countdown_train
       storage_type: file
-      path: /PATH/TO/DATA
+      path: ${oc.env:TRINITY_TASKSET_PATH}
       split: train
       format:
         prompt_key: 'question'
@@ -227,7 +227,7 @@ buffer:
     eval_tasksets:
     - name: countdown_eval
       storage_type: file
-      path: /PATH/TO/DATA
+      path: ${oc.env:TRINITY_TASKSET_PATH}
       split: test
       repeat_times: 1
       format:
@@ -281,7 +281,7 @@ buffer:
     sft_warmup_dataset:
       name: warmup_data
       storage_type: file
-      path: /PATH/TO/WARMUP_DATA
+      path: ${oc.env:TRINITY_SFT_DATASET_PATH}
       format:
         prompt_key: 'question'
         response_key: 'answer'
@@ -338,7 +338,7 @@ explorer:
     tensor_parallel_size: 1
     enable_history: False
   auxiliary_models:
-  - model_path: /PATH/TO/MODEL
+  - model_path: Qwen/Qwen2.5-7B-Instruct
     tensor_parallel_size: 1
   eval_interval: 100
   eval_on_startup: True
@@ -438,7 +438,7 @@ data_processor:
           input_keys: ["question", "answer"]
           field_names: ["Question", "Answer"]
     inputs:  # the output will be set to the explorer input automatically
-      - /PATH/TO/GSM8K/DATA/FILE
+      - ${oc.env:TRINITY_TASKSET_PATH}
     target_fields: ["question", "answer"]
   experience_pipeline:
     operators:

diff --git a/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md b/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md
@@ -83,7 +83,7 @@ buffer:
   explorer_input:
     taskset:
       default_workflow: "math_workflow"
-      path: "/PATH/TO/FILE/DIR"
+      path: ${oc.env:TRINITY_TASKSET_PATH}
       format:
         prompt_key: "question"
         response_key: "answer"

diff --git a/examples/RAFT_alfworld/RAFT_alfworld_7B.yaml b/examples/RAFT_alfworld/RAFT_alfworld_7B.yaml
@@ -1,12 +1,12 @@
 project: "Trinity-RFT-RAFT-alfworld"
 name: "qwen2.5-7B-RAFT-alfworld"
 mode: both
-checkpoint_root_dir: /PATH/TO/CHECKPOINT/RAFT_ALFWORLD/
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 algorithm:
   algorithm_type: raft
   repeat_times: 1
 model:
-  model_path: /PATH/TO/MODEL/
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct}
   max_response_tokens: 4096
   max_model_len: 20480
 cluster:
@@ -19,7 +19,7 @@ buffer:
     taskset:
       name: alfworld-train
       storage_type: file
-      path: '/PATH/TO/ALFWORLD_DATA/'
+      path: ${oc.env:TRINITY_TASKSET_PATH}
       split: 'train'
       format:
         prompt_key: 'game_file'
@@ -31,7 +31,7 @@ buffer:
     eval_tasksets:
       - name: alfworld-eval
         storage_type: file
-        path: '/PATH/TO/ALFWORLD_DATA/'
+        path: ${oc.env:TRINITY_TASKSET_PATH}
         split: 'test'
         format:
           prompt_key: 'game_file'

diff --git a/examples/RAFT_alfworld/RAFT_reflect_alfworld_7B.yaml b/examples/RAFT_alfworld/RAFT_reflect_alfworld_7B.yaml
@@ -1,12 +1,12 @@
 project: "Trinity-RFT-RAFT-reflect-alfworld"
 name: "qwen2.5-7B-RAFT-reflect-alfworld"
 mode: both
-checkpoint_root_dir: /PATH/TO/CHECKPOINT/RAFT_REFLECT_ALFWORLD/
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 algorithm:
   algorithm_type: raft
   repeat_times: 1
 model:
-  model_path: /PATH/TO/MODEL/
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct}
   max_response_tokens: 4096
   max_model_len: 20480
 cluster:
@@ -19,7 +19,7 @@ buffer:
     taskset:
       name: alfworld-train
       storage_type: file
-      path: '/PATH/TO/ALFWORLD_DATA/'
+      path: ${oc.env:TRINITY_TASKSET_PATH}
       split: 'train'
       format:
         prompt_key: 'game_file'
@@ -31,7 +31,7 @@ buffer:
     eval_tasksets:
       - name: alfworld-eval
         storage_type: file
-        path: '/PATH/TO/ALFWORLD_DATA/'
+        path: ${oc.env:TRINITY_TASKSET_PATH}
         split: 'test'
         format:
           prompt_key: 'game_file'

diff --git a/examples/RAFT_alfworld/README.md b/examples/RAFT_alfworld/README.md
@@ -22,7 +22,7 @@ python examples/grpo_alfworld/get_alfworld_data.py
 
 ### Configuration
 Before running, make sure to update the following paths in the YAML files:
-- `model.model_path`: Replace with your model path (e.g., `/PATH/TO/MODEL/`)
+- `model.model_path`: Replace with your model path (e.g., `Qwen/Qwen2.5-7B-Instruct`)
 - `buffer.explorer_input.taskset.path`: Replace with your alfworld dataset path
 - `buffer.explorer_input.eval_tasksets[0].path`: Replace with your alfworld dataset path
 - `checkpoint_root_dir`: Replace with your desired checkpoint directory

diff --git a/examples/agentscope_tool_react/agentscope_tool_react_dapo.yaml b/examples/agentscope_tool_react/agentscope_tool_react_dapo.yaml
@@ -1,12 +1,12 @@
 project: "Trinity-RFT-dapo-reactv2"
 name: "Qwen3-8B-dapo-reactv2"
-checkpoint_root_dir: /PATH/TO/CHECKPOINT/
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 algorithm:
   algorithm_type: grpo
   repeat_times: 8
   advantage_fn: step_wise_grpo
 model:
-  model_path: /PATH/TO/MODEL/Qwen3-8B
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-8B}
   max_response_tokens: 16384
   max_model_len: 24576
 cluster:

diff --git a/examples/agentscope_tool_react/agentscope_tool_react_gsm8k.yaml b/examples/agentscope_tool_react/agentscope_tool_react_gsm8k.yaml
@@ -1,12 +1,12 @@
 project: "Trinity-RFT-gsm8k-reactv2"
 name: "Qwen3-4B-gsm8k-reactv2"
-checkpoint_root_dir: /PATH/TO/CHECKPOINT/
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 algorithm:
   algorithm_type: grpo
   repeat_times: 8
   advantage_fn: step_wise_grpo
 model:
-  model_path: /PATH/TO/MODEL/Qwen3-4B
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-4B}
   max_response_tokens: 16384
   max_model_len: 24576
 cluster: