diff --git a/benchmark/config/countdown-template.yaml b/benchmark/config/countdown-template.yaml
index edd231cc4e..d10eb33573 100644
--- a/benchmark/config/countdown-template.yaml
+++ b/benchmark/config/countdown-template.yaml
@@ -35,11 +35,9 @@ buffer:
       rollout_args:
         temperature: 1.0
         logprobs: 0
+      default_workflow_type: math_workflow
+      default_reward_fn_type: countdown_reward
     eval_tasksets: []
-    default_workflow_type: math_workflow
-    default_reward_fn_type: countdown_reward
-    system_prompt: null
-    reply_prefix: null
   trainer_input:
     experience_buffer:
       name: experience_buffer
diff --git a/benchmark/config/gsm8k-template.yaml b/benchmark/config/gsm8k-template.yaml
index 7a76de2e8a..93f42166a6 100644
--- a/benchmark/config/gsm8k-template.yaml
+++ b/benchmark/config/gsm8k-template.yaml
@@ -40,11 +40,9 @@ buffer:
       rollout_args:
         temperature: 1.0
         logprobs: 0
+      default_workflow_type: math_workflow
+      default_reward_fn_type: math_reward
     eval_tasksets: []
-    default_workflow_type: math_workflow
-    default_reward_fn_type: math_reward
-    system_prompt: null
-    reply_prefix: null
   trainer_input:
     experience_buffer:
       name: experience_buffer
@@ -79,7 +77,7 @@ trainer:
   enable_preview: true
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 10240
+  max_token_len_per_gpu: 10240
   ulysses_sequence_parallel_size: 1
 monitor:
   monitor_type: wandb
diff --git a/docs/sphinx_doc/source/tutorial/example_async_mode.md b/docs/sphinx_doc/source/tutorial/example_async_mode.md
index 7cf42f3621..64e692fb86 100644
--- a/docs/sphinx_doc/source/tutorial/example_async_mode.md
+++ b/docs/sphinx_doc/source/tutorial/example_async_mode.md
@@ -39,14 +39,14 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 1.0
-    default_workflow_type: 'math_workflow'
+      default_workflow_type: 'math_workflow'
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
       storage_type: queue
       path: 'sqlite:///gsm8k.db'
 explorer:
-  runner_num: 32
+  runner_per_model: 8
   rollout_model:
     engine_num: 4
 synchronizer:
@@ -86,7 +86,7 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 1.0
-    default_workflow_type: 'math_workflow'
+      default_workflow_type: 'math_workflow'
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
@@ -98,7 +98,7 @@ synchronizer:
 trainer:
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
 ```
 
@@ -133,7 +133,7 @@ cluster:  # important
   gpu_per_node: 8
 explorer:
   name: 'explorer_new'  # important
-  runner_num: 64
+  runner_per_model: 8
   rollout_model:
     engine_num: 8
 buffer:
@@ -150,7 +150,7 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 1.0
-    default_workflow_type: 'math_workflow'
+      default_workflow_type: 'math_workflow'
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
diff --git a/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md b/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md
index d20ed8bddc..cfa5d078d2 100644
--- a/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md
+++ b/docs/sphinx_doc/source/tutorial/example_reasoning_basic.md
@@ -77,6 +77,7 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 1.0
+      default_workflow_type: 'math_workflow'
     eval_tasksets:
     - name: gsm8k-eval
       storage_type: file
@@ -86,7 +87,7 @@ buffer:
       format:
         prompt_key: 'question'
         response_key: 'answer'
-    default_workflow_type: 'math_workflow'
+      default_workflow_type: 'math_workflow'
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
@@ -94,7 +95,7 @@ buffer:
       path: 'sqlite:///gsm8k.db'
 explorer:
   eval_interval: 50
-  runner_num: 16
+  runner_per_model: 16
   rollout_model:
     engine_num: 1
 synchronizer:
@@ -117,7 +118,7 @@ trinity run --config examples/grpo_gsm8k/gsm8k.yaml
 
 ## Optional: RFT with SFT Warmup
 
-Before RFT, we may use SFT as a warmup step. Trinity-RFT supports adding SFT warmup stage before RFT by setting `stages` in the config file. The `sft_warmup_dataset` specifies the dataset used for SFT warmup, and `sft_warmup_steps` specifies the number of training steps for SFT warmup.
+Before RFT, we may use SFT as a warmup step. Trinity-RFT supports adding SFT warmup stage before RFT by setting `stages` in the config file. The `experience_buffer` specifies the dataset used for SFT warmup, and `total_steps` specifies the number of training steps for SFT warmup.
 
 ```yaml
 # Properly add the following configs in gsm8k.yaml
diff --git a/docs/sphinx_doc/source/tutorial/example_step_wise.md b/docs/sphinx_doc/source/tutorial/example_step_wise.md
index 9c073168e4..3239bbaf70 100644
--- a/docs/sphinx_doc/source/tutorial/example_step_wise.md
+++ b/docs/sphinx_doc/source/tutorial/example_step_wise.md
@@ -121,7 +121,7 @@ buffer:
       workflow_args:
         max_env_steps: 30
       enable_progress_bar: false
-    default_workflow_type: 'step_wise_alfworld_workflow'
+      default_workflow_type: 'step_wise_alfworld_workflow'
   trainer_input:
     experience_buffer:
       name: alfworld_buffer
@@ -129,7 +129,7 @@ buffer:
       use_priority_queue: true
 explorer:
   max_repeat_times_per_runner: 1
-  runner_num: 32
+  runner_per_model: 32
   max_timeout: 3600
   rollout_model:
     enable_history: true
@@ -152,7 +152,7 @@ trainer:
   save_interval: 50
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
 ```
 
diff --git a/docs/sphinx_doc/source/tutorial/trinity_configs.md b/docs/sphinx_doc/source/tutorial/trinity_configs.md
index feec65a947..c1af4871b7 100644
--- a/docs/sphinx_doc/source/tutorial/trinity_configs.md
+++ b/docs/sphinx_doc/source/tutorial/trinity_configs.md
@@ -200,6 +200,7 @@ buffer:
   batch_size: 32
   train_batch_size: 256
   total_epochs: 100
+  total_steps: null
 
   explorer_input:
     taskset:
@@ -214,9 +215,6 @@ buffer:
         ...
       buffer_2:
         ...
-
-  default_workflow_type: 'math_workflow'
-  default_reward_fn_type: 'countdown_reward'
 ```
 
 - `batch_size`: Number of tasks used per training step. *Please do not multiply this value by the `algorithm.repeat_times` manually*.
@@ -231,6 +229,9 @@ Defines the dataset(s) used by the explorer for training and evaluation.
 ```yaml
 buffer:
   explorer_input:
+    default_workflow_type: 'math_workflow'
+    default_eval_workflow_type: 'math_workflow'
+    default_reward_fn_type: 'countdown_reward'
     taskset:
       name: countdown_train
       storage_type: file
@@ -262,7 +263,10 @@ buffer:
 ```
 
 - `buffer.explorer_input.taskset`: Task dataset used for training exploration policies.
-- `buffer.explorer_input.eval_taskset`: List of task datasets used for evaluation.
+- `buffer.explorer_input.eval_tasksets`: List of task datasets used for evaluation.
+- `buffer.explorer_input.default_workflow_type`: Default workflow type for all task datasets under `explorer_input` if not specified at the dataset level.
+- `buffer.explorer_input.default_eval_workflow_type`: Default evaluation workflow type for all eval task datasets under `explorer_input` if not specified at the dataset level.
+- `buffer.explorer_input.default_reward_fn_type`: Default reward function type for all task datasets under `explorer_input` if not specified at the dataset level.
 
 The configuration for each task dataset is defined as follows:
 
@@ -413,7 +417,7 @@ trainer:
   save_strategy: "unrestricted"
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
   trainer_config: null
 ```
@@ -429,7 +433,7 @@ trainer:
   - `unrestricted`: No restrictions on saving operations; multiple nodes, processes, or threads are allowed to save the model simultaneously.
 - `grad_clip`: Gradient clipping for updates.
 - `use_dynamic_bsz`: Whether to use dynamic batch size.
-- `ppo_max_token_len_per_gpu`:  The maximum number of tokens to be processed in forward and backward when updating the policy. Effective when `use_dynamic_bsz=true`.
+- `max_token_len_per_gpu`:  The maximum number of tokens to be processed in forward and backward when updating the policy. Effective when `use_dynamic_bsz=true`.
 - `ulysses_sequence_parallel_size`: Sequence parallel size.
 - `trainer_config`: The trainer configuration provided inline.
 ---
diff --git a/docs/sphinx_doc/source_zh/tutorial/example_async_mode.md b/docs/sphinx_doc/source_zh/tutorial/example_async_mode.md
index 7f1cb27301..00139883f3 100644
--- a/docs/sphinx_doc/source_zh/tutorial/example_async_mode.md
+++ b/docs/sphinx_doc/source_zh/tutorial/example_async_mode.md
@@ -39,14 +39,14 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 1.0
-    default_workflow_type: 'math_workflow'
+      default_workflow_type: 'math_workflow'
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
       storage_type: queue
       path: 'sqlite:///gsm8k.db'
 explorer:
-  runner_num: 32
+  runner_per_model: 16
   rollout_model:
     engine_num: 4
 synchronizer:
@@ -86,7 +86,7 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 1.0
-    default_workflow_type: 'math_workflow'
+      default_workflow_type: 'math_workflow'
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
@@ -133,7 +133,7 @@ cluster:  # important
   gpu_per_node: 8
 explorer:
   name: 'explorer_new'  # important
-  runner_num: 64
+  runner_per_model: 8
   rollout_model:
     engine_num: 8
 buffer:
@@ -150,7 +150,7 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 1.0
-    default_workflow_type: 'math_workflow'
+      default_workflow_type: 'math_workflow'
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
diff --git a/docs/sphinx_doc/source_zh/tutorial/example_reasoning_basic.md b/docs/sphinx_doc/source_zh/tutorial/example_reasoning_basic.md
index 639d07d89f..3235930063 100644
--- a/docs/sphinx_doc/source_zh/tutorial/example_reasoning_basic.md
+++ b/docs/sphinx_doc/source_zh/tutorial/example_reasoning_basic.md
@@ -77,6 +77,7 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 1.0
+      default_workflow_type: 'math_workflow'
     eval_tasksets:
     - name: gsm8k-eval
       storage_type: file
@@ -86,7 +87,7 @@ buffer:
       format:
         prompt_key: 'question'
         response_key: 'answer'
-    default_workflow_type: 'math_workflow'
+      default_workflow_type: 'math_workflow'
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
@@ -94,7 +95,7 @@ buffer:
       path: 'sqlite:///gsm8k.db'
 explorer:
   eval_interval: 50
-  runner_num: 16
+  runner_per_model: 16
   rollout_model:
     engine_num: 1
 synchronizer:
@@ -117,7 +118,7 @@ trinity run --config examples/grpo_gsm8k/gsm8k.yaml
 
 ## 进阶选项：带 SFT warmup 的 RFT
 
-在进行 RFT 之前，我们可以先使用 SFT 作为预热步骤。Trinity-RFT 支持通过在配置文件中设置 `stages` 来添加 SFT 预热阶段。`sft_warmup_dataset` 指定用于 SFT warmup 的数据集，`sft_warmup_steps` 指定 SFT warmup 的训练步数。
+在进行 RFT 之前，我们可以先使用 SFT 作为预热步骤。Trinity-RFT 支持通过在配置文件中设置 `stages` 来添加 SFT 预热阶段。`experience_buffer` 指定用于 SFT warmup 的数据集，`total_steps` 指定 SFT warmup 的训练步数。
 
 ```yaml
 # 在 gsm8k.yaml 中正确添加以下配置
diff --git a/docs/sphinx_doc/source_zh/tutorial/example_step_wise.md b/docs/sphinx_doc/source_zh/tutorial/example_step_wise.md
index a0cc1b231e..f40250f5e3 100644
--- a/docs/sphinx_doc/source_zh/tutorial/example_step_wise.md
+++ b/docs/sphinx_doc/source_zh/tutorial/example_step_wise.md
@@ -119,7 +119,7 @@ buffer:
       workflow_args:
         max_env_steps: 30
       enable_progress_bar: false
-    default_workflow_type: 'step_wise_alfworld_workflow'
+      default_workflow_type: 'step_wise_alfworld_workflow'
   trainer_input:
     experience_buffer:
       name: alfworld_buffer
@@ -127,7 +127,7 @@ buffer:
       use_priority_queue: true
 explorer:
   max_repeat_times_per_runner: 1
-  runner_num: 32
+  runner_per_model: 16
   max_timeout: 3600
   rollout_model:
     enable_history: true
@@ -150,7 +150,7 @@ trainer:
   save_interval: 50
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
 ```
 
diff --git a/docs/sphinx_doc/source_zh/tutorial/trinity_configs.md b/docs/sphinx_doc/source_zh/tutorial/trinity_configs.md
index b2b2dde92b..998fe939e8 100644
--- a/docs/sphinx_doc/source_zh/tutorial/trinity_configs.md
+++ b/docs/sphinx_doc/source_zh/tutorial/trinity_configs.md
@@ -214,9 +214,6 @@ buffer:
         ...
       buffer_2:
         ...
-
-  default_workflow_type: 'math_workflow'
-  default_reward_fn_type: 'countdown_reward'
 ```
 
 - `batch_size`: 每个训练步骤使用的任务数。*请勿手动将此值乘以 `algorithm.repeat_times`*。
@@ -231,6 +228,9 @@ buffer:
 ```yaml
 buffer:
   explorer_input:
+    default_workflow_type: 'math_workflow'
+    default_eval_workflow_type: 'math_workflow'
+    default_reward_fn_type: 'countdown_reward'
     taskset:
       name: countdown_train
       storage_type: file
@@ -256,13 +256,14 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 0.1
-      default_workflow_type: 'math_workflow'
-      default_reward_fn_type: 'countdown_reward'
     ...
 ```
 
 - `buffer.explorer_input.taskset`: 用于训练探索策略的任务数据集。
-- `buffer.explorer_input.eval_taskset`: 用于评估的任务数据集列表。
+- `buffer.explorer_input.eval_tasksets`: 用于评测的任务数据集列表。
+- `buffer.explorer_input.default_workflow_type`: 若未在数据集级别指定，则为所有任务数据集设置默认的工作流类型。
+- `buffer.explorer_input.default_eval_workflow_type`: 若未在数据集级别指定，则为所有评测任务数据集设置默认的工作流类型。
+- `buffer.explorer_input.default_reward_fn_type`: 若未在数据集级别指定，则为所有任务数据集设置默认的奖励类型。
 
 每个任务数据集的配置定义如下：
 
@@ -413,7 +414,7 @@ trainer:
   save_strategy: "unrestricted"
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
   trainer_config: null
 ```
@@ -429,7 +430,7 @@ trainer:
   - `unrestricted`：不限制保存操作，允许多个节点、进程或线程同时保存模型。
 - `grad_clip`: 梯度裁剪阈值。
 - `use_dynamic_bsz`: 是否使用动态批量大小。
-- `ppo_max_token_len_per_gpu`: 训练过程中，每个 GPU 最大 token 长度; 当 `use_dynamic_bsz=true` 时生效。
+- `max_token_len_per_gpu`: 训练过程中，每个 GPU 最大 token 长度; 当 `use_dynamic_bsz=true` 时生效。
 - `ulysses_sequence_parallel_size`: 序列并行的并行度，即用于分割单个序列的 GPU 数量。
 - `trainer_config`: 内联提供的 trainer 配置。
 
diff --git a/examples/RAFT_alfworld/RAFT_alfworld_7B.yaml b/examples/RAFT_alfworld/RAFT_alfworld_7B.yaml
index 646de28a0a..7fc2445eaf 100644
--- a/examples/RAFT_alfworld/RAFT_alfworld_7B.yaml
+++ b/examples/RAFT_alfworld/RAFT_alfworld_7B.yaml
@@ -70,7 +70,7 @@ trainer:
   save_interval: 100000
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 20000  # Adjusted for alfworld longer sequences
+  max_token_len_per_gpu: 20000  # Adjusted for alfworld longer sequences
   ulysses_sequence_parallel_size: 1
 monitor:
   monitor_type: wandb
diff --git a/examples/RAFT_alfworld/RAFT_reflect_alfworld_7B.yaml b/examples/RAFT_alfworld/RAFT_reflect_alfworld_7B.yaml
index 1a98e4f58e..30a115cada 100644
--- a/examples/RAFT_alfworld/RAFT_reflect_alfworld_7B.yaml
+++ b/examples/RAFT_alfworld/RAFT_reflect_alfworld_7B.yaml
@@ -70,7 +70,7 @@ trainer:
   save_interval: 100000
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 20000  # Adjusted for alfworld longer sequences
+  max_token_len_per_gpu: 20000  # Adjusted for alfworld longer sequences
   ulysses_sequence_parallel_size: 1
 monitor:
   monitor_type: wandb
diff --git a/examples/agentscope_react/gsm8k.yaml b/examples/agentscope_react/gsm8k.yaml
index 1cb61b3b89..a70da2c5e1 100644
--- a/examples/agentscope_react/gsm8k.yaml
+++ b/examples/agentscope_react/gsm8k.yaml
@@ -29,8 +29,8 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 1.0
+      default_workflow_type: 'as_react_workflow'
     eval_tasksets: []
-    default_workflow_type: 'as_react_workflow'
   trainer_input:
     experience_buffer:
       name: agentscope_gsm8k_buffer
@@ -61,7 +61,7 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 24576
+  max_token_len_per_gpu: 24576
   ulysses_sequence_parallel_size: 2
 monitor:
   monitor_type: tensorboard
diff --git a/examples/agentscope_tool_react/agentscopev0_tool_react_dapo.yaml b/examples/agentscope_tool_react/agentscopev0_tool_react_dapo.yaml
index 23efc27c12..9463de670b 100644
--- a/examples/agentscope_tool_react/agentscopev0_tool_react_dapo.yaml
+++ b/examples/agentscope_tool_react/agentscopev0_tool_react_dapo.yaml
@@ -29,8 +29,8 @@ buffer:
         response_key: 'solution'
       rollout_args:
         temperature: 1.0
+      default_workflow_type: 'agentscopev0_react_math_workflow'
     eval_tasksets: []
-    default_workflow_type: 'agentscopev0_react_math_workflow'
   trainer_input:
     experience_buffer:
       name: agentscope_dapo_buffer
@@ -62,7 +62,7 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 24576
+  max_token_len_per_gpu: 24576
   ulysses_sequence_parallel_size: 2
 monitor:
   monitor_type: wandb
diff --git a/examples/agentscope_tool_react/agentscopev0_tool_react_gsm8k.yaml b/examples/agentscope_tool_react/agentscopev0_tool_react_gsm8k.yaml
index 76c946edfe..e0f73db05b 100644
--- a/examples/agentscope_tool_react/agentscopev0_tool_react_gsm8k.yaml
+++ b/examples/agentscope_tool_react/agentscopev0_tool_react_gsm8k.yaml
@@ -29,8 +29,8 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 1.0
+      default_workflow_type: 'agentscopev0_react_math_workflow'
     eval_tasksets: []
-    default_workflow_type: 'agentscopev0_react_math_workflow'
   trainer_input:
     experience_buffer:
       name: agentscope_gsm8k_buffer
@@ -62,7 +62,7 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 24576
+  max_token_len_per_gpu: 24576
   ulysses_sequence_parallel_size: 2
 monitor:
   monitor_type: wandb
diff --git a/examples/agentscope_tool_react/agentscopev1_tool_react_dapo.yaml b/examples/agentscope_tool_react/agentscopev1_tool_react_dapo.yaml
index e0bff08b66..3a951a66d4 100644
--- a/examples/agentscope_tool_react/agentscopev1_tool_react_dapo.yaml
+++ b/examples/agentscope_tool_react/agentscopev1_tool_react_dapo.yaml
@@ -29,8 +29,8 @@ buffer:
         response_key: 'solution'
       rollout_args:
         temperature: 1.0
+      default_workflow_type: 'agentscope_react_math_workflow'
     eval_tasksets: []
-    default_workflow_type: 'agentscope_react_math_workflow'
   trainer_input:
     experience_buffer:
       name: agentscope_dapo_buffer
@@ -60,7 +60,7 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 24576
+  max_token_len_per_gpu: 24576
   ulysses_sequence_parallel_size: 2
 monitor:
   monitor_type: wandb
diff --git a/examples/agentscope_websearch/agentscopev1_websearch_agent.yaml b/examples/agentscope_websearch/agentscopev1_websearch_agent.yaml
index bb6d9b34fe..cd9a87594a 100644
--- a/examples/agentscope_websearch/agentscopev1_websearch_agent.yaml
+++ b/examples/agentscope_websearch/agentscopev1_websearch_agent.yaml
@@ -33,6 +33,7 @@ buffer:
         temperature: 1.0
         max_tokens: 4096
       enable_progress_bar: false
+      default_workflow_type: 'agentscope_v1_react_search_workflow'
     eval_tasksets:
     - name: webqa_test
       storage_type: file
@@ -47,7 +48,7 @@ buffer:
       rollout_args:
         temperature: 0.6
         max_tokens: 4096
-    default_workflow_type: 'agentscope_v1_react_search_workflow'
+      default_workflow_type: 'agentscope_v1_react_search_workflow'
   trainer_input:
     experience_buffer:
       name: experience_buffer
@@ -88,5 +89,5 @@ trainer:
   save_interval: 20
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 2
diff --git a/examples/asymre_gsm8k/gsm8k.yaml b/examples/asymre_gsm8k/gsm8k.yaml
index 16ac2b31bb..0374dd6fcb 100644
--- a/examples/asymre_gsm8k/gsm8k.yaml
+++ b/examples/asymre_gsm8k/gsm8k.yaml
@@ -36,6 +36,7 @@ buffer:
         response_key: answer
       rollout_args:
         temperature: 1.0
+      default_workflow_type: math_workflow
     eval_tasksets:
       - name: gsm8k-eval
         storage_type: file
@@ -45,7 +46,7 @@ buffer:
         format:
           prompt_key: question
           response_key: answer
-    default_workflow_type: math_workflow
+        default_workflow_type: math_workflow
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
@@ -69,5 +70,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/asymre_math/math.yaml b/examples/asymre_math/math.yaml
index 97df16cebb..c1636f5318 100644
--- a/examples/asymre_math/math.yaml
+++ b/examples/asymre_math/math.yaml
@@ -73,5 +73,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/async_gsm8k/explorer.yaml b/examples/async_gsm8k/explorer.yaml
index 07c21ef043..ab49d2ffab 100644
--- a/examples/async_gsm8k/explorer.yaml
+++ b/examples/async_gsm8k/explorer.yaml
@@ -28,7 +28,7 @@ buffer:
       rollout_args:
         temperature: 1.0
         logprobs: 0
-    default_workflow_type: 'math_workflow'
+      default_workflow_type: 'math_workflow'
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
diff --git a/examples/async_gsm8k/trainer.yaml b/examples/async_gsm8k/trainer.yaml
index 05430aebe6..9791eeeeec 100644
--- a/examples/async_gsm8k/trainer.yaml
+++ b/examples/async_gsm8k/trainer.yaml
@@ -30,7 +30,7 @@ buffer:
       rollout_args:
         temperature: 1.0
         logprobs: 0
-    default_workflow_type: 'math_workflow'
+      default_workflow_type: 'math_workflow'
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
@@ -42,5 +42,5 @@ synchronizer:
 trainer:
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/cispo_gsm8k/gsm8k.yaml b/examples/cispo_gsm8k/gsm8k.yaml
index 2a038dbeec..6d6dffaaa2 100644
--- a/examples/cispo_gsm8k/gsm8k.yaml
+++ b/examples/cispo_gsm8k/gsm8k.yaml
@@ -61,5 +61,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/dapo_math/dapo.yaml b/examples/dapo_math/dapo.yaml
index 07df6557d7..3acf4d817e 100644
--- a/examples/dapo_math/dapo.yaml
+++ b/examples/dapo_math/dapo.yaml
@@ -77,5 +77,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 22000
+  max_token_len_per_gpu: 22000
   ulysses_sequence_parallel_size: 1
diff --git a/examples/dpo_human_in_the_loop/dpo.yaml b/examples/dpo_human_in_the_loop/dpo.yaml
index bc4a015261..f13dfc539e 100644
--- a/examples/dpo_human_in_the_loop/dpo.yaml
+++ b/examples/dpo_human_in_the_loop/dpo.yaml
@@ -77,7 +77,7 @@ trainer:
   save_interval: 30
   total_steps: 200
   use_dynamic_bsz: false
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
   trainer_config:
     actor_rollout_ref:
diff --git a/examples/dpo_humanlike/dpo.yaml b/examples/dpo_humanlike/dpo.yaml
index bd5bb7f118..7f07d1a226 100644
--- a/examples/dpo_humanlike/dpo.yaml
+++ b/examples/dpo_humanlike/dpo.yaml
@@ -42,7 +42,7 @@ trainer:
   save_interval: 30
   total_steps: 200
   use_dynamic_bsz: false
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
   trainer_config:
     actor_rollout_ref:
diff --git a/examples/grpo_alfworld/alfworld.yaml b/examples/grpo_alfworld/alfworld.yaml
index bb6733d391..77ba65d555 100644
--- a/examples/grpo_alfworld/alfworld.yaml
+++ b/examples/grpo_alfworld/alfworld.yaml
@@ -26,7 +26,7 @@ buffer:
       rollout_args:
         temperature: 1.0
         logprobs: 0
-    default_workflow_type: 'alfworld_workflow'
+      default_workflow_type: 'alfworld_workflow'
   trainer_input:
     experience_buffer:
       name: alfworld_buffer
@@ -54,5 +54,5 @@ trainer:
   save_interval: 10
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/grpo_alfworld_general_multi_step/alfworld.yaml b/examples/grpo_alfworld_general_multi_step/alfworld.yaml
index 806f515ba4..e36016a3b2 100644
--- a/examples/grpo_alfworld_general_multi_step/alfworld.yaml
+++ b/examples/grpo_alfworld_general_multi_step/alfworld.yaml
@@ -30,7 +30,7 @@ buffer:
       workflow_args:
         max_env_steps: 30
       enable_progress_bar: false
-    default_workflow_type: 'step_wise_alfworld_workflow'
+      default_workflow_type: 'step_wise_alfworld_workflow'
   trainer_input:
     experience_buffer:
       name: alfworld_buffer
@@ -61,7 +61,7 @@ trainer:
   save_interval: 50
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
 monitor:
   monitor_type: 'wandb'
diff --git a/examples/grpo_email_search/email_search.yaml b/examples/grpo_email_search/email_search.yaml
index 710316616e..fa3b96f2a5 100644
--- a/examples/grpo_email_search/email_search.yaml
+++ b/examples/grpo_email_search/email_search.yaml
@@ -90,5 +90,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/grpo_gsm8k/gsm8k.yaml b/examples/grpo_gsm8k/gsm8k.yaml
index 0fa3e2601e..bc3e533d5f 100644
--- a/examples/grpo_gsm8k/gsm8k.yaml
+++ b/examples/grpo_gsm8k/gsm8k.yaml
@@ -62,7 +62,7 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
 # stages:  # Uncomment to add a SFT warmup stage before RFT
 #   - stage_name: sft_warmup
diff --git a/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml b/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml
index 3fccaaac28..62d380220a 100644
--- a/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml
+++ b/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml
@@ -81,5 +81,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml b/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml
index fa45d91913..d8b9227c32 100644
--- a/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml
+++ b/examples/grpo_gsm8k_ruler/gsm8k_ruler.yaml
@@ -71,5 +71,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml b/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml
index 76247d527f..fb95c0dc62 100644
--- a/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml
+++ b/examples/grpo_gsm8k_task_pipeline/gsm8k.yaml
@@ -79,5 +79,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/grpo_gsm8k_trainable_ruler/gsm8k_ruler.yaml b/examples/grpo_gsm8k_trainable_ruler/gsm8k_ruler.yaml
index 785191dcc6..fa332954cc 100644
--- a/examples/grpo_gsm8k_trainable_ruler/gsm8k_ruler.yaml
+++ b/examples/grpo_gsm8k_trainable_ruler/gsm8k_ruler.yaml
@@ -48,7 +48,7 @@ buffer:
       storage_type: queue
 explorer:
   eval_interval: 10
-  runner_num: 32
+  runner_per_model: 8
   rollout_model:
     engine_num: 4
     tensor_parallel_size: 1
@@ -65,5 +65,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/grpo_lora_gsm8k/gsm8k.yaml b/examples/grpo_lora_gsm8k/gsm8k.yaml
index bad54c22d2..e445824415 100644
--- a/examples/grpo_lora_gsm8k/gsm8k.yaml
+++ b/examples/grpo_lora_gsm8k/gsm8k.yaml
@@ -65,7 +65,7 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
   trainer_config:
     actor_rollout_ref:
diff --git a/examples/grpo_math/math.yaml b/examples/grpo_math/math.yaml
index 39d5cafa5b..1ec35ce86c 100644
--- a/examples/grpo_math/math.yaml
+++ b/examples/grpo_math/math.yaml
@@ -29,8 +29,8 @@ buffer:
         logprobs: 0
       reward_fn_args:
         reward_name: math_verify_reward
-    default_workflow_type: 'math_rm_workflow'
-    default_reward_fn_type: 'rm_gallery_reward'
+      default_workflow_type: 'math_rm_workflow'
+      default_reward_fn_type: 'rm_gallery_reward'
   trainer_input:
     experience_buffer:
       name: math_buffer
@@ -54,5 +54,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/grpo_rubric_as_reward/rubric.yaml b/examples/grpo_rubric_as_reward/rubric.yaml
index 4e76a9c9cd..6e66dc348f 100644
--- a/examples/grpo_rubric_as_reward/rubric.yaml
+++ b/examples/grpo_rubric_as_reward/rubric.yaml
@@ -31,7 +31,7 @@ buffer:
       rollout_args:
         temperature: 1.0
       enable_progress_bar: false
-    default_workflow_type: 'rubric_judge_workflow'
+      default_workflow_type: 'rubric_judge_workflow'
   trainer_input:
     experience_buffer:
       name: experience_buffer
@@ -64,5 +64,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/grpo_sciworld/sciworld.yaml b/examples/grpo_sciworld/sciworld.yaml
index 8fb044781f..09bf683132 100644
--- a/examples/grpo_sciworld/sciworld.yaml
+++ b/examples/grpo_sciworld/sciworld.yaml
@@ -26,7 +26,7 @@ buffer:
       rollout_args:
         temperature: 1.0
         logprobs: 0
-    default_workflow_type: 'sciworld_workflow'
+      default_workflow_type: 'sciworld_workflow'
   trainer_input:
     experience_buffer:
       name: sciworld_buffer
@@ -51,5 +51,5 @@ trainer:
   save_interval: 10
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/grpo_toolcall/toolace.yaml b/examples/grpo_toolcall/toolace.yaml
index 7d699b4bc6..05e8a7e7e4 100644
--- a/examples/grpo_toolcall/toolace.yaml
+++ b/examples/grpo_toolcall/toolace.yaml
@@ -25,8 +25,7 @@ buffer:
       rollout_args:
         temperature: 1.0
         logprobs: 0
-    eval_tasksets: []
-    default_workflow_type: 'toolcall_workflow'
+      default_workflow_type: 'toolcall_workflow'
   trainer_input:
     experience_buffer:
       name: toolace_buffer
@@ -50,5 +49,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 2
diff --git a/examples/grpo_vlm/vlm.yaml b/examples/grpo_vlm/vlm.yaml
index 75f1aa0b42..159f0177a7 100644
--- a/examples/grpo_vlm/vlm.yaml
+++ b/examples/grpo_vlm/vlm.yaml
@@ -30,8 +30,8 @@ buffer:
         image_key: 'images'
       rollout_args:
         temperature: 1.0
-    default_workflow_type: 'simple_mm_workflow'
-    default_reward_fn_type: 'math_boxed_reward'
+      default_workflow_type: 'simple_mm_workflow'
+      default_reward_fn_type: 'math_boxed_reward'
   trainer_input:
     experience_buffer:
       name: experience_buffer
@@ -53,5 +53,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/grpo_webshop/webshop.yaml b/examples/grpo_webshop/webshop.yaml
index 2183772fb9..7357002bcb 100644
--- a/examples/grpo_webshop/webshop.yaml
+++ b/examples/grpo_webshop/webshop.yaml
@@ -26,7 +26,7 @@ buffer:
       rollout_args:
         temperature: 1.0
         logprobs: 0
-    default_workflow_type: 'webshop_workflow'
+      default_workflow_type: 'webshop_workflow'
   trainer_input:
     experience_buffer:
       name: webshop_buffer
@@ -51,5 +51,5 @@ trainer:
   save_interval: 10
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/mix_chord/mix_chord.yaml b/examples/mix_chord/mix_chord.yaml
index 77e54adb3a..7d62c559b1 100644
--- a/examples/mix_chord/mix_chord.yaml
+++ b/examples/mix_chord/mix_chord.yaml
@@ -85,7 +85,7 @@ trainer:
   save_interval: 50
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 25600
+  max_token_len_per_gpu: 25600
   ulysses_sequence_parallel_size: 2
 monitor:
   monitor_type: wandb
diff --git a/examples/mix_chord/mix_chord_toolace.yaml b/examples/mix_chord/mix_chord_toolace.yaml
index fd8875266a..9380c82c36 100644
--- a/examples/mix_chord/mix_chord_toolace.yaml
+++ b/examples/mix_chord/mix_chord_toolace.yaml
@@ -80,7 +80,7 @@ trainer:
   save_interval: 50
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 25600
+  max_token_len_per_gpu: 25600
   ulysses_sequence_parallel_size: 2
 monitor:
   monitor_type: wandb
diff --git a/examples/mix_math/mix_math.yaml b/examples/mix_math/mix_math.yaml
index ad6133f2c8..07b43a990f 100644
--- a/examples/mix_math/mix_math.yaml
+++ b/examples/mix_math/mix_math.yaml
@@ -84,7 +84,7 @@ trainer:
   save_interval: 50
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 25600
+  max_token_len_per_gpu: 25600
   ulysses_sequence_parallel_size: 1
 monitor:
   monitor_type: wandb
diff --git a/examples/opmd_gsm8k/opmd_gsm8k.yaml b/examples/opmd_gsm8k/opmd_gsm8k.yaml
index acb65a86cd..6367e01a7c 100644
--- a/examples/opmd_gsm8k/opmd_gsm8k.yaml
+++ b/examples/opmd_gsm8k/opmd_gsm8k.yaml
@@ -28,7 +28,7 @@ buffer:
       rollout_args:
         temperature: 1.0
         logprobs: 0
-    default_workflow_type: 'math_workflow'
+      default_workflow_type: 'math_workflow'
   trainer_input:
     experience_buffer:
       name: gsm8k_buffer
@@ -51,5 +51,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/ppo_countdown/countdown.yaml b/examples/ppo_countdown/countdown.yaml
index 9d792fd96c..c7bdfc9774 100644
--- a/examples/ppo_countdown/countdown.yaml
+++ b/examples/ppo_countdown/countdown.yaml
@@ -27,8 +27,8 @@ buffer:
       rollout_args:
         temperature: 1.0
         logprobs: 0
-    default_workflow_type: 'math_workflow'
-    default_reward_fn_type: 'countdown_reward'
+      default_workflow_type: 'math_workflow'
+      default_reward_fn_type: 'countdown_reward'
   trainer_input:
     experience_buffer:
       name: countdown_buffer
@@ -52,7 +52,7 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
   trainer_config:
     actor_rollout_ref:
diff --git a/examples/ppo_countdown_megatron/countdown.yaml b/examples/ppo_countdown_megatron/countdown.yaml
index bb47a43537..bb592d464e 100644
--- a/examples/ppo_countdown_megatron/countdown.yaml
+++ b/examples/ppo_countdown_megatron/countdown.yaml
@@ -26,8 +26,8 @@ buffer:
         response_key: 'answer'
       rollout_args:
         temperature: 1.0
-    default_workflow_type: 'math_workflow'
-    default_reward_fn_type: 'countdown_reward'
+      default_workflow_type: 'math_workflow'
+      default_reward_fn_type: 'countdown_reward'
   trainer_input:
     experience_buffer:
       name: countdown_buffer
@@ -51,7 +51,7 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
   trainer_config:
     actor_rollout_ref:
diff --git a/examples/rec_gsm8k/gsm8k.yaml b/examples/rec_gsm8k/gsm8k.yaml
index 4c45270d13..3be850e401 100644
--- a/examples/rec_gsm8k/gsm8k.yaml
+++ b/examples/rec_gsm8k/gsm8k.yaml
@@ -54,9 +54,8 @@ buffer:
       storage_type: queue
 explorer:
   eval_interval: 20
-  runner_num: 64
+  runner_per_model: 16
   rollout_model:
-    engine_type: vllm_async
     engine_num: 4
     tensor_parallel_size: 1
     enable_prefix_caching: false
@@ -73,5 +72,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/rec_math/math.yaml b/examples/rec_math/math.yaml
index 1996d9814e..226fbe7d75 100644
--- a/examples/rec_math/math.yaml
+++ b/examples/rec_math/math.yaml
@@ -57,7 +57,7 @@ buffer:
       storage_type: queue
 explorer:
   eval_interval: 500
-  runner_num: 64
+  runner_per_model: 16
   rollout_model:
     engine_type: vllm_async
     engine_num: 4
@@ -78,5 +78,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/examples/sft_mot/sft.yaml b/examples/sft_mot/sft.yaml
index d408b3da25..159a8657e1 100644
--- a/examples/sft_mot/sft.yaml
+++ b/examples/sft_mot/sft.yaml
@@ -32,5 +32,5 @@ trainer:
   save_interval: 10
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 22000
+  max_token_len_per_gpu: 22000
   ulysses_sequence_parallel_size: 1
diff --git a/examples/sppo_gsm8k/gsm8k.yaml b/examples/sppo_gsm8k/gsm8k.yaml
index 790c2623b1..0fb4cabcba 100644
--- a/examples/sppo_gsm8k/gsm8k.yaml
+++ b/examples/sppo_gsm8k/gsm8k.yaml
@@ -67,5 +67,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 22000
+  max_token_len_per_gpu: 22000
   ulysses_sequence_parallel_size: 1
diff --git a/examples/topr_gsm8k/gsm8k.yaml b/examples/topr_gsm8k/gsm8k.yaml
index 12ba9a7817..2ae39139a7 100644
--- a/examples/topr_gsm8k/gsm8k.yaml
+++ b/examples/topr_gsm8k/gsm8k.yaml
@@ -61,5 +61,5 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
diff --git a/tests/buffer/formatter_test.py b/tests/buffer/formatter_test.py
index 1e8a682b4e..92b6616555 100644
--- a/tests/buffer/formatter_test.py
+++ b/tests/buffer/formatter_test.py
@@ -263,8 +263,7 @@ def test_task_formatter(self):
         }
         config = StorageConfig(
             is_eval=True,
-            default_workflow_type="math_workflow",
-            default_eval_workflow_type="math_boxed_workflow",
+            default_workflow_type="math_boxed_workflow",
             workflow_args={"use_base": True, "with_think": True},
         )
         formatter = FORMATTER.get("task")(config=config)
@@ -279,7 +278,6 @@ def test_task_formatter(self):
         config = StorageConfig(
             is_eval=False,
             default_workflow_type="math_workflow",
-            default_eval_workflow_type="math_boxed_workflow",
             default_reward_fn_type="math_reward",
             workflow_args={"use_base": False, "with_think": True},
         )
@@ -296,7 +294,7 @@ def test_task_formatter(self):
 
         config = StorageConfig(
             is_eval=False,
-            default_eval_workflow_type="math_workflow",
+            default_workflow_type="math_workflow",
             workflow_args={"use_base": True, "with_think": False},
             format=FormatConfig(
                 workflow_key="workflow",
diff --git a/tests/common/config_test.py b/tests/common/config_test.py
index e51832f3a6..0a6a5557b0 100644
--- a/tests/common/config_test.py
+++ b/tests/common/config_test.py
@@ -1,11 +1,12 @@
 # -*- coding: utf-8 -*-
 """Test cases for Config modules."""
 import datetime
+import math
 import os
 import shutil
 import unittest
 
-from tests.tools import get_template_config
+from tests.tools import get_template_config, get_unittest_dataset_config
 from trinity.common.config import InferenceModelConfig, load_config
 
 CHECKPOINT_ROOT_DIR = os.path.join(os.path.dirname(__file__), "temp_checkpoint_dir")
@@ -91,6 +92,51 @@ def test_update_config_from_ray_cluster(self):
         self.assertEqual(config.cluster.node_num, 2)
         self.assertEqual(config.cluster.gpu_per_node, 2)
 
+    def test_default_workflow(self):
+        config = get_template_config()
+        config.buffer.explorer_input.default_workflow_type = "simple_workflow"
+        config.buffer.explorer_input.default_eval_workflow_type = "math_boxed_workflow"
+        config.buffer.explorer_input.eval_tasksets.append(get_unittest_dataset_config("gsm8k"))
+        st = get_unittest_dataset_config("countdown")
+        st.default_workflow_type = None
+        config.buffer.explorer_input.eval_tasksets.append(st)
+        config.check_and_update()
+        self.assertEqual(
+            config.buffer.explorer_input.eval_tasksets[0].default_workflow_type,
+            "math_workflow",
+        )
+        self.assertEqual(
+            config.buffer.explorer_input.eval_tasksets[1].default_workflow_type,
+            "math_boxed_workflow",
+        )
+        self.assertEqual(
+            config.buffer.explorer_input.taskset.default_workflow_type,
+            "simple_workflow",
+        )
+
+    def test_max_token_len_per_gpu_set_correctly(self):
+        config = get_template_config()
+        config.model.max_model_len = 8192
+        config.trainer.ulysses_sequence_parallel_size = 2
+        config.trainer.max_token_len_per_gpu = None
+        config.check_and_update()
+        self.assertIsNotNone(config.trainer.trainer_config)
+        expected_max_token_len = math.ceil(
+            (2 * config.model.max_model_len) / config.trainer.ulysses_sequence_parallel_size
+        )
+        self.assertEqual(
+            config.trainer.trainer_config.actor_rollout_ref.actor.ppo_max_token_len_per_gpu,
+            expected_max_token_len,
+        )
+        self.assertEqual(
+            config.trainer.trainer_config.actor_rollout_ref.ref.log_prob_max_token_len_per_gpu,
+            expected_max_token_len,
+        )
+        self.assertEqual(
+            config.trainer.trainer_config.critic.ppo_max_token_len_per_gpu,
+            expected_max_token_len,
+        )
+
     def tearDown(self):
         if os.path.exists(CHECKPOINT_ROOT_DIR):
             shutil.rmtree(CHECKPOINT_ROOT_DIR)
diff --git a/tests/template/config.yaml b/tests/template/config.yaml
index 74587f98dd..13b2ad081f 100644
--- a/tests/template/config.yaml
+++ b/tests/template/config.yaml
@@ -32,9 +32,6 @@ buffer:
       path: 'placeholder'
       split: 'train'
       enable_progress_bar: false
-    default_workflow_type: ''
-    default_eval_workflow_type: ''
-    default_reward_fn_type: ''
 explorer:
   eval_interval: 100
   runner_per_model: 8
@@ -51,7 +48,7 @@ trainer:
   save_interval: 100
   grad_clip: 1.0
   use_dynamic_bsz: true
-  ppo_max_token_len_per_gpu: 16384
+  max_token_len_per_gpu: 16384
   ulysses_sequence_parallel_size: 1
   trainer_config:
     actor_rollout_ref:
@@ -64,8 +61,6 @@ trainer:
         lr: 1e-5
       model:
         use_remove_padding: false
-      ppo_max_token_len_per_gpu: 32768 # (${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}) * 2
-      forward_max_token_len_per_gpu: ${trainer.trainer_config.critic.ppo_max_token_len_per_gpu}
       cliprange_value: 0.5
       checkpoint:
         load_contents: ['model', 'optimizer', 'extra']
diff --git a/trinity/buffer/schema/formatter.py b/trinity/buffer/schema/formatter.py
index e562df7d0b..4284a321ad 100644
--- a/trinity/buffer/schema/formatter.py
+++ b/trinity/buffer/schema/formatter.py
@@ -40,8 +40,6 @@ def __init__(self, config: StorageConfig):
         self.config = config
         self.is_eval = config.is_eval
         self.default_workflow_cls = WORKFLOWS.get(config.default_workflow_type)  # type: ignore
-        if self.is_eval and config.default_eval_workflow_type:
-            self.default_workflow_cls = WORKFLOWS.get(config.default_eval_workflow_type)
         self.default_reward_fn_cls = REWARD_FUNCTIONS.get(config.default_reward_fn_type)  # type: ignore
         self.workflow_key = config.format.workflow_key
         self.reward_fn_key = config.format.reward_fn_key
diff --git a/trinity/buffer/storage/sql.py b/trinity/buffer/storage/sql.py
index de4e34f6ff..3254cd663e 100644
--- a/trinity/buffer/storage/sql.py
+++ b/trinity/buffer/storage/sql.py
@@ -221,8 +221,6 @@ def __init__(self, storage_config: StorageConfig, config: BufferConfig) -> None:
         self.batch_size = config.batch_size
         self.is_eval = storage_config.is_eval
         self.default_workflow_cls = WORKFLOWS.get(storage_config.default_workflow_type)  # type: ignore
-        if self.is_eval and storage_config.default_eval_workflow_type:
-            self.default_workflow_cls = WORKFLOWS.get(storage_config.default_eval_workflow_type)
         self.default_reward_fn_cls = REWARD_FUNCTIONS.get(storage_config.default_reward_fn_type)  # type: ignore
         self.formatter = TaskFormatter(storage_config)
         self.offset = storage_config.index
diff --git a/trinity/common/config.py b/trinity/common/config.py
index fc6de7d3bd..f96077baf3 100644
--- a/trinity/common/config.py
+++ b/trinity/common/config.py
@@ -2,6 +2,7 @@
 """Configs for RFT."""
 from __future__ import annotations
 
+import math
 import os
 from copy import deepcopy
 from dataclasses import dataclass, field
@@ -123,9 +124,6 @@ class StorageConfig:
     # For continuing training
     index: int = 0
 
-    # used for multi-modal data
-    mm_data_kwargs: dict = field(default_factory=dict)
-
     # used for StorageType.FILE
     split: str = "train"
     subset_name: Optional[str] = None
@@ -146,7 +144,6 @@ class StorageConfig:
 
     # used for rollout tasks
     default_workflow_type: Optional[str] = None
-    default_eval_workflow_type: Optional[str] = None
     default_reward_fn_type: Optional[str] = None
     rollout_args: GenerationConfig = field(default_factory=GenerationConfig)
     workflow_args: dict = field(default_factory=dict)
@@ -390,8 +387,6 @@ class ExplorerInput:
     default_workflow_type: Optional[str] = None
     default_eval_workflow_type: Optional[str] = None
     default_reward_fn_type: Optional[str] = None
-    system_prompt: Optional[str] = None
-    reply_prefix: Optional[str] = None
 
 
 @dataclass
@@ -405,10 +400,6 @@ class TrainerInput:
     # Some auxiliary buffers to facilitate training (e.g., data mixing)
     auxiliary_buffers: Dict[str, StorageConfig] = field(default_factory=dict)
 
-    # ! Deprecated, keep for backward compatibility, do not use it in new code
-    sft_warmup_dataset: Optional[StorageConfig] = None
-    sft_warmup_steps: Optional[int] = None
-
 
 @dataclass
 class BufferConfig:
@@ -485,7 +476,8 @@ class TrainerConfig:
     # trainer configs
     grad_clip: float = 1.0
     use_dynamic_bsz: bool = True
-    ppo_max_token_len_per_gpu: int = 16384
+    # if None, automatically set to 2 * model.max_model_len / ulysses_sequence_parallel_size
+    max_token_len_per_gpu: Optional[int] = None
     ulysses_sequence_parallel_size: int = 1  # sp size
     # TODO: extract more train-related params from underlying trainer engine
 
@@ -615,14 +607,6 @@ def save(self, config_path: str) -> None:
             OmegaConf.save(self, f)
 
     def _check_deprecated(self) -> None:
-        if self.buffer.trainer_input.sft_warmup_steps is not None:
-            logger.warning(
-                "`buffer.trainer_input.sft_warmup_steps` is deprecated, SFT warmup related settings are moved to `stages`."
-            )
-        if self.buffer.trainer_input.sft_warmup_dataset is not None:
-            logger.warning(
-                "`buffer.trainer_input.sft_warmup_dataset` is deprecated, SFT warmup related settings are moved to `stages`."
-            )
         if self.explorer.runner_num is not None:
             logger.warning(
                 "`explorer.runner_num` is deprecated, please use `explorer.runner_per_model` instead."
@@ -706,17 +690,11 @@ def _check_buffer(self) -> None:  # noqa: C901
             experience_buffer.total_epochs = self.buffer.total_epochs
             experience_buffer.total_steps = self.buffer.total_steps
         else:
-            taskset.is_eval = False
             taskset.total_epochs = self.buffer.total_epochs
             taskset.total_steps = self.buffer.total_steps
 
         set_if_none(taskset, "default_workflow_type", explorer_input.default_workflow_type)
-        set_if_none(
-            taskset, "default_eval_workflow_type", explorer_input.default_eval_workflow_type
-        )
         set_if_none(taskset, "default_reward_fn_type", explorer_input.default_reward_fn_type)
-        set_if_none(taskset.format, "system_prompt", explorer_input.system_prompt)
-        set_if_none(taskset.format, "reply_prefix", explorer_input.reply_prefix)
         set_if_none(taskset, "ray_namespace", self.ray_namespace)
         set_if_none(taskset.rollout_args, "max_tokens", self.model.max_response_tokens)
 
@@ -729,13 +707,10 @@ def _check_buffer(self) -> None:  # noqa: C901
             if not dataset.name:
                 dataset.name = f"eval_taskset_{idx}"
             set_if_none(dataset, "repeat_times", 1)
+            # eval_workflow has higher priority than workflow in eval tasksets, so we set it first
+            set_if_none(dataset, "default_workflow_type", explorer_input.default_eval_workflow_type)
             set_if_none(dataset, "default_workflow_type", explorer_input.default_workflow_type)
-            set_if_none(
-                dataset, "default_eval_workflow_type", explorer_input.default_eval_workflow_type
-            )
             set_if_none(dataset, "default_reward_fn_type", explorer_input.default_reward_fn_type)
-            set_if_none(dataset.format, "system_prompt", explorer_input.system_prompt)
-            set_if_none(dataset.format, "reply_prefix", explorer_input.reply_prefix)
             set_if_none(dataset, "ray_namespace", self.ray_namespace)
             set_if_none(dataset.rollout_args, "max_tokens", self.model.max_response_tokens)
             remained_tasksets.append(dataset)
@@ -1095,22 +1070,18 @@ def check_and_update(self) -> Config:  # noqa: C901
                     )
                     self.trainer.trainer_config = OmegaConf.to_object(trainer_config)
                 elif self.trainer.trainer_config_path:
-                    logger.warning(
+                    raise ValueError(
                         "`trainer_config_path` is deprecated; please use `trainer_config` instead."
                     )
-                    if os.path.isfile(self.trainer.trainer_config_path):
-                        from trinity.common.verl_config import load_config
-
-                        self.trainer.trainer_config = load_config(self.trainer.trainer_config_path)
-                    else:
-                        raise ValueError(
-                            f"Invalid trainer config path: {self.trainer.trainer_config_path}"
-                        )
                 else:
                     from trinity.common.verl_config import veRLConfig
 
                     logger.info("`trainer_config` is not provided, using default trainer config.")
                     self.trainer.trainer_config = veRLConfig()
+                if self.trainer.max_token_len_per_gpu is None:
+                    self.trainer.max_token_len_per_gpu = math.ceil(
+                        2 * self.model.max_model_len / self.trainer.ulysses_sequence_parallel_size  # type: ignore [operator]
+                    )
             else:
                 raise ValueError(f"Invalid trainer type: {self.trainer_type}")
             self.trainer.trainer_config.synchronize_config(self)
diff --git a/trinity/common/verl_config.py b/trinity/common/verl_config.py
index 7a483b8a9b..05c26bfea4 100644
--- a/trinity/common/verl_config.py
+++ b/trinity/common/verl_config.py
@@ -235,7 +235,7 @@ class Critic:
     forward_micro_batch_size_per_gpu: Optional[int] = None
     use_dynamic_bsz: Optional[bool] = None
     ppo_max_token_len_per_gpu: Optional[int] = None
-    forward_max_token_len_per_gpu: int = 0
+    forward_max_token_len_per_gpu: Optional[int] = None
     ulysses_sequence_parallel_size: Optional[int] = None
     ppo_epochs: int = 0
     shuffle: bool = False
@@ -423,7 +423,7 @@ def synchronize_config(self, config: Config) -> None:  # noqa: C901
             self.actor_rollout_ref.actor.use_dynamic_bsz = config.trainer.use_dynamic_bsz
         if self.actor_rollout_ref.actor.ppo_max_token_len_per_gpu is None:
             self.actor_rollout_ref.actor.ppo_max_token_len_per_gpu = (
-                config.trainer.ppo_max_token_len_per_gpu
+                config.trainer.max_token_len_per_gpu
             )
         if self.actor_rollout_ref.actor.ulysses_sequence_parallel_size is None:
             self.actor_rollout_ref.actor.ulysses_sequence_parallel_size = (
@@ -432,14 +432,17 @@ def synchronize_config(self, config: Config) -> None:  # noqa: C901
         if (
             self.actor_rollout_ref.actor.ppo_max_token_len_per_gpu  # type: ignore [operator]
             * self.actor_rollout_ref.actor.ulysses_sequence_parallel_size
-            < config.model.max_model_len
+            < config.model.max_model_len * 2  # type: ignore [operator]
         ):
             self.actor_rollout_ref.actor.ppo_max_token_len_per_gpu = math.ceil(
                 config.model.max_model_len  # type: ignore [operator]
-                / self.actor_rollout_ref.actor.ulysses_sequence_parallel_size
+                * 2
+                / self.actor_rollout_ref.actor.ulysses_sequence_parallel_size  # type: ignore [operator]
             )
             logger.warning(
-                f"Warning: actor.ppo_max_token_len_per_gpu is automatically set to {self.actor_rollout_ref.actor.ppo_max_token_len_per_gpu} to match model.max_model_len ({config.model.max_model_len})"
+                f"actor.ppo_max_token_len_per_gpu is automatically set to {self.actor_rollout_ref.actor.ppo_max_token_len_per_gpu} "
+                f"to match model.max_model_len ({config.model.max_model_len}). If you face OOM issues, "
+                "please set `model.max_model_len` to a smaller value."
             )
 
         # Ref Config
@@ -447,12 +450,27 @@ def synchronize_config(self, config: Config) -> None:  # noqa: C901
             self.actor_rollout_ref.ref.log_prob_use_dynamic_bsz = config.trainer.use_dynamic_bsz
         if self.actor_rollout_ref.ref.log_prob_max_token_len_per_gpu is None:
             self.actor_rollout_ref.ref.log_prob_max_token_len_per_gpu = (
-                config.trainer.ppo_max_token_len_per_gpu
+                self.actor_rollout_ref.actor.ppo_max_token_len_per_gpu
             )
         if self.actor_rollout_ref.ref.ulysses_sequence_parallel_size is None:
             self.actor_rollout_ref.ref.ulysses_sequence_parallel_size = (
                 config.trainer.ulysses_sequence_parallel_size
             )
+        if (
+            self.actor_rollout_ref.ref.log_prob_max_token_len_per_gpu  # type: ignore [operator]
+            * self.actor_rollout_ref.ref.ulysses_sequence_parallel_size
+            < config.model.max_model_len * 2  # type: ignore [operator]
+        ):
+            self.actor_rollout_ref.ref.log_prob_max_token_len_per_gpu = math.ceil(
+                config.model.max_model_len  # type: ignore [operator]
+                * 2
+                / self.actor_rollout_ref.ref.ulysses_sequence_parallel_size  # type: ignore [operator]
+            )
+            logger.warning(
+                f"ref.log_prob_max_token_len_per_gpu is automatically set to {self.actor_rollout_ref.ref.log_prob_max_token_len_per_gpu} "
+                f"to match model.max_model_len ({config.model.max_model_len}). If you face OOM issues, "
+                "please set `model.max_model_len` to a smaller value."
+            )
 
         # Critic config
         self.critic.strategy = self.actor_rollout_ref.actor.strategy
@@ -466,21 +484,30 @@ def synchronize_config(self, config: Config) -> None:  # noqa: C901
         if self.critic.use_dynamic_bsz is None:
             self.critic.use_dynamic_bsz = config.trainer.use_dynamic_bsz
         if self.critic.ppo_max_token_len_per_gpu is None:
-            self.critic.ppo_max_token_len_per_gpu = config.trainer.ppo_max_token_len_per_gpu
+            self.critic.ppo_max_token_len_per_gpu = (
+                self.actor_rollout_ref.actor.ppo_max_token_len_per_gpu
+            )
         if self.critic.ulysses_sequence_parallel_size is None:
             self.critic.ulysses_sequence_parallel_size = (
                 config.trainer.ulysses_sequence_parallel_size
             )
+
         if (
             self.critic.ppo_max_token_len_per_gpu * self.critic.ulysses_sequence_parallel_size  # type: ignore [operator]
-            < config.model.max_model_len
+            < config.model.max_model_len * 2  # type: ignore [operator]
         ):
             self.critic.ppo_max_token_len_per_gpu = math.ceil(
-                config.model.max_model_len / self.critic.ulysses_sequence_parallel_size  # type: ignore [operator]
+                config.model.max_model_len  # type: ignore [operator]
+                * 2
+                / self.critic.ulysses_sequence_parallel_size  # type: ignore [operator]
             )
             logger.warning(
-                f"Warning: critic.ppo_max_token_len_per_gpu is automatically set to {self.critic.ppo_max_token_len_per_gpu} to match model.max_model_len ({config.model.max_model_len})"
+                f"critic.ppo_max_token_len_per_gpu is automatically set to {self.critic.ppo_max_token_len_per_gpu} "
+                f"to match model.max_model_len ({config.model.max_model_len}). If you face OOM issues, "
+                "please set `model.max_model_len` to a smaller value."
             )
+        if self.critic.forward_max_token_len_per_gpu is None:
+            self.critic.forward_max_token_len_per_gpu = self.critic.ppo_max_token_len_per_gpu
 
         # LoRA related config
         if config.model.lora_configs is not None: