Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion trinity/cli/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from trinity.trainer.trainer import Trainer
from trinity.utils.dlc_utils import setup_ray_cluster
from trinity.utils.log import get_logger
from trinity.utils.plugin_loader import load_plugins

logger = get_logger(__name__)

Expand Down Expand Up @@ -124,12 +125,17 @@ def both(config: Config) -> None:


def run(config_path: str, dlc: bool = False, plugin_dir: str = None):
if plugin_dir:
os.environ[PLUGIN_DIRS_ENV_VAR] = os.pathsep.join(
[os.environ.get(PLUGIN_DIRS_ENV_VAR, ""), plugin_dir]
)
load_plugins()
config = load_config(config_path)
config.check_and_update()
pprint(config)

envs = {
PLUGIN_DIRS_ENV_VAR: plugin_dir or "",
PLUGIN_DIRS_ENV_VAR: os.environ.get(PLUGIN_DIRS_ENV_VAR, ""),
LOG_DIR_ENV_VAR: config.log.save_dir,
LOG_LEVEL_ENV_VAR: config.log.level,
LOG_NODE_IP_ENV_VAR: "1" if config.log.group_by_node else "0",
Expand Down
8 changes: 0 additions & 8 deletions trinity/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,6 @@ class StorageType(CaseInsensitiveEnum):
FILE = "file"


class MonitorType(CaseInsensitiveEnum):
"""Monitor Type."""

WANDB = "wandb"
TENSORBOARD = "tensorboard"
MLFLOW = "mlflow"


class SyncMethodEnumMeta(CaseInsensitiveEnumMeta):
def __call__(cls, value, *args, **kwargs):
if value == "online":
Expand Down
145 changes: 110 additions & 35 deletions trinity/manager/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
from trinity.algorithm.policy_loss_fn.policy_loss_fn import POLICY_LOSS_FN
from trinity.algorithm.sample_strategy.sample_strategy import SAMPLE_STRATEGY
from trinity.common.constants import StorageType
from trinity.manager.config_registry.buffer_config_manager import get_train_batch_size
from trinity.manager.config_registry.config_registry import CONFIG_GENERATORS
from trinity.manager.config_registry.trainer_config_manager import use_critic
from trinity.utils.plugin_loader import load_plugins

register_map = {
"sample_strategy": SAMPLE_STRATEGY,
Expand All @@ -29,6 +31,7 @@

class ConfigManager:
def __init__(self):
load_plugins()
self.unfinished_fields = set()
CONFIG_GENERATORS.set_unfinished_fields(self.unfinished_fields)
st.set_page_config(page_title="Trinity-RFT Config Generator", page_icon=":robot:")
Expand Down Expand Up @@ -131,7 +134,7 @@ def beginner_mode(self):
st.header("Important Configs")
self.get_configs("node_num", "gpu_per_node", "engine_num", "tensor_parallel_size")

self.get_configs("total_epochs", "explore_batch_size", "train_batch_size", "repeat_times")
self.get_configs("total_epochs", "explore_batch_size", "repeat_times", "train_batch_size")

self.get_configs("storage_type", "max_response_tokens", "max_model_len", "ppo_epochs")

Expand Down Expand Up @@ -201,9 +204,10 @@ def _expert_buffer_part(self):
self.get_configs("use_priority_queue")
self.get_configs("reuse_cooldown_time", "priority_fn", "priority_decay")

self.buffer_advanced_tab = st.expander("Advanced Config")
with self.buffer_advanced_tab:
self.get_configs("buffer_max_retry_times", "max_retry_interval")
# TODO: used for SQL storage
# self.buffer_advanced_tab = st.expander("Advanced Config")
# with self.buffer_advanced_tab:
# self.get_configs("buffer_max_retry_times", "max_retry_interval")

def _expert_explorer_part(self):
self.get_configs("sync_method", "sync_interval", "sync_timeout")
Expand Down Expand Up @@ -265,15 +269,43 @@ def _expert_verl_training_part(self):

self.get_configs("ppo_epochs", "training_strategy", "resume_mode", "impl_backend")

self.get_configs("param_offload", "optimizer_offload", "forward_prefetch")
self.get_configs("resume_from_path")

if st.session_state["training_strategy"] == "fsdp":
self.get_configs("param_offload", "optimizer_offload", "forward_prefetch")
elif st.session_state["training_strategy"] == "fsdp2":
self.get_configs("offload_policy", "reshard_after_forward")
elif st.session_state["training_strategy"] == "megatron":
with st.expander("Megatron Config"):
self.get_configs("param_offload", "grad_offload", "optimizer_offload")
self.get_configs(
"tensor_model_parallel_size",
"pipeline_model_parallel_size",
"virtual_pipeline_model_parallel_size",
)
self.get_configs(
"expert_model_parallel_size",
"expert_tensor_parallel_size",
"context_parallel_size",
)
self.get_configs(
"sequence_parallel",
"use_distributed_optimizer",
"use_dist_checkpointing",
"use_mbridge",
)
self.get_configs("dist_checkpointing_path")
self.get_configs(
"recompute_granularity", "recompute_method", "recompute_num_layers"
)
self.get_configs("recompute_modules")

with st.expander("Advanced Config"):
self.get_configs("critic_warmup", "total_training_steps")

self.get_configs("default_hdfs_dir")

self.get_configs("remove_previous_ckpt_in_save", "del_local_ckpt_after_load")
self.get_configs("del_local_ckpt_after_load")

self.get_configs("max_actor_ckpt_to_keep", "max_critic_ckpt_to_keep")

Expand Down Expand Up @@ -341,15 +373,56 @@ def _generate_verl_config(self):
use_fused_kernels = "use_fused_kernels" in st.session_state["training_args"]

if st.session_state["training_strategy"] == "fsdp":
fsdp_config = {
"wrap_policy": {"min_num_params": 0},
"param_offload": st.session_state["param_offload"],
"optimizer_offload": st.session_state["optimizer_offload"],
"fsdp_size": -1,
"forward_prefetch": st.session_state["forward_prefetch"],
distribution_config = {
"fsdp_config": {
"fsdp_size": -1,
"wrap_policy": {"min_num_params": 0},
"param_offload": st.session_state["param_offload"],
"optimizer_offload": st.session_state["optimizer_offload"],
"forward_prefetch": st.session_state["forward_prefetch"],
}
}
elif st.session_state["training_strategy"] == "fsdp2":
distribution_config = {
"fsdp_config": {
"fsdp_size": -1,
"offload_policy": st.session_state["offload_policy"],
"reshard_after_forward": st.session_state["reshard_after_forward"],
}
}
elif st.session_state["training_strategy"] == "megatron":
distribution_config = {
"megatron": {
"param_offload": st.session_state["param_offload"],
"grad_offload": st.session_state["grad_offload"],
"optimizer_offload": st.session_state["optimizer_offload"],
"tensor_model_parallel_size": st.session_state["tensor_model_parallel_size"],
"pipeline_model_parallel_size": st.session_state[
"pipeline_model_parallel_size"
],
"virtual_pipeline_model_parallel_size": st.session_state[
"virtual_pipeline_model_parallel_size"
],
"expert_model_parallel_size": st.session_state["expert_model_parallel_size"],
"expert_tensor_parallel_size": st.session_state["expert_tensor_parallel_size"],
"context_parallel_size": st.session_state["context_parallel_size"],
"sequence_parallel": st.session_state["sequence_parallel"],
"use_distributed_optimizer": st.session_state["use_distributed_optimizer"],
"use_dist_checkpointing": st.session_state["use_dist_checkpointing"],
"dist_checkpointing_path": st.session_state["dist_checkpointing_path"],
"seed": st.session_state["seed"],
# TODO: override_ddp_config
"override_transformer_config": {
"recompute_granularity": st.session_state["recompute_granularity"],
"recompute_modules": st.session_state["recompute_modules"],
"recompute_method": st.session_state["recompute_method"],
"recompute_num_layers": st.session_state["recompute_num_layers"],
},
"use_mbridge": st.session_state["use_mbridge"],
}
}
else:
fsdp_config = {}
distribution_config = {}

ppo_max_token_len_per_gpu = (
st.session_state["repeat_times"] * st.session_state["max_model_len"]
Expand All @@ -372,29 +445,25 @@ def _generate_verl_config(self):
"use_dynamic_bsz": use_dynamic_bsz,
"ppo_max_token_len_per_gpu": ppo_max_token_len_per_gpu,
"ppo_epochs": st.session_state["ppo_epochs"],
"shuffle": False,
"ulysses_sequence_parallel_size": st.session_state[
"actor_ulysses_sequence_parallel_size"
],
"entropy_from_logits_with_chunking": st.session_state[
"actor_entropy_from_logits_with_chunking"
],
"entropy_checkpointing": st.session_state["actor_entropy_checkpointing"],
"checkpoint": {"contents": st.session_state["actor_checkpoint"]},
"checkpoint": {
"load_contents": st.session_state["actor_checkpoint"],
"save_contents": st.session_state["actor_checkpoint"],
},
"optim": {
"lr": st.session_state["actor_lr"],
"lr_warmup_steps_ratio": st.session_state["actor_lr_warmup_steps_ratio"],
"warmup_style": st.session_state["actor_warmup_style"],
"total_training_steps": (
-1
if st.session_state["total_training_steps"] is None
else st.session_state["total_training_steps"]
),
"total_training_steps": (st.session_state["total_training_steps"] or -1),
},
"fsdp_config": copy.deepcopy(fsdp_config),
},
"ref": {
"fsdp_config": copy.deepcopy(fsdp_config),
"log_prob_micro_batch_size_per_gpu": st.session_state[
"ref_log_prob_micro_batch_size_per_gpu"
],
Expand All @@ -415,14 +484,15 @@ def _generate_verl_config(self):
"resume_mode": st.session_state["resume_mode"],
"resume_from_path": st.session_state["resume_from_path"],
"default_hdfs_dir": st.session_state["default_hdfs_dir"],
"remove_previous_ckpt_in_save": st.session_state["remove_previous_ckpt_in_save"],
"del_local_ckpt_after_load": st.session_state["del_local_ckpt_after_load"],
"val_before_train": False,
"max_actor_ckpt_to_keep": st.session_state["max_actor_ckpt_to_keep"],
"max_critic_ckpt_to_keep": st.session_state["max_critic_ckpt_to_keep"],
},
}

trainer_config["actor_rollout_ref"]["actor"].update(copy.deepcopy(distribution_config))
trainer_config["actor_rollout_ref"]["ref"].update(copy.deepcopy(distribution_config))

if use_fused_kernels:
trainer_config["actor_rollout_ref"]["model"]["fused_kernel_options"] = {
"impl_backend": st.session_state["impl_backend"],
Expand All @@ -436,20 +506,15 @@ def _generate_verl_config(self):
"lr": st.session_state["critic_lr"],
"lr_warmup_steps_ratio": st.session_state["critic_lr_warmup_steps_ratio"],
"warmup_style": st.session_state["critic_warmup_style"],
"total_training_steps": (
-1
if st.session_state["total_training_steps"] is None
else st.session_state["total_training_steps"]
),
"total_training_steps": (st.session_state["total_training_steps"] or -1),
},
"model": {
"override_config": {},
"external_lib": None,
"enable_gradient_checkpointing": enable_gradient_checkpointing,
"use_remove_padding": use_remove_padding,
"fsdp_config": copy.deepcopy(fsdp_config),
},
"ppo_mini_batch_size": st.session_state["train_batch_size"],
"ppo_mini_batch_size": get_train_batch_size(),
"ppo_micro_batch_size_per_gpu": st.session_state[
"critic_ppo_micro_batch_size_per_gpu"
],
Expand All @@ -463,11 +528,17 @@ def _generate_verl_config(self):
"critic_ulysses_sequence_parallel_size"
],
"ppo_epochs": st.session_state["ppo_epochs"],
"shuffle": False,
"grad_clip": st.session_state["critic_grad_clip"],
"cliprange_value": st.session_state["critic_cliprange_value"],
"checkpoint": {"contents": st.session_state["critic_checkpoint"]},
"checkpoint": {
"load_contents": st.session_state["critic_checkpoint"],
"save_contents": st.session_state["critic_checkpoint"],
},
}
if st.session_state["training_strategy"] in {"fsdp", "fsdp2"}:
trainer_config["critic"]["model"].update(copy.deepcopy(distribution_config))
elif st.session_state["training_strategy"] == "megatron":
trainer_config["critic"].update(copy.deepcopy(distribution_config))
else:
del trainer_config["critic"]
return trainer_config
Expand Down Expand Up @@ -519,12 +590,14 @@ def _gen_buffer_config(self):
"name": "experience_buffer",
"storage_type": st.session_state["storage_type"],
"path": experience_buffer_path,
"max_retry_interval": st.session_state["max_retry_interval"],
"max_retry_times": st.session_state["buffer_max_retry_times"],
# "max_retry_interval": st.session_state["max_retry_interval"],
# "max_retry_times": st.session_state["buffer_max_retry_times"],
},
"sft_warmup_steps": st.session_state["sft_warmup_steps"],
},
}
if st.session_state["train_batch_size"] is None:
del buffer_config["train_batch_size"]
if st.session_state["algorithm_type"] != "dpo":
experience_buffer = buffer_config["trainer_input"]["experience_buffer"]
experience_buffer["use_priority_queue"] = st.session_state["use_priority_queue"]
Expand Down Expand Up @@ -665,6 +738,8 @@ def generate_config(self):
"data_processor": {}, # TODO: Add data processor config
"model": {
"model_path": st.session_state["model_path"],
"max_prompt_tokens": st.session_state["max_prompt_tokens"],
"min_response_tokens": st.session_state["min_response_tokens"],
"max_response_tokens": st.session_state["max_response_tokens"],
"max_model_len": st.session_state["max_model_len"],
},
Expand Down
Loading