diff --git a/trl/trainer/base_trainer.py b/trl/trainer/base_trainer.py index bb88cbfc934..4e4d2ba64e2 100644 --- a/trl/trainer/base_trainer.py +++ b/trl/trainer/base_trainer.py @@ -17,7 +17,7 @@ from transformers import Trainer, is_wandb_available -from .utils import generate_model_card, get_comet_experiment_url +from .utils import generate_model_card, get_comet_experiment_url, get_config_model_id if is_wandb_available(): @@ -50,8 +50,9 @@ def create_model_card( if not self.is_world_process_zero(): return - if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): - base_model = self.model.config._name_or_path + model_name_or_path = get_config_model_id(self.model.config) + if model_name_or_path and not os.path.isdir(model_name_or_path): + base_model = model_name_or_path else: base_model = None diff --git a/trl/trainer/callbacks.py b/trl/trainer/callbacks.py index 3976279e85f..49cf4ab77b0 100644 --- a/trl/trainer/callbacks.py +++ b/trl/trainer/callbacks.py @@ -40,7 +40,7 @@ from ..mergekit_utils import MergeConfig, merge_models, upload_model_to_hf from ..models.utils import unwrap_model_for_generation from .judges import BasePairwiseJudge -from .utils import log_table_to_comet_experiment +from .utils import get_config_model_id, log_table_to_comet_experiment if is_rich_available(): @@ -821,7 +821,7 @@ def _merge_and_maybe_push(self, output_dir, global_step, model): checkpoint_path = os.path.join(output_dir, f"checkpoint-{global_step}") self.merge_config.policy_model_path = checkpoint_path if self.merge_config.target_model_path is None: - self.merge_config.target_model_path = model.config._name_or_path + self.merge_config.target_model_path = get_config_model_id(model.config) merge_path = os.path.join(checkpoint_path, "merged") merge_models(self.merge_config.create(), merge_path) diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index f296e99c75c..aa4deb04f98 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -65,6 +65,7 @@ empty_cache, flush_left, flush_right, + get_config_model_id, log_table_to_comet_experiment, pad, pad_to_length, @@ -286,7 +287,7 @@ def __init__( ): # Args if args is None: - model_name = model if isinstance(model, str) else model.config._name_or_path + model_name = model if isinstance(model, str) else get_config_model_id(model.config) model_name = model_name.split("/")[-1] args = DPOConfig(f"{model_name}-DPO") @@ -299,7 +300,7 @@ def __init__( "You passed `model_init_kwargs` to the `DPOConfig`, but your model is already instantiated. " "The `model_init_kwargs` will be ignored." ) - model_id = model.config._name_or_path + model_id = get_config_model_id(model.config) if isinstance(ref_model, str): ref_model = create_model_from_path(ref_model, **args.ref_model_init_kwargs or {}) else: diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py index 875d1873092..0d573a89770 100644 --- a/trl/trainer/grpo_trainer.py +++ b/trl/trainer/grpo_trainer.py @@ -65,6 +65,7 @@ disable_dropout_in_model, ensure_master_addr_port, entropy_from_logits, + get_config_model_id, identity, nanmax, nanmin, @@ -233,7 +234,7 @@ def __init__( ): # Args if args is None: - model_name = model if isinstance(model, str) else model.config._name_or_path + model_name = model if isinstance(model, str) else get_config_model_id(model.config) model_name = model_name.split("/")[-1] args = GRPOConfig(f"{model_name}-GRPO") @@ -258,7 +259,7 @@ def __init__( architecture = getattr(transformers, config.architectures[0]) model = architecture.from_pretrained(model_id, **model_init_kwargs) else: - model_id = model.config._name_or_path + model_id = get_config_model_id(model.config) if args.model_init_kwargs is not None: logger.warning( "You passed `model_init_kwargs` to the `GRPOConfig`, but your model is already instantiated. " @@ -278,7 +279,7 @@ def __init__( # Processing class if processing_class is None: - processing_class = AutoProcessor.from_pretrained(model.config._name_or_path, truncation_side="left") + processing_class = AutoProcessor.from_pretrained(get_config_model_id(model.config), truncation_side="left") # Handle pad token for processors or tokenizers if isinstance(processing_class, ProcessorMixin): @@ -305,7 +306,7 @@ def __init__( reward_func, num_labels=1, **model_init_kwargs ) if isinstance(reward_funcs[i], nn.Module): # Use Module over PretrainedModel for compat w/ compiled models - self.reward_func_names.append(reward_funcs[i].config._name_or_path.split("/")[-1]) + self.reward_func_names.append(get_config_model_id(reward_funcs[i].config).split("/")[-1]) else: self.reward_func_names.append(reward_funcs[i].__name__) self.reward_funcs = reward_funcs @@ -335,7 +336,7 @@ def __init__( for i, (reward_processing_class, reward_func) in enumerate(zip(reward_processing_classes, reward_funcs)): if isinstance(reward_func, PreTrainedModel): if reward_processing_class is None: - reward_processing_class = AutoTokenizer.from_pretrained(reward_func.config._name_or_path) + reward_processing_class = AutoTokenizer.from_pretrained(get_config_model_id(reward_func.config)) if reward_processing_class.pad_token_id is None: reward_processing_class.pad_token = reward_processing_class.eos_token # The reward model computes the reward for the latest non-padded token in the input sequence. diff --git a/trl/trainer/online_dpo_trainer.py b/trl/trainer/online_dpo_trainer.py index f74abf2c27e..4268f6be508 100644 --- a/trl/trainer/online_dpo_trainer.py +++ b/trl/trainer/online_dpo_trainer.py @@ -74,6 +74,7 @@ disable_dropout_in_model, empty_cache, ensure_master_addr_port, + get_config_model_id, pad, truncate_right, ) @@ -243,7 +244,7 @@ def __init__( reward_func, num_labels=1, **model_init_kwargs ) if isinstance(reward_funcs[i], nn.Module): - self.reward_func_names.append(reward_funcs[i].config._name_or_path.split("/")[-1]) + self.reward_func_names.append(get_config_model_id(reward_funcs[i].config).split("/")[-1]) else: self.reward_func_names.append(reward_funcs[i].__name__) self.reward_funcs = reward_funcs diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index 5408db49967..56a216526ca 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -44,7 +44,7 @@ from ..models import clone_chat_template, get_act_offloading_ctx_manager, prepare_peft_model from .base_trainer import BaseTrainer from .reward_config import RewardConfig -from .utils import disable_dropout_in_model, pad, remove_none_values +from .utils import disable_dropout_in_model, get_config_model_id, pad, remove_none_values if is_peft_available(): @@ -273,7 +273,7 @@ def __init__( ): # Args if args is None: - model_name = model if isinstance(model, str) else model.config._name_or_path + model_name = model if isinstance(model, str) else get_config_model_id(model.config) model_name = model_name.split("/")[-1] args = RewardConfig(f"{model_name}-Reward") @@ -294,7 +294,7 @@ def __init__( with suppress_from_pretrained_warning(transformers.modeling_utils.logger): model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=1, **model_init_kwargs) else: - model_id = model.config._name_or_path + model_id = get_config_model_id(model.config) if args.model_init_kwargs is not None: logger.warning( "You passed `model_init_kwargs` to the `RewardConfig`, but your model is already instantiated. " diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py index f433d886c00..633f489fea8 100644 --- a/trl/trainer/rloo_trainer.py +++ b/trl/trainer/rloo_trainer.py @@ -65,6 +65,7 @@ disable_dropout_in_model, ensure_master_addr_port, entropy_from_logits, + get_config_model_id, identity, nanmax, nanmin, @@ -240,7 +241,7 @@ def __init__( # Args if args is None: - model_name = model if isinstance(model, str) else model.config._name_or_path + model_name = model if isinstance(model, str) else get_config_model_id(model.config) model_name = model_name.split("/")[-1] args = RLOOConfig(f"{model_name}-RLOO") @@ -265,7 +266,7 @@ def __init__( architecture = getattr(transformers, config.architectures[0]) model = architecture.from_pretrained(model_id, **model_init_kwargs) else: - model_id = model.config._name_or_path + model_id = get_config_model_id(model.config) if args.model_init_kwargs is not None: logger.warning( "You passed `model_init_kwargs` to the `RLOOConfig`, but your model is already instantiated. " @@ -285,7 +286,7 @@ def __init__( # Processing class if processing_class is None: - processing_class = AutoProcessor.from_pretrained(model.config._name_or_path, truncation_side="left") + processing_class = AutoProcessor.from_pretrained(get_config_model_id(model.config), truncation_side="left") # Handle pad token for processors or tokenizers if isinstance(processing_class, ProcessorMixin): @@ -312,7 +313,7 @@ def __init__( reward_func, num_labels=1, **model_init_kwargs ) if isinstance(reward_funcs[i], nn.Module): # Use Module over PretrainedModel for compat w/ compiled models - self.reward_func_names.append(reward_funcs[i].config._name_or_path.split("/")[-1]) + self.reward_func_names.append(get_config_model_id(reward_funcs[i].config).split("/")[-1]) else: self.reward_func_names.append(reward_funcs[i].__name__) self.reward_funcs = reward_funcs @@ -342,7 +343,7 @@ def __init__( for i, (reward_processing_class, reward_func) in enumerate(zip(reward_processing_classes, reward_funcs)): if isinstance(reward_func, PreTrainedModel): if reward_processing_class is None: - reward_processing_class = AutoTokenizer.from_pretrained(reward_func.config._name_or_path) + reward_processing_class = AutoTokenizer.from_pretrained(get_config_model_id(reward_func.config)) if reward_processing_class.pad_token_id is None: reward_processing_class.pad_token = reward_processing_class.eos_token # The reward model computes the reward for the latest non-padded token in the input sequence. diff --git a/trl/trainer/sft_trainer.py b/trl/trainer/sft_trainer.py index 6ffe70e324a..3b5ad9b0cf9 100644 --- a/trl/trainer/sft_trainer.py +++ b/trl/trainer/sft_trainer.py @@ -54,6 +54,7 @@ create_model_from_path, entropy_from_logits, flush_left, + get_config_model_id, pad, remove_none_values, selective_log_softmax, @@ -590,7 +591,7 @@ def __init__( ): # Args if args is None: - model_name = model if isinstance(model, str) else model.config._name_or_path + model_name = model if isinstance(model, str) else get_config_model_id(model.config) model_name = model_name.split("/")[-1] args = SFTConfig(f"{model_name}-SFT") elif isinstance(args, TrainingArguments) and not isinstance(args, SFTConfig): @@ -608,11 +609,10 @@ def __init__( "You passed `model_init_kwargs` to the `SFTConfig`, but your model is already instantiated. " "The `model_init_kwargs` will be ignored." ) - model_id = model.config._name_or_path # Processing class if processing_class is None: - processing_class = AutoProcessor.from_pretrained(model_id) + processing_class = AutoProcessor.from_pretrained(get_config_model_id(model.config)) # Handle pad token for processors or tokenizers if isinstance(processing_class, ProcessorMixin): diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py index f335166c527..ab83b1bc75b 100644 --- a/trl/trainer/utils.py +++ b/trl/trainer/utils.py @@ -41,6 +41,7 @@ BitsAndBytesConfig, EvalPrediction, GenerationConfig, + PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase, TrainerState, @@ -1962,3 +1963,19 @@ def create_model_from_path(model_id: str, **kwargs) -> PreTrainedModel: architecture = getattr(transformers, config.architectures[0]) model = architecture.from_pretrained(model_id, **kwargs) return model + + +def get_config_model_id(config: PretrainedConfig) -> str: + """ + Retrieve the model identifier from a given model configuration. + + Args: + config ([`~transformers.PreTrainedConfig`]): + Configuration from which to extract the model identifier. + + Returns: + `str`: + The model identifier associated with the model configuration. + """ + # Fall back to `config.text_config._name_or_path` if `config._name_or_path` is missing: Qwen2-VL and Qwen2.5-VL. See GH-4323 + return getattr(config, "_name_or_path", "") or getattr(getattr(config, "text_config", None), "_name_or_path", "")