diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index fd3ab04d6ca..70009f37cac 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -95,8 +95,6 @@ title: Model Classes - local: model_utils title: Model Utilities - - local: best_of_n - title: Best of N Sampling - local: judges title: Judges - local: callbacks diff --git a/docs/source/best_of_n.md b/docs/source/best_of_n.md deleted file mode 100644 index 9280a9e2008..00000000000 --- a/docs/source/best_of_n.md +++ /dev/null @@ -1,68 +0,0 @@ -# Best of N sampling: Alternative ways to get better model output without RL based fine-tuning - -> [!WARNING] -> Best-of-N sampling is deprecated and will be removed in TRL 0.25.0. - -Within the extras module is the `best-of-n` sampler class that serves as an alternative method of generating better model output. -As to how it fares against the RL based fine-tuning, please look in the `examples` directory for a comparison example - -## Usage - -To get started quickly, instantiate an instance of the class with a model, a length sampler, a tokenizer and a callable that serves as a proxy reward pipeline that outputs reward scores for input queries - -```python -from transformers import pipeline, AutoTokenizer -from trl import AutoModelForCausalLMWithValueHead -from trl.core import LengthSampler -from trl.extras import BestOfNSampler - -ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(ref_model_name) -reward_pipe = pipeline("sentiment-analysis", model=reward_model, device=device) -tokenizer = AutoTokenizer.from_pretrained(ref_model_name) -tokenizer.pad_token = tokenizer.eos_token - -# callable that takes a list of raw text and returns a list of corresponding reward scores -def queries_to_scores(list_of_strings): - return [output["score"] for output in reward_pipe(list_of_strings)] - -best_of_n = BestOfNSampler(model, tokenizer, queries_to_scores, length_sampler=output_length_sampler) -``` - -And assuming you have a list/tensor of tokenized queries, you can generate better output by calling the `generate` method - -```python -best_of_n.generate(query_tensors, device=device, **gen_kwargs) -``` - -The default sample size is 4, but you can change it at the time of instance initialization like so - -```python -best_of_n = BestOfNSampler(model, tokenizer, queries_to_scores, length_sampler=output_length_sampler, sample_size=8) -``` - -The default output is the result of taking the top scored output for each query, but you can change it to top 2 and so on by passing the `n_candidates` argument at the time of instance initialization - -```python -best_of_n = BestOfNSampler(model, tokenizer, queries_to_scores, length_sampler=output_length_sampler, n_candidates=2) -``` - -There is the option of setting the generation settings (like `temperature`, `pad_token_id`) at the time of instance creation as opposed to when calling the `generate` method. -This is done by passing a [`~transformers.GenerationConfig`] from the `transformers` library at the time of initialization - -```python - -from transformers import GenerationConfig - -generation_config = GenerationConfig(min_length= -1, top_k=0.0, top_p= 1.0, do_sample= True, pad_token_id=tokenizer.eos_token_id) - -best_of_n = BestOfNSampler(model, tokenizer, queries_to_scores, length_sampler=output_length_sampler, generation_config=generation_config) - -best_of_n.generate(query_tensors, device=device) - -``` - -Furthermore, at the time of initialization you can set the seed to control the repeatability of the generation process and the number of samples to generate for each query - -## BestOfNSampler - -[[autodoc]] BestOfNSampler diff --git a/tests/test_best_of_n_sampler.py b/tests/test_best_of_n_sampler.py deleted file mode 100644 index d52538c71d0..00000000000 --- a/tests/test_best_of_n_sampler.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright 2020-2025 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch -from transformers import AutoTokenizer, GenerationConfig - -from trl import AutoModelForCausalLMWithValueHead -from trl.core import LengthSampler -from trl.extras import BestOfNSampler - -from .testing_utils import TrlTestCase - - -def queries_to_scores(list_of_strings): - return [torch.rand(1).item() for _ in list_of_strings] - - -class TestBestOfNSampler(TrlTestCase): - """ - Tests the BestOfNSampler class - """ - - ref_model_name = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" - output_length_sampler = LengthSampler(2, 6) - model = AutoModelForCausalLMWithValueHead.from_pretrained(ref_model_name) - tokenizer = AutoTokenizer.from_pretrained(ref_model_name) - tokenizer.pad_token = tokenizer.eos_token - output_length_sampler = LengthSampler(2, 6) - - def test_different_input_types(self): - r""" - Tests if the different input types normalizer works - """ - - generation_config = GenerationConfig( - min_length=-1, - top_k=0.0, - top_p=1.0, - do_sample=True, - pad_token_id=self.tokenizer.eos_token_id, - ) - - output_length_sampler = LengthSampler(2, 6) - - best_of_n = BestOfNSampler( - self.model, - self.tokenizer, - queries_to_scores, - length_sampler=output_length_sampler, - generation_config=generation_config, - ) - - queries = ["hello world", "goodbye world"] - tokenized_queries = [self.tokenizer.encode(query) for query in queries] - - various_queries_formats = [ - (tokenized_queries[0], 1), - (tokenized_queries, 2), - (torch.tensor(tokenized_queries[1]), 1), - ([torch.tensor(query) for query in tokenized_queries], 2), - ] - - for q, expected_length in various_queries_formats: - results = best_of_n.generate(q) - assert isinstance(results, list) - assert len(results) == expected_length - - def test_different_sample_sizes_and_n_candidates_values(self): - r""" - Tests different sample sizes and n_candidates values - """ - generation_config = GenerationConfig( - min_length=-1, - top_k=0.0, - top_p=1.0, - do_sample=True, - pad_token_id=self.tokenizer.eos_token_id, - ) - - output_length_sampler = LengthSampler(6, 10) - - for sample_value, n_candidates_values, expected in [ - (4, 2, 2), - (10, 3, 3), - (6, 4, 4), - ]: - best_of_n = BestOfNSampler( - self.model, - self.tokenizer, - queries_to_scores, - length_sampler=output_length_sampler, - generation_config=generation_config, - sample_size=sample_value, - n_candidates=n_candidates_values, - ) - - queries = ["hello world", "troll the world"] - tokenized_queries = [self.tokenizer.encode(query) for query in queries] - results = best_of_n.generate(tokenized_queries) - for result in results: - assert len(result) == expected diff --git a/tests/test_utils.py b/tests/test_utils.py index 973d4105433..60d9b9dcefb 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -30,7 +30,6 @@ DataCollatorForChatML, RepeatSampler, batch_generation, - decode_and_strip_padding, entropy_from_logits, flush_left, flush_right, @@ -170,21 +169,6 @@ def test_create_peft_config_use_peft_true(self): assert getattr(peft_config, arg) == value -class TestDecodeAndStripPadding(TrlTestCase): - def setup_method(self): - self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - - def test_example_with_padding(self): - inputs = self.tokenizer(["Hello world", "Hello"], padding=True, return_tensors="pt") - decoded = decode_and_strip_padding(inputs["input_ids"], self.tokenizer) - assert decoded == ["Hello world", "Hello"] - - def test_example_without_padding(self): - inputs = self.tokenizer(["Hello", "Hello"], padding=False, return_tensors="pt") - decoded = decode_and_strip_padding(inputs["input_ids"], self.tokenizer) - assert decoded == ["Hello", "Hello"] - - class TestGenerateModelCard(TrlTestCase): def test_full(self): model_card = generate_model_card( diff --git a/trl/__init__.py b/trl/__init__.py index 6eabf828078..8babb49039e 100644 --- a/trl/__init__.py +++ b/trl/__init__.py @@ -53,7 +53,6 @@ "truncate_dataset", "unpair_preference_dataset", ], - "extras": ["BestOfNSampler"], "models": [ "SUPPORTED_ARCHITECTURES", "AutoModelForCausalLMWithValueHead", @@ -133,7 +132,6 @@ truncate_dataset, unpair_preference_dataset, ) - from .extras import BestOfNSampler from .models import ( SUPPORTED_ARCHITECTURES, AutoModelForCausalLMWithValueHead, diff --git a/trl/extras/__init__.py b/trl/extras/__init__.py index fddcdf1af1a..a3170185781 100644 --- a/trl/extras/__init__.py +++ b/trl/extras/__init__.py @@ -11,19 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from typing import TYPE_CHECKING - -from ..import_utils import _LazyModule - - -_import_structure = { - "best_of_n_sampler": ["BestOfNSampler"], -} - -if TYPE_CHECKING: - from .best_of_n_sampler import BestOfNSampler -else: - import sys - - sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) diff --git a/trl/extras/best_of_n_sampler.py b/trl/extras/best_of_n_sampler.py deleted file mode 100644 index f7505042404..00000000000 --- a/trl/extras/best_of_n_sampler.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2020-2025 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings -from typing import Any, Callable, Optional, Union - -import torch -from transformers import GenerationConfig, PreTrainedTokenizer, PreTrainedTokenizerFast, set_seed - -from ..models import SUPPORTED_ARCHITECTURES, PreTrainedModelWrapper - - -class BestOfNSampler: - """ - Sampler for best-of-n generation. - - Args: - model ([`PreTrainedModelWrapper`]): - The pretrained model to use for generation. - tokenizer ([`~transformers.PreTrainedTokenizer`] or [`~transformers.PreTrainedTokenizerFast`]): - Tokenizer associated with the pretrained model. - queries_to_scores (`Callable[[list[str]], list[float]]`): - Callable that takes a list of generated texts and returns the associated reward scores. - length_sampler (`Any`): - Sampler used to sample the length of the generated text. - sample_size (`int`, *optional*, defaults to `4`): - Number of samples to generate for each query. - seed (`int`, *optional*): - Random seed used to control generation. - n_candidates (`int`, *optional*, defaults to `1`): - Number of candidates to return for each query. - generation_config ([`~transformers.GenerationConfig`], *optional*): - Generation config passed to the underlying model's `generate` method. See - [`~transformers.GenerationConfig`] for more details. - - - - `BestOfNSampler` is deprecated and will be removed in version 0.25. - - - """ - - warnings.warn("`BestOfNSampler` is deprecated and will be removed in TRL 0.25.", FutureWarning, stacklevel=2) - - def __init__( - self, - model: PreTrainedModelWrapper, - tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], - queries_to_scores: Callable[[list[str]], list[float]], - length_sampler: Any, - sample_size: int = 4, - seed: Optional[int] = None, - n_candidates: int = 1, - generation_config: Optional[GenerationConfig] = None, - ) -> None: - if seed is not None: - set_seed(seed) - - if not isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)): - raise ValueError( - f"tokenizer must be a PreTrainedTokenizer or PreTrainedTokenizerFast, got {type(tokenizer)}" - ) - if not isinstance(model, (SUPPORTED_ARCHITECTURES)): - raise ValueError( - f"model must be a PreTrainedModelWrapper, got {type(model)} - supported architectures are: {SUPPORTED_ARCHITECTURES}" - ) - - self.model = model - self.tokenizer = tokenizer - - self.queries_to_scores = queries_to_scores - self.length_sampler = length_sampler - self.gen_config = generation_config - self.sample_size = sample_size - self.n_candidates = n_candidates - - def generate( - self, - tokenized_query: Union[list[int], torch.Tensor, list[torch.Tensor], list[list[int]]], - skip_special_tokens: bool = True, - device: Optional[Union[str, torch.device]] = None, - **generation_kwargs, - ) -> list[list[str]]: - """ - Generate the best of n samples for input queries. - - Args: - tokenized_query (`list[int]` or `torch.Tensor` or `list[torch.Tensor]` or `list[list[int]]`): - Either a single tokenized query (a single tensor or a list of integers) or a batch of tokenized queries - (a list of tensors or a list of lists of integers). - skip_special_tokens (`bool`, *optional*, defaults to `True`): - Whether to remove the special tokens from the output. - device (`str` or `torch.device`, *optional*): - The device on which the model will be loaded. - **generation_kwargs: - Additional keyword arguments passed along to the underlying model's `generate` method. This is used to - override generation config. - - Returns: - `list[list[str]]`: A list of lists of generated texts. - """ - queries = None - - if isinstance(tokenized_query, torch.Tensor) and tokenized_query.ndim == 1: - queries = tokenized_query.unsqueeze(0) - elif isinstance(tokenized_query, list): - element_type = type(tokenized_query[0]) - if element_type is int: - queries = torch.tensor(tokenized_query).unsqueeze(0) - elif element_type is torch.Tensor: - queries = [tensor.reshape((1, -1)) for tensor in tokenized_query] - else: - queries = [torch.tensor(query).reshape((1, -1)) for query in tokenized_query] - - result = [] - - for query in queries: - queries = query.repeat((self.sample_size, 1)) - output = self.model.generate( - queries.to(device), - max_new_tokens=self.length_sampler(), - generation_config=self.gen_config, - **generation_kwargs, - ).squeeze() - output = self.tokenizer.batch_decode(output, skip_special_tokens=skip_special_tokens) - scores = torch.tensor(self.queries_to_scores(output)) - output = [output[i] for i in scores.topk(self.n_candidates).indices] - result.append(output) - - return result diff --git a/trl/trainer/dpo_config.py b/trl/trainer/dpo_config.py index b9e73a78ff4..740fdb6353a 100644 --- a/trl/trainer/dpo_config.py +++ b/trl/trainer/dpo_config.py @@ -219,7 +219,7 @@ class DPOConfig(TrainingArguments): - This parameter is deprecated and will be removed in version 0.25.0. Use `pad_token` (`str`) instead. + This parameter is deprecated and will be removed in version 0.26.0. Use `pad_token` (`str`) instead. """ diff --git a/trl/trainer/nash_md_trainer.py b/trl/trainer/nash_md_trainer.py index ad2c20be7a7..d8cc1ca8a2e 100644 --- a/trl/trainer/nash_md_trainer.py +++ b/trl/trainer/nash_md_trainer.py @@ -94,14 +94,6 @@ class NashMDTrainer(OnlineDPOTrainer): The optimizer and scheduler to use for training. preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): The function to use to preprocess the logits before computing the metrics. - - reward_model: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `reward_funcs` instead. - - """ _tag_names = ["trl", "nash-md"] @@ -139,8 +131,6 @@ def __init__( callbacks: Optional[list[TrainerCallback]] = None, optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, - # Deprecated parameters - reward_model: Optional[Union[PreTrainedModel, nn.Module]] = None, ) -> None: super().__init__( model=model, @@ -158,7 +148,6 @@ def __init__( callbacks=callbacks, optimizers=optimizers, preprocess_logits_for_metrics=preprocess_logits_for_metrics, - reward_model=reward_model, ) self._mixture_coef = self.args.mixture_coef diff --git a/trl/trainer/online_dpo_config.py b/trl/trainer/online_dpo_config.py index 08ed1a6700d..2e28ec13e81 100644 --- a/trl/trainer/online_dpo_config.py +++ b/trl/trainer/online_dpo_config.py @@ -59,17 +59,6 @@ class may differ from those in [`~transformers.TrainingArguments`]. - `"sigmoid"`: sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper. - `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper. - - dataset_num_proc (`int`, *optional*): - Number of processes to use for processing the dataset. - - - - This parameter is deprecated and will be removed in version 0.25.0. Since OnlineDPO does not involve - dataset preparation, you can safely remove it. - - - disable_dropout (`bool`, *optional*, defaults to `True`): Whether to disable dropout in the model and reference model. @@ -380,36 +369,11 @@ class may differ from those in [`~transformers.TrainingArguments`]. }, ) - # Deprecated parameters - dataset_num_proc: Optional[int] = field( - default=None, - metadata={"help": "Number of processes to use for processing the dataset."}, - ) - gpu_memory_utilization: Optional[float] = field( - default=None, - metadata={ - "help": "This parameter is deprecated and will be removed in version 0.25.0. Please use " - "`vllm_gpu_memory_utilization` instead.", - }, - ) - def __post_init__(self): self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16 super().__post_init__() - if self.dataset_num_proc is not None: - warnings.warn( - "The parameter `dataset_num_proc` is deprecated and will be removed in version 0.25.0. " - "Since OnlineDPO does not involve dataset preparation, you can safely remove it.", - ) - if self.gpu_memory_utilization is not None: - warnings.warn( - "The parameter `gpu_memory_utilization` is deprecated and will be removed in version 0.25.0. " - "Please use `vllm_gpu_memory_utilization` instead.", - ) - self.vllm_gpu_memory_utilization = self.gpu_memory_utilization - if hasattr(self.beta, "__len__") and len(self.beta) == 1: self.beta = self.beta[0] diff --git a/trl/trainer/online_dpo_trainer.py b/trl/trainer/online_dpo_trainer.py index 70c649bdf6e..a35955980aa 100644 --- a/trl/trainer/online_dpo_trainer.py +++ b/trl/trainer/online_dpo_trainer.py @@ -164,14 +164,6 @@ class OnlineDPOTrainer(BaseTrainer): The optimizer and scheduler to use for training. preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): The function to use to preprocess the logits before computing the metrics. - - reward_model: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `reward_funcs` instead. - - """ _tag_names = ["trl", "online-dpo"] @@ -206,9 +198,6 @@ def __init__( callbacks: Optional[list[TrainerCallback]] = None, optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, - # Deprecated parameters - reward_model: Optional[Union[PreTrainedModel, nn.Module]] = None, - reward_processing_class: Optional[PreTrainedTokenizerBase] = None, ) -> None: if not os.environ.get("TRL_EXPERIMENTAL_SILENCE"): warnings.warn( @@ -225,36 +214,6 @@ def __init__( self.ref_model = ref_model - # Handle deprecated parameters for backward compatibility - if reward_model is not None: - warnings.warn( - "The `reward_model` parameter is deprecated and will be removed in version 0.25.0. " - "Please use `reward_funcs` instead. For example, change `reward_model=model` to `reward_funcs=model`.", - ) - # Convert old reward_model to new reward_funcs format - if reward_funcs is None: - reward_funcs = reward_model - else: - warnings.warn( - "Both `reward_model` and `reward_funcs` are provided. Using `reward_funcs` and ignoring " - "`reward_model`.", - ) - - if reward_processing_class is not None: - warnings.warn( - "The `reward_processing_class` parameter is deprecated and will be removed in version 0.25.0. " - "Please use `reward_processing_classes` instead. For example, change " - "`reward_processing_class=tokenizer` to `reward_processing_classes=tokenizer`.", - ) - # Convert old reward_processing_class to new reward_processing_classes format - if reward_processing_classes is None: - reward_processing_classes = reward_processing_class - else: - warnings.warn( - "Both `reward_processing_class` and `reward_processing_classes` are provided. Using " - "`reward_processing_classes` and ignoring `reward_processing_class`.", - ) - # Validate reward configuration - must have exactly one of: judge, or reward_funcs reward_configs = sum(x is not None for x in [judge, reward_funcs]) if reward_configs == 0: @@ -329,16 +288,7 @@ def __init__( self.reward_weights = None if args.missing_eos_penalty is not None and reward_funcs is None and judge is None: - # Check if this is the old reward_model case - if reward_model is not None: - logger.warning( - "The `missing_eos_penalty` parameter is deprecated when used with the deprecated `reward_model` parameter. " - "Please use `reward_funcs` instead of `reward_model` to continue using this feature.", - FutureWarning, - stacklevel=2, - ) - else: - raise ValueError("`missing_eos_penalty` is only supported when `reward_funcs` is provided.") + raise ValueError("`missing_eos_penalty` is only supported when `reward_funcs` is provided.") if args is None: raise ValueError("`args` must be provided.") @@ -1330,7 +1280,7 @@ def training_step( if is_conversational({"prompt": prompts[0]}): completions = [[{"role": "assistant", "content": completion}] for completion in completions] - # Get the reward from reward functions, judge, or deprecated reward_model + # Get the reward from reward functions or judge if self.reward_funcs is not None: # First create completion_ids_list for custom reward functions completion_ids_list = [completion_ids[i].tolist() for i in range(completion_ids.shape[0])] diff --git a/trl/trainer/rloo_config.py b/trl/trainer/rloo_config.py index eb27ca1f9a7..335cc72fd09 100644 --- a/trl/trainer/rloo_config.py +++ b/trl/trainer/rloo_config.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import warnings from dataclasses import dataclass, field from typing import Optional, Union @@ -193,142 +192,6 @@ class RLOOConfig(TrainingArguments): wandb_log_unique_prompts (`bool`, *optional*, defaults to `False`): Whether to log unique prompts in wandb. If `True`, only unique prompts are logged. If `False`, all prompts are logged. - - > Deprecated parameters - - rloo_k: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `num_generations` instead. - - - - cliprange: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `epsilon` instead. - - - - kl_coef: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `beta` instead. - - - - exp_name: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `run_name` instead. - - - - normalize_reward: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `normalize_advantages` instead. - - - - num_ppo_epochs: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `num_iterations` instead. - - - - num_mini_batches: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `steps_per_generation` instead. - - - - total_episodes: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `max_steps` instead. - - - - response_length: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `max_completion_length` instead. - - - - token_level_kl: - - - - This parameter is deprecated and will be removed in version 0.25.0. KL is now computed only at the sequence - level. - - - - dataset_num_proc: - - - - This parameter is deprecated and will be removed in version 0.25.0. This parameter was unused, you can - safely remove it from your scripts. - - - - local_rollout_forward_batch_size: - - - - This parameter is deprecated and will be removed in version 0.25.0. Now it is automatically set to - `per_device_train_batch_size` (or `per_device_eval_batch_size` during evaluation). - - - - num_sample_generations: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `logging_steps` to control - generation logging frequency. - - - - stop_token: - - - - This parameter is deprecated and will be removed in version 0.25.0. - - - - stop_token_id: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `processing_class.eos_token_id` - instead. - - - - missing_eos_penalty: - - - - This parameter is deprecated and will be removed in version 0.25.0. Replicate with a custom reward function - checking if `eos_token_id` is in `completion_ids`. - - """ _VALID_DICT_FIELDS = TrainingArguments._VALID_DICT_FIELDS + ["model_init_kwargs"] @@ -656,125 +519,9 @@ class RLOOConfig(TrainingArguments): }, ) - # Deprecated params - rloo_k: Optional[int] = field( - default=None, - metadata={"help": "Deprecated: use `num_generations` instead."}, - ) - cliprange: Optional[float] = field( - default=None, - metadata={"help": "Deprecated: use `epsilon` instead."}, - ) - kl_coef: Optional[float] = field( - default=None, - metadata={"help": "Deprecated: use `beta` instead."}, - ) - exp_name: Optional[str] = field( - default=None, - metadata={"help": "Deprecated: use `run_name` instead."}, - ) - normalize_reward: Optional[bool] = field( - default=None, - metadata={"help": "Deprecated: use `normalize_advantages` instead."}, - ) - num_ppo_epochs: Optional[int] = field( - default=None, - metadata={"help": "Deprecated: use `num_iterations` instead."}, - ) - num_mini_batches: Optional[int] = field( - default=None, - metadata={"help": "Deprecated: use `steps_per_generation` instead."}, - ) - total_episodes: Optional[int] = field( - default=None, - metadata={"help": "Deprecated: use `max_steps=total_episodes/(gradient_accumulation_steps*rloo_k)` instead."}, - ) - response_length: Optional[int] = field( - default=None, - metadata={"help": "Deprecated: use `max_completion_length` instead."}, - ) - token_level_kl: Optional[bool] = field( - default=None, - metadata={"help": "Removed: KL is now computed only at the sequence level."}, - ) - dataset_num_proc: Optional[int] = field( - default=None, - metadata={"help": "Removed: this parameter was unused, you can safely remove it from your scripts."}, - ) - local_rollout_forward_batch_size: Optional[int] = field( - default=None, - metadata={ - "help": "Removed: now automatically set to `per_device_train_batch_size` (or `per_device_eval_batch_size` " - "during evaluation)." - }, - ) - num_sample_generations: Optional[int] = field( - default=None, - metadata={"help": "Removed: use `logging_steps` to control generation logging frequency."}, - ) - stop_token: Optional[str] = field( - default=None, - metadata={"help": "Removed."}, - ) - stop_token_id: Optional[int] = field( - default=None, - metadata={"help": "Removed: use `processing_class.eos_token_id` instead."}, - ) - missing_eos_penalty: Optional[float] = field( - default=None, - metadata={ - "help": "Removed: replicate with a custom reward function checking if `eos_token_id` is in " - "`completion_ids`." - }, - ) - def __post_init__(self): self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16 - _DEPRECATED_PARAMS = { - "rloo_k": "num_generations", - "cliprange": "epsilon", - "kl_coef": "beta", - "exp_name": "run_name", - "normalize_reward": "normalize_advantages", - "num_ppo_epochs": "num_iterations", - "num_mini_batches": "steps_per_generation", - "total_episodes": "max_steps", - "response_length": "max_completion_length", - } - - _REMOVED_PARAMS = { - "token_level_kl", - "dataset_num_proc", - "local_rollout_forward_batch_size", - "num_sample_generations", - "stop_token", - "stop_token_id", - "missing_eos_penalty", - } - - # Check for deprecated parameters and issue warnings - for old_param, new_param in _DEPRECATED_PARAMS.items(): - if getattr(self, old_param) is not None: - old_value = getattr(self, old_param) - if old_param == "total_episodes": - old_value = old_value // self.gradient_accumulation_steps - warnings.warn( - f"Parameter '{old_param}' is deprecated and will be removed in version 0.25.0. Please use " - f"'{new_param}' instead. We are setting {new_param}={old_value}" - ) - # Set the new parameter with the old value - setattr(self, new_param, old_value) - # Clear the deprecated parameter - setattr(self, old_param, None) - - for removed_param in _REMOVED_PARAMS: - if hasattr(self, removed_param) and getattr(self, removed_param) is not None: - warnings.warn( - f"Parameter '{removed_param}' is deprecated and will be removed in version 0.25.0. Please refer " - "to the migration guide: https://huggingface.co/docs/trl/en/rloo_trainer##migration-guide-from-the-old-implementation-021-and-below" - ) - super().__post_init__() num_processes = self.world_size diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py index 59fc9c285e0..a6ff57aa8f2 100644 --- a/trl/trainer/rloo_trainer.py +++ b/trl/trainer/rloo_trainer.py @@ -192,47 +192,6 @@ def reward_func(completions, **kwargs): model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`. peft_config ([`~peft.PeftConfig`], *optional*): PEFT configuration used to wrap the model. If `None`, the model is not wrapped. - - config: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `args` instead. - - - - reward_model: - - - This parameter is deprecated and will be removed in version 0.25.0. Use `reward_funcs` instead. - - - - policy: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `model` instead. - - - - ref_policy: - - - - This parameter is deprecated and will be removed in version 0.25.0. To use the initial model as the - reference model, simply omit this parameter. The parameter is ignored. - - - - data_collator: - - - - This parameter is deprecated and will be removed in version 0.25.0. The RLOOTrainer does not use a data - collator, so this parameter is ignored. - - """ _tag_names = ["trl", "rloo"] @@ -255,9 +214,8 @@ def reward_func(completions, **kwargs): def __init__( self, - # Note for dev: we can remove the default None when we remove the deprecated model parameter in version 0.25.0 - model: Union[str, PreTrainedModel] = None, - reward_funcs: Union[RewardFunc, list[RewardFunc]] = None, + model: Union[str, PreTrainedModel], + reward_funcs: Union[RewardFunc, list[RewardFunc]], args: Optional[RLOOConfig] = None, train_dataset: Optional[Union[Dataset, IterableDataset]] = None, eval_dataset: Optional[Union[Dataset, IterableDataset, dict[str, Union[Dataset, IterableDataset]]]] = None, @@ -266,12 +224,6 @@ def __init__( callbacks: Optional[list[TrainerCallback]] = None, optimizers: tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None), peft_config: Optional["PeftConfig"] = None, - # Deprecated parameters - config=None, - reward_model=None, - policy=None, - ref_policy=None, - data_collator=None, ): if not os.environ.get("TRL_EXPERIMENTAL_SILENCE"): warnings.warn( @@ -280,70 +232,6 @@ def __init__( "https://github.com/huggingface/trl/issues/4223. Silence this warning by setting environment variable " "TRL_EXPERIMENTAL_SILENCE=1." ) - # Handle deprecated parameters - if config is not None: - warnings.warn( - "Parameter 'config' is deprecated and will be removed in version 0.25.0. Please use 'args' instead. " - "We are setting args=config" - ) - if args is None: - args = config - else: - raise ValueError("Cannot specify both 'config' (deprecated) and 'args'. Please use 'args' only.") - - if reward_model is not None: - warnings.warn( - "Parameter 'reward_model' is deprecated and will be removed in version 0.25.0. Please use " - "'reward_funcs' instead. We are setting reward_funcs=reward_model" - ) - if reward_funcs is None: - reward_funcs = reward_model - else: - raise ValueError( - "Cannot specify both 'reward_model' (deprecated) and 'reward_funcs'. Please use 'reward_funcs' " - "only." - ) - if policy is not None: - warnings.warn( - "Parameter 'policy' is deprecated and will be removed in version 0.25.0. Please use 'model' instead. " - "We are setting model=policy" - ) - if model is None: - model = policy - else: - raise ValueError("Cannot specify both 'policy' (deprecated) and 'model'. Please use 'model' only.") - if ref_policy is not None: - warnings.warn( - "Parameter 'ref_policy' is deprecated and will be removed in version 0.25.0. To use the initial model " - "as the reference model, simply omit this parameter. The parameter is ignored." - ) - if data_collator is not None: - warnings.warn( - "Parameter 'data_collator' is deprecated and will be removed in version 0.25.0. The RLOOTrainer does " - "not use a data collator, so this parameter is ignored." - ) - if "input_ids" in train_dataset.column_names: - warnings.warn( - "The training dataset contains a column named 'input_ids', indicating that it is pre-tokenized. " - "Support for pre-tokenized datasets is deprecated and will be removed in version 0.25. Please provide " - "the raw dataset (conversational or standard) with a 'prompt' column instead." - ) - - def decode(example, tokenizer): - return {"prompt": tokenizer.decode(example["input_ids"])} - - train_dataset = train_dataset.map(decode, fn_kwargs={"tokenizer": processing_class}) - if eval_dataset is not None and "input_ids" in eval_dataset.column_names: - warnings.warn( - "The evaluation dataset contains a column named 'input_ids', indicating that it is pre-tokenized. " - "Support for pre-tokenized datasets is deprecated and will be removed in version 0.25. Please provide " - "the raw dataset (conversational or standard) with a 'prompt' column instead." - ) - - def decode(example, tokenizer): - return {"prompt": tokenizer.decode(example["input_ids"])} - - eval_dataset = eval_dataset.map(decode, fn_kwargs={"tokenizer": processing_class}) # Args if args is None: diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py index 7c547687dcc..f335166c527 100644 --- a/trl/trainer/utils.py +++ b/trl/trainer/utils.py @@ -1251,34 +1251,6 @@ def empty_cache() -> None: torch.cuda.empty_cache() -def decode_and_strip_padding(inputs: torch.Tensor, tokenizer: PreTrainedTokenizerBase) -> list[str]: - # docstyle-ignore - """ - Decodes the input tensor and strips the padding tokens. - - > [!WARNING] - > This function is deprecated and will be removed in a version 0.25.0. If you want to keep using it, please copy - > the code into your codebase and use it from there. - - Args: - inputs (`torch.Tensor`): - The input tensor to be decoded. - tokenizer ([`~transformers.PreTrainedTokenizerBase`]): - The tokenizer used to decode the input tensor. - - Returns: - `list[str]`: - The list of decoded strings with padding tokens stripped. - """ - warnings.warn( - "The function `decode_and_strip_padding` is deprecated and will be removed in a version 0.25.0. If you want " - "to keep using it, please copy the code into your codebase and use it from there.", - FutureWarning, - ) - decoded = tokenizer.batch_decode(inputs, skip_special_tokens=False) - return [d.replace(tokenizer.pad_token, "") for d in decoded] - - def generate_model_card( base_model: Optional[str], model_name: str, diff --git a/trl/trainer/xpo_trainer.py b/trl/trainer/xpo_trainer.py index e289bce5bb9..0dfdddad550 100644 --- a/trl/trainer/xpo_trainer.py +++ b/trl/trainer/xpo_trainer.py @@ -93,14 +93,6 @@ class XPOTrainer(OnlineDPOTrainer): The optimizer and scheduler to use for training. preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): The function to use to preprocess the logits before computing the metrics. - - reward_model: - - - - This parameter is deprecated and will be removed in version 0.25.0. Use `reward_funcs` instead. - - """ _tag_names = ["trl", "xpo"] @@ -137,15 +129,12 @@ def __init__( callbacks: Optional[list[TrainerCallback]] = None, optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, - # Deprecated parameters - reward_model: Optional[Union[PreTrainedModel, nn.Module]] = None, ) -> None: super().__init__( model=model, ref_model=ref_model, judge=judge, reward_funcs=reward_funcs, - reward_model=reward_model, args=args, data_collator=data_collator, train_dataset=train_dataset,