huggingface · behroozazarkhalili · Nov 5, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 10, 2025
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
@@ -66,8 +66,6 @@
       title: GKD
     - local: grpo_trainer
       title: GRPO
-    - local: kto_trainer
-      title: KTO
     - local: nash_md_trainer
       title: Nash-MD
     - local: orpo_trainer
@@ -115,6 +113,8 @@
     title: GSPO-token
   - local: judges
     title: Judges
+  - local: kto_trainer
+    title: KTO
   - local: papo_trainer
     title: PAPO
   - local: xpo_trainer

diff --git a/docs/source/dataset_formats.md b/docs/source/dataset_formats.md
@@ -392,7 +392,7 @@ Choosing the right dataset type depends on the task you are working on and the s
 | [`DPOTrainer`] | [Preference (explicit prompt recommended)](#preference) |
 | [`GKDTrainer`] | [Prompt-completion](#prompt-completion) |
 | [`GRPOTrainer`] | [Prompt-only](#prompt-only) |
-| [`KTOTrainer`] | [Unpaired preference](#unpaired-preference) or [Preference (explicit prompt recommended)](#preference) |
+| [`experimental.kto.KTOTrainer`] | [Unpaired preference](#unpaired-preference) or [Preference (explicit prompt recommended)](#preference) |
 | [`NashMDTrainer`] | [Prompt-only](#prompt-only) |
 | [`OnlineDPOTrainer`] | [Prompt-only](#prompt-only) |
 | [`ORPOTrainer`] | [Preference (explicit prompt recommended)](#preference) |

diff --git a/docs/source/example_overview.md b/docs/source/example_overview.md
@@ -39,7 +39,7 @@ Scripts are maintained in the [`trl/scripts`](https://github.com/huggingface/trl
 
  File | Description |
 | --- | --- |
-| [`examples/scripts/bco.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/bco.py) | This script shows how to use the [`KTOTrainer`] with the BCO loss to fine-tune a model to increase instruction-following, truthfulness, honesty, and helpfulness using the [openbmb/UltraFeedback](https://huggingface.co/datasets/openbmb/UltraFeedback) dataset. |
+| [`examples/scripts/bco.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/bco.py) | This script shows how to use the [`experimental.kto.KTOTrainer`] with the BCO loss to fine-tune a model to increase instruction-following, truthfulness, honesty, and helpfulness using the [openbmb/UltraFeedback](https://huggingface.co/datasets/openbmb/UltraFeedback) dataset. |
 | [`examples/scripts/cpo.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/cpo.py) | This script shows how to use the [`CPOTrainer`] to fine-tune a model to increase helpfulness and harmlessness using the [Anthropic/hh-rlhf](https://huggingface.co/datasets/Anthropic/hh-rlhf) dataset. |
 | [`trl/scripts/dpo.py`](https://github.com/huggingface/trl/blob/main/trl/scripts/dpo.py) | This script shows how to use the [`DPOTrainer`] to fine-tune a model. |
 | [`examples/scripts/dpo_vlm.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/dpo_vlm.py) | This script shows how to use the [`DPOTrainer`] to fine-tune a Vision Language Model to reduce hallucinations using the [openbmb/RLAIF-V-Dataset](https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset) dataset. |
@@ -49,7 +49,7 @@ Scripts are maintained in the [`trl/scripts`](https://github.com/huggingface/trl
 | [`examples/scripts/grpo_vlm.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/grpo_vlm.py) | This script shows how to use the [`GRPOTrainer`] to fine-tune a multimodal model for reasoning using the [lmms-lab/multimodal-open-r1-8k-verified](https://huggingface.co/datasets/lmms-lab/multimodal-open-r1-8k-verified) dataset. |
 | [`examples/scripts/gspo.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/gspo.py) | This script shows how to use GSPO via the [`GRPOTrainer`] to fine-tune model for reasoning using the [AI-MO/NuminaMath-TIR](https://huggingface.co/datasets/AI-MO/NuminaMath-TIR) dataset. |
 | [`examples/scripts/gspo_vlm.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/gspo_vlm.py) | This script shows how to use GSPO via the [`GRPOTrainer`] to fine-tune a multimodal model for reasoning using the [lmms-lab/multimodal-open-r1-8k-verified](https://huggingface.co/datasets/lmms-lab/multimodal-open-r1-8k-verified) dataset. |
-| [`examples/scripts/kto.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/kto.py) | This script shows how to use the [`KTOTrainer`] to fine-tune a model. |
+| [`examples/scripts/kto.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/kto.py) | This script shows how to use the [`experimental.kto.KTOTrainer`] to fine-tune a model. |
 | [`examples/scripts/mpo_vlm.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/mpo_vlm.py) | This script shows how to use MPO via the [`DPOTrainer`] to align a model based on preferences using the [HuggingFaceH4/rlaif-v_formatted](https://huggingface.co/datasets/HuggingFaceH4/rlaif-v_formatted) dataset and a set of loss weights with weights. |
 | [`examples/scripts/nash_md.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/nash_md.py) | This script shows how to use the [`NashMDTrainer`] to fine-tune a model. |
 | [`examples/scripts/online_dpo.py`](https://github.com/huggingface/trl/blob/main/examples/scripts/online_dpo.py) | This script shows how to use the [`OnlineDPOTrainer`] to fine-tune a model. |

diff --git a/docs/source/index.md b/docs/source/index.md
@@ -44,7 +44,7 @@ Below is the current list of TRL trainers, organized by method type (⚡️ = vL
 - [`ORPOTrainer`]
 - [`experimental.bco.BCOTrainer`] 🧪
 - [`CPOTrainer`]
-- [`KTOTrainer`]
+- [`experimental.kto.KTOTrainer`] 🧪
 
 ### Knowledge distillation
 

diff --git a/docs/source/kto_trainer.md b/docs/source/kto_trainer.md
@@ -30,7 +30,7 @@ Below is the script to train the model:
 ```python
 # train_kto.py
 from datasets import load_dataset
-from trl import KTOConfig, KTOTrainer
+from trl.experimental.kto import KTOConfig, KTOTrainer
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct")

diff --git a/docs/source/liger_kernel_integration.md b/docs/source/liger_kernel_integration.md
@@ -58,7 +58,7 @@ training_args = GRPOConfig(..., use_liger_kernel=True)
 <hfoption id="KTO">
 
 ```python
-from trl import KTOConfig
+from trl.experimental.kto import KTOConfig
 
 training_args = KTOConfig(..., use_liger_kernel=True)
 ```
@@ -67,7 +67,7 @@ training_args = KTOConfig(..., use_liger_kernel=True)
 <hfoption id="GKD">
 
 ```python
-from trl import GKDConfig
+from trl.experimental.gkd import GKDConfig
 
 training_args = GKDConfig(..., use_liger_kernel=True)
 ```

diff --git a/docs/source/reducing_memory_usage.md b/docs/source/reducing_memory_usage.md
@@ -156,7 +156,7 @@ training_args = GRPOConfig(..., use_liger_kernel=True)
 <hfoption id="KTO">
 
 ```python
-from trl import KTOConfig
+from trl.experimental.kto import KTOConfig
 
 training_args = KTOConfig(..., use_liger_kernel=True)
 ```
@@ -165,7 +165,7 @@ training_args = KTOConfig(..., use_liger_kernel=True)
 <hfoption id="GKD">
 
 ```python
-from trl import GKDConfig
+from trl.experimental.gkd import GKDConfig
 
 training_args = GKDConfig(..., use_liger_kernel=True)
 ```

diff --git a/examples/scripts/kto.py b/examples/scripts/kto.py
@@ -63,7 +63,8 @@
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser
 
-from trl import KTOConfig, KTOTrainer, ModelConfig, ScriptArguments, get_peft_config
+from trl import ModelConfig, ScriptArguments, get_peft_config
+from trl.experimental.kto import KTOConfig, KTOTrainer
 
 
 # Enable logging in a Hugging Face Space

diff --git a/tests/test_kto_trainer.py → tests/experimental/test_kto_trainer.py b/tests/test_kto_trainer.py → tests/experimental/test_kto_trainer.py
@@ -17,10 +17,10 @@
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
 
-from trl import KTOConfig, KTOTrainer
-from trl.trainer.kto_trainer import _get_kl_dataset, _process_tokens, _tokenize
+from trl.experimental.kto import KTOConfig, KTOTrainer
+from trl.experimental.kto.kto_trainer import _get_kl_dataset, _process_tokens, _tokenize
 
-from .testing_utils import TrlTestCase, require_liger_kernel, require_no_wandb, require_peft
+from ..testing_utils import TrlTestCase, require_liger_kernel, require_no_wandb, require_peft
 
 
 class TestKTOTrainer(TrlTestCase):

diff --git a/trl/experimental/kto/__init__.py b/trl/experimental/kto/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2020-2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .kto_config import KTOConfig
+from .kto_trainer import KTOTrainer
+
+
+__all__ = ["KTOConfig", "KTOTrainer"]