diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 32393ec11e..5c928049f0 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -50,6 +50,7 @@ jobs: tags: | type=match,pattern=\d.\d.\d,enable=${{ github.event_name == 'release' }} type=raw,value=${{ github.event.inputs.version }},enable=${{ github.event_name == 'workflow_dispatch' }} + type=raw,value=latest,enable={{is_default_branch}} # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see [Usage](https://github.com/docker/build-push-action#usage) in the README of the `docker/build-push-action` repository. # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. diff --git a/.github/workflows/docker/docker-compose.yaml b/.github/workflows/docker/docker-compose.yaml index a923b461eb..0889e57ffd 100644 --- a/.github/workflows/docker/docker-compose.yaml +++ b/.github/workflows/docker/docker-compose.yaml @@ -1,6 +1,6 @@ services: trinity-node-1: - image: trinity-rft-unittest:20260126 + image: trinity-rft-unittest:20260205 cap_add: - SYS_PTRACE pull_policy: never @@ -32,7 +32,7 @@ services: capabilities: [gpu] trinity-node-2: - image: trinity-rft-unittest:20260126 + image: trinity-rft-unittest:20260205 cap_add: - SYS_PTRACE pull_policy: never diff --git a/README.md b/README.md index 9ac0c04fed..afe36c6969 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ Trinity-RFT provides functionalities for users with different backgrounds and ob ## 🚀 News +* [2026-02] [[Release Notes]](https://github.com/agentscope-ai/Trinity-RFT/releases/tag/v0.5.0) Trinity-RFT v0.5.0 released: colocate mode for single-GPU scenarios, trainer driven weight synchronization, automatic parallelism setting suggestion, and more. * [2026-01] 🎉 Three papers accepted by ICLR 2026: [CHORD](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/mix_chord), [BOTS](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/bots), and [Group-relative REINFORCE variants](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/rec_gsm8k). Try out these new algorithms in Trinity-RFT! * [2026-01] [[Release Notes]](https://github.com/agentscope-ai/Trinity-RFT/releases/tag/v0.4.1) Trinity-RFT v0.4.1 released: upgraded verl to v0.7.0, Tinker backend supports OpenAI API, bug fixes. * [2026-01] Introducing [R3L](https://github.com/shiweijiezero/R3L): a systematic reflect-then-retry RL mechanism with efficient language-guided exploration and stable off-policy learning ([paper](https://arxiv.org/abs/2601.03715)). @@ -165,7 +166,7 @@ Before installing, make sure your system meets the following requirements: - Python: version 3.10 to 3.12 (inclusive) - CUDA: version >= 12.8 -- GPUs: at least 2 GPUs +- GPUs: At least one NVIDIA GPU with [compute capability](https://developer.nvidia.com/cuda/gpus) 8.0 or higher (e.g., RTX 30 series, A100, H100) **Recommended for first-time users:** diff --git a/README_zh.md b/README_zh.md index 09688b0f43..15a4ee26f7 100644 --- a/README_zh.md +++ b/README_zh.md @@ -41,6 +41,7 @@ Trinity-RFT 面向不同背景和目标的用户提供相应功能: ## 🚀 新闻 +* [2026-02] [[发布说明]](https://github.com/agentscope-ai/Trinity-RFT/releases/tag/v0.5.0) Trinity-RFT v0.5.0 发布:单 GPU 场景下的 colocate 模式,trainer 驱动的权重同步,自动并行设置建议等新功能。 * [2026-01] 🎉 三篇论文被 ICLR 2026 接收:[CHORD](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/mix_chord)、[BOTS](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/bots) 和 [Group-relative REINFORCE 系列变种](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/rec_gsm8k)。在 Trinity-RFT 中尝试这些新算法吧! * [2026-01] [[发布说明]](https://github.com/agentscope-ai/Trinity-RFT/releases/tag/v0.4.1) Trinity-RFT v0.4.1 发布:升级 verl 至 v0.7.0,Tinker 后端支持 OpenAI API,修复若干 Bug。 * [2026-01] 推出 [R3L](https://github.com/shiweijiezero/R3L):基于反思-重试的强化学习机制,由自然语言反馈引导高效探索,并达成稳定的 off-policy 学习([论文](https://arxiv.org/abs/2601.03715))。 @@ -171,7 +172,7 @@ Trinity-RFT 面向不同背景和目标的用户提供相应功能: - **Python**:版本 3.10 至 3.12(含) - **CUDA**:版本 >= 12.8 -- **GPU**:至少 2 块 GPU +- **GPU**: 至少一块 [compute capability](https://developer.nvidia.com/cuda/gpus) 为 8.0 或更高的 NVIDIA GPU(例如 RTX 30 系列、A100、H100) ## 源码安装(推荐) diff --git a/docs/sphinx_doc/source/tutorial/trinity_installation.md b/docs/sphinx_doc/source/tutorial/trinity_installation.md index 96b2b9a0b2..2a32dbf76f 100644 --- a/docs/sphinx_doc/source/tutorial/trinity_installation.md +++ b/docs/sphinx_doc/source/tutorial/trinity_installation.md @@ -11,7 +11,7 @@ Make sure your system meets these requirements: - **Python**: 3.10 – 3.12 - **CUDA**: 12.8 or higher -- **GPUs**: At least 2 available +- **GPUs**: At least one NVIDIA GPU with [compute capability](https://developer.nvidia.com/cuda/gpus) 8.0 or higher (e.g., RTX 30 series, A100, H100) ### If you don’t have GPUs or prefer not to use them @@ -84,24 +84,6 @@ uv sync --extra vllm --extra dev --extra flash_attn --- -## Via PyPI - -If you just want to use the package without modifying the code: - -```bash -pip install trinity-rft -pip install flash-attn==2.8.1 -``` - -Or with `uv`: - -```bash -uv pip install trinity-rft -uv pip install flash-attn==2.8.1 -``` - ---- - ## Using Docker You can download the Trinity-RFT Docker image from Github Container Registry or build it locally. @@ -150,6 +132,24 @@ docker run -it \ --- +## Via PyPI + +If you just want to use the package without modifying the code: + +```bash +pip install trinity-rft +pip install flash-attn==2.8.1 --no-build-isolation +``` + +Or with `uv`: + +```bash +uv pip install trinity-rft +uv pip install flash-attn==2.8.1 --no-build-isolation +``` + +--- + ```{note} For training with **Megatron-LM**, please refer to {ref}`Megatron-LM Backend `. ``` diff --git a/docs/sphinx_doc/source_zh/tutorial/trinity_installation.md b/docs/sphinx_doc/source_zh/tutorial/trinity_installation.md index 6304ba35cf..5787a72649 100644 --- a/docs/sphinx_doc/source_zh/tutorial/trinity_installation.md +++ b/docs/sphinx_doc/source_zh/tutorial/trinity_installation.md @@ -11,7 +11,7 @@ - **Python**:3.10 – 3.12 - **CUDA**:12.8 或更高版本 -- **GPU**:至少 2 块可用 +- **GPU**:至少一块 [compute capability](https://developer.nvidia.com/cuda/gpus) 为 8.0 或更高的 NVIDIA GPU(例如 RTX 30 系列、A100、H100) ### 如果您没有 GPU 或不希望使用 GPU @@ -139,14 +139,14 @@ docker run -it \ ```bash pip install trinity-rft -pip install flash-attn==2.8.1 +pip install flash-attn==2.8.1 --no-build-isolation ``` 或使用 `uv`: ```bash uv pip install trinity-rft -uv pip install flash-attn==2.8.1 +uv pip install flash-attn==2.8.1 --no-build-isolation ``` --- diff --git a/pyproject.toml b/pyproject.toml index 411864a793..b3d2f237c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "trinity-rft" -version = "0.5.0.dev0" +version = "0.5.0" authors = [ {name="Trinity-RFT Team", email="trinity-rft@outlook.com"}, ] @@ -50,9 +50,9 @@ trinity = "trinity.cli.launcher:main" [project.optional-dependencies] vllm = [ - "vllm>=0.10.2,<=0.14.1,!=0.12.0", + "vllm>=0.10.2,<=0.15.1,!=0.12.0", # v0.12.0 has a huge performance regression so we exclude it - # v0.10.2 is the most stable version, but we allow up to 0.14.1 for new features + # v0.10.2 is the most stable version, but we allow up to 0.15.1 for new features ] data = [ "py-data-juicer>=1.4.3" diff --git a/trinity/__init__.py b/trinity/__init__.py index b29fbc4891..f9c4d8f061 100644 --- a/trinity/__init__.py +++ b/trinity/__init__.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- """Trinity-RFT (Reinforcement Fine-Tuning)""" -__version__ = "0.5.0.dev0" +__version__ = "0.5.0" diff --git a/trinity/common/models/vllm_model.py b/trinity/common/models/vllm_model.py index fffc4e17e0..2372db6cc3 100644 --- a/trinity/common/models/vllm_model.py +++ b/trinity/common/models/vllm_model.py @@ -149,7 +149,10 @@ def __init__( async def _initialize_tokenizer(self): if self.tokenizer is None: - self.tokenizer = await self.async_llm.get_tokenizer() + if self.vllm_version >= parse_version("0.15.0"): + self.tokenizer = self.async_llm.get_tokenizer() + else: + self.tokenizer = await self.async_llm.get_tokenizer() self.tokenizer.truncation_side = "left" def _initialize_processor(self): diff --git a/trinity/common/models/vllm_patch/worker_patch.py b/trinity/common/models/vllm_patch/worker_patch.py index a0d6f37647..4d6412cc29 100644 --- a/trinity/common/models/vllm_patch/worker_patch.py +++ b/trinity/common/models/vllm_patch/worker_patch.py @@ -13,10 +13,10 @@ def patch_vllm_prompt_logprobs(model_runner: GPUModelRunner): # noqa: C901 """Patch vLLM model runner to support prompt logprobs extraction.""" version = get_vllm_version() - if version < parse_version("0.10.2") or version > parse_version("0.14.1"): + if version < parse_version("0.10.2") or version > parse_version("0.15.1"): raise ValueError( f"Unsupported vllm version: {vllm.__version__}. " - "This patch requires vllm version >= 0.10.2, <= 0.14.1." + "This patch requires vllm version >= 0.10.2, <= 0.15.1." ) is_v0102 = version == parse_version("0.10.2") @@ -150,7 +150,7 @@ def _get_prompt_logprobs_dict_v12( This is a monkey-patched version of `_get_prompt_logprobs_dict` from `vllm.v1.worker.gpu_model_runner.GPUModelRunner` (vLLM versions - 0.12.0 to 0.14.1). + 0.12.0 to 0.15.1). The original function does not apply temperature scaling to logits when calculating prompt logprobs, which can lead to incorrect logprob values