huggingface
diff --git a/‎docker/transformers-intel-cpu/Dockerfile‎
Lines changed: 71 additions & 0 deletions b/‎docker/transformers-intel-cpu/Dockerfile‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎docs/source/en/_toctree.yml‎
Lines changed: 2 additions & 0 deletions b/‎docs/source/en/_toctree.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/en/model_doc/apertus.md‎
Lines changed: 4 additions & 1 deletion b/‎docs/source/en/model_doc/apertus.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎docs/source/en/model_doc/florence2.md‎
Lines changed: 4 additions & 1 deletion b/‎docs/source/en/model_doc/florence2.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎docs/source/en/model_doc/longcat_flash.md‎
Lines changed: 128 additions & 0 deletions b/‎docs/source/en/model_doc/longcat_flash.md‎
Lines changed: 128 additions & 0 deletions
diff --git a/‎docs/source/en/model_doc/nllb.md‎
Lines changed: 9 additions & 9 deletions b/‎docs/source/en/model_doc/nllb.md‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎docs/source/en/model_doc/sam2.md‎
Lines changed: 4 additions & 2 deletions b/‎docs/source/en/model_doc/sam2.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎docs/source/en/model_doc/sam2_video.md‎
Lines changed: 4 additions & 2 deletions b/‎docs/source/en/model_doc/sam2_video.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎i18n/README_zh-hans.md‎
Lines changed: 1 addition & 1 deletion b/‎i18n/README_zh-hans.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎setup.py‎
Lines changed: 1 addition & 1 deletion b/‎setup.py‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,71 @@
+FROM intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu24.04 AS base
+LABEL maintainer="Hugging Face"
+SHELL ["/bin/bash", "-c"]
+
+ARG PYTHON_VERSION=3.12
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && \
+    apt-get install -y software-properties-common && \
+    add-apt-repository -y ppa:deadsnakes/ppa && \
+    apt-get update
+
+RUN apt-get update && \
+    apt-get -y install \
+    apt-utils \
+    build-essential \
+    ca-certificates \
+    clinfo \
+    curl \
+    git \
+    git-lfs \
+    vim \
+    numactl \
+    gnupg2 \
+    gpg-agent \
+    python3-dev \
+    python3-opencv \
+    unzip \
+    ffmpeg \
+    tesseract-ocr \
+    espeak-ng \
+    wget \
+    ncurses-term \
+    google-perftools \
+    libjemalloc-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Use virtual env because Ubuntu:24 does not allowed pip on original python
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+ENV PATH="/root/.local/bin:$PATH"
+ENV VIRTUAL_ENV="/opt/venv"
+ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
+RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+RUN pip install --upgrade pip wheel
+RUN pip install torch torchvision torchaudio torchcodec --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
+RUN pip install av pyctcdecode pytesseract decord galore-torch fire scipy scikit-learn sentencepiece sentence_transformers sacremoses nltk rouge_score librosa soundfile mpi4py pytorch_msssim
+RUN pip install onnx optimum onnxruntime
+RUN pip install autoawq
+RUN pip install gptqmodel --no-build-isolation
+RUN pip install -U datasets timm transformers accelerate peft diffusers opencv-python kenlm evaluate
+RUN pip install -U intel-openmp
+
+# install bitsandbytes
+RUN git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/ && \
+    cmake -DCOMPUTE_BACKEND=cpu -S . && make && pip install . && cd ../
+
+# CPU don't need triton
+RUN pip uninstall triton -y
+
+ENV LD_PRELOAD=${LD_PRELOAD}:/opt/venv/lib/libiomp5.so:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
+ENV KMP_AFFINITY=granularity=fine,compact,1,0
+
+RUN touch /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+RUN echo "#!/bin/bash" >> /entrypoint.sh
+RUN echo "/bin/bash" >> /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
@@ -559,6 +559,8 @@
         title: Llama2
       - local: model_doc/llama3
         title: Llama3
+      - local: model_doc/longcat_flash
+        title: LongCatFlash
       - local: model_doc/longformer
         title: Longformer
       - local: model_doc/longt5
 
@@ -13,6 +13,9 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on 2025-09-02 and added to Hugging Face Transformers on 2025-08-28.*
+
+# Apertus
 
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
@@ -23,7 +26,7 @@ rendered properly in your Markdown viewer.
     </div>
 </div>
 
-# Apertus
+## Overview
 
 [Apertus](https://www.swiss-ai.org) is a family of large language models from the Swiss AI Initiative.
 
 
@@ -13,6 +13,9 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on 2024-06-16 and added to Hugging Face Transformers on 2025-08-20.*
+
+# Florence-2
 
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
@@ -21,7 +24,7 @@ rendered properly in your Markdown viewer.
     </div>
 </div>
 
-# Florence-2
+## Overview
 
 [Florence-2](https://huggingface.co/papers/2311.06242) is an advanced vision foundation model that uses a prompt-based approach to handle a wide range of vision and vision-language tasks. Florence-2 can interpret simple text prompts to perform tasks like captioning, object detection, and segmentation. It leverages the FLD-5B dataset, containing 5.4 billion annotations across 126 million images, to master multi-task learning. The model's sequence-to-sequence architecture enables it to excel in both zero-shot and fine-tuned settings, proving to be a competitive vision foundation model.
 
 
@@ -0,0 +1,128 @@
+<!--Copyright 2025 the HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer.
+
+-->
+*This model was released on 2025-09-01 and added to Hugging Face Transformers on 2025-09-15.*
+
+
+# LongCatFlash
+
+## Overview
+
+The LongCatFlash model was proposed in [LongCat-Flash Technical Report](https://huggingface.co/papers/2509.01322) by the Meituan LongCat Team.
+LongCat-Flash is a 560B parameter Mixture-of-Experts (MoE) model that activates 18.6B-31.3B parameters dynamically (average ~27B). The model features a shortcut-connected architecture enabling high inference speed (>100 tokens/second) and advanced reasoning capabilities.
+
+The abstract from the paper is the following:
+
+*We present LongCat-Flash, a 560 billion parameter Mixture-of-Experts (MoE) language model featuring a dynamic computation mechanism that activates 18.6B-31.3B parameters based on context (average ~27B). The model incorporates a shortcut-connected architecture enabling high inference speed (>100 tokens/second) and demonstrates strong performance across multiple benchmarks including 89.71% accuracy on MMLU and exceptional agentic tool use capabilities.*
+
+Tips:
+
+- LongCat-Flash uses a unique shortcut-connected MoE architecture that enables faster inference compared to traditional MoE models
+- The model supports up to 128k context length for long-form tasks
+- Dynamic parameter activation makes it computationally efficient while maintaining high performance
+- Best suited for applications requiring strong reasoning, coding, and tool-calling capabilities
+- The MoE architecture includes zero experts (nn.Identity modules) which act as skip connections, allowing tokens to bypass expert computation when appropriate
+
+This model was contributed by [Molbap](https://huggingface.co/Molbap).
+The original code can be found [here](https://huggingface.co/meituan-longcat/LongCat-Flash-Chat).
+
+## Usage examples
+
+The model is large: you will need 2x8 H100 to run inference.
+```python
+# launch_longcat.py
+from transformers import LongcatFlashForCausalLM, AutoTokenizer
+import torch
+
+model_id = "meituan-longcat/LongCat-Flash-Chat"
+
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+
+chat = [
+      {"role": "user", "content": "Hello! What is the capital of France? What can you tell me about it?"},
+]
+
+model = LongcatFlashForCausalLM.from_pretrained(
+      model_id,
+      tp_plan="auto",
+      dtype=torch.bfloat16,
+      )
+
+inputs = tokenizer.apply_chat_template(
+      chat, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
+
+outputs = model.generate(inputs, max_new_tokens=30)
+print(tokenizer.batch_decode(outputs))
+```
+
+To run with TP, you will need torchrun: 
+
+```bash
+torchrun  --nproc_per_node=8 --nnodes=2 --node_rank=0 | 1  --rdzv-id <an_id> --rdzv-backend c10d --rdzv-endpoint $NODE_ID:$NODE_PORT  --log-dir ./logs_longcat launch_longcat.py
+```
+
+And you'll get a nice generation:
+```json
+[Round 0] USER:Hello! What is the capital of France? What can you tell me about it? ASSISTANT:Hello! 😊 The capital of France is Paris, one of the most famous and beloved cities in the world. Here’s a quick overview of what makes Paris special:
+1. Iconic Landmarks
+
+    Eiffel Tower – The global symbol of France, built in 1889 for the World's Fair.
+    Notre-Dame Cathedral – A masterpiece of Gothic architecture (currently under restoration after the 2019 fire).
+    Louvre Museum – The world’s largest art museum, home to the Mona Lisa and Venus de Milo.
+    Sacré-Cœur Basilica – A stunning white church atop Montmartre with panoramic views.
+    Arc de Triomphe – Honors French military victories, with the Tomb of the Unknown Soldier beneath it.
+    Champs-Élysées – A glamorous avenue leading to the Arc de Triomphe, lined with shops and cafés.
+
+2. Culture & Arts
+
+    Paris is the "City of Light" (La Ville Lumière), a nickname from its early adoption of street lighting and its role as a center of enlightenment.
+    It’s a global hub for fashion (haute couture, Paris Fashion Week) and art (Impressionism, Picasso, Dali).
+    Famous literary figures like Hemingway, Fitzgerald, and Sartre lived and wrote here.
+
+3. Food & Cuisine
+
+    Croissants, baguettes, macarons, and crème brûlée are just a few of its culinary delights.
+    Paris has over 100 Michelin-starred restaurants and countless cozy bistros.
+    The Marché d’Aligre and Rue Mouffetard are great for fresh produce and local flavors.
+
+4. History & Politics
+
+    Founded in the 3rd century BC by the Parisii tribe, it became a major European city under the Romans.
+    The French Revolution (1789–1799) began here, leading to the fall of the monarchy.
+    Today, it’s the political and economic heart of France, housing the French President’s residence (Élysée Palace) and the National Assembly.
+
+**
+```
+
+## LongcatFlashConfig
+
+[[autodoc]] LongcatFlashConfig
+
+## LongcatFlashPreTrainedModel
+
+[[autodoc]] LongcatFlashPreTrainedModel
+    - forward
+
+## LongcatFlashModel
+
+[[autodoc]] LongcatFlashModel
+    - forward
+
+## LongcatFlashForCausalLM
+
+[[autodoc]] LongcatFlashForCausalLM
@@ -13,6 +13,9 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on 2022-07-11 and added to Hugging Face Transformers on 2022-07-18.*
+
+# NLLB
 
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
@@ -22,18 +25,15 @@ rendered properly in your Markdown viewer.
     </div>
 </div>
 
-*This model was released on 2022-07-11 and added to Hugging Face Transformers on 2022-07-18.*
-
-
-# NLLB
+## Overview
 
 [NLLB: No Language Left Behind](https://huggingface.co/papers/2207.04672) is a multilingual translation model. It's trained on data using data mining techniques tailored for low-resource languages and supports over 200 languages. NLLB features a conditional compute architecture using a Sparsely Gated Mixture of Experts.
 
 
 You can find all the original NLLB checkpoints under the [AI at Meta](https://huggingface.co/facebook/models?search=nllb) organization.
 
 > [!TIP]
-> This model was contributed by [Lysandre](https://huggingface.co/lysandre).  
+> This model was contributed by [Lysandre](https://huggingface.co/lysandre).
 > Click on the NLLB models in the right sidebar for more examples of how to apply NLLB to different translation tasks.
 
 The example below demonstrates how to translate text with [`Pipeline`] or the [`AutoModel`] class.
@@ -120,17 +120,17 @@ visualizer("UN Chief says there is no military solution in Syria")
    >>> tokenizer("How was your day?").input_ids
    [256047, 13374, 1398, 4260, 4039, 248130, 2]
    ```
-   
+
    To revert to the legacy behavior, use the code example below.
-   
+
    ```python
    >>> from transformers import NllbTokenizer
 
    >>> tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", legacy_behaviour=True)
    ```
-   
+
  - For non-English languages, specify the language's [BCP-47](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200) code with the `src_lang` keyword as shown below.
- 
+
  - See example below for a translation from Romanian to German.
     ```python
     >>> from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
@@ -13,6 +13,10 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on 2024-07-29 and added to Hugging Face Transformers on 2025-08-14.*
+
+# SAM2
+
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
         <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
@@ -21,8 +25,6 @@ rendered properly in your Markdown viewer.
     </div>
 </div>
 
-# SAM2
-
 ## Overview
 
 SAM2 (Segment Anything Model 2) was proposed in [Segment Anything in Images and Videos](https://ai.meta.com/research/publications/sam-2-segment-anything-in-images-and-videos/) by Nikhila Ravi, Valentin Gabeur, Yuan-Ting Hu, Ronghang Hu, Chaitanya Ryali, Tengyu Ma, Haitham Khedr, Roman Rädle, Chloe Rolland, Laura Gustafson, Eric Mintun, Junting Pan, Kalyan Vasudev Alwala, Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Dollár, Christoph Feichtenhofer.
 
@@ -13,6 +13,10 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
+*This model was released on 2024-07-29 and added to Hugging Face Transformers on 2025-08-14.*
+
+# SAM2 Video
+
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
         <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
@@ -21,8 +25,6 @@ rendered properly in your Markdown viewer.
     </div>
 </div>
 
-# SAM2 Video
-
 ## Overview
 
 SAM2 (Segment Anything Model 2) was proposed in [Segment Anything in Images and Videos](https://ai.meta.com/research/publications/sam-2-segment-anything-in-images-and-videos/) by Nikhila Ravi, Valentin Gabeur, Yuan-Ting Hu, Ronghang Hu, Chaitanya Ryali, Tengyu Ma, Haitham Khedr, Roman Rädle, Chloe Rolland, Laura Gustafson, Eric Mintun, Junting Pan, Kalyan Vasudev Alwala, Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Dollár, Christoph Feichtenhofer.
 
@@ -79,7 +79,7 @@ checkpoint: 检查点
 </h4>
 
 <h3 align="center">
-    <p>为 Jax、PyTorch 和 TensorFlow 打造的先进的自然语言处理</p>
+    <p>为 Jax、PyTorch 和 TensorFlow 打造的先进的自然语言处理函数库</p>
 </h3>
 
 <h3 align="center">
 
@@ -308,7 +308,7 @@ def run(self):
 extras["sigopt"] = deps_list("sigopt")
 extras["hub-kernels"] = deps_list("kernels")
 
-extras["integrations"] = extras["hub-kernels"] + extras["optuna"] + extras["ray"] + extras["sigopt"]
+extras["integrations"] = extras["hub-kernels"] + extras["optuna"] + extras["ray"]
 
 extras["serving"] = deps_list("openai", "pydantic", "uvicorn", "fastapi", "starlette") + extras["torch"]
 extras["audio"] = deps_list(