ylacombe · ylacombe · Apr 2, 2024 · Mar 22, 2024 · Mar 22, 2024 · Mar 22, 2024
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -157,6 +157,7 @@ jobs:
  command: pip freeze | tee installed.txt
  - store_artifacts:
  path: ~/transformers/installed.txt
+ - run: python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
  - run: ruff check examples tests src utils
  - run: ruff format tests src utils --check
  - run: python utils/custom_init_isort.py --check_only

diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
@@ -198,41 +198,44 @@ jobs:
  push: true
  tags: huggingface/transformers-pytorch-gpu
 
-# Need to be fixed with the help from Guillaume.
-# latest-pytorch-amd:
-# name: "Latest PyTorch (AMD) [dev]"
-# runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210]
-# steps:
-# - name: Set up Docker Buildx
-# uses: docker/setup-buildx-action@v3
-# - name: Check out code
-# uses: actions/checkout@v3
-# - name: Login to DockerHub
-# uses: docker/login-action@v3
-# with:
-# username: ${{ secrets.DOCKERHUB_USERNAME }}
-# password: ${{ secrets.DOCKERHUB_PASSWORD }}
-# - name: Build and push
-# uses: docker/build-push-action@v5
-# with:
-# context: ./docker/transformers-pytorch-amd-gpu
-# build-args: |
-# REF=main
-# push: true
-# tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
-# # Push CI images still need to be re-built daily
-# -
-# name: Build and push (for Push CI) in a daily basis
-# # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
-# # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
-# if: inputs.image_postfix != '-push-ci'
-# uses: docker/build-push-action@v5
-# with:
-# context: ./docker/transformers-pytorch-amd-gpu
-# build-args: |
-# REF=main
-# push: true
-# tags: huggingface/transformers-pytorch-amd-gpu-push-ci
+ latest-pytorch-amd:
+ name: "Latest PyTorch (AMD) [dev]"
+ runs-on: [intel-cpu, 8-cpu, ci]
+ steps:
+ - 
+ name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+ - 
+ name: Check out code
+ uses: actions/checkout@v3
+ - 
+ name: Login to DockerHub
+ uses: docker/login-action@v3
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_PASSWORD }}
+ - 
+ name: Build and push
+ uses: docker/build-push-action@v5
+ with:
+ context: ./docker/transformers-pytorch-amd-gpu
+ build-args: |
+ REF=main
+ push: true
+ tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
+ # Push CI images still need to be re-built daily
+ -
+ name: Build and push (for Push CI) in a daily basis
+ # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+ # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+ if: inputs.image_postfix != '-push-ci'
+ uses: docker/build-push-action@v5
+ with:
+ context: ./docker/transformers-pytorch-amd-gpu
+ build-args: |
+ REF=main
+ push: true
+ tags: huggingface/transformers-pytorch-amd-gpu-push-ci
 
  latest-tensorflow:
  name: "Latest TensorFlow [dev]"
@@ -262,41 +265,44 @@ jobs:
  push: true
  tags: huggingface/transformers-tensorflow-gpu
 
- # latest-pytorch-deepspeed-amd:
- # name: "PyTorch + DeepSpeed (AMD) [dev]"
-
- # runs-on: [self-hosted, docker-gpu, amd-gpu, single-gpu, mi210]
- # steps:
- # - name: Set up Docker Buildx
- # uses: docker/setup-buildx-action@v3
- # - name: Check out code
- # uses: actions/checkout@v3
- # - name: Login to DockerHub
- # uses: docker/login-action@v3
- # with:
- # username: ${{ secrets.DOCKERHUB_USERNAME }}
- # password: ${{ secrets.DOCKERHUB_PASSWORD }}
- # - name: Build and push
- # uses: docker/build-push-action@v5
- # with:
- # context: ./docker/transformers-pytorch-deepspeed-amd-gpu
- # build-args: |
- # REF=main
- # push: true
- # tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
- # # Push CI images still need to be re-built daily
- # -
- # name: Build and push (for Push CI) in a daily basis
- # # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
- # # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
- # if: inputs.image_postfix != '-push-ci'
- # uses: docker/build-push-action@v5
- # with:
- # context: ./docker/transformers-pytorch-deepspeed-amd-gpu
- # build-args: |
- # REF=main
- # push: true
- # tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
+ latest-pytorch-deepspeed-amd:
+ name: "PyTorch + DeepSpeed (AMD) [dev]"
+ runs-on: [intel-cpu, 8-cpu, ci]
+ steps:
+ - 
+ name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+ - 
+ name: Check out code
+ uses: actions/checkout@v3
+ - 
+ name: Login to DockerHub
+ uses: docker/login-action@v3
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_PASSWORD }}
+ - 
+ name: Build and push
+ uses: docker/build-push-action@v5
+ with:
+ context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+ build-args: |
+ REF=main
+ push: true
+ tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
+ # Push CI images still need to be re-built daily
+ -
+ name: Build and push (for Push CI) in a daily basis
+ # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+ # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+ if: inputs.image_postfix != '-push-ci'
+ uses: docker/build-push-action@v5
+ with:
+ context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+ build-args: |
+ REF=main
+ push: true
+ tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
 
  latest-quantization-torch-docker:
  name: "Latest Pytorch + Quantization [dev]"

diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml
@@ -16,6 +16,7 @@ jobs:
  package: transformers
  notebook_folder: transformers_doc
  languages: de en es fr hi it ko pt tr zh ja te
+ custom_container: huggingface/transformers-doc-builder
  secrets:
  token: ${{ secrets.HUGGINGFACE_PUSH }}
  hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
@@ -15,3 +15,4 @@ jobs:
  pr_number: ${{ github.event.number }}
  package: transformers
  languages: de en es fr hi it ko pt tr zh ja te
+ custom_container: huggingface/transformers-doc-builder
diff --git a/Makefile b/Makefile
@@ -51,12 +51,14 @@ repo-consistency:
 # this target runs checks on all files
 
 quality:
+ @python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
  ruff check $(check_dirs) setup.py conftest.py
  ruff format --check $(check_dirs) setup.py conftest.py
  python utils/custom_init_isort.py --check_only
  python utils/sort_auto_mappings.py --check_only
  python utils/check_doc_toc.py
 
+
 # Format source code automatically and check is there are any problems left that need manual fixing
 
 extra_style_checks:

diff --git a/README.md b/README.md
@@ -473,6 +473,7 @@ Current number of checkpoints: ![](https://img.shields.io/endpoint?url=https://h
 1. **[PVTv2](https://huggingface.co/docs/transformers/model_doc/pvt_v2)** (from Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) released with the paper [PVT v2: Improved Baselines with Pyramid Vision Transformer](https://arxiv.org/abs/2106.13797) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
 1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius.
 1. **[Qwen2](https://huggingface.co/docs/transformers/model_doc/qwen2)** (from the Qwen team, Alibaba Group) released with the paper [Qwen Technical Report](https://arxiv.org/abs/2309.16609) by Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Kai Dang, Xiaodong Deng, Yang Fan, Wenbin Ge, Yu Han, Fei Huang, Binyuan Hui, Luo Ji, Mei Li, Junyang Lin, Runji Lin, Dayiheng Liu, Gao Liu, Chengqiang Lu, Keming Lu, Jianxin Ma, Rui Men, Xingzhang Ren, Xuancheng Ren, Chuanqi Tan, Sinan Tan, Jianhong Tu, Peng Wang, Shijie Wang, Wei Wang, Shengguang Wu, Benfeng Xu, Jin Xu, An Yang, Hao Yang, Jian Yang, Shusheng Yang, Yang Yao, Bowen Yu, Hongyi Yuan, Zheng Yuan, Jianwei Zhang, Xingxuan Zhang, Yichang Zhang, Zhenru Zhang, Chang Zhou, Jingren Zhou, Xiaohuan Zhou and Tianhang Zhu.
+1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (from the Qwen team, Alibaba Group) released with [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
 1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
 1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
 1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.

diff --git a/README_de.md b/README_de.md
@@ -469,6 +469,7 @@ Aktuelle Anzahl der Checkpoints: ![](https://img.shields.io/endpoint?url=https:/
 1. **[PVTv2](https://huggingface.co/docs/transformers/model_doc/pvt_v2)** (from Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) released with the paper [PVT v2: Improved Baselines with Pyramid Vision Transformer](https://arxiv.org/abs/2106.13797) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
 1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius.
 1. **[Qwen2](https://huggingface.co/docs/transformers/model_doc/qwen2)** (from the Qwen team, Alibaba Group) released with the paper [Qwen Technical Report](https://arxiv.org/abs/2309.16609) by Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Kai Dang, Xiaodong Deng, Yang Fan, Wenbin Ge, Yu Han, Fei Huang, Binyuan Hui, Luo Ji, Mei Li, Junyang Lin, Runji Lin, Dayiheng Liu, Gao Liu, Chengqiang Lu, Keming Lu, Jianxin Ma, Rui Men, Xingzhang Ren, Xuancheng Ren, Chuanqi Tan, Sinan Tan, Jianhong Tu, Peng Wang, Shijie Wang, Wei Wang, Shengguang Wu, Benfeng Xu, Jin Xu, An Yang, Hao Yang, Jian Yang, Shusheng Yang, Yang Yao, Bowen Yu, Hongyi Yuan, Zheng Yuan, Jianwei Zhang, Xingxuan Zhang, Yichang Zhang, Zhenru Zhang, Chang Zhou, Jingren Zhou, Xiaohuan Zhou and Tianhang Zhu.
+1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (from the Qwen team, Alibaba Group) released with the paper [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
 1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
 1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
 1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.

diff --git a/README_es.md b/README_es.md
@@ -446,6 +446,7 @@ Número actual de puntos de control: ![](https://img.shields.io/endpoint?url=htt
 1. **[PVTv2](https://huggingface.co/docs/transformers/model_doc/pvt_v2)** (from Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) released with the paper [PVT v2: Improved Baselines with Pyramid Vision Transformer](https://arxiv.org/abs/2106.13797) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. 
 1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (from NVIDIA) released with the paper [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius Micikevicius.
 1. **[Qwen2](https://huggingface.co/docs/transformers/model_doc/qwen2)** (from the Qwen team, Alibaba Group) released with the paper [Qwen Technical Report](https://arxiv.org/abs/2309.16609) by Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Kai Dang, Xiaodong Deng, Yang Fan, Wenbin Ge, Yu Han, Fei Huang, Binyuan Hui, Luo Ji, Mei Li, Junyang Lin, Runji Lin, Dayiheng Liu, Gao Liu, Chengqiang Lu, Keming Lu, Jianxin Ma, Rui Men, Xingzhang Ren, Xuancheng Ren, Chuanqi Tan, Sinan Tan, Jianhong Tu, Peng Wang, Shijie Wang, Wei Wang, Shengguang Wu, Benfeng Xu, Jin Xu, An Yang, Hao Yang, Jian Yang, Shusheng Yang, Yang Yao, Bowen Yu, Hongyi Yuan, Zheng Yuan, Jianwei Zhang, Xingxuan Zhang, Yichang Zhang, Zhenru Zhang, Chang Zhou, Jingren Zhou, Xiaohuan Zhou and Tianhang Zhu.
+1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (from the Qwen team, Alibaba Group) released with the paper [blog post](https://qwenlm.github.io/blog/qwen-moe/) by Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
 1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (from Facebook) released with the paper [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
 1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (from Google Research) released with the paper [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang.
 1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.

diff --git a/README_fr.md b/README_fr.md
@@ -467,6 +467,7 @@ Nombre actuel de points de contrôle : ![](https://img.shields.io/endpoint?url=h
 1. **[PVTv2](https://huggingface.co/docs/transformers/model_doc/pvt_v2)** (de Shanghai AI Laboratory, Nanjing University, The University of Hong Kong etc.) publié dans l'article [PVT v2: Improved Baselines with Pyramid Vision Transformer](https://arxiv.org/abs/2106.13797) parWenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
 1. **[QDQBert](https://huggingface.co/docs/transformers/model_doc/qdqbert)** (de NVIDIA) a été publié dans l'article [Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation](https://arxiv.org/abs/2004.09602) par Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev et Paulius Micikevicius.
 1. **[Qwen2](https://huggingface.co/docs/transformers/model_doc/qwen2)** (de l'équipe Qwen, Alibaba Group) a été publié avec le rapport technique [Qwen Technical Report](https://arxiv.org/abs/2309.16609) par Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Kai Dang, Xiaodong Deng, Yang Fan, Wenbin Ge, Yu Han, Fei Huang, Binyuan Hui, Luo Ji, Mei Li, Junyang Lin, Runji Lin, Dayiheng Liu, Gao Liu, Chengqiang Lu, Keming Lu, Jianxin Ma, Rui Men, Xingzhang Ren, Xuancheng Ren, Chuanqi Tan, Sinan Tan, Jianhong Tu, Peng Wang, Shijie Wang, Wei Wang, Shengguang Wu, Benfeng Xu, Jin Xu, An Yang, Hao Yang, Jian Yang, Shusheng Yang, Yang Yao, Bowen Yu, Hongyi Yuan, Zheng Yuan, Jianwei Zhang, Xingxuan Zhang, Yichang Zhang, Zhenru Zhang, Chang Zhou, Jingren Zhou, Xiaohuan Zhou et Tianhang Zhu.
+1. **[Qwen2MoE](https://huggingface.co/docs/transformers/main/model_doc/qwen2_moe)** (de l'équipe Qwen, Alibaba Group) a été publié avec le rapport technique [blog post](https://qwenlm.github.io/blog/qwen-moe/) par Bo Zheng, Dayiheng Liu, Rui Men, Junyang Lin, Zhou San, Bowen Yu, An Yang, Mingfeng Xue, Fei Huang, Binyuan Hui, Mei Li, Tianyu Liu, Xingzhang Ren, Xuancheng Ren, Kexin Yang, Chang Zhou, Jingren Zhou.
 1. **[RAG](https://huggingface.co/docs/transformers/model_doc/rag)** (de Facebook) a été publié dans l'article [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](https://arxiv.org/abs/2005.11401) par Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
 1. **[REALM](https://huggingface.co/docs/transformers/model_doc/realm.html)** (de Google Research) a été publié dans l'article [REALM: Retrieval-Augmented Language Model Pre-Training](https://arxiv.org/abs/2002.08909) par Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat et Ming-Wei Chang.
 1. **[Reformer](https://huggingface.co/docs/transformers/model_doc/reformer)** (de Google Research) a été publié dans l'article [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) par Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.