add new ones

Cyrilvallez · Cyrilvallez · commit 0bd77c9df124 · 2025-08-21T17:15:35.000+02:00
diff --git a/docs/source/en/model_doc/barthez.md b/docs/source/en/model_doc/barthez.md
@@ -44,7 +44,7 @@ from transformers import pipeline
 pipeline = pipeline(
     task="fill-mask",
     model="moussaKam/barthez",
-    torch_dtype=torch.float16,
+    dtype=torch.float16,
     device=0
 )
 pipeline("Les plantes produisent <mask> grâce à un processus appelé photosynthèse.")
@@ -62,7 +62,7 @@ tokenizer = AutoTokenizer.from_pretrained(
 )
 model = AutoModelForMaskedLM.from_pretrained(
     "moussaKam/barthez",
-    torch_dtype=torch.float16,
+    dtype=torch.float16,
     device_map="auto",
 )
 inputs = tokenizer("Les plantes produisent <mask> grâce à un processus appelé photosynthèse.", return_tensors="pt").to(model.device)
diff --git a/docs/source/en/model_doc/cohere2.md b/docs/source/en/model_doc/cohere2.md
@@ -47,7 +47,7 @@ from transformers import pipeline
 pipeline = pipeline(
     task="text-generation", 
     model="CohereLabs/c4ai-command-r7b-12-2024",
-    torch_dtype=torch.float16,
+    dtype=torch.float16,
     device_map=0
 )
 
@@ -67,7 +67,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("CohereLabs/c4ai-command-r7b-12-2024")
 model = AutoModelForCausalLM.from_pretrained(
     "CohereLabs/c4ai-command-r7b-12-2024", 
-    torch_dtype=torch.float16, 
+    dtype=torch.float16, 
     device_map="auto", 
     attn_implementation="sdpa"
 )
@@ -90,7 +90,7 @@ print(tokenizer.decode(output[0], skip_special_tokens=True))
 
 ```bash
 # pip install -U flash-attn --no-build-isolation
-transformers-cli chat CohereLabs/c4ai-command-r7b-12-2024 --torch_dtype auto --attn_implementation flash_attention_2
+transformers-cli chat CohereLabs/c4ai-command-r7b-12-2024 --dtype auto --attn_implementation flash_attention_2
 ```
 
 </hfoption>
@@ -108,7 +108,7 @@ bnb_config = BitsAndBytesConfig(load_in_4bit=True)
 tokenizer = AutoTokenizer.from_pretrained("CohereLabs/c4ai-command-r7b-12-2024")
 model = AutoModelForCausalLM.from_pretrained(
     "CohereLabs/c4ai-command-r7b-12-2024", 
-    torch_dtype=torch.float16, 
+    dtype=torch.float16, 
     device_map="auto", 
     quantization_config=bnb_config, 
     attn_implementation="sdpa"
diff --git a/docs/source/en/model_doc/cohere2_vision.md b/docs/source/en/model_doc/cohere2_vision.md
@@ -48,7 +48,7 @@ model_id = "CohereLabs/command-a-vision-07-2025"
 
 processor = AutoProcessor.from_pretrained(model_id)
 model = AutoModelForImageTextToText.from_pretrained(
-    model_id, device_map="auto", torch_dtype=torch.float16
+    model_id, device_map="auto", dtype=torch.float16
 )
 
 # Format message with the Command-A-Vision chat template
diff --git a/docs/source/en/model_doc/deformable_detr.md b/docs/source/en/model_doc/deformable_detr.md
@@ -49,7 +49,7 @@ import torch
 pipeline = pipeline(
     "object-detection", 
     model="SenseTime/deformable-detr",
-    torch_dtype=torch.float16,
+    dtype=torch.float16,
     device_map=0
 )
 
diff --git a/docs/source/en/model_doc/detr.md b/docs/source/en/model_doc/detr.md
@@ -44,7 +44,7 @@ import torch
 pipeline = pipeline(
     "object-detection", 
     model="facebook/detr-resnet-50",
-    torch_dtype=torch.float16,
+    dtype=torch.float16,
     device_map=0
 )
 
diff --git a/docs/source/en/model_doc/mt5.md b/docs/source/en/model_doc/mt5.md
@@ -44,7 +44,7 @@ from transformers import pipeline
 pipeline = pipeline(
     task="text2text-generation",
     model="csebuetnlp/mT5_multilingual_XLSum",
-    torch_dtype=torch.float16,
+    dtype=torch.float16,
     device=0
 )
 pipeline("""Plants are remarkable organisms that produce their own food using a method called photosynthesis.
@@ -64,7 +64,7 @@ tokenizer = AutoTokenizer.from_pretrained(
 )
 model = AutoModelForSeq2SeqLM.from_pretrained(
     "csebuetnlp/mT5_multilingual_XLSum",
-    torch_dtype=torch.float16,
+    dtype=torch.float16,
     device_map="auto",
 )
 
@@ -102,7 +102,7 @@ quantization_config = BitsAndBytesConfig(
 )
 model = AutoModelForSeq2SeqLM.from_pretrained(
     "csebuetnlp/mT5_multilingual_XLSum",
-    torch_dtype=torch.bfloat16,
+    dtype=torch.bfloat16,
     device_map="auto",
     quantization_config=quantization_config
 )
diff --git a/docs/source/ko/cache_explanation.md b/docs/source/ko/cache_explanation.md
@@ -104,7 +104,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache, infe
 device = f"{infer_device()}:0"
 
 model_id = "meta-llama/Llama-2-7b-chat-hf"
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map=device)
+model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16, device_map=device)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 past_key_values = DynamicCache()
@@ -150,7 +150,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache, infe
 device = f"{infer_device()}:0"
 
 model_id = "meta-llama/Llama-2-7b-chat-hf"
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map=device)
+model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16, device_map=device)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 messages = [{"role": "user", "content": "You are a helpful assistant."}]
@@ -176,7 +176,7 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache
 
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
-model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, device_map="auto")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", dtype=torch.float16, device_map="auto")
 inputs = tokenizer("Hello, my name is", return_tensors="pt").to(model.device)
 
 # 캐시를 반환하려면 `return_dict_in_generate=True`가 필요하고 `return_legacy_cache`는 반환된 캐시를
diff --git a/src/transformers/models/gpt_oss/convert_gpt_oss_weights_to_hf.py b/src/transformers/models/gpt_oss/convert_gpt_oss_weights_to_hf.py
@@ -265,7 +265,7 @@ def write_model(
 
     gc.collect()
     print("Reloading the model to check if it's saved correctly.")
-    GptOssForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map="auto")
+    GptOssForCausalLM.from_pretrained(model_path, dtype=torch.bfloat16, device_map="auto")
     print("Model reloaded successfully.")
 
     # generation config
diff --git a/src/transformers/quantizers/quantizer_mxfp4.py b/src/transformers/quantizers/quantizer_mxfp4.py
@@ -126,17 +126,17 @@ def validate_environment(self, *args, **kwargs):
                     "Please use a quantized checkpoint or remove the CPU or disk device from the device_map."
                 )
 
-    def update_torch_dtype(self, torch_dtype: "torch.dtype") -> "torch.dtype":
-        if torch_dtype is None:
-            torch_dtype = torch.bfloat16
+    def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
+        if dtype is None:
+            dtype = torch.bfloat16
             logger.info(
-                "Overriding torch_dtype=%s with `torch_dtype=torch.bfloat16` due to "
+                "Overriding dtype=%s with `dtype=torch.bfloat16` due to "
                 "requirements of `fbgemm-gpu` to enable model loading in fp4. "
-                "Pass your own torch_dtype to specify the dtype of the remaining non-linear layers or pass"
-                " torch_dtype=torch.bfloat16 to remove this warning.",
-                torch_dtype,
+                "Pass your own dtype to specify the dtype of the remaining non-linear layers or pass"
+                " dtype=torch.bfloat16 to remove this warning.",
+                dtype,
             )
-        return torch_dtype
+        return dtype
 
     def check_quantized_param(
         self,
diff --git a/tests/models/cohere2_vision/test_modeling_cohere2_vision.py b/tests/models/cohere2_vision/test_modeling_cohere2_vision.py
@@ -186,7 +186,7 @@ def tearDown(self):
 
     def get_model(self, dummy=True):
         device_type, major, _ = get_device_properties()
-        torch_dtype = torch.float16
+        dtype = torch.float16
 
         # too large to fit into A10
         config = Cohere2VisionConfig.from_pretrained(self.model_checkpoint)
@@ -197,7 +197,7 @@ def get_model(self, dummy=True):
         model = Cohere2VisionForConditionalGeneration.from_pretrained(
             self.model_checkpoint,
             config=config,
-            torch_dtype=torch_dtype,
+            dtype=dtype,
             device_map="auto",
         )
         return model
diff --git a/tests/models/gpt_oss/test_modeling_gpt_oss.py b/tests/models/gpt_oss/test_modeling_gpt_oss.py
@@ -212,7 +212,7 @@ def generate_config_key(quantized, model, kernels, attn_impl, mode):
     model_id = f"openai/gpt-oss-{model_size}"
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        torch_dtype="auto",
+        dtype="auto",
         tp_plan="auto",  # distributed inference
         use_kernels=kernels,
     ).to(torch_device)
@@ -300,7 +300,7 @@ def tearDown(self):
     def load_and_forward(model_id, attn_implementation, input_text, **pretrained_kwargs):
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
-            torch_dtype=torch.bfloat16,
+            dtype=torch.bfloat16,
             device_map="auto",
             attn_implementation=attn_implementation,
             **pretrained_kwargs,
@@ -540,7 +540,7 @@ def test_model_matches_original_20b(self):
 
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
-            torch_dtype=torch.bfloat16,
+            dtype=torch.bfloat16,
             device_map="auto",
             attn_implementation="eager",
         )
@@ -606,7 +606,7 @@ def test_model_matches_original_120b(self):
 
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
-            torch_dtype=torch.bfloat16,
+            dtype=torch.bfloat16,
             device_map="auto",
             attn_implementation="eager",
         )
diff --git a/tests/models/smolvlm/test_modeling_smolvlm.py b/tests/models/smolvlm/test_modeling_smolvlm.py
@@ -625,7 +625,7 @@ def test_export_smolvlm_vision_encoder(self):
         # Load model and extract vision encoder
         model = SmolVLMForConditionalGeneration.from_pretrained(
             model_id,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
             config=config,
         )
 
@@ -648,7 +648,7 @@ def test_export_smolvlm_connector(self):
         # Load the model and extract the connector (multi-modal projector)
         model = SmolVLMForConditionalGeneration.from_pretrained(
             model_id,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
             config=config,
         )
 
@@ -686,7 +686,7 @@ def test_export_smolvlm_text_decoder(self):
         # Load the model and extract the text decoder
         model = SmolVLMForConditionalGeneration.from_pretrained(
             model_id,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
             config=config,
         )
 
diff --git a/tests/quantization/mxfp4/test_mxfp4.py b/tests/quantization/mxfp4/test_mxfp4.py
@@ -220,19 +220,19 @@ def test_quantizer_validation_missing_triton_pre_quantized_no_dequantize(self):
             quantizer.validate_environment()
             self.assertTrue(quantizer.quantization_config.dequantize)
 
-    def test_update_torch_dtype(self):
+    def test_update_dtype(self):
         """Test torch dtype updating"""
         from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
 
         config = Mxfp4Config()
         quantizer = Mxfp4HfQuantizer(config)
 
         # Should default to bfloat16
-        result_dtype = quantizer.update_torch_dtype(None)
+        result_dtype = quantizer.update_dtype(None)
         self.assertEqual(result_dtype, torch.bfloat16)
 
         # Should preserve existing dtype
-        result_dtype = quantizer.update_torch_dtype(torch.float32)
+        result_dtype = quantizer.update_dtype(torch.float32)
         self.assertEqual(result_dtype, torch.float32)
 
     def test_update_expected_keys(self):
@@ -425,7 +425,7 @@ def test_gpt_oss_model_loading_quantized_with_device_map(self):
         model = GptOssForCausalLM.from_pretrained(
             self.model_name,
             quantization_config=quantization_config,
-            torch_dtype=torch.bfloat16,
+            dtype=torch.bfloat16,
             device_map="auto",
         )
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
@@ -442,7 +442,7 @@ def test_gpt_oss_model_loading_dequantized_with_device_map(self):
         model = GptOssForCausalLM.from_pretrained(
             self.model_name,
             quantization_config=quantization_config,
-            torch_dtype=torch.bfloat16,
+            dtype=torch.bfloat16,
             device_map="auto",
         )
         tokenizer = AutoTokenizer.from_pretrained(self.model_name)
@@ -467,12 +467,12 @@ def test_memory_footprint_comparison(self):
         quantization_config = Mxfp4Config(dequantize=True)
         quantized_model = GptOssForCausalLM.from_pretrained(
             self.model_name,
-            torch_dtype=torch.bfloat16,
+            dtype=torch.bfloat16,
             device_map="auto",
         )
         dequantized_model = GptOssForCausalLM.from_pretrained(
             self.model_name,
-            torch_dtype=torch.bfloat16,
+            dtype=torch.bfloat16,
             device_map="auto",
             quantization_config=quantization_config,
         )

Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ from transformers import pipeline`
`44`	`44`	`pipeline = pipeline(`
`45`	`45`	`task="fill-mask",`
`46`	`46`	`model="moussaKam/barthez",`
`47`		`- torch_dtype=torch.float16,`
	`47`	`+ dtype=torch.float16,`
`48`	`48`	`device=0`
`49`	`49`	`)`
`50`	`50`	`pipeline("Les plantes produisent <mask> grâce à un processus appelé photosynthèse.")`
`@@ -62,7 +62,7 @@ tokenizer = AutoTokenizer.from_pretrained(`
`62`	`62`	`)`
`63`	`63`	`model = AutoModelForMaskedLM.from_pretrained(`
`64`	`64`	`"moussaKam/barthez",`
`65`		`- torch_dtype=torch.float16,`
	`65`	`+ dtype=torch.float16,`
`66`	`66`	`device_map="auto",`
`67`	`67`	`)`
`68`	`68`	`inputs = tokenizer("Les plantes produisent <mask> grâce à un processus appelé photosynthèse.", return_tensors="pt").to(model.device)`
Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ model_id = "CohereLabs/command-a-vision-07-2025"`
`48`	`48`
`49`	`49`	`processor = AutoProcessor.from_pretrained(model_id)`
`50`	`50`	`model = AutoModelForImageTextToText.from_pretrained(`
`51`		`- model_id, device_map="auto", torch_dtype=torch.float16`
	`51`	`+ model_id, device_map="auto", dtype=torch.float16`
`52`	`52`	`)`
`53`	`53`
`54`	`54`	`# Format message with the Command-A-Vision chat template`
Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ import torch`
`49`	`49`	`pipeline = pipeline(`
`50`	`50`	`"object-detection",`
`51`	`51`	`model="SenseTime/deformable-detr",`
`52`		`- torch_dtype=torch.float16,`
	`52`	`+ dtype=torch.float16,`
`53`	`53`	`device_map=0`
`54`	`54`	`)`
`55`	`55`
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ import torch`
`44`	`44`	`pipeline = pipeline(`
`45`	`45`	`"object-detection",`
`46`	`46`	`model="facebook/detr-resnet-50",`
`47`		`- torch_dtype=torch.float16,`
	`47`	`+ dtype=torch.float16,`
`48`	`48`	`device_map=0`
`49`	`49`	`)`
`50`	`50`