huggingface · Rocketknight1 · Oct 1, 2025 · Sep 17, 2025 · Sep 30, 2025 · Sep 30, 2025
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -770,7 +770,6 @@
     from .utils import is_torch_npu_available as is_torch_npu_available
     from .utils import is_torch_xla_available as is_torch_xla_available
     from .utils import is_torch_xpu_available as is_torch_xpu_available
-    from .utils import logging as logging
 
     # bitsandbytes config
     from .utils.quantization_config import AqlmConfig as AqlmConfig

diff --git a/src/transformers/commands/chat.py b/src/transformers/commands/chat.py
@@ -59,9 +59,7 @@
 
     from transformers import (
         AutoModelForCausalLM,
-        AutoTokenizer,
         BitsAndBytesConfig,
-        GenerationConfig,
     )
 
 ALLOWED_KEY_CHARS = set(string.ascii_letters + string.whitespace)
@@ -534,7 +532,7 @@ def parse_eos_tokens(
     # -----------------------------------------------------------------------------------------------------------------
     # Model loading and performance automation methods
     @staticmethod
-    def get_quantization_config(model_args: ChatArguments) -> Optional["BitsAndBytesConfig"]:
+    def get_quantization_config(model_args: ChatArguments) -> Optional[BitsAndBytesConfig]:
         if model_args.load_in_4bit:
             quantization_config = BitsAndBytesConfig(
                 load_in_4bit=True,

diff --git a/src/transformers/data/metrics/squad_metrics.py b/src/transformers/data/metrics/squad_metrics.py
@@ -148,7 +148,7 @@ def find_best_thresh_v2(preds, scores, na_probs, qid_to_has_ans):
     best_score = cur_score
     best_thresh = 0.0
     qid_list = sorted(na_probs, key=lambda k: na_probs[k])
-    for i, qid in enumerate(qid_list):
+    for qid in qid_list:
         if qid not in scores:
             continue
         if qid_to_has_ans[qid]:

diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py
@@ -369,7 +369,6 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to
 
         if scores.dim() == 3:
             if self.logits_indices is not None and self.cu_seq_lens_q is not None:
-                batch_size, seq_len, vocab_size = scores.shape
                 last_positions = self.logits_indices
                 last_scores = scores[0, last_positions, :]
 

diff --git a/src/transformers/generation/watermarking.py b/src/transformers/generation/watermarking.py
@@ -24,14 +24,9 @@
 from torch.nn import BCELoss
 
 from ..modeling_utils import PreTrainedModel
-from ..utils import ModelOutput, is_torch_available, logging
+from ..utils import ModelOutput, logging
 from .configuration_utils import PretrainedConfig, WatermarkingConfig
-
-
-if is_torch_available():
-    import torch
-
-    from .logits_process import SynthIDTextWatermarkLogitsProcessor, WatermarkLogitsProcessor
+from .logits_process import SynthIDTextWatermarkLogitsProcessor, WatermarkLogitsProcessor
 
 
 logger = logging.get_logger(__name__)

diff --git a/src/transformers/integrations/tensor_parallel.py b/src/transformers/integrations/tensor_parallel.py
@@ -1009,7 +1009,7 @@ def add_tensor_parallel_hooks_to_module(
 
 
 def shard_and_distribute_module(
-    model, param, empty_param, parameter_name, param_casting_dtype, is_contiguous, rank, device_mesh, set_param=True
+    model, param, empty_param, parameter_name, param_casting_dtype, is_contiguous, rank, device_mesh
 ):  # TODO: rename to shard_and_distribute_param
     r"""
     This function is called in `from_pretrained` when loading a model's checkpoints.

diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
@@ -2103,8 +2103,6 @@ def tp_plan(self, plan: dict[str, str]):
             if hasattr(self, "named_parameters"):
                 model_param_names = [name for name, _ in self.named_parameters()]
                 if model_param_names:  # Only validate if model has parameters
-                    import re
-
                     for layer_pattern in plan.keys():
                         # Convert pattern to regex (replace * with .*)
                         regex_pattern = layer_pattern.replace("*", r"\d+")

diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py
@@ -814,7 +814,6 @@ def pipeline(
 
     # Retrieve the task
     if task in custom_tasks:
-        normalized_task = task
         targeted_task, task_options = clean_custom_task(custom_tasks[task])
         if pipeline_class is None:
             if not trust_remote_code:

diff --git a/src/transformers/pipelines/question_answering.py b/src/transformers/pipelines/question_answering.py
@@ -658,7 +658,7 @@ def span_to_answer(self, text: str, start: int, end: int) -> dict[str, Union[str
         words = []
         token_idx = char_start_idx = char_end_idx = chars_idx = 0
 
-        for i, word in enumerate(text.split(" ")):
+        for word in text.split(" "):
             token = self.tokenizer.tokenize(word)
 
             # Append words if they are in the span

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
@@ -2138,7 +2138,7 @@ def _from_pretrained(
             if template_file is None:
                 continue  # I think this should never happen, but just in case
             template_name = extra_chat_template.removeprefix("chat_template_")
-            with open(template_file) as chat_template_handle:
+            with open(template_file, encoding="utf8") as chat_template_handle:
                 chat_templates[template_name] = chat_template_handle.read()
         if len(chat_templates) == 1 and "default" in chat_templates:
             init_kwargs["chat_template"] = chat_templates["default"]