huggingface
diff --git a/‎src/transformers/audio_utils.py‎
Lines changed: 5 additions & 2 deletions b/‎src/transformers/audio_utils.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎src/transformers/commands/serving.py‎
Lines changed: 3 additions & 3 deletions b/‎src/transformers/commands/serving.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/transformers/feature_extraction_utils.py‎
Lines changed: 7 additions & 6 deletions b/‎src/transformers/feature_extraction_utils.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎src/transformers/generation/utils.py‎
Lines changed: 1 addition & 1 deletion b/‎src/transformers/generation/utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/generation/watermarking.py‎
Lines changed: 18 additions & 18 deletions b/‎src/transformers/generation/watermarking.py‎
Lines changed: 18 additions & 18 deletions
diff --git a/‎src/transformers/image_processing_utils_fast.py‎
Lines changed: 2 additions & 2 deletions b/‎src/transformers/image_processing_utils_fast.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/transformers/image_transforms.py‎
Lines changed: 1 addition & 1 deletion b/‎src/transformers/image_transforms.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/image_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎src/transformers/image_utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/transformers/models/aria/image_processing_aria.py‎
Lines changed: 10 additions & 10 deletions b/‎src/transformers/models/aria/image_processing_aria.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎src/transformers/models/aria/modular_aria.py‎
Lines changed: 7 additions & 7 deletions b/‎src/transformers/models/aria/modular_aria.py‎
Lines changed: 7 additions & 7 deletions
@@ -23,8 +23,11 @@
 import warnings
 from collections.abc import Sequence
 from io import BytesIO
-from typing import Any, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional, Union
 
+
+if TYPE_CHECKING:
+    import torch
 import numpy as np
 import requests
 from packaging import version
@@ -51,7 +54,7 @@
 if is_torchcodec_available():
     TORCHCODEC_VERSION = version.parse(importlib.metadata.version("torchcodec"))
 
-AudioInput = Union[np.ndarray, "torch.Tensor", Sequence[np.ndarray], Sequence["torch.Tensor"]]  # noqa: F821
+AudioInput = Union[np.ndarray, "torch.Tensor", Sequence[np.ndarray], Sequence["torch.Tensor"]]
 
 
 def load_audio(audio: Union[str, np.ndarray], sampling_rate=16000, timeout=None) -> np.ndarray:
 
@@ -31,7 +31,7 @@
 from dataclasses import dataclass, field
 from io import BytesIO
 from threading import Thread
-from typing import Optional, Union
+from typing import Optional, TypedDict, Union
 
 from huggingface_hub import model_info
 from huggingface_hub.constants import HF_HUB_OFFLINE
@@ -528,7 +528,7 @@ def __init__(self, args: ServeArguments):
     def _validate_request(
         self,
         request: dict,
-        schema: "_TypedDictMeta",  # noqa: F821
+        schema: TypedDict,
         validator: "TypeAdapter",
         unused_fields: set,
     ):
@@ -538,7 +538,7 @@ def _validate_request(
         Args:
             request (`dict`):
                 The request to validate.
-            schema (`_TypedDictMeta`):
+            schema (`TypedDict`):
                 The schema of the request to validate. It is a `TypedDict` definition.
             validator (`TypeAdapter`):
                 The validator to use to validate the request. Built from `schema`.
 
@@ -48,13 +48,12 @@
 
 
 if TYPE_CHECKING:
-    if is_torch_available():
-        import torch  # noqa
+    from .feature_extraction_sequence_utils import SequenceFeatureExtractor
 
 
 logger = logging.get_logger(__name__)
 
-PreTrainedFeatureExtractor = Union["SequenceFeatureExtractor"]  # noqa: F821
+PreTrainedFeatureExtractor = Union["SequenceFeatureExtractor"]
 
 # type hinting: specifying the type of feature extractor class that inherits from FeatureExtractionMixin
 SpecificFeatureExtractorType = TypeVar("SpecificFeatureExtractorType", bound="FeatureExtractionMixin")
@@ -127,7 +126,7 @@ def _get_is_as_tensor_fns(self, tensor_type: Optional[Union[str, TensorType]] =
         elif tensor_type == TensorType.PYTORCH:
             if not is_torch_available():
                 raise ImportError("Unable to convert output to PyTorch tensors format, PyTorch is not installed.")
-            import torch  # noqa
+            import torch
 
             def as_tensor(value):
                 if isinstance(value, (list, tuple)) and len(value) > 0:
@@ -563,7 +562,9 @@ def get_feature_extractor_dict(
         return feature_extractor_dict, kwargs
 
     @classmethod
-    def from_dict(cls, feature_extractor_dict: dict[str, Any], **kwargs) -> PreTrainedFeatureExtractor:
+    def from_dict(
+        cls, feature_extractor_dict: dict[str, Any], **kwargs
+    ) -> Union["FeatureExtractionMixin", tuple["FeatureExtractionMixin", dict[str, Any]]]:
         """
         Instantiates a type of [`~feature_extraction_utils.FeatureExtractionMixin`] from a Python dictionary of
         parameters.
@@ -613,7 +614,7 @@ def to_dict(self) -> dict[str, Any]:
         return output
 
     @classmethod
-    def from_json_file(cls, json_file: Union[str, os.PathLike]) -> PreTrainedFeatureExtractor:
+    def from_json_file(cls, json_file: Union[str, os.PathLike]) -> "FeatureExtractionMixin":
         """
         Instantiates a feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`] from the path to
         a JSON file of parameters.
 
@@ -3454,7 +3454,7 @@ def _assisted_decoding(
         generation_config: GenerationConfig,
         synced_gpus: bool = False,
         streamer: Optional["BaseStreamer"] = None,
-        inputs_tensor: torch.FloatTensor = None,
+        inputs_tensor: Optional[torch.FloatTensor] = None,
         assistant_model: Optional["PreTrainedModel"] = None,
         assistant_tokenizer: Optional["PreTrainedTokenizerBase"] = None,
         tokenizer: Optional["PreTrainedTokenizerBase"] = None,
 
@@ -43,31 +43,31 @@ class WatermarkDetectorOutput:
     Outputs of a watermark detector.
 
     Args:
-        num_tokens_scored (np.array of shape (batch_size)):
+        num_tokens_scored (np.ndarray of shape (batch_size)):
             Array containing the number of tokens scored for each element in the batch.
-        num_green_tokens (np.array of shape (batch_size)):
+        num_green_tokens (np.ndarray of shape (batch_size)):
             Array containing the number of green tokens for each element in the batch.
-        green_fraction (np.array of shape (batch_size)):
+        green_fraction (np.ndarray of shape (batch_size)):
             Array containing the fraction of green tokens for each element in the batch.
-        z_score (np.array of shape (batch_size)):
+        z_score (np.ndarray of shape (batch_size)):
             Array containing the z-score for each element in the batch. Z-score here shows
             how many standard deviations away is the green token count in the input text
             from the expected green token count for machine-generated text.
-        p_value (np.array of shape (batch_size)):
+        p_value (np.ndarray of shape (batch_size)):
             Array containing the p-value for each batch obtained from z-scores.
-        prediction (np.array of shape (batch_size)), *optional*:
+        prediction (np.ndarray of shape (batch_size)), *optional*:
             Array containing boolean predictions whether a text is machine-generated for each element in the batch.
-        confidence (np.array of shape (batch_size)), *optional*:
+        confidence (np.ndarray of shape (batch_size)), *optional*:
             Array containing confidence scores of a text being machine-generated for each element in the batch.
     """
 
-    num_tokens_scored: Optional[np.array] = None
-    num_green_tokens: Optional[np.array] = None
-    green_fraction: Optional[np.array] = None
-    z_score: Optional[np.array] = None
-    p_value: Optional[np.array] = None
-    prediction: Optional[np.array] = None
-    confidence: Optional[np.array] = None
+    num_tokens_scored: Optional[np.ndarray] = None
+    num_green_tokens: Optional[np.ndarray] = None
+    green_fraction: Optional[np.ndarray] = None
+    z_score: Optional[np.ndarray] = None
+    p_value: Optional[np.ndarray] = None
+    prediction: Optional[np.ndarray] = None
+    confidence: Optional[np.ndarray] = None
 
 
 class WatermarkDetector:
@@ -179,7 +179,7 @@ def _score_ngrams_in_passage(self, input_ids: torch.LongTensor):
                 )
         return num_tokens_scored_batch, green_token_count_batch
 
-    def _compute_z_score(self, green_token_count: np.ndarray, total_num_tokens: np.ndarray) -> np.array:
+    def _compute_z_score(self, green_token_count: np.ndarray, total_num_tokens: np.ndarray) -> np.ndarray:
         expected_count = self.greenlist_ratio
         numer = green_token_count - expected_count * total_num_tokens
         denom = np.sqrt(total_num_tokens * expected_count * (1 - expected_count))
@@ -195,7 +195,7 @@ def __call__(
         input_ids: torch.LongTensor,
         z_threshold: float = 3.0,
         return_dict: bool = False,
-    ) -> Union[WatermarkDetectorOutput, np.array]:
+    ) -> Union[WatermarkDetectorOutput, np.ndarray]:
         """
                 Args:
                 input_ids (`torch.LongTensor`):
@@ -207,8 +207,8 @@ def __call__(
                     Whether to return `~generation.WatermarkDetectorOutput` or not. If not it will return boolean predictions,
         ma
                 Return:
-                    [`~generation.WatermarkDetectorOutput`] or `np.array`: A [`~generation.WatermarkDetectorOutput`]
-                    if `return_dict=True` otherwise a `np.array`.
+                    [`~generation.WatermarkDetectorOutput`] or `np.ndarray`: A [`~generation.WatermarkDetectorOutput`]
+                    if `return_dict=True` otherwise a `np.ndarray`.
 
         """
 
 
@@ -142,8 +142,8 @@ def get_max_height_width(images: list["torch.Tensor"]) -> tuple[int, ...]:
 
 
 def divide_to_patches(
-    image: Union[np.array, "torch.Tensor"], patch_size: int
-) -> list[Union[np.array, "torch.Tensor"]]:
+    image: Union[np.ndarray, "torch.Tensor"], patch_size: int
+) -> list[Union[np.ndarray, "torch.Tensor"]]:
     """
     Divides an image into patches of a specified size.
 
 
@@ -323,7 +323,7 @@ def get_resize_output_image_size(
 def resize(
     image: np.ndarray,
     size: tuple[int, int],
-    resample: "PILImageResampling" = None,
+    resample: Optional["PILImageResampling"] = None,
     reducing_gap: Optional[int] = None,
     data_format: Optional[ChannelDimension] = None,
     return_numpy: bool = True,
 
@@ -579,7 +579,7 @@ class ImageFeatureExtractionMixin:
     def _ensure_format_supported(self, image):
         if not isinstance(image, (PIL.Image.Image, np.ndarray)) and not is_torch_tensor(image):
             raise ValueError(
-                f"Got type {type(image)} which is not supported, only `PIL.Image.Image`, `np.array` and "
+                f"Got type {type(image)} which is not supported, only `PIL.Image.Image`, `np.ndarray` and "
                 "`torch.Tensor` are."
             )
 
 
@@ -43,20 +43,20 @@
 logger = logging.get_logger(__name__)
 
 
-def divide_to_patches(image: np.ndarray, patch_size: int, input_data_format) -> list[np.array]:
+def divide_to_patches(image: np.ndarray, patch_size: int, input_data_format) -> list[np.ndarray]:
     """
     Divides an image into patches of a specified size.
 
     Args:
-        image (`np.array`):
+        image (`np.ndarray`):
             The input image.
         patch_size (`int`):
             The size of each patch.
         input_data_format (`ChannelDimension` or `str`):
             The channel dimension format of the input image.
 
     Returns:
-        list: A list of np.array representing the patches.
+        list: A list of np.ndarray representing the patches.
     """
     patches = []
     height, width = get_image_size(image, channel_dim=input_data_format)
@@ -342,12 +342,12 @@ def preprocess(
 
     def _resize_for_patching(
         self, image: np.ndarray, target_resolution: tuple, resample, input_data_format: ChannelDimension
-    ) -> np.array:
+    ) -> np.ndarray:
         """
         Resizes an image to a target resolution while maintaining aspect ratio.
 
         Args:
-            image (np.array):
+            image (np.ndarray):
                 The input image.
             target_resolution (tuple):
                 The target resolution (height, width) of the image.
@@ -357,7 +357,7 @@ def _resize_for_patching(
                 The channel dimension format of the input image.
 
         Returns:
-            np.array: The resized and padded image.
+            np.ndarray: The resized and padded image.
         """
         new_height, new_width = get_patch_output_size(image, target_resolution, input_data_format)
 
@@ -375,7 +375,7 @@ def _get_padding_size(self, original_resolution: tuple, target_resolution: tuple
 
     def _pad_for_patching(
         self, image: np.ndarray, target_resolution: tuple, input_data_format: ChannelDimension
-    ) -> np.array:
+    ) -> np.ndarray:
         """
         Pad an image to a target resolution while maintaining aspect ratio.
         """
@@ -460,12 +460,12 @@ def get_image_patches(
         resample: PILImageResampling,
         data_format: ChannelDimension,
         input_data_format: ChannelDimension,
-    ) -> list[np.array]:
+    ) -> list[np.ndarray]:
         """
         Process an image with variable resolutions by dividing it into patches.
 
         Args:
-            image (`np.array`):
+            image (`np.ndarray`):
                 The input image to be processed.
             grid_pinpoints (list[tuple[int, int]]):
                 A list of possible resolutions as tuples.
@@ -479,7 +479,7 @@ def get_image_patches(
                 The channel dimension format of the input image.
 
         Returns:
-            `list[np.array]`: A list of NumPy arrays containing the processed image patches.
+            `list[np.ndarray]`: A list of NumPy arrays containing the processed image patches.
         """
         if not isinstance(grid_pinpoints, list):
             raise TypeError("grid_pinpoints must be a list of possible resolutions.")
 
@@ -725,12 +725,12 @@ def preprocess(
 
     def _resize_for_patching(
         self, image: np.ndarray, target_resolution: tuple, resample, input_data_format: ChannelDimension
-    ) -> np.array:
+    ) -> np.ndarray:
         """
         Resizes an image to a target resolution while maintaining aspect ratio.
 
         Args:
-            image (np.array):
+            image (np.ndarray):
                 The input image.
             target_resolution (tuple):
                 The target resolution (height, width) of the image.
@@ -740,7 +740,7 @@ def _resize_for_patching(
                 The channel dimension format of the input image.
 
         Returns:
-            np.array: The resized and padded image.
+            np.ndarray: The resized and padded image.
         """
         new_height, new_width = get_patch_output_size(image, target_resolution, input_data_format)
 
@@ -758,7 +758,7 @@ def _get_padding_size(self, original_resolution: tuple, target_resolution: tuple
 
     def _pad_for_patching(
         self, image: np.ndarray, target_resolution: tuple, input_data_format: ChannelDimension
-    ) -> np.array:
+    ) -> np.ndarray:
         """
         Pad an image to a target resolution while maintaining aspect ratio.
         """
@@ -843,12 +843,12 @@ def get_image_patches(
         resample: PILImageResampling,
         data_format: ChannelDimension,
         input_data_format: ChannelDimension,
-    ) -> list[np.array]:
+    ) -> list[np.ndarray]:
         """
         Process an image with variable resolutions by dividing it into patches.
 
         Args:
-            image (`np.array`):
+            image (`np.ndarray`):
                 The input image to be processed.
             grid_pinpoints (list[tuple[int, int]]):
                 A list of possible resolutions as tuples.
@@ -862,7 +862,7 @@ def get_image_patches(
                 The channel dimension format of the input image.
 
         Returns:
-            `list[np.array]`: A list of NumPy arrays containing the processed image patches.
+            `list[np.ndarray]`: A list of NumPy arrays containing the processed image patches.
         """
         if not isinstance(grid_pinpoints, list):
             raise TypeError("grid_pinpoints must be a list of possible resolutions.")
Original file line number	Diff line number	Diff line change
`@@ -579,7 +579,7 @@ class ImageFeatureExtractionMixin:`
`579`	`579`	`def _ensure_format_supported(self, image):`
`580`	`580`	`if not isinstance(image, (PIL.Image.Image, np.ndarray)) and not is_torch_tensor(image):`
`581`	`581`	`raise ValueError(`
`582`		- f"Got type {type(image)} which is not supported, only `PIL.Image.Image`, `np.array` and "
	`582`	+ f"Got type {type(image)} which is not supported, only `PIL.Image.Image`, `np.ndarray` and "
`583`	`583`	"`torch.Tensor` are."
`584`	`584`	`)`
`585`	`585`