Skip to content

Commit 281b8b6

Browse files
cyyeverArthurZucker
authored andcommitted
Fix typing (#40788)
* Fix optional typing Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * Fix optional typing Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * Fix schema typing Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * Fix typing * Fix typing * Fix typing * Fix typing * Use np.ndarray Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * Fix typing Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * Format code Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * Use np.ndarray Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * Improve typing Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * Fix quote string of np.ndarray Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * More fixes Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * Fix code * Format Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> --------- Signed-off-by: Yuanyuan Chen <cyyever@outlook.com>
1 parent 9957b44 commit 281b8b6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+140
-134
lines changed

src/transformers/audio_utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,11 @@
2323
import warnings
2424
from collections.abc import Sequence
2525
from io import BytesIO
26-
from typing import Any, Optional, Union
26+
from typing import TYPE_CHECKING, Any, Optional, Union
2727

28+
29+
if TYPE_CHECKING:
30+
import torch
2831
import numpy as np
2932
import requests
3033
from packaging import version
@@ -51,7 +54,7 @@
5154
if is_torchcodec_available():
5255
TORCHCODEC_VERSION = version.parse(importlib.metadata.version("torchcodec"))
5356

54-
AudioInput = Union[np.ndarray, "torch.Tensor", Sequence[np.ndarray], Sequence["torch.Tensor"]] # noqa: F821
57+
AudioInput = Union[np.ndarray, "torch.Tensor", Sequence[np.ndarray], Sequence["torch.Tensor"]]
5558

5659

5760
def load_audio(audio: Union[str, np.ndarray], sampling_rate=16000, timeout=None) -> np.ndarray:

src/transformers/commands/serving.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from dataclasses import dataclass, field
3232
from io import BytesIO
3333
from threading import Thread
34-
from typing import Optional, Union
34+
from typing import Optional, TypedDict, Union
3535

3636
from huggingface_hub import model_info
3737
from huggingface_hub.constants import HF_HUB_OFFLINE
@@ -528,7 +528,7 @@ def __init__(self, args: ServeArguments):
528528
def _validate_request(
529529
self,
530530
request: dict,
531-
schema: "_TypedDictMeta", # noqa: F821
531+
schema: TypedDict,
532532
validator: "TypeAdapter",
533533
unused_fields: set,
534534
):
@@ -538,7 +538,7 @@ def _validate_request(
538538
Args:
539539
request (`dict`):
540540
The request to validate.
541-
schema (`_TypedDictMeta`):
541+
schema (`TypedDict`):
542542
The schema of the request to validate. It is a `TypedDict` definition.
543543
validator (`TypeAdapter`):
544544
The validator to use to validate the request. Built from `schema`.

src/transformers/feature_extraction_utils.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,12 @@
4848

4949

5050
if TYPE_CHECKING:
51-
if is_torch_available():
52-
import torch # noqa
51+
from .feature_extraction_sequence_utils import SequenceFeatureExtractor
5352

5453

5554
logger = logging.get_logger(__name__)
5655

57-
PreTrainedFeatureExtractor = Union["SequenceFeatureExtractor"] # noqa: F821
56+
PreTrainedFeatureExtractor = Union["SequenceFeatureExtractor"]
5857

5958
# type hinting: specifying the type of feature extractor class that inherits from FeatureExtractionMixin
6059
SpecificFeatureExtractorType = TypeVar("SpecificFeatureExtractorType", bound="FeatureExtractionMixin")
@@ -127,7 +126,7 @@ def _get_is_as_tensor_fns(self, tensor_type: Optional[Union[str, TensorType]] =
127126
elif tensor_type == TensorType.PYTORCH:
128127
if not is_torch_available():
129128
raise ImportError("Unable to convert output to PyTorch tensors format, PyTorch is not installed.")
130-
import torch # noqa
129+
import torch
131130

132131
def as_tensor(value):
133132
if isinstance(value, (list, tuple)) and len(value) > 0:
@@ -563,7 +562,9 @@ def get_feature_extractor_dict(
563562
return feature_extractor_dict, kwargs
564563

565564
@classmethod
566-
def from_dict(cls, feature_extractor_dict: dict[str, Any], **kwargs) -> PreTrainedFeatureExtractor:
565+
def from_dict(
566+
cls, feature_extractor_dict: dict[str, Any], **kwargs
567+
) -> Union["FeatureExtractionMixin", tuple["FeatureExtractionMixin", dict[str, Any]]]:
567568
"""
568569
Instantiates a type of [`~feature_extraction_utils.FeatureExtractionMixin`] from a Python dictionary of
569570
parameters.
@@ -613,7 +614,7 @@ def to_dict(self) -> dict[str, Any]:
613614
return output
614615

615616
@classmethod
616-
def from_json_file(cls, json_file: Union[str, os.PathLike]) -> PreTrainedFeatureExtractor:
617+
def from_json_file(cls, json_file: Union[str, os.PathLike]) -> "FeatureExtractionMixin":
617618
"""
618619
Instantiates a feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`] from the path to
619620
a JSON file of parameters.

src/transformers/generation/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3454,7 +3454,7 @@ def _assisted_decoding(
34543454
generation_config: GenerationConfig,
34553455
synced_gpus: bool = False,
34563456
streamer: Optional["BaseStreamer"] = None,
3457-
inputs_tensor: torch.FloatTensor = None,
3457+
inputs_tensor: Optional[torch.FloatTensor] = None,
34583458
assistant_model: Optional["PreTrainedModel"] = None,
34593459
assistant_tokenizer: Optional["PreTrainedTokenizerBase"] = None,
34603460
tokenizer: Optional["PreTrainedTokenizerBase"] = None,

src/transformers/generation/watermarking.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,31 +43,31 @@ class WatermarkDetectorOutput:
4343
Outputs of a watermark detector.
4444
4545
Args:
46-
num_tokens_scored (np.array of shape (batch_size)):
46+
num_tokens_scored (np.ndarray of shape (batch_size)):
4747
Array containing the number of tokens scored for each element in the batch.
48-
num_green_tokens (np.array of shape (batch_size)):
48+
num_green_tokens (np.ndarray of shape (batch_size)):
4949
Array containing the number of green tokens for each element in the batch.
50-
green_fraction (np.array of shape (batch_size)):
50+
green_fraction (np.ndarray of shape (batch_size)):
5151
Array containing the fraction of green tokens for each element in the batch.
52-
z_score (np.array of shape (batch_size)):
52+
z_score (np.ndarray of shape (batch_size)):
5353
Array containing the z-score for each element in the batch. Z-score here shows
5454
how many standard deviations away is the green token count in the input text
5555
from the expected green token count for machine-generated text.
56-
p_value (np.array of shape (batch_size)):
56+
p_value (np.ndarray of shape (batch_size)):
5757
Array containing the p-value for each batch obtained from z-scores.
58-
prediction (np.array of shape (batch_size)), *optional*:
58+
prediction (np.ndarray of shape (batch_size)), *optional*:
5959
Array containing boolean predictions whether a text is machine-generated for each element in the batch.
60-
confidence (np.array of shape (batch_size)), *optional*:
60+
confidence (np.ndarray of shape (batch_size)), *optional*:
6161
Array containing confidence scores of a text being machine-generated for each element in the batch.
6262
"""
6363

64-
num_tokens_scored: Optional[np.array] = None
65-
num_green_tokens: Optional[np.array] = None
66-
green_fraction: Optional[np.array] = None
67-
z_score: Optional[np.array] = None
68-
p_value: Optional[np.array] = None
69-
prediction: Optional[np.array] = None
70-
confidence: Optional[np.array] = None
64+
num_tokens_scored: Optional[np.ndarray] = None
65+
num_green_tokens: Optional[np.ndarray] = None
66+
green_fraction: Optional[np.ndarray] = None
67+
z_score: Optional[np.ndarray] = None
68+
p_value: Optional[np.ndarray] = None
69+
prediction: Optional[np.ndarray] = None
70+
confidence: Optional[np.ndarray] = None
7171

7272

7373
class WatermarkDetector:
@@ -179,7 +179,7 @@ def _score_ngrams_in_passage(self, input_ids: torch.LongTensor):
179179
)
180180
return num_tokens_scored_batch, green_token_count_batch
181181

182-
def _compute_z_score(self, green_token_count: np.ndarray, total_num_tokens: np.ndarray) -> np.array:
182+
def _compute_z_score(self, green_token_count: np.ndarray, total_num_tokens: np.ndarray) -> np.ndarray:
183183
expected_count = self.greenlist_ratio
184184
numer = green_token_count - expected_count * total_num_tokens
185185
denom = np.sqrt(total_num_tokens * expected_count * (1 - expected_count))
@@ -195,7 +195,7 @@ def __call__(
195195
input_ids: torch.LongTensor,
196196
z_threshold: float = 3.0,
197197
return_dict: bool = False,
198-
) -> Union[WatermarkDetectorOutput, np.array]:
198+
) -> Union[WatermarkDetectorOutput, np.ndarray]:
199199
"""
200200
Args:
201201
input_ids (`torch.LongTensor`):
@@ -207,8 +207,8 @@ def __call__(
207207
Whether to return `~generation.WatermarkDetectorOutput` or not. If not it will return boolean predictions,
208208
ma
209209
Return:
210-
[`~generation.WatermarkDetectorOutput`] or `np.array`: A [`~generation.WatermarkDetectorOutput`]
211-
if `return_dict=True` otherwise a `np.array`.
210+
[`~generation.WatermarkDetectorOutput`] or `np.ndarray`: A [`~generation.WatermarkDetectorOutput`]
211+
if `return_dict=True` otherwise a `np.ndarray`.
212212
213213
"""
214214

src/transformers/image_processing_utils_fast.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,8 @@ def get_max_height_width(images: list["torch.Tensor"]) -> tuple[int, ...]:
142142

143143

144144
def divide_to_patches(
145-
image: Union[np.array, "torch.Tensor"], patch_size: int
146-
) -> list[Union[np.array, "torch.Tensor"]]:
145+
image: Union[np.ndarray, "torch.Tensor"], patch_size: int
146+
) -> list[Union[np.ndarray, "torch.Tensor"]]:
147147
"""
148148
Divides an image into patches of a specified size.
149149

src/transformers/image_transforms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ def get_resize_output_image_size(
323323
def resize(
324324
image: np.ndarray,
325325
size: tuple[int, int],
326-
resample: "PILImageResampling" = None,
326+
resample: Optional["PILImageResampling"] = None,
327327
reducing_gap: Optional[int] = None,
328328
data_format: Optional[ChannelDimension] = None,
329329
return_numpy: bool = True,

src/transformers/image_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ class ImageFeatureExtractionMixin:
579579
def _ensure_format_supported(self, image):
580580
if not isinstance(image, (PIL.Image.Image, np.ndarray)) and not is_torch_tensor(image):
581581
raise ValueError(
582-
f"Got type {type(image)} which is not supported, only `PIL.Image.Image`, `np.array` and "
582+
f"Got type {type(image)} which is not supported, only `PIL.Image.Image`, `np.ndarray` and "
583583
"`torch.Tensor` are."
584584
)
585585

src/transformers/models/aria/image_processing_aria.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,20 @@
4343
logger = logging.get_logger(__name__)
4444

4545

46-
def divide_to_patches(image: np.ndarray, patch_size: int, input_data_format) -> list[np.array]:
46+
def divide_to_patches(image: np.ndarray, patch_size: int, input_data_format) -> list[np.ndarray]:
4747
"""
4848
Divides an image into patches of a specified size.
4949
5050
Args:
51-
image (`np.array`):
51+
image (`np.ndarray`):
5252
The input image.
5353
patch_size (`int`):
5454
The size of each patch.
5555
input_data_format (`ChannelDimension` or `str`):
5656
The channel dimension format of the input image.
5757
5858
Returns:
59-
list: A list of np.array representing the patches.
59+
list: A list of np.ndarray representing the patches.
6060
"""
6161
patches = []
6262
height, width = get_image_size(image, channel_dim=input_data_format)
@@ -342,12 +342,12 @@ def preprocess(
342342

343343
def _resize_for_patching(
344344
self, image: np.ndarray, target_resolution: tuple, resample, input_data_format: ChannelDimension
345-
) -> np.array:
345+
) -> np.ndarray:
346346
"""
347347
Resizes an image to a target resolution while maintaining aspect ratio.
348348
349349
Args:
350-
image (np.array):
350+
image (np.ndarray):
351351
The input image.
352352
target_resolution (tuple):
353353
The target resolution (height, width) of the image.
@@ -357,7 +357,7 @@ def _resize_for_patching(
357357
The channel dimension format of the input image.
358358
359359
Returns:
360-
np.array: The resized and padded image.
360+
np.ndarray: The resized and padded image.
361361
"""
362362
new_height, new_width = get_patch_output_size(image, target_resolution, input_data_format)
363363

@@ -375,7 +375,7 @@ def _get_padding_size(self, original_resolution: tuple, target_resolution: tuple
375375

376376
def _pad_for_patching(
377377
self, image: np.ndarray, target_resolution: tuple, input_data_format: ChannelDimension
378-
) -> np.array:
378+
) -> np.ndarray:
379379
"""
380380
Pad an image to a target resolution while maintaining aspect ratio.
381381
"""
@@ -460,12 +460,12 @@ def get_image_patches(
460460
resample: PILImageResampling,
461461
data_format: ChannelDimension,
462462
input_data_format: ChannelDimension,
463-
) -> list[np.array]:
463+
) -> list[np.ndarray]:
464464
"""
465465
Process an image with variable resolutions by dividing it into patches.
466466
467467
Args:
468-
image (`np.array`):
468+
image (`np.ndarray`):
469469
The input image to be processed.
470470
grid_pinpoints (list[tuple[int, int]]):
471471
A list of possible resolutions as tuples.
@@ -479,7 +479,7 @@ def get_image_patches(
479479
The channel dimension format of the input image.
480480
481481
Returns:
482-
`list[np.array]`: A list of NumPy arrays containing the processed image patches.
482+
`list[np.ndarray]`: A list of NumPy arrays containing the processed image patches.
483483
"""
484484
if not isinstance(grid_pinpoints, list):
485485
raise TypeError("grid_pinpoints must be a list of possible resolutions.")

src/transformers/models/aria/modular_aria.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -725,12 +725,12 @@ def preprocess(
725725

726726
def _resize_for_patching(
727727
self, image: np.ndarray, target_resolution: tuple, resample, input_data_format: ChannelDimension
728-
) -> np.array:
728+
) -> np.ndarray:
729729
"""
730730
Resizes an image to a target resolution while maintaining aspect ratio.
731731
732732
Args:
733-
image (np.array):
733+
image (np.ndarray):
734734
The input image.
735735
target_resolution (tuple):
736736
The target resolution (height, width) of the image.
@@ -740,7 +740,7 @@ def _resize_for_patching(
740740
The channel dimension format of the input image.
741741
742742
Returns:
743-
np.array: The resized and padded image.
743+
np.ndarray: The resized and padded image.
744744
"""
745745
new_height, new_width = get_patch_output_size(image, target_resolution, input_data_format)
746746

@@ -758,7 +758,7 @@ def _get_padding_size(self, original_resolution: tuple, target_resolution: tuple
758758

759759
def _pad_for_patching(
760760
self, image: np.ndarray, target_resolution: tuple, input_data_format: ChannelDimension
761-
) -> np.array:
761+
) -> np.ndarray:
762762
"""
763763
Pad an image to a target resolution while maintaining aspect ratio.
764764
"""
@@ -843,12 +843,12 @@ def get_image_patches(
843843
resample: PILImageResampling,
844844
data_format: ChannelDimension,
845845
input_data_format: ChannelDimension,
846-
) -> list[np.array]:
846+
) -> list[np.ndarray]:
847847
"""
848848
Process an image with variable resolutions by dividing it into patches.
849849
850850
Args:
851-
image (`np.array`):
851+
image (`np.ndarray`):
852852
The input image to be processed.
853853
grid_pinpoints (list[tuple[int, int]]):
854854
A list of possible resolutions as tuples.
@@ -862,7 +862,7 @@ def get_image_patches(
862862
The channel dimension format of the input image.
863863
864864
Returns:
865-
`list[np.array]`: A list of NumPy arrays containing the processed image patches.
865+
`list[np.ndarray]`: A list of NumPy arrays containing the processed image patches.
866866
"""
867867
if not isinstance(grid_pinpoints, list):
868868
raise TypeError("grid_pinpoints must be a list of possible resolutions.")

0 commit comments

Comments
 (0)