Skip to content

Commit 01c98a5

Browse files
DEV: Update code style related libraries (#3414)
Additionally includes the related changes required for getting a clean CI with these library changes.
1 parent 6e617af commit 01c98a5

40 files changed

+484
-514
lines changed

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,16 @@ repos:
1919
args: ['--maxkb=1000']
2020

2121
- repo: https://github.com/charliermarsh/ruff-pre-commit
22-
rev: v0.12.0
22+
rev: v0.12.7
2323
hooks:
24-
- id: ruff
24+
- id: ruff-check
2525
args: ['--fix']
2626

2727
- repo: https://github.com/asottile/pyupgrade
28-
rev: v3.19.1
28+
rev: v3.20.0
2929
hooks:
3030
- id: pyupgrade
31-
args: [--py38-plus]
31+
args: [--py39-plus]
3232

3333
- repo: https://github.com/pre-commit/mirrors-mypy
3434
rev: 'v1.17.0'

make_release.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import urllib.request
66
from dataclasses import dataclass
77
from datetime import datetime, timezone
8-
from typing import Dict, List, Tuple
98

109
GH_ORG = "py-pdf"
1110
GH_PROJECT = "pypdf"
@@ -185,7 +184,7 @@ def write_changelog(new_changelog: str, changelog_path: str) -> None:
185184
fh.write(new_changelog)
186185

187186

188-
def get_formatted_changes(git_tag: str) -> Tuple[str, str]:
187+
def get_formatted_changes(git_tag: str) -> tuple[str, str]:
189188
"""
190189
Format the changes done since the last tag.
191190
@@ -277,7 +276,7 @@ def get_most_recent_git_tag() -> str:
277276
).strip()
278277

279278

280-
def get_author_mapping(line_count: int) -> Dict[str, str]:
279+
def get_author_mapping(line_count: int) -> dict[str, str]:
281280
"""
282281
Get the authors for each commit.
283282
@@ -291,7 +290,7 @@ def get_author_mapping(line_count: int) -> Dict[str, str]:
291290
"""
292291
per_page = min(line_count, 100)
293292
page = 1
294-
mapping: Dict[str, str] = {}
293+
mapping: dict[str, str] = {}
295294
for _ in range(0, line_count, per_page):
296295
with urllib.request.urlopen(
297296
f"https://api.github.com/repos/{GH_ORG}/{GH_PROJECT}/commits?per_page={per_page}&page={page}"
@@ -303,7 +302,7 @@ def get_author_mapping(line_count: int) -> Dict[str, str]:
303302
return mapping
304303

305304

306-
def get_git_commits_since_tag(git_tag: str) -> List[Change]:
305+
def get_git_commits_since_tag(git_tag: str) -> list[Change]:
307306
"""
308307
Get all commits since the last tag.
309308
@@ -334,7 +333,7 @@ def get_git_commits_since_tag(git_tag: str) -> List[Change]:
334333
return [parse_commit_line(line, authors) for line in lines if line != ""]
335334

336335

337-
def parse_commit_line(line: str, authors: Dict[str, str]) -> Change:
336+
def parse_commit_line(line: str, authors: dict[str, str]) -> Change:
338337
"""
339338
Parse the first line of a git commit message.
340339

pypdf/_cmap.py

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from binascii import Error as BinasciiError
33
from binascii import unhexlify
44
from math import ceil
5-
from typing import Any, Dict, List, Tuple, Union, cast
5+
from typing import Any, Union, cast
66

77
from ._codecs import adobe_glyphs, charset_encoding
88
from ._utils import logger_error, logger_warning
@@ -19,7 +19,7 @@
1919
# code freely inspired from @twiggy ; see #711
2020
def build_char_map(
2121
font_name: str, space_width: float, obj: DictionaryObject
22-
) -> Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any], DictionaryObject]:
22+
) -> tuple[str, float, Union[str, dict[int, str]], dict[Any, Any], DictionaryObject]:
2323
"""
2424
Determine information about a font.
2525
@@ -42,7 +42,7 @@ def build_char_map(
4242

4343
def build_char_map_from_dict(
4444
space_width: float, ft: DictionaryObject
45-
) -> Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any]]:
45+
) -> tuple[str, float, Union[str, dict[int, str]], dict[Any, Any]]:
4646
"""
4747
Determine information about a font.
4848
@@ -73,15 +73,15 @@ def build_char_map_from_dict(
7373

7474

7575
# used when missing data, e.g. font def missing
76-
unknown_char_map: Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any]] = (
76+
unknown_char_map: tuple[str, float, Union[str, dict[int, str]], dict[Any, Any]] = (
7777
"Unknown",
7878
9999,
7979
dict.fromkeys(range(256), "�"),
8080
{},
8181
)
8282

8383

84-
_predefined_cmap: Dict[str, str] = {
84+
_predefined_cmap: dict[str, str] = {
8585
"/Identity-H": "utf-16-be",
8686
"/Identity-V": "utf-16-be",
8787
"/GB-EUC-H": "gbk",
@@ -104,7 +104,7 @@ def build_char_map_from_dict(
104104
}
105105

106106
# manually extracted from http://mirrors.ctan.org/fonts/adobe/afm/Adobe-Core35_AFMs-229.tar.gz
107-
_default_fonts_space_width: Dict[str, int] = {
107+
_default_fonts_space_width: dict[str, int] = {
108108
"/Courier": 600,
109109
"/Courier-Bold": 600,
110110
"/Courier-BoldOblique": 600,
@@ -128,7 +128,7 @@ def build_char_map_from_dict(
128128

129129
def get_encoding(
130130
ft: DictionaryObject
131-
) -> Tuple[Union[str, Dict[int, str]], Dict[Any, Any]]:
131+
) -> tuple[Union[str, dict[int, str]], dict[Any, Any]]:
132132
encoding = _parse_encoding(ft)
133133
map_dict, int_entry = _parse_to_unicode(ft)
134134

@@ -146,8 +146,8 @@ def get_encoding(
146146

147147
def _parse_encoding(
148148
ft: DictionaryObject
149-
) -> Union[str, Dict[int, str]]:
150-
encoding: Union[str, List[str], Dict[int, str]] = []
149+
) -> Union[str, dict[int, str]]:
150+
encoding: Union[str, list[str], dict[int, str]] = []
151151
if "/Encoding" not in ft:
152152
if "/BaseFont" in ft and cast(str, ft["/BaseFont"]) in charset_encoding:
153153
encoding = dict(
@@ -205,13 +205,13 @@ def _parse_encoding(
205205

206206
def _parse_to_unicode(
207207
ft: DictionaryObject
208-
) -> Tuple[Dict[Any, Any], List[int]]:
208+
) -> tuple[dict[Any, Any], list[int]]:
209209
# will store all translation code
210210
# and map_dict[-1] we will have the number of bytes to convert
211-
map_dict: Dict[Any, Any] = {}
211+
map_dict: dict[Any, Any] = {}
212212

213213
# will provide the list of cmap keys as int to correct encoding
214-
int_entry: List[int] = []
214+
int_entry: list[int] = []
215215

216216
if "/ToUnicode" not in ft:
217217
if ft.get("/Subtype", "") == "/Type1":
@@ -220,7 +220,7 @@ def _parse_to_unicode(
220220
process_rg: bool = False
221221
process_char: bool = False
222222
multiline_rg: Union[
223-
None, Tuple[int, int]
223+
None, tuple[int, int]
224224
] = None # tuple = (current_char, remaining size) ; cf #1285 for example of file
225225
cm = prepare_cm(ft)
226226
for line in cm.split(b"\n"):
@@ -237,7 +237,7 @@ def _parse_to_unicode(
237237

238238

239239
def get_actual_str_key(
240-
value_char: str, encoding: Union[str, Dict[int, str]], map_dict: Dict[Any, Any]
240+
value_char: str, encoding: Union[str, dict[int, str]], map_dict: dict[Any, Any]
241241
) -> str:
242242
key_dict = {}
243243
if isinstance(encoding, dict):
@@ -292,10 +292,10 @@ def process_cm_line(
292292
line: bytes,
293293
process_rg: bool,
294294
process_char: bool,
295-
multiline_rg: Union[None, Tuple[int, int]],
296-
map_dict: Dict[Any, Any],
297-
int_entry: List[int],
298-
) -> Tuple[bool, bool, Union[None, Tuple[int, int]]]:
295+
multiline_rg: Union[None, tuple[int, int]],
296+
map_dict: dict[Any, Any],
297+
int_entry: list[int],
298+
) -> tuple[bool, bool, Union[None, tuple[int, int]]]:
299299
if line == b"" or line[0] == 37: # 37 = %
300300
return process_rg, process_char, multiline_rg
301301
line = line.replace(b"\t", b" ")
@@ -319,10 +319,10 @@ def process_cm_line(
319319

320320
def parse_bfrange(
321321
line: bytes,
322-
map_dict: Dict[Any, Any],
323-
int_entry: List[int],
324-
multiline_rg: Union[None, Tuple[int, int]],
325-
) -> Union[None, Tuple[int, int]]:
322+
map_dict: dict[Any, Any],
323+
int_entry: list[int],
324+
multiline_rg: Union[None, tuple[int, int]],
325+
) -> Union[None, tuple[int, int]]:
326326
lst = [x for x in line.split(b" ") if x]
327327
closure_found = False
328328
if multiline_rg is not None:
@@ -377,7 +377,7 @@ def parse_bfrange(
377377
return None if closure_found else (a, b)
378378

379379

380-
def parse_bfchar(line: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> None:
380+
def parse_bfchar(line: bytes, map_dict: dict[Any, Any], int_entry: list[int]) -> None:
381381
lst = [x for x in line.split(b" ") if x]
382382
map_dict[-1] = len(lst[0]) // 2
383383
while len(lst) > 1:
@@ -401,8 +401,8 @@ def parse_bfchar(line: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) ->
401401

402402
def build_font_width_map(
403403
ft: DictionaryObject, default_font_width: float
404-
) -> Dict[Any, float]:
405-
font_width_map: Dict[Any, float] = {}
404+
) -> dict[Any, float]:
405+
font_width_map: dict[Any, float] = {}
406406
st: int = 0
407407
en: int = 0
408408
try:
@@ -482,7 +482,7 @@ def build_font_width_map(
482482

483483

484484
def compute_space_width(
485-
font_width_map: Dict[Any, float], space_char: str
485+
font_width_map: dict[Any, float], space_char: str
486486
) -> float:
487487
try:
488488
sp_width = font_width_map[space_char]
@@ -497,7 +497,7 @@ def compute_space_width(
497497

498498

499499
def compute_font_width(
500-
font_width_map: Dict[Any, float],
500+
font_width_map: dict[Any, float],
501501
char: str
502502
) -> float:
503503
char_width: float = 0.0
@@ -513,9 +513,9 @@ def compute_font_width(
513513

514514
def _type1_alternative(
515515
ft: DictionaryObject,
516-
map_dict: Dict[Any, Any],
517-
int_entry: List[int],
518-
) -> Tuple[Dict[Any, Any], List[int]]:
516+
map_dict: dict[Any, Any],
517+
int_entry: list[int],
518+
) -> tuple[dict[Any, Any], list[int]]:
519519
if "/FontDescriptor" not in ft:
520520
return map_dict, int_entry
521521
ft_desc = cast(DictionaryObject, ft["/FontDescriptor"]).get("/FontFile")

pypdf/_codecs/__init__.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
from typing import Dict, List
2-
31
from .adobe_glyphs import adobe_glyphs
42
from .pdfdoc import _pdfdoc_encoding
53
from .std import _std_encoding
64
from .symbol import _symbol_encoding
75
from .zapfding import _zapfding_encoding
86

97

10-
def fill_from_encoding(enc: str) -> List[str]:
11-
lst: List[str] = []
8+
def fill_from_encoding(enc: str) -> list[str]:
9+
lst: list[str] = []
1210
for x in range(256):
1311
try:
1412
lst += (bytes((x,)).decode(enc),)
@@ -17,8 +15,8 @@ def fill_from_encoding(enc: str) -> List[str]:
1715
return lst
1816

1917

20-
def rev_encoding(enc: List[str]) -> Dict[str, int]:
21-
rev: Dict[str, int] = {}
18+
def rev_encoding(enc: list[str]) -> dict[str, int]:
19+
rev: dict[str, int] = {}
2220
for i in range(256):
2321
char = enc[i]
2422
if char == "\u0000":
@@ -32,14 +30,14 @@ def rev_encoding(enc: List[str]) -> Dict[str, int]:
3230
_mac_encoding = fill_from_encoding("mac_roman")
3331

3432

35-
_win_encoding_rev: Dict[str, int] = rev_encoding(_win_encoding)
36-
_mac_encoding_rev: Dict[str, int] = rev_encoding(_mac_encoding)
37-
_symbol_encoding_rev: Dict[str, int] = rev_encoding(_symbol_encoding)
38-
_zapfding_encoding_rev: Dict[str, int] = rev_encoding(_zapfding_encoding)
39-
_pdfdoc_encoding_rev: Dict[str, int] = rev_encoding(_pdfdoc_encoding)
33+
_win_encoding_rev: dict[str, int] = rev_encoding(_win_encoding)
34+
_mac_encoding_rev: dict[str, int] = rev_encoding(_mac_encoding)
35+
_symbol_encoding_rev: dict[str, int] = rev_encoding(_symbol_encoding)
36+
_zapfding_encoding_rev: dict[str, int] = rev_encoding(_zapfding_encoding)
37+
_pdfdoc_encoding_rev: dict[str, int] = rev_encoding(_pdfdoc_encoding)
4038

4139

42-
charset_encoding: Dict[str, List[str]] = {
40+
charset_encoding: dict[str, list[str]] = {
4341
"/StandardEncoding": _std_encoding,
4442
"/WinAnsiEncoding": _win_encoding,
4543
"/MacRomanEncoding": _mac_encoding,

pypdf/_codecs/_codecs.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import io
99
from abc import ABC, abstractmethod
10-
from typing import Dict, List
1110

1211
from pypdf._utils import logger_warning
1312

@@ -52,7 +51,7 @@ class LzwCodec(Codec):
5251

5352
def _initialize_encoding_table(self) -> None:
5453
"""Initialize the encoding table and state to initial conditions."""
55-
self.encoding_table: Dict[bytes, int] = {bytes([i]): i for i in range(256)}
54+
self.encoding_table: dict[bytes, int] = {bytes([i]): i for i in range(256)}
5655
self.next_code = self.EOD_MARKER + 1
5756
self.bits_per_code = self.INITIAL_BITS_PER_CODE
5857
self.max_code_value = (1 << self.bits_per_code) - 1
@@ -73,7 +72,7 @@ def encode(self, data: bytes) -> bytes:
7372
7473
Taken from PDF 1.7 specs, "7.4.4.2 Details of LZW Encoding".
7574
"""
76-
result_codes: List[int] = []
75+
result_codes: list[int] = []
7776

7877
# The encoder shall begin by issuing a clear-table code
7978
result_codes.append(self.CLEAR_TABLE_MARKER)
@@ -109,7 +108,7 @@ def encode(self, data: bytes) -> bytes:
109108

110109
return self._pack_codes_into_bytes(result_codes)
111110

112-
def _pack_codes_into_bytes(self, codes: List[int]) -> bytes:
111+
def _pack_codes_into_bytes(self, codes: list[int]) -> bytes:
113112
"""
114113
Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
115114
The bit-width starts at 9 bits and expands as needed.

0 commit comments

Comments
 (0)