From 01dceba13e872e9ca24b8e00a2b75db3d0d6c1a3 Mon Sep 17 00:00:00 2001 From: Zachary Ware Date: Mon, 5 Feb 2024 17:10:55 -0600 Subject: [PATCH 001/126] gh-109991: Update Windows build to use OpenSSL 3.0.13 (#115043) --- .../Windows/2024-02-05-16-53-12.gh-issue-109991.YqjnDz.rst | 1 + PCbuild/get_externals.bat | 4 ++-- PCbuild/python.props | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-02-05-16-53-12.gh-issue-109991.YqjnDz.rst diff --git a/Misc/NEWS.d/next/Windows/2024-02-05-16-53-12.gh-issue-109991.YqjnDz.rst b/Misc/NEWS.d/next/Windows/2024-02-05-16-53-12.gh-issue-109991.YqjnDz.rst new file mode 100644 index 00000000000000..d9923c35c2726e --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-02-05-16-53-12.gh-issue-109991.YqjnDz.rst @@ -0,0 +1 @@ +Update Windows build to use OpenSSL 3.0.13. diff --git a/PCbuild/get_externals.bat b/PCbuild/get_externals.bat index de73d923d8f4df..0989bd46a580f7 100644 --- a/PCbuild/get_externals.bat +++ b/PCbuild/get_externals.bat @@ -53,7 +53,7 @@ echo.Fetching external libraries... set libraries= set libraries=%libraries% bzip2-1.0.8 if NOT "%IncludeLibffiSrc%"=="false" set libraries=%libraries% libffi-3.4.4 -if NOT "%IncludeSSLSrc%"=="false" set libraries=%libraries% openssl-3.0.11 +if NOT "%IncludeSSLSrc%"=="false" set libraries=%libraries% openssl-3.0.13 set libraries=%libraries% sqlite-3.44.2.0 if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tcl-core-8.6.13.1 if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tk-8.6.13.1 @@ -76,7 +76,7 @@ echo.Fetching external binaries... set binaries= if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4 -if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.11 +if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.13 if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.13.1 if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06 diff --git a/PCbuild/python.props b/PCbuild/python.props index 2cb16693e546b1..54553db4057288 100644 --- a/PCbuild/python.props +++ b/PCbuild/python.props @@ -74,8 +74,8 @@ $(ExternalsDir)libffi-3.4.4\ $(libffiDir)$(ArchName)\ $(libffiOutDir)include - $(ExternalsDir)openssl-3.0.11\ - $(ExternalsDir)openssl-bin-3.0.11\$(ArchName)\ + $(ExternalsDir)openssl-3.0.13\ + $(ExternalsDir)openssl-bin-3.0.13\$(ArchName)\ $(opensslOutDir)include $(ExternalsDir)\nasm-2.11.06\ $(ExternalsDir)\zlib-1.3.1\ From 638e811a3c54a81d8af5a4c08b9497d210823f78 Mon Sep 17 00:00:00 2001 From: Ned Deily Date: Mon, 5 Feb 2024 20:59:25 -0500 Subject: [PATCH 002/126] gh-109991: Update macOS installer to use OpenSSL 3.0.13. (GH-115052) --- Mac/BuildScript/build-installer.py | 6 +++--- .../macOS/2024-02-05-18-30-27.gh-issue-109991.tun6Yu.rst | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/macOS/2024-02-05-18-30-27.gh-issue-109991.tun6Yu.rst diff --git a/Mac/BuildScript/build-installer.py b/Mac/BuildScript/build-installer.py index 32de56bcf13086..9000fb8973659d 100755 --- a/Mac/BuildScript/build-installer.py +++ b/Mac/BuildScript/build-installer.py @@ -246,9 +246,9 @@ def library_recipes(): result.extend([ dict( - name="OpenSSL 3.0.11", - url="https://www.openssl.org/source/openssl-3.0.11.tar.gz", - checksum='b3425d3bb4a2218d0697eb41f7fc0cdede016ed19ca49d168b78e8d947887f55', + name="OpenSSL 3.0.13", + url="https://www.openssl.org/source/openssl-3.0.13.tar.gz", + checksum='88525753f79d3bec27d2fa7c66aa0b92b3aa9498dafd93d7cfa4b3780cdae313', buildrecipe=build_universal_openssl, configure=None, install=None, diff --git a/Misc/NEWS.d/next/macOS/2024-02-05-18-30-27.gh-issue-109991.tun6Yu.rst b/Misc/NEWS.d/next/macOS/2024-02-05-18-30-27.gh-issue-109991.tun6Yu.rst new file mode 100644 index 00000000000000..79b45e7d51da3f --- /dev/null +++ b/Misc/NEWS.d/next/macOS/2024-02-05-18-30-27.gh-issue-109991.tun6Yu.rst @@ -0,0 +1 @@ +Update macOS installer to use OpenSSL 3.0.13. From 299e16ca0f303a1e00bd0e04679862a5d4db5ab2 Mon Sep 17 00:00:00 2001 From: Ned Deily Date: Mon, 5 Feb 2024 21:10:11 -0500 Subject: [PATCH 003/126] gh-109991: Update GitHub CI workflows to use OpenSSL 3.0.13. (#115050) Also update multissltests to use 1.1.1w, 3.0.13, 3.1.5, and 3.2.1. --- .github/workflows/build.yml | 6 +++--- .github/workflows/reusable-ubuntu.yml | 2 +- .../2024-02-05-19-00-32.gh-issue-109991.yJSEkw.rst | 2 ++ Tools/ssl/multissltests.py | 5 +++-- 4 files changed, 9 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2024-02-05-19-00-32.gh-issue-109991.yJSEkw.rst diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 949c4ae95da07f..0a2f6da50ed8a0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -250,7 +250,7 @@ jobs: strategy: fail-fast: false matrix: - openssl_ver: [1.1.1w, 3.0.11, 3.1.3] + openssl_ver: [1.1.1w, 3.0.13, 3.1.5, 3.2.1] env: OPENSSL_VER: ${{ matrix.openssl_ver }} MULTISSL_DIR: ${{ github.workspace }}/multissl @@ -304,7 +304,7 @@ jobs: needs: check_source if: needs.check_source.outputs.run_tests == 'true' && needs.check_source.outputs.run_hypothesis == 'true' env: - OPENSSL_VER: 3.0.11 + OPENSSL_VER: 3.0.13 PYTHONSTRICTEXTENSIONBUILD: 1 steps: - uses: actions/checkout@v4 @@ -415,7 +415,7 @@ jobs: needs: check_source if: needs.check_source.outputs.run_tests == 'true' env: - OPENSSL_VER: 3.0.11 + OPENSSL_VER: 3.0.13 PYTHONSTRICTEXTENSIONBUILD: 1 ASAN_OPTIONS: detect_leaks=0:allocator_may_return_null=1:handle_segv=0 steps: diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index ef52d99c15191b..0cbad57f0c6572 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -14,7 +14,7 @@ jobs: timeout-minutes: 60 runs-on: ubuntu-20.04 env: - OPENSSL_VER: 3.0.11 + OPENSSL_VER: 3.0.13 PYTHONSTRICTEXTENSIONBUILD: 1 steps: - uses: actions/checkout@v4 diff --git a/Misc/NEWS.d/next/Tools-Demos/2024-02-05-19-00-32.gh-issue-109991.yJSEkw.rst b/Misc/NEWS.d/next/Tools-Demos/2024-02-05-19-00-32.gh-issue-109991.yJSEkw.rst new file mode 100644 index 00000000000000..4eb4d39629b9bc --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2024-02-05-19-00-32.gh-issue-109991.yJSEkw.rst @@ -0,0 +1,2 @@ +Update GitHub CI workflows to use OpenSSL 3.0.13 and multissltests to use +1.1.1w, 3.0.13, 3.1.5, and 3.2.1. diff --git a/Tools/ssl/multissltests.py b/Tools/ssl/multissltests.py index 120e3883adc795..baa16102068aa0 100755 --- a/Tools/ssl/multissltests.py +++ b/Tools/ssl/multissltests.py @@ -47,8 +47,9 @@ OPENSSL_RECENT_VERSIONS = [ "1.1.1w", - "3.0.11", - "3.1.3", + "3.0.13", + "3.1.5", + "3.2.1", ] LIBRESSL_OLD_VERSIONS = [ From 1b1f8398d0ffe3c8ba2cca79d0c0f19a6a34e72a Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 6 Feb 2024 02:48:18 +0000 Subject: [PATCH 004/126] GH-106747: Make pathlib ABC globbing more consistent with `glob.glob()` (#115056) When expanding `**` wildcards, ensure we add a trailing slash to the topmost directory path. This matches `glob.glob()` behaviour: >>> glob.glob('dirA/**', recursive=True) ['dirA/', 'dirA/dirB', 'dirA/dirB/dirC'] This does not affect `pathlib.Path.glob()`, because trailing slashes aren't supported in pathlib proper. --- Lib/pathlib/_abc.py | 2 +- Lib/test/test_pathlib/test_pathlib_abc.py | 34 +++++++++++------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 91f5cd6c01e9d0..e4b1201a3703c3 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -95,7 +95,7 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks): if follow_symlinks is None: follow_symlinks = False for parent_path in parent_paths: - paths = [parent_path] + paths = [parent_path._make_child_relpath('')] while paths: path = paths.pop() yield path diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 207579ccbf443b..1d30deca8f7a1b 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1791,25 +1791,25 @@ def _check(path, glob, expected): _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/..", "dirB/linkD/.."]) _check(p, "dir*/**", [ - "dirA", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", - "dirB", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", - "dirC", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", - "dirE"]) + "dirA/", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", + "dirB/", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", + "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", + "dirE/"]) _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", "dirC/", "dirC/dirD/", "dirE/"]) _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", "dirB/linkD/..", "dirA/linkC/linkD/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) _check(p, "dir*/*/**", [ - "dirA/linkC", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", - "dirB/linkD", "dirB/linkD/fileB", - "dirC/dirD", "dirC/dirD/fileD"]) + "dirA/linkC/", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", + "dirB/linkD/", "dirB/linkD/fileB", + "dirC/dirD/", "dirC/dirD/fileD"]) _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirA/linkC/linkD/..", "dirB/linkD/..", "dirC/dirD/.."]) _check(p, "dir*/**/fileC", ["dirC/fileC"]) _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) - _check(p, "*/dirD/**", ["dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) _check(p, "*/dirD/**/", ["dirC/dirD/"]) @needs_symlinks @@ -1827,19 +1827,19 @@ def _check(path, glob, expected): _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/"]) _check(p, "dir*/*/..", ["dirC/dirD/.."]) _check(p, "dir*/**", [ - "dirA", "dirA/linkC", - "dirB", "dirB/fileB", "dirB/linkD", - "dirC", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", - "dirE"]) + "dirA/", "dirA/linkC", + "dirB/", "dirB/fileB", "dirB/linkD", + "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", + "dirE/"]) _check(p, "dir*/**/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"]) _check(p, "dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check(p, "dir*/*/**", ["dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "dir*/*/**", ["dirC/dirD/", "dirC/dirD/fileD"]) _check(p, "dir*/*/**/", ["dirC/dirD/"]) _check(p, "dir*/*/**/..", ["dirC/dirD/.."]) _check(p, "dir*/**/fileC", ["dirC/fileC"]) - _check(p, "dir*/*/../dirD/**", ["dirC/dirD/../dirD", "dirC/dirD/../dirD/fileD"]) + _check(p, "dir*/*/../dirD/**", ["dirC/dirD/../dirD/", "dirC/dirD/../dirD/fileD"]) _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) - _check(p, "*/dirD/**", ["dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) _check(p, "*/dirD/**/", ["dirC/dirD/"]) def test_rglob_common(self): @@ -1876,13 +1876,13 @@ def _check(glob, expected): "dirC/dirD", "dirC/dirD/fileD"]) _check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) _check(p.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"]) - _check(p.rglob("dir*/**"), ["dirC/dirD", "dirC/dirD/fileD"]) + _check(p.rglob("dir*/**"), ["dirC/dirD/", "dirC/dirD/fileD"]) _check(p.rglob("dir*/**/"), ["dirC/dirD/"]) _check(p.rglob("*/*"), ["dirC/dirD/fileD"]) _check(p.rglob("*/"), ["dirC/dirD/"]) _check(p.rglob(""), ["dirC/", "dirC/dirD/"]) _check(p.rglob("**"), [ - "dirC", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt"]) + "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt"]) _check(p.rglob("**/"), ["dirC/", "dirC/dirD/"]) # gh-91616, a re module regression _check(p.rglob("*.txt"), ["dirC/novel.txt"]) From 13eb5215c9de9dd302f116ef0bca4ae23b02842b Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 6 Feb 2024 11:04:35 +0100 Subject: [PATCH 005/126] gh-115009: Update macOS installer to use SQLite 3.45.1 (#115066) Co-authored-by: Ned Deily --- Mac/BuildScript/build-installer.py | 6 +++--- .../macOS/2024-02-06-09-01-10.gh-issue-115009.ysau7e.rst | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/macOS/2024-02-06-09-01-10.gh-issue-115009.ysau7e.rst diff --git a/Mac/BuildScript/build-installer.py b/Mac/BuildScript/build-installer.py index 9000fb8973659d..0af90563cbbb2b 100755 --- a/Mac/BuildScript/build-installer.py +++ b/Mac/BuildScript/build-installer.py @@ -359,9 +359,9 @@ def library_recipes(): ), ), dict( - name="SQLite 3.44.2", - url="https://sqlite.org/2023/sqlite-autoconf-3440200.tar.gz", - checksum="c02f40fd4f809ced95096250adc5764a", + name="SQLite 3.45.1", + url="https://sqlite.org/2024/sqlite-autoconf-3450100.tar.gz", + checksum="cd9c27841b7a5932c9897651e20b86c701dd740556989b01ca596fcfa3d49a0a", extra_cflags=('-Os ' '-DSQLITE_ENABLE_FTS5 ' '-DSQLITE_ENABLE_FTS4 ' diff --git a/Misc/NEWS.d/next/macOS/2024-02-06-09-01-10.gh-issue-115009.ysau7e.rst b/Misc/NEWS.d/next/macOS/2024-02-06-09-01-10.gh-issue-115009.ysau7e.rst new file mode 100644 index 00000000000000..47ec488c3cced2 --- /dev/null +++ b/Misc/NEWS.d/next/macOS/2024-02-06-09-01-10.gh-issue-115009.ysau7e.rst @@ -0,0 +1 @@ +Update macOS installer to use SQLite 3.45.1. From 4bf41879d03b1da3c6d38c39a04331e3ae2e7545 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Tue, 6 Feb 2024 04:25:58 -0600 Subject: [PATCH 006/126] gh-112302: Change 'licenseConcluded' field to 'NOASSERTION' (#115038) --- Misc/sbom.spdx.json | 60 ++++++++++++++++++------------------ Tools/build/generate_sbom.py | 12 +++++--- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index e94dcb83dd4e40..d783d14255e66f 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -1601,7 +1601,7 @@ "referenceType": "cpe23Type" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "expat", "originator": "Organization: Expat development team", "primaryPackagePurpose": "SOURCE", @@ -1623,7 +1623,7 @@ "referenceType": "cpe23Type" } ], - "licenseConcluded": "Apache-2.0", + "licenseConcluded": "NOASSERTION", "name": "hacl-star", "originator": "Organization: HACL* Developers", "primaryPackagePurpose": "SOURCE", @@ -1645,7 +1645,7 @@ "referenceType": "cpe23Type" } ], - "licenseConcluded": "CC0-1.0", + "licenseConcluded": "NOASSERTION", "name": "libb2", "originator": "Organization: BLAKE2 - fast secure hashing", "primaryPackagePurpose": "SOURCE", @@ -1667,7 +1667,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "macholib", "originator": "Person: Ronald Oussoren (ronaldoussoren@mac.com)", "primaryPackagePurpose": "SOURCE", @@ -1689,7 +1689,7 @@ "referenceType": "cpe23Type" } ], - "licenseConcluded": "BSD-2-Clause", + "licenseConcluded": "NOASSERTION", "name": "mpdecimal", "originator": "Organization: bytereef.org", "primaryPackagePurpose": "SOURCE", @@ -1711,7 +1711,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "cachecontrol", "primaryPackagePurpose": "SOURCE", "versionInfo": "0.13.1" @@ -1732,7 +1732,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "colorama", "primaryPackagePurpose": "SOURCE", "versionInfo": "0.4.6" @@ -1753,7 +1753,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "distlib", "primaryPackagePurpose": "SOURCE", "versionInfo": "0.3.8" @@ -1774,7 +1774,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "distro", "primaryPackagePurpose": "SOURCE", "versionInfo": "1.8.0" @@ -1795,7 +1795,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "msgpack", "primaryPackagePurpose": "SOURCE", "versionInfo": "1.0.5" @@ -1816,7 +1816,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "packaging", "primaryPackagePurpose": "SOURCE", "versionInfo": "21.3" @@ -1837,7 +1837,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "platformdirs", "primaryPackagePurpose": "SOURCE", "versionInfo": "3.8.1" @@ -1858,7 +1858,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "pyparsing", "primaryPackagePurpose": "SOURCE", "versionInfo": "3.1.0" @@ -1879,7 +1879,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "pyproject-hooks", "primaryPackagePurpose": "SOURCE", "versionInfo": "1.0.0" @@ -1900,7 +1900,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "requests", "primaryPackagePurpose": "SOURCE", "versionInfo": "2.31.0" @@ -1921,7 +1921,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "certifi", "primaryPackagePurpose": "SOURCE", "versionInfo": "2023.7.22" @@ -1942,7 +1942,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "chardet", "primaryPackagePurpose": "SOURCE", "versionInfo": "5.1.0" @@ -1963,7 +1963,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "idna", "primaryPackagePurpose": "SOURCE", "versionInfo": "3.4" @@ -1984,7 +1984,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "rich", "primaryPackagePurpose": "SOURCE", "versionInfo": "13.4.2" @@ -2005,7 +2005,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "pygments", "primaryPackagePurpose": "SOURCE", "versionInfo": "2.15.1" @@ -2026,7 +2026,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "typing_extensions", "primaryPackagePurpose": "SOURCE", "versionInfo": "4.7.1" @@ -2047,7 +2047,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "resolvelib", "primaryPackagePurpose": "SOURCE", "versionInfo": "1.0.1" @@ -2068,7 +2068,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "setuptools", "primaryPackagePurpose": "SOURCE", "versionInfo": "68.0.0" @@ -2089,7 +2089,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "six", "primaryPackagePurpose": "SOURCE", "versionInfo": "1.16.0" @@ -2110,7 +2110,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "tenacity", "primaryPackagePurpose": "SOURCE", "versionInfo": "8.2.2" @@ -2131,7 +2131,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "tomli", "primaryPackagePurpose": "SOURCE", "versionInfo": "2.0.1" @@ -2152,7 +2152,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "truststore", "primaryPackagePurpose": "SOURCE", "versionInfo": "0.8.0" @@ -2173,7 +2173,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "webencodings", "primaryPackagePurpose": "SOURCE", "versionInfo": "0.5.1" @@ -2194,7 +2194,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "urllib3", "primaryPackagePurpose": "SOURCE", "versionInfo": "1.26.17" @@ -2220,7 +2220,7 @@ "referenceType": "purl" } ], - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "name": "pip", "originator": "Organization: Python Packaging Authority", "primaryPackagePurpose": "SOURCE", diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index aceb13f141cba4..442487f2d2546b 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -338,7 +338,7 @@ def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None: "name": "pip", "versionInfo": pip_version, "originator": "Organization: Python Packaging Authority", - "licenseConcluded": "MIT", + "licenseConcluded": "NOASSERTION", "downloadLocation": pip_download_url, "checksums": [ {"algorithm": "SHA256", "checksumValue": pip_checksum_sha256} @@ -383,9 +383,11 @@ def main() -> None: discover_pip_sbom_package(sbom_data) # Ensure all packages in this tool are represented also in the SBOM file. + actual_names = {package["name"] for package in sbom_data["packages"]} + expected_names = set(PACKAGE_TO_FILES) error_if( - {package["name"] for package in sbom_data["packages"]} != set(PACKAGE_TO_FILES), - "Packages defined in SBOM tool don't match those defined in SBOM file.", + actual_names != expected_names, + f"Packages defined in SBOM tool don't match those defined in SBOM file: {actual_names}, {expected_names}", ) # Make a bunch of assertions about the SBOM data to ensure it's consistent. @@ -422,8 +424,8 @@ def main() -> None: # License must be on the approved list for SPDX. license_concluded = package["licenseConcluded"] error_if( - license_concluded not in ALLOWED_LICENSE_EXPRESSIONS, - f"License identifier '{license_concluded}' not in SBOM tool allowlist" + license_concluded != "NOASSERTION", + f"License identifier must be 'NOASSERTION'" ) # We call 'sorted()' here a lot to avoid filesystem scan order issues. From 1a10437a14b13100bdf41cbdab819c33258deb65 Mon Sep 17 00:00:00 2001 From: Mariusz Felisiak Date: Tue, 6 Feb 2024 12:34:56 +0100 Subject: [PATCH 007/126] gh-91602: Add iterdump() support for filtering database objects (#114501) Add optional 'filter' parameter to iterdump() that allows a "LIKE" pattern for filtering database objects to dump. Co-authored-by: Erlend E. Aasland --- Doc/library/sqlite3.rst | 11 ++- Doc/whatsnew/3.13.rst | 4 ++ .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 3 + Lib/sqlite3/dump.py | 19 +++-- Lib/test/test_sqlite3/test_dump.py | 70 +++++++++++++++++++ ...4-01-24-20-51-49.gh-issue-91602.8fOH8l.rst | 3 + Modules/_sqlite/clinic/connection.c.h | 60 ++++++++++++++-- Modules/_sqlite/connection.c | 20 ++++-- 11 files changed, 176 insertions(+), 17 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-24-20-51-49.gh-issue-91602.8fOH8l.rst diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index c3406b166c3d89..87d5ef1e42ca3a 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -1137,12 +1137,19 @@ Connection objects .. _Loading an Extension: https://www.sqlite.org/loadext.html#loading_an_extension_ - .. method:: iterdump + .. method:: iterdump(*, filter=None) Return an :term:`iterator` to dump the database as SQL source code. Useful when saving an in-memory database for later restoration. Similar to the ``.dump`` command in the :program:`sqlite3` shell. + :param filter: + + An optional ``LIKE`` pattern for database objects to dump, e.g. ``prefix_%``. + If ``None`` (the default), all database objects will be included. + + :type filter: str | None + Example: .. testcode:: @@ -1158,6 +1165,8 @@ Connection objects :ref:`sqlite3-howto-encoding` + .. versionchanged:: 3.13 + Added the *filter* parameter. .. method:: backup(target, *, pages=-1, progress=None, name="main", sleep=0.250) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 5e5f1e295f4d70..372757759b986f 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -438,6 +438,10 @@ sqlite3 object is not :meth:`closed ` explicitly. (Contributed by Erlend E. Aasland in :gh:`105539`.) +* Add *filter* keyword-only parameter to :meth:`sqlite3.Connection.iterdump` + for filtering database objects to dump. + (Contributed by Mariusz Felisiak in :gh:`91602`.) + subprocess ---------- diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index dd09ff40f39fe6..932738c3049882 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -940,6 +940,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fileno)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filepath)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fillvalue)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filter)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filters)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(final)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(find_class)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 79d6509abcdfd9..da62b4f0a951ff 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -429,6 +429,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(fileno) STRUCT_FOR_ID(filepath) STRUCT_FOR_ID(fillvalue) + STRUCT_FOR_ID(filter) STRUCT_FOR_ID(filters) STRUCT_FOR_ID(final) STRUCT_FOR_ID(find_class) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index f3c55acfb3c282..68fbbcb4378e17 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -938,6 +938,7 @@ extern "C" { INIT_ID(fileno), \ INIT_ID(filepath), \ INIT_ID(fillvalue), \ + INIT_ID(filter), \ INIT_ID(filters), \ INIT_ID(final), \ INIT_ID(find_class), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 2e9572382fe033..c8458b4e36ccc9 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1128,6 +1128,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(fillvalue); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(filter); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(filters); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/sqlite3/dump.py b/Lib/sqlite3/dump.py index 719dfc8947697d..9dcce7dc76ced4 100644 --- a/Lib/sqlite3/dump.py +++ b/Lib/sqlite3/dump.py @@ -15,7 +15,7 @@ def _quote_value(value): return "'{0}'".format(value.replace("'", "''")) -def _iterdump(connection): +def _iterdump(connection, *, filter=None): """ Returns an iterator to the dump of the database in an SQL text format. @@ -32,15 +32,23 @@ def _iterdump(connection): yield('PRAGMA foreign_keys=OFF;') yield('BEGIN TRANSACTION;') + if filter: + # Return database objects which match the filter pattern. + filter_name_clause = 'AND "name" LIKE ?' + params = [filter] + else: + filter_name_clause = "" + params = [] # sqlite_master table contains the SQL CREATE statements for the database. - q = """ + q = f""" SELECT "name", "type", "sql" FROM "sqlite_master" WHERE "sql" NOT NULL AND "type" == 'table' + {filter_name_clause} ORDER BY "name" """ - schema_res = cu.execute(q) + schema_res = cu.execute(q, params) sqlite_sequence = [] for table_name, type, sql in schema_res.fetchall(): if table_name == 'sqlite_sequence': @@ -82,13 +90,14 @@ def _iterdump(connection): yield("{0};".format(row[0])) # Now when the type is 'index', 'trigger', or 'view' - q = """ + q = f""" SELECT "name", "type", "sql" FROM "sqlite_master" WHERE "sql" NOT NULL AND "type" IN ('index', 'trigger', 'view') + {filter_name_clause} """ - schema_res = cu.execute(q) + schema_res = cu.execute(q, params) for name, type, sql in schema_res.fetchall(): yield('{0};'.format(sql)) diff --git a/Lib/test/test_sqlite3/test_dump.py b/Lib/test/test_sqlite3/test_dump.py index 2e1f0b80c10f46..7261b7f0dc93d0 100644 --- a/Lib/test/test_sqlite3/test_dump.py +++ b/Lib/test/test_sqlite3/test_dump.py @@ -54,6 +54,76 @@ def test_table_dump(self): [self.assertEqual(expected_sqls[i], actual_sqls[i]) for i in range(len(expected_sqls))] + def test_table_dump_filter(self): + all_table_sqls = [ + """CREATE TABLE "some_table_2" ("id_1" INTEGER);""", + """INSERT INTO "some_table_2" VALUES(3);""", + """INSERT INTO "some_table_2" VALUES(4);""", + """CREATE TABLE "test_table_1" ("id_2" INTEGER);""", + """INSERT INTO "test_table_1" VALUES(1);""", + """INSERT INTO "test_table_1" VALUES(2);""", + ] + all_views_sqls = [ + """CREATE VIEW "view_1" AS SELECT * FROM "some_table_2";""", + """CREATE VIEW "view_2" AS SELECT * FROM "test_table_1";""", + ] + # Create database structure. + for sql in [*all_table_sqls, *all_views_sqls]: + self.cu.execute(sql) + # %_table_% matches all tables. + dump_sqls = list(self.cx.iterdump(filter="%_table_%")) + self.assertEqual( + dump_sqls, + ["BEGIN TRANSACTION;", *all_table_sqls, "COMMIT;"], + ) + # view_% matches all views. + dump_sqls = list(self.cx.iterdump(filter="view_%")) + self.assertEqual( + dump_sqls, + ["BEGIN TRANSACTION;", *all_views_sqls, "COMMIT;"], + ) + # %_1 matches tables and views with the _1 suffix. + dump_sqls = list(self.cx.iterdump(filter="%_1")) + self.assertEqual( + dump_sqls, + [ + "BEGIN TRANSACTION;", + """CREATE TABLE "test_table_1" ("id_2" INTEGER);""", + """INSERT INTO "test_table_1" VALUES(1);""", + """INSERT INTO "test_table_1" VALUES(2);""", + """CREATE VIEW "view_1" AS SELECT * FROM "some_table_2";""", + "COMMIT;" + ], + ) + # some_% matches some_table_2. + dump_sqls = list(self.cx.iterdump(filter="some_%")) + self.assertEqual( + dump_sqls, + [ + "BEGIN TRANSACTION;", + """CREATE TABLE "some_table_2" ("id_1" INTEGER);""", + """INSERT INTO "some_table_2" VALUES(3);""", + """INSERT INTO "some_table_2" VALUES(4);""", + "COMMIT;" + ], + ) + # Only single object. + dump_sqls = list(self.cx.iterdump(filter="view_2")) + self.assertEqual( + dump_sqls, + [ + "BEGIN TRANSACTION;", + """CREATE VIEW "view_2" AS SELECT * FROM "test_table_1";""", + "COMMIT;" + ], + ) + # % matches all objects. + dump_sqls = list(self.cx.iterdump(filter="%")) + self.assertEqual( + dump_sqls, + ["BEGIN TRANSACTION;", *all_table_sqls, *all_views_sqls, "COMMIT;"], + ) + def test_dump_autoincrement(self): expected = [ 'CREATE TABLE "t1" (id integer primary key autoincrement);', diff --git a/Misc/NEWS.d/next/Library/2024-01-24-20-51-49.gh-issue-91602.8fOH8l.rst b/Misc/NEWS.d/next/Library/2024-01-24-20-51-49.gh-issue-91602.8fOH8l.rst new file mode 100644 index 00000000000000..21d39df43e035b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-24-20-51-49.gh-issue-91602.8fOH8l.rst @@ -0,0 +1,3 @@ +Add *filter* keyword-only parameter to +:meth:`sqlite3.Connection.iterdump` for filtering database objects to dump. +Patch by Mariusz Felisiak. diff --git a/Modules/_sqlite/clinic/connection.c.h b/Modules/_sqlite/clinic/connection.c.h index f2cff6a7b421f3..811314b5cd8aed 100644 --- a/Modules/_sqlite/clinic/connection.c.h +++ b/Modules/_sqlite/clinic/connection.c.h @@ -1204,21 +1204,67 @@ pysqlite_connection_interrupt(pysqlite_Connection *self, PyObject *Py_UNUSED(ign } PyDoc_STRVAR(pysqlite_connection_iterdump__doc__, -"iterdump($self, /)\n" +"iterdump($self, /, *, filter=None)\n" "--\n" "\n" -"Returns iterator to the dump of the database in an SQL text format."); +"Returns iterator to the dump of the database in an SQL text format.\n" +"\n" +" filter\n" +" An optional LIKE pattern for database objects to dump"); #define PYSQLITE_CONNECTION_ITERDUMP_METHODDEF \ - {"iterdump", (PyCFunction)pysqlite_connection_iterdump, METH_NOARGS, pysqlite_connection_iterdump__doc__}, + {"iterdump", _PyCFunction_CAST(pysqlite_connection_iterdump), METH_FASTCALL|METH_KEYWORDS, pysqlite_connection_iterdump__doc__}, static PyObject * -pysqlite_connection_iterdump_impl(pysqlite_Connection *self); +pysqlite_connection_iterdump_impl(pysqlite_Connection *self, + PyObject *filter); static PyObject * -pysqlite_connection_iterdump(pysqlite_Connection *self, PyObject *Py_UNUSED(ignored)) +pysqlite_connection_iterdump(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { - return pysqlite_connection_iterdump_impl(self); + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(filter), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"filter", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "iterdump", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + PyObject *filter = Py_None; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 0, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + filter = args[0]; +skip_optional_kwonly: + return_value = pysqlite_connection_iterdump_impl(self, filter); + +exit: + return return_value; } PyDoc_STRVAR(pysqlite_connection_backup__doc__, @@ -1820,4 +1866,4 @@ getconfig(pysqlite_Connection *self, PyObject *arg) #ifndef DESERIALIZE_METHODDEF #define DESERIALIZE_METHODDEF #endif /* !defined(DESERIALIZE_METHODDEF) */ -/*[clinic end generated code: output=99299d3ee2c247ab input=a9049054013a1b77]*/ +/*[clinic end generated code: output=3c6d0b748fac016f input=a9049054013a1b77]*/ diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c index 0a6633972cc5ef..f97afcf5fcf16e 100644 --- a/Modules/_sqlite/connection.c +++ b/Modules/_sqlite/connection.c @@ -1979,12 +1979,17 @@ pysqlite_connection_interrupt_impl(pysqlite_Connection *self) /*[clinic input] _sqlite3.Connection.iterdump as pysqlite_connection_iterdump + * + filter: object = None + An optional LIKE pattern for database objects to dump + Returns iterator to the dump of the database in an SQL text format. [clinic start generated code]*/ static PyObject * -pysqlite_connection_iterdump_impl(pysqlite_Connection *self) -/*[clinic end generated code: output=586997aaf9808768 input=1911ca756066da89]*/ +pysqlite_connection_iterdump_impl(pysqlite_Connection *self, + PyObject *filter) +/*[clinic end generated code: output=fd81069c4bdeb6b0 input=4ae6d9a898f108df]*/ { if (!pysqlite_check_connection(self)) { return NULL; @@ -1998,9 +2003,16 @@ pysqlite_connection_iterdump_impl(pysqlite_Connection *self) } return NULL; } - - PyObject *retval = PyObject_CallOneArg(iterdump, (PyObject *)self); + PyObject *args[3] = {NULL, (PyObject *)self, filter}; + PyObject *kwnames = Py_BuildValue("(s)", "filter"); + if (!kwnames) { + Py_DECREF(iterdump); + return NULL; + } + Py_ssize_t nargsf = 1 | PY_VECTORCALL_ARGUMENTS_OFFSET; + PyObject *retval = PyObject_Vectorcall(iterdump, args + 1, nargsf, kwnames); Py_DECREF(iterdump); + Py_DECREF(kwnames); return retval; } From d7334e2c2012defaf7aae920d6a56689464509d1 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Tue, 6 Feb 2024 16:08:56 +0300 Subject: [PATCH 008/126] gh-106233: Fix stacklevel in zoneinfo.InvalidTZPathWarning (GH-106234) --- Lib/test/test_zoneinfo/test_zoneinfo.py | 17 +++++++++++-- Lib/zoneinfo/_tzpath.py | 24 ++++++++++++------- ...-06-29-14-26-56.gh-issue-106233.Aqw2HI.rst | 2 ++ 3 files changed, 32 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-06-29-14-26-56.gh-issue-106233.Aqw2HI.rst diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index 18eab5b33540c9..8414721555731e 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -20,7 +20,7 @@ from test.support import MISSING_C_DOCSTRINGS from test.test_zoneinfo import _support as test_support from test.test_zoneinfo._support import OS_ENV_LOCK, TZPATH_TEST_LOCK, ZoneInfoTestBase -from test.support.import_helper import import_module +from test.support.import_helper import import_module, CleanImport lzma = import_module('lzma') py_zoneinfo, c_zoneinfo = test_support.get_modules() @@ -1720,13 +1720,26 @@ def test_env_variable_relative_paths(self): with self.subTest("warning", path_var=path_var): # Note: Per PEP 615 the warning is implementation-defined # behavior, other implementations need not warn. - with self.assertWarns(self.module.InvalidTZPathWarning): + with self.assertWarns(self.module.InvalidTZPathWarning) as w: self.module.reset_tzpath() + self.assertEqual(w.warnings[0].filename, __file__) tzpath = self.module.TZPATH with self.subTest("filtered", path_var=path_var): self.assertSequenceEqual(tzpath, expected_paths) + def test_env_variable_relative_paths_warning_location(self): + path_var = "path/to/somewhere" + + with self.python_tzpath_context(path_var): + with CleanImport("zoneinfo", "zoneinfo._tzpath"): + with self.assertWarns(RuntimeWarning) as w: + import zoneinfo + InvalidTZPathWarning = zoneinfo.InvalidTZPathWarning + self.assertIsInstance(w.warnings[0].message, InvalidTZPathWarning) + # It should represent the current file: + self.assertEqual(w.warnings[0].filename, __file__) + def test_reset_tzpath_kwarg(self): self.module.reset_tzpath(to=[f"{DRIVE}/a/b/c"]) diff --git a/Lib/zoneinfo/_tzpath.py b/Lib/zoneinfo/_tzpath.py index 4985dce2dc36d0..5db17bea045d8c 100644 --- a/Lib/zoneinfo/_tzpath.py +++ b/Lib/zoneinfo/_tzpath.py @@ -2,7 +2,7 @@ import sysconfig -def reset_tzpath(to=None): +def _reset_tzpath(to=None, stacklevel=4): global TZPATH tzpaths = to @@ -18,17 +18,22 @@ def reset_tzpath(to=None): base_tzpath = tzpaths else: env_var = os.environ.get("PYTHONTZPATH", None) - if env_var is not None: - base_tzpath = _parse_python_tzpath(env_var) - else: - base_tzpath = _parse_python_tzpath( - sysconfig.get_config_var("TZPATH") - ) + if env_var is None: + env_var = sysconfig.get_config_var("TZPATH") + base_tzpath = _parse_python_tzpath(env_var, stacklevel) TZPATH = tuple(base_tzpath) -def _parse_python_tzpath(env_var): +def reset_tzpath(to=None): + """Reset global TZPATH.""" + # We need `_reset_tzpath` helper function because it produces a warning, + # it is used as both a module-level call and a public API. + # This is how we equalize the stacklevel for both calls. + _reset_tzpath(to) + + +def _parse_python_tzpath(env_var, stacklevel): if not env_var: return () @@ -45,6 +50,7 @@ def _parse_python_tzpath(env_var): "Invalid paths specified in PYTHONTZPATH environment variable. " + msg, InvalidTZPathWarning, + stacklevel=stacklevel, ) return new_tzpath @@ -172,4 +178,4 @@ class InvalidTZPathWarning(RuntimeWarning): TZPATH = () -reset_tzpath() +_reset_tzpath(stacklevel=5) diff --git a/Misc/NEWS.d/next/Library/2023-06-29-14-26-56.gh-issue-106233.Aqw2HI.rst b/Misc/NEWS.d/next/Library/2023-06-29-14-26-56.gh-issue-106233.Aqw2HI.rst new file mode 100644 index 00000000000000..345c8b20815c95 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-06-29-14-26-56.gh-issue-106233.Aqw2HI.rst @@ -0,0 +1,2 @@ +Fix stacklevel in ``InvalidTZPathWarning`` during :mod:`zoneinfo` module +import. From 0e2ab73dc31e0b8ea1827ec24bae93ae2644c617 Mon Sep 17 00:00:00 2001 From: da-woods Date: Tue, 6 Feb 2024 15:55:44 +0000 Subject: [PATCH 009/126] gh-114756: Update FAQ section on removing the GIL (#114957) Update FAQ section on removing the GIL to reflect recent progress on PEP 703 and PEP 684. Co-authored-by: AN Long --- Doc/faq/library.rst | 52 +++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/Doc/faq/library.rst b/Doc/faq/library.rst index 476a43d9c288f1..e2f8004c7e3aea 100644 --- a/Doc/faq/library.rst +++ b/Doc/faq/library.rst @@ -405,22 +405,37 @@ lists. When in doubt, use a mutex! Can't we get rid of the Global Interpreter Lock? ------------------------------------------------ -.. XXX link to dbeazley's talk about GIL? - The :term:`global interpreter lock` (GIL) is often seen as a hindrance to Python's deployment on high-end multiprocessor server machines, because a multi-threaded Python program effectively only uses one CPU, due to the insistence that (almost) all Python code can only run while the GIL is held. -Back in the days of Python 1.5, Greg Stein actually implemented a comprehensive +With the approval of :pep:`703` work is now underway to remove the GIL from the +CPython implementation of Python. Initially it will be implemented as an +optional compiler flag when building the interpreter, and so separate +builds will be available with and without the GIL. Long-term, the hope is +to settle on a single build, once the performance implications of removing the +GIL are fully understood. Python 3.13 is likely to be the first release +containing this work, although it may not be completely functional in this +release. + +The current work to remove the GIL is based on a +`fork of Python 3.9 with the GIL removed `_ +by Sam Gross. +Prior to that, +in the days of Python 1.5, Greg Stein actually implemented a comprehensive patch set (the "free threading" patches) that removed the GIL and replaced it -with fine-grained locking. Adam Olsen recently did a similar experiment +with fine-grained locking. Adam Olsen did a similar experiment in his `python-safethread `_ -project. Unfortunately, both experiments exhibited a sharp drop in single-thread +project. Unfortunately, both of these earlier experiments exhibited a sharp +drop in single-thread performance (at least 30% slower), due to the amount of fine-grained locking -necessary to compensate for the removal of the GIL. +necessary to compensate for the removal of the GIL. The Python 3.9 fork +is the first attempt at removing the GIL with an acceptable performance +impact. -This doesn't mean that you can't make good use of Python on multi-CPU machines! +The presence of the GIL in current Python releases +doesn't mean that you can't make good use of Python on multi-CPU machines! You just have to be creative with dividing the work up between multiple *processes* rather than multiple *threads*. The :class:`~concurrent.futures.ProcessPoolExecutor` class in the new @@ -434,22 +449,13 @@ thread of execution is in the C code and allow other threads to get some work done. Some standard library modules such as :mod:`zlib` and :mod:`hashlib` already do this. -It has been suggested that the GIL should be a per-interpreter-state lock rather -than truly global; interpreters then wouldn't be able to share objects. -Unfortunately, this isn't likely to happen either. It would be a tremendous -amount of work, because many object implementations currently have global state. -For example, small integers and short strings are cached; these caches would -have to be moved to the interpreter state. Other object types have their own -free list; these free lists would have to be moved to the interpreter state. -And so on. - -And I doubt that it can even be done in finite time, because the same problem -exists for 3rd party extensions. It is likely that 3rd party extensions are -being written at a faster rate than you can convert them to store all their -global state in the interpreter state. - -And finally, once you have multiple interpreters not sharing any state, what -have you gained over running each interpreter in a separate process? +An alternative approach to reducing the impact of the GIL is +to make the GIL a per-interpreter-state lock rather than truly global. +This was :ref:`first implemented in Python 3.12 ` and is +available in the C API. A Python interface to it is expected in Python 3.13. +The main limitation to it at the moment is likely to be 3rd party extension +modules, since these must be written with multiple interpreters in mind in +order to be usable, so many older extension modules will not be usable. Input and Output From de61d4bd4db868ce49a729a283763b94f2fda961 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 6 Feb 2024 11:36:23 -0500 Subject: [PATCH 010/126] gh-112066: Add `PyDict_SetDefaultRef` function. (#112123) The `PyDict_SetDefaultRef` function is similar to `PyDict_SetDefault`, but returns a strong reference through the optional `**result` pointer instead of a borrowed reference. Co-authored-by: Petr Viktorin --- Doc/c-api/dict.rst | 20 ++++ Doc/whatsnew/3.13.rst | 6 ++ Include/cpython/dictobject.h | 10 ++ Lib/test/test_capi/test_dict.py | 22 +++++ ...-11-15-13-47-48.gh-issue-112066.22WsqR.rst | 5 + Modules/_testcapi/dict.c | 26 ++++++ Objects/dictobject.c | 91 +++++++++++++++---- 7 files changed, 160 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-11-15-13-47-48.gh-issue-112066.22WsqR.rst diff --git a/Doc/c-api/dict.rst b/Doc/c-api/dict.rst index 8471c98d044872..03f3d28187bfe9 100644 --- a/Doc/c-api/dict.rst +++ b/Doc/c-api/dict.rst @@ -174,6 +174,26 @@ Dictionary Objects .. versionadded:: 3.4 +.. c:function:: int PyDict_SetDefaultRef(PyObject *p, PyObject *key, PyObject *default_value, PyObject **result) + + Inserts *default_value* into the dictionary *p* with a key of *key* if the + key is not already present in the dictionary. If *result* is not ``NULL``, + then *\*result* is set to a :term:`strong reference` to either + *default_value*, if the key was not present, or the existing value, if *key* + was already present in the dictionary. + Returns ``1`` if the key was present and *default_value* was not inserted, + or ``0`` if the key was not present and *default_value* was inserted. + On failure, returns ``-1``, sets an exception, and sets ``*result`` + to ``NULL``. + + For clarity: if you have a strong reference to *default_value* before + calling this function, then after it returns, you hold a strong reference + to both *default_value* and *\*result* (if it's not ``NULL``). + These may refer to the same object: in that case you hold two separate + references to it. + .. versionadded:: 3.13 + + .. c:function:: int PyDict_Pop(PyObject *p, PyObject *key, PyObject **result) Remove *key* from dictionary *p* and optionally return the removed value. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 372757759b986f..e034d34c5fb5ab 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1440,6 +1440,12 @@ New Features not needed. (Contributed by Victor Stinner in :gh:`106004`.) +* Added :c:func:`PyDict_SetDefaultRef`, which is similar to + :c:func:`PyDict_SetDefault` but returns a :term:`strong reference` instead of + a :term:`borrowed reference`. This function returns ``-1`` on error, ``0`` on + insertion, and ``1`` if the key was already present in the dictionary. + (Contributed by Sam Gross in :gh:`112066`.) + * Add :c:func:`PyDict_ContainsString` function: same as :c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*` UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`. diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index 1720fe6f01ea37..35b6a822a0dfff 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -41,6 +41,16 @@ PyAPI_FUNC(PyObject *) _PyDict_GetItemStringWithError(PyObject *, const char *); PyAPI_FUNC(PyObject *) PyDict_SetDefault( PyObject *mp, PyObject *key, PyObject *defaultobj); +// Inserts `key` with a value `default_value`, if `key` is not already present +// in the dictionary. If `result` is not NULL, then the value associated +// with `key` is returned in `*result` (either the existing value, or the now +// inserted `default_value`). +// Returns: +// -1 on error +// 0 if `key` was not present and `default_value` was inserted +// 1 if `key` was present and `default_value` was not inserted +PyAPI_FUNC(int) PyDict_SetDefaultRef(PyObject *mp, PyObject *key, PyObject *default_value, PyObject **result); + /* Get the number of items of a dictionary. */ static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) { PyDictObject *mp; diff --git a/Lib/test/test_capi/test_dict.py b/Lib/test/test_capi/test_dict.py index 57a7238588eae0..cca6145bc90c04 100644 --- a/Lib/test/test_capi/test_dict.py +++ b/Lib/test/test_capi/test_dict.py @@ -339,6 +339,28 @@ def test_dict_setdefault(self): # CRASHES setdefault({}, 'a', NULL) # CRASHES setdefault(NULL, 'a', 5) + def test_dict_setdefaultref(self): + setdefault = _testcapi.dict_setdefaultref + dct = {} + self.assertEqual(setdefault(dct, 'a', 5), 5) + self.assertEqual(dct, {'a': 5}) + self.assertEqual(setdefault(dct, 'a', 8), 5) + self.assertEqual(dct, {'a': 5}) + + dct2 = DictSubclass() + self.assertEqual(setdefault(dct2, 'a', 5), 5) + self.assertEqual(dct2, {'a': 5}) + self.assertEqual(setdefault(dct2, 'a', 8), 5) + self.assertEqual(dct2, {'a': 5}) + + self.assertRaises(TypeError, setdefault, {}, [], 5) # unhashable + self.assertRaises(SystemError, setdefault, UserDict(), 'a', 5) + self.assertRaises(SystemError, setdefault, [1], 0, 5) + self.assertRaises(SystemError, setdefault, 42, 'a', 5) + # CRASHES setdefault({}, NULL, 5) + # CRASHES setdefault({}, 'a', NULL) + # CRASHES setdefault(NULL, 'a', 5) + def test_mapping_keys_valuesitems(self): class BadMapping(dict): def keys(self): diff --git a/Misc/NEWS.d/next/C API/2023-11-15-13-47-48.gh-issue-112066.22WsqR.rst b/Misc/NEWS.d/next/C API/2023-11-15-13-47-48.gh-issue-112066.22WsqR.rst new file mode 100644 index 00000000000000..ae2b8b2444de97 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-11-15-13-47-48.gh-issue-112066.22WsqR.rst @@ -0,0 +1,5 @@ +Add :c:func:`PyDict_SetDefaultRef`: insert a key and value into a dictionary +if the key is not already present. This is similar to +:meth:`dict.setdefault`, but returns an integer value indicating if the key +was already present. It is also similar to :c:func:`PyDict_SetDefault`, but +returns a strong reference instead of a borrowed reference. diff --git a/Modules/_testcapi/dict.c b/Modules/_testcapi/dict.c index 42e056b7d07a31..fe03c24f75e196 100644 --- a/Modules/_testcapi/dict.c +++ b/Modules/_testcapi/dict.c @@ -225,6 +225,31 @@ dict_setdefault(PyObject *self, PyObject *args) return PyDict_SetDefault(mapping, key, defaultobj); } +static PyObject * +dict_setdefaultref(PyObject *self, PyObject *args) +{ + PyObject *obj, *key, *default_value, *result = UNINITIALIZED_PTR; + if (!PyArg_ParseTuple(args, "OOO", &obj, &key, &default_value)) { + return NULL; + } + NULLABLE(obj); + NULLABLE(key); + NULLABLE(default_value); + switch (PyDict_SetDefaultRef(obj, key, default_value, &result)) { + case -1: + assert(result == NULL); + return NULL; + case 0: + assert(result == default_value); + return result; + case 1: + return result; + default: + Py_FatalError("PyDict_SetDefaultRef() returned invalid code"); + Py_UNREACHABLE(); + } +} + static PyObject * dict_delitem(PyObject *self, PyObject *args) { @@ -433,6 +458,7 @@ static PyMethodDef test_methods[] = { {"dict_delitem", dict_delitem, METH_VARARGS}, {"dict_delitemstring", dict_delitemstring, METH_VARARGS}, {"dict_setdefault", dict_setdefault, METH_VARARGS}, + {"dict_setdefaultref", dict_setdefaultref, METH_VARARGS}, {"dict_keys", dict_keys, METH_O}, {"dict_values", dict_values, METH_O}, {"dict_items", dict_items, METH_O}, diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 4bb818b90a4a72..11b388d9f4adb0 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3355,8 +3355,9 @@ dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value) return Py_NewRef(val); } -PyObject * -PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) +static int +dict_setdefault_ref(PyObject *d, PyObject *key, PyObject *default_value, + PyObject **result, int incref_result) { PyDictObject *mp = (PyDictObject *)d; PyObject *value; @@ -3365,41 +3366,64 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) if (!PyDict_Check(d)) { PyErr_BadInternalCall(); - return NULL; + if (result) { + *result = NULL; + } + return -1; } if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { hash = PyObject_Hash(key); - if (hash == -1) - return NULL; + if (hash == -1) { + if (result) { + *result = NULL; + } + return -1; + } } if (mp->ma_keys == Py_EMPTY_KEYS) { if (insert_to_emptydict(interp, mp, Py_NewRef(key), hash, - Py_NewRef(defaultobj)) < 0) { - return NULL; + Py_NewRef(default_value)) < 0) { + if (result) { + *result = NULL; + } + return -1; + } + if (result) { + *result = incref_result ? Py_NewRef(default_value) : default_value; } - return defaultobj; + return 0; } if (!PyUnicode_CheckExact(key) && DK_IS_UNICODE(mp->ma_keys)) { if (insertion_resize(interp, mp, 0) < 0) { - return NULL; + if (result) { + *result = NULL; + } + return -1; } } Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &value); - if (ix == DKIX_ERROR) - return NULL; + if (ix == DKIX_ERROR) { + if (result) { + *result = NULL; + } + return -1; + } if (ix == DKIX_EMPTY) { uint64_t new_version = _PyDict_NotifyEvent( - interp, PyDict_EVENT_ADDED, mp, key, defaultobj); + interp, PyDict_EVENT_ADDED, mp, key, default_value); mp->ma_keys->dk_version = 0; - value = defaultobj; + value = default_value; if (mp->ma_keys->dk_usable <= 0) { if (insertion_resize(interp, mp, 1) < 0) { - return NULL; + if (result) { + *result = NULL; + } + return -1; } } Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash); @@ -3431,11 +3455,16 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) mp->ma_keys->dk_usable--; mp->ma_keys->dk_nentries++; assert(mp->ma_keys->dk_usable >= 0); + ASSERT_CONSISTENT(mp); + if (result) { + *result = incref_result ? Py_NewRef(value) : value; + } + return 0; } else if (value == NULL) { uint64_t new_version = _PyDict_NotifyEvent( - interp, PyDict_EVENT_ADDED, mp, key, defaultobj); - value = defaultobj; + interp, PyDict_EVENT_ADDED, mp, key, default_value); + value = default_value; assert(_PyDict_HasSplitTable(mp)); assert(mp->ma_values->values[ix] == NULL); MAINTAIN_TRACKING(mp, key, value); @@ -3443,10 +3472,33 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) _PyDictValues_AddToInsertionOrder(mp->ma_values, ix); mp->ma_used++; mp->ma_version_tag = new_version; + ASSERT_CONSISTENT(mp); + if (result) { + *result = incref_result ? Py_NewRef(value) : value; + } + return 0; } ASSERT_CONSISTENT(mp); - return value; + if (result) { + *result = incref_result ? Py_NewRef(value) : value; + } + return 1; +} + +int +PyDict_SetDefaultRef(PyObject *d, PyObject *key, PyObject *default_value, + PyObject **result) +{ + return dict_setdefault_ref(d, key, default_value, result, 1); +} + +PyObject * +PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) +{ + PyObject *result; + dict_setdefault_ref(d, key, defaultobj, &result, 0); + return result; } /*[clinic input] @@ -3467,9 +3519,8 @@ dict_setdefault_impl(PyDictObject *self, PyObject *key, /*[clinic end generated code: output=f8c1101ebf69e220 input=0f063756e815fd9d]*/ { PyObject *val; - - val = PyDict_SetDefault((PyObject *)self, key, default_value); - return Py_XNewRef(val); + PyDict_SetDefaultRef((PyObject *)self, key, default_value, &val); + return val; } From f7a22a7055d97c05406512577bdfcb6d3f134b91 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 7 Feb 2024 01:41:18 +0900 Subject: [PATCH 011/126] gh-112087: Make list_{count, index, contains} to be thread-safe. (gh-114916) --- Objects/listobject.c | 52 ++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 82a4ba952de07d..307b8f1bd76cac 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -272,6 +272,15 @@ PyList_GetItemRef(PyObject *op, Py_ssize_t i) return Py_NewRef(PyList_GET_ITEM(op, i)); } +static inline PyObject* +list_get_item_ref(PyListObject *op, Py_ssize_t i) +{ + if (!valid_index(i, Py_SIZE(op))) { + return NULL; + } + return Py_NewRef(PyList_GET_ITEM(op, i)); +} + int PyList_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem) @@ -478,18 +487,20 @@ list_length(PyObject *a) static int list_contains(PyObject *aa, PyObject *el) { - PyListObject *a = (PyListObject *)aa; - PyObject *item; - Py_ssize_t i; - int cmp; - for (i = 0, cmp = 0 ; cmp == 0 && i < Py_SIZE(a); ++i) { - item = PyList_GET_ITEM(a, i); - Py_INCREF(item); - cmp = PyObject_RichCompareBool(item, el, Py_EQ); + for (Py_ssize_t i = 0; ; i++) { + PyObject *item = list_get_item_ref((PyListObject *)aa, i); + if (item == NULL) { + // out-of-bounds + return 0; + } + int cmp = PyObject_RichCompareBool(item, el, Py_EQ); Py_DECREF(item); + if (cmp != 0) { + return cmp; + } } - return cmp; + return 0; } static PyObject * @@ -2724,8 +2735,6 @@ list_index_impl(PyListObject *self, PyObject *value, Py_ssize_t start, Py_ssize_t stop) /*[clinic end generated code: output=ec51b88787e4e481 input=40ec5826303a0eb1]*/ { - Py_ssize_t i; - if (start < 0) { start += Py_SIZE(self); if (start < 0) @@ -2736,9 +2745,12 @@ list_index_impl(PyListObject *self, PyObject *value, Py_ssize_t start, if (stop < 0) stop = 0; } - for (i = start; i < stop && i < Py_SIZE(self); i++) { - PyObject *obj = self->ob_item[i]; - Py_INCREF(obj); + for (Py_ssize_t i = start; i < stop; i++) { + PyObject *obj = list_get_item_ref(self, i); + if (obj == NULL) { + // out-of-bounds + break; + } int cmp = PyObject_RichCompareBool(obj, value, Py_EQ); Py_DECREF(obj); if (cmp > 0) @@ -2764,15 +2776,17 @@ list_count(PyListObject *self, PyObject *value) /*[clinic end generated code: output=b1f5d284205ae714 input=3bdc3a5e6f749565]*/ { Py_ssize_t count = 0; - Py_ssize_t i; - - for (i = 0; i < Py_SIZE(self); i++) { - PyObject *obj = self->ob_item[i]; + for (Py_ssize_t i = 0; ; i++) { + PyObject *obj = list_get_item_ref(self, i); + if (obj == NULL) { + // out-of-bounds + break; + } if (obj == value) { count++; + Py_DECREF(obj); continue; } - Py_INCREF(obj); int cmp = PyObject_RichCompareBool(obj, value, Py_EQ); Py_DECREF(obj); if (cmp > 0) From 7fdd4235d790559372bbb1bf0c2384191a9bb5f3 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 6 Feb 2024 11:45:42 -0500 Subject: [PATCH 012/126] gh-112529: Stop the world around gc.get_referents (#114823) We do not want to add locking in `tp_traverse` slot implementations. Instead, stop the world when calling `gc.get_referents`. Note that the the stop the world call is a no-op in the default build. Co-authored-by: Pablo Galindo Salgado --- Modules/gcmodule.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 3b63dd7a9a8353..3a42654b41b2ac 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -230,6 +230,26 @@ referentsvisit(PyObject *obj, void *arg) return PyList_Append(list, obj) < 0; } +static int +append_referrents(PyObject *result, PyObject *args) +{ + for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(args); i++) { + PyObject *obj = PyTuple_GET_ITEM(args, i); + if (!_PyObject_IS_GC(obj)) { + continue; + } + + traverseproc traverse = Py_TYPE(obj)->tp_traverse; + if (!traverse) { + continue; + } + if (traverse(obj, referentsvisit, result)) { + return -1; + } + } + return 0; +} + /*[clinic input] gc.get_referents @@ -242,29 +262,24 @@ static PyObject * gc_get_referents_impl(PyObject *module, PyObject *args) /*[clinic end generated code: output=d47dc02cefd06fe8 input=b3ceab0c34038cbf]*/ { - Py_ssize_t i; if (PySys_Audit("gc.get_referents", "(O)", args) < 0) { return NULL; } + PyInterpreterState *interp = _PyInterpreterState_GET(); PyObject *result = PyList_New(0); if (result == NULL) return NULL; - for (i = 0; i < PyTuple_GET_SIZE(args); i++) { - traverseproc traverse; - PyObject *obj = PyTuple_GET_ITEM(args, i); + // NOTE: stop the world is a no-op in default build + _PyEval_StopTheWorld(interp); + int err = append_referrents(result, args); + _PyEval_StartTheWorld(interp); - if (!_PyObject_IS_GC(obj)) - continue; - traverse = Py_TYPE(obj)->tp_traverse; - if (! traverse) - continue; - if (traverse(obj, referentsvisit, result)) { - Py_DECREF(result); - return NULL; - } + if (err < 0) { + Py_CLEAR(result); } + return result; } From 76108b8b05040fc49a6bc50eb2e990576595c57c Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Tue, 6 Feb 2024 19:44:12 +0100 Subject: [PATCH 013/126] #gh-75705: Set unixfrom envelope in mailbox._mboxMMDF (GH-107117) --- Lib/mailbox.py | 5 +++-- Lib/test/test_mailbox.py | 12 +++++++++++- .../2023-07-23-12-28-26.gh-issue-75705.aB2-Ww.rst | 1 + 3 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-07-23-12-28-26.gh-issue-75705.aB2-Ww.rst diff --git a/Lib/mailbox.py b/Lib/mailbox.py index 81ea210cf815a4..746811bd559412 100644 --- a/Lib/mailbox.py +++ b/Lib/mailbox.py @@ -830,10 +830,11 @@ def get_message(self, key): """Return a Message representation or raise a KeyError.""" start, stop = self._lookup(key) self._file.seek(start) - from_line = self._file.readline().replace(linesep, b'') + from_line = self._file.readline().replace(linesep, b'').decode('ascii') string = self._file.read(stop - self._file.tell()) msg = self._message_factory(string.replace(linesep, b'\n')) - msg.set_from(from_line[5:].decode('ascii')) + msg.set_unixfrom(from_line) + msg.set_from(from_line[5:]) return msg def get_string(self, key, from_=False): diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py index d84faad0eb3406..c52c014185bec7 100644 --- a/Lib/test/test_mailbox.py +++ b/Lib/test/test_mailbox.py @@ -1127,12 +1127,14 @@ def test_add_from_string(self): # Add a string starting with 'From ' to the mailbox key = self._box.add('From foo@bar blah\nFrom: foo\n\n0\n') self.assertEqual(self._box[key].get_from(), 'foo@bar blah') + self.assertEqual(self._box[key].get_unixfrom(), 'From foo@bar blah') self.assertEqual(self._box[key].get_payload(), '0\n') def test_add_from_bytes(self): # Add a byte string starting with 'From ' to the mailbox key = self._box.add(b'From foo@bar blah\nFrom: foo\n\n0\n') self.assertEqual(self._box[key].get_from(), 'foo@bar blah') + self.assertEqual(self._box[key].get_unixfrom(), 'From foo@bar blah') self.assertEqual(self._box[key].get_payload(), '0\n') def test_add_mbox_or_mmdf_message(self): @@ -1667,18 +1669,23 @@ def test_initialize_with_unixfrom(self): msg = mailbox.Message(_sample_message) msg.set_unixfrom('From foo@bar blah') msg = mailbox.mboxMessage(msg) - self.assertEqual(msg.get_from(), 'foo@bar blah', msg.get_from()) + self.assertEqual(msg.get_from(), 'foo@bar blah') + self.assertEqual(msg.get_unixfrom(), 'From foo@bar blah') def test_from(self): # Get and set "From " line msg = mailbox.mboxMessage(_sample_message) self._check_from(msg) + self.assertIsNone(msg.get_unixfrom()) msg.set_from('foo bar') self.assertEqual(msg.get_from(), 'foo bar') + self.assertIsNone(msg.get_unixfrom()) msg.set_from('foo@bar', True) self._check_from(msg, 'foo@bar') + self.assertIsNone(msg.get_unixfrom()) msg.set_from('blah@temp', time.localtime()) self._check_from(msg, 'blah@temp') + self.assertIsNone(msg.get_unixfrom()) def test_flags(self): # Use get_flags(), set_flags(), add_flag(), remove_flag() @@ -1866,6 +1873,7 @@ def test_maildir_to_mboxmmdf(self): self.assertEqual(msg.get_flags(), result) self.assertEqual(msg.get_from(), 'MAILER-DAEMON %s' % time.asctime(time.gmtime(0.0))) + self.assertIsNone(msg.get_unixfrom()) msg_maildir.set_subdir('cur') self.assertEqual(class_(msg_maildir).get_flags(), 'RODFA') @@ -1914,10 +1922,12 @@ def test_mboxmmdf_to_mboxmmdf(self): msg_mboxMMDF = class_(_sample_message) msg_mboxMMDF.set_flags('RODFA') msg_mboxMMDF.set_from('foo@bar') + self.assertIsNone(msg_mboxMMDF.get_unixfrom()) for class2_ in (mailbox.mboxMessage, mailbox.MMDFMessage): msg2 = class2_(msg_mboxMMDF) self.assertEqual(msg2.get_flags(), 'RODFA') self.assertEqual(msg2.get_from(), 'foo@bar') + self.assertIsNone(msg2.get_unixfrom()) def test_mboxmmdf_to_mh(self): # Convert mboxMessage and MMDFMessage to MHMessage diff --git a/Misc/NEWS.d/next/Library/2023-07-23-12-28-26.gh-issue-75705.aB2-Ww.rst b/Misc/NEWS.d/next/Library/2023-07-23-12-28-26.gh-issue-75705.aB2-Ww.rst new file mode 100644 index 00000000000000..272e31d64cfbd9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-07-23-12-28-26.gh-issue-75705.aB2-Ww.rst @@ -0,0 +1 @@ +Set unixfrom envelope in :class:`mailbox.mbox` and :class:`mailbox.MMDF`. From 71239d50b54c90afd3fdde260848e0c6d73a5c27 Mon Sep 17 00:00:00 2001 From: Artem Mukhin Date: Tue, 6 Feb 2024 20:32:07 +0100 Subject: [PATCH 014/126] gh-103224: Resolve paths properly in test_sysconfig (GH-103292) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pass tests when executed through a Python symlink. Co-authored-by: Miro Hrončok --- Lib/test/test_sysconfig.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index be609a0abd29c8..bb87bf00dc2d1a 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -154,17 +154,21 @@ def test_posix_venv_scheme(self): 'python%d.%d' % sys.version_info[:2], 'site-packages') - # Resolve the paths in prefix - binpath = os.path.join(sys.prefix, binpath) - incpath = os.path.join(sys.prefix, incpath) - libpath = os.path.join(sys.prefix, libpath) + # Resolve the paths in an imaginary venv/ directory + binpath = os.path.join('venv', binpath) + incpath = os.path.join('venv', incpath) + libpath = os.path.join('venv', libpath) - self.assertEqual(binpath, sysconfig.get_path('scripts', scheme='posix_venv')) - self.assertEqual(libpath, sysconfig.get_path('purelib', scheme='posix_venv')) + # Mimic the venv module, set all bases to the venv directory + bases = ('base', 'platbase', 'installed_base', 'installed_platbase') + vars = {base: 'venv' for base in bases} + + self.assertEqual(binpath, sysconfig.get_path('scripts', scheme='posix_venv', vars=vars)) + self.assertEqual(libpath, sysconfig.get_path('purelib', scheme='posix_venv', vars=vars)) # The include directory on POSIX isn't exactly the same as before, # but it is "within" - sysconfig_includedir = sysconfig.get_path('include', scheme='posix_venv') + sysconfig_includedir = sysconfig.get_path('include', scheme='posix_venv', vars=vars) self.assertTrue(sysconfig_includedir.startswith(incpath + os.sep)) def test_nt_venv_scheme(self): @@ -174,14 +178,19 @@ def test_nt_venv_scheme(self): incpath = 'Include' libpath = os.path.join('Lib', 'site-packages') - # Resolve the paths in prefix - binpath = os.path.join(sys.prefix, binpath) - incpath = os.path.join(sys.prefix, incpath) - libpath = os.path.join(sys.prefix, libpath) + # Resolve the paths in an imaginary venv\ directory + venv = 'venv' + binpath = os.path.join(venv, binpath) + incpath = os.path.join(venv, incpath) + libpath = os.path.join(venv, libpath) + + # Mimic the venv module, set all bases to the venv directory + bases = ('base', 'platbase', 'installed_base', 'installed_platbase') + vars = {base: 'venv' for base in bases} - self.assertEqual(binpath, sysconfig.get_path('scripts', scheme='nt_venv')) - self.assertEqual(incpath, sysconfig.get_path('include', scheme='nt_venv')) - self.assertEqual(libpath, sysconfig.get_path('purelib', scheme='nt_venv')) + self.assertEqual(binpath, sysconfig.get_path('scripts', scheme='nt_venv', vars=vars)) + self.assertEqual(incpath, sysconfig.get_path('include', scheme='nt_venv', vars=vars)) + self.assertEqual(libpath, sysconfig.get_path('purelib', scheme='nt_venv', vars=vars)) def test_venv_scheme(self): if sys.platform == 'win32': From b6228b521b4692b2de1c1c12f4aa5623f8319084 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 6 Feb 2024 14:45:04 -0500 Subject: [PATCH 015/126] gh-115035: Mark ThreadHandles as non-joinable earlier after forking (#115042) This marks dead ThreadHandles as non-joinable earlier in `PyOS_AfterFork_Child()` before we execute any Python code. The handles are stored in a global linked list in `_PyRuntimeState` because `fork()` affects the entire process. --- Include/internal/pycore_pythread.h | 15 +++++--- Include/internal/pycore_runtime_init.h | 2 + Lib/threading.py | 5 +-- Modules/_threadmodule.c | 53 +++++++++++++++++--------- Python/pystate.c | 2 + Python/thread_nt.h | 4 -- Python/thread_pthread.h | 10 ----- 7 files changed, 50 insertions(+), 41 deletions(-) diff --git a/Include/internal/pycore_pythread.h b/Include/internal/pycore_pythread.h index 9c9a09f60f3441..265299d7574838 100644 --- a/Include/internal/pycore_pythread.h +++ b/Include/internal/pycore_pythread.h @@ -9,6 +9,7 @@ extern "C" { #endif #include "dynamic_annotations.h" // _Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX +#include "pycore_llist.h" // struct llist_node // Get _POSIX_THREADS and _POSIX_SEMAPHORES macros if available #if (defined(HAVE_UNISTD_H) && !defined(_POSIX_THREADS) \ @@ -75,14 +76,22 @@ struct _pythread_runtime_state { struct py_stub_tls_entry tls_entries[PTHREAD_KEYS_MAX]; } stubs; #endif + + // Linked list of ThreadHandleObjects + struct llist_node handles; }; +#define _pythread_RUNTIME_INIT(pythread) \ + { \ + .handles = LLIST_INIT(pythread.handles), \ + } #ifdef HAVE_FORK /* Private function to reinitialize a lock at fork in the child process. Reset the lock to the unlocked state. Return 0 on success, return -1 on error. */ extern int _PyThread_at_fork_reinit(PyThread_type_lock *lock); +extern void _PyThread_AfterFork(struct _pythread_runtime_state *state); #endif /* HAVE_FORK */ @@ -143,12 +152,6 @@ PyAPI_FUNC(int) PyThread_join_thread(PyThread_handle_t); */ PyAPI_FUNC(int) PyThread_detach_thread(PyThread_handle_t); -/* - * Obtain the new thread ident and handle in a forked child process. - */ -PyAPI_FUNC(void) PyThread_update_thread_after_fork(PyThread_ident_t* ident, - PyThread_handle_t* handle); - #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 4370ad05bdc058..2ad1347ad48a59 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -16,6 +16,7 @@ extern "C" { #include "pycore_parser.h" // _parser_runtime_state_INIT #include "pycore_pyhash.h" // pyhash_state_INIT #include "pycore_pymem_init.h" // _pymem_allocators_standard_INIT +#include "pycore_pythread.h" // _pythread_RUNTIME_INIT #include "pycore_runtime_init_generated.h" // _Py_bytes_characters_INIT #include "pycore_signal.h" // _signals_RUNTIME_INIT #include "pycore_tracemalloc.h" // _tracemalloc_runtime_state_INIT @@ -90,6 +91,7 @@ extern PyTypeObject _PyExc_MemoryError; }, \ .obmalloc = _obmalloc_global_state_INIT, \ .pyhash_state = pyhash_state_INIT, \ + .threads = _pythread_RUNTIME_INIT(runtime.threads), \ .signals = _signals_RUNTIME_INIT, \ .interpreters = { \ /* This prevents interpreters from getting created \ diff --git a/Lib/threading.py b/Lib/threading.py index 75a08e5aac97d6..b6ff00acadd58f 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -949,7 +949,6 @@ def _after_fork(self, new_ident=None): # This thread is alive. self._ident = new_ident if self._handle is not None: - self._handle.after_fork_alive() assert self._handle.ident == new_ident # bpo-42350: If the fork happens when the thread is already stopped # (ex: after threading._shutdown() has been called), _tstate_lock @@ -965,9 +964,7 @@ def _after_fork(self, new_ident=None): self._is_stopped = True self._tstate_lock = None self._join_lock = None - if self._handle is not None: - self._handle.after_fork_dead() - self._handle = None + self._handle = None def __repr__(self): assert self._initialized, "Thread.__init__() was not called" diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 5cceb84658deb7..df02b023012fbd 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -44,6 +44,7 @@ get_thread_state(PyObject *module) typedef struct { PyObject_HEAD + struct llist_node node; // linked list node (see _pythread_runtime_state) PyThread_ident_t ident; PyThread_handle_t handle; char joinable; @@ -59,6 +60,11 @@ new_thread_handle(thread_module_state* state) self->ident = 0; self->handle = 0; self->joinable = 0; + + HEAD_LOCK(&_PyRuntime); + llist_insert_tail(&_PyRuntime.threads.handles, &self->node); + HEAD_UNLOCK(&_PyRuntime); + return self; } @@ -66,6 +72,14 @@ static void ThreadHandle_dealloc(ThreadHandleObject *self) { PyObject *tp = (PyObject *) Py_TYPE(self); + + // Remove ourself from the global list of handles + HEAD_LOCK(&_PyRuntime); + if (self->node.next != NULL) { + llist_remove(&self->node); + } + HEAD_UNLOCK(&_PyRuntime); + if (self->joinable) { int ret = PyThread_detach_thread(self->handle); if (ret) { @@ -77,6 +91,28 @@ ThreadHandle_dealloc(ThreadHandleObject *self) Py_DECREF(tp); } +void +_PyThread_AfterFork(struct _pythread_runtime_state *state) +{ + // gh-115035: We mark ThreadHandles as not joinable early in the child's + // after-fork handler. We do this before calling any Python code to ensure + // that it happens before any ThreadHandles are deallocated, such as by a + // GC cycle. + PyThread_ident_t current = PyThread_get_thread_ident_ex(); + + struct llist_node *node; + llist_for_each_safe(node, &state->handles) { + ThreadHandleObject *hobj = llist_data(node, ThreadHandleObject, node); + if (hobj->ident == current) { + continue; + } + + // Disallow calls to detach() and join() as they could crash. + hobj->joinable = 0; + llist_remove(node); + } +} + static PyObject * ThreadHandle_repr(ThreadHandleObject *self) { @@ -91,21 +127,6 @@ ThreadHandle_get_ident(ThreadHandleObject *self, void *ignored) } -static PyObject * -ThreadHandle_after_fork_alive(ThreadHandleObject *self, void* ignored) -{ - PyThread_update_thread_after_fork(&self->ident, &self->handle); - Py_RETURN_NONE; -} - -static PyObject * -ThreadHandle_after_fork_dead(ThreadHandleObject *self, void* ignored) -{ - // Disallow calls to detach() and join() as they could crash. - self->joinable = 0; - Py_RETURN_NONE; -} - static PyObject * ThreadHandle_detach(ThreadHandleObject *self, void* ignored) { @@ -157,8 +178,6 @@ static PyGetSetDef ThreadHandle_getsetlist[] = { static PyMethodDef ThreadHandle_methods[] = { - {"after_fork_alive", (PyCFunction)ThreadHandle_after_fork_alive, METH_NOARGS}, - {"after_fork_dead", (PyCFunction)ThreadHandle_after_fork_dead, METH_NOARGS}, {"detach", (PyCFunction)ThreadHandle_detach, METH_NOARGS}, {"join", (PyCFunction)ThreadHandle_join, METH_NOARGS}, {0, 0} diff --git a/Python/pystate.c b/Python/pystate.c index 7836c172bbfb61..e77e5bfa7e2df8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -517,6 +517,8 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) return _PyStatus_NO_MEMORY(); } + _PyThread_AfterFork(&runtime->threads); + return _PyStatus_OK(); } #endif diff --git a/Python/thread_nt.h b/Python/thread_nt.h index 044e9fa111e979..ad467e0e7840e7 100644 --- a/Python/thread_nt.h +++ b/Python/thread_nt.h @@ -242,10 +242,6 @@ PyThread_detach_thread(PyThread_handle_t handle) { return (CloseHandle(hThread) == 0); } -void -PyThread_update_thread_after_fork(PyThread_ident_t* ident, PyThread_handle_t* handle) { -} - /* * Return the thread Id instead of a handle. The Id is said to uniquely identify the * thread in the system diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h index fb3b79fc160502..556e3de0b071f8 100644 --- a/Python/thread_pthread.h +++ b/Python/thread_pthread.h @@ -339,16 +339,6 @@ PyThread_detach_thread(PyThread_handle_t th) { return pthread_detach((pthread_t) th); } -void -PyThread_update_thread_after_fork(PyThread_ident_t* ident, PyThread_handle_t* handle) { - // The thread id might have been updated in the forked child - pthread_t th = pthread_self(); - *ident = (PyThread_ident_t) th; - *handle = (PyThread_handle_t) th; - assert(th == (pthread_t) *ident); - assert(th == (pthread_t) *handle); -} - /* XXX This implementation is considered (to quote Tim Peters) "inherently hosed" because: - It does not guarantee the promise that a non-zero integer is returned. From 92abb0124037e5bc938fa870461a26f64c56095b Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Tue, 6 Feb 2024 14:03:43 -0800 Subject: [PATCH 016/126] gh-112075: Add critical sections for most dict APIs (#114508) Starts adding thread safety to dict objects. Use @critical_section for APIs which are exposed via argument clinic and don't directly correlate with a public C API which needs to acquire the lock Use a _lock_held suffix for keeping changes to complicated functions simple and just wrapping them with a critical section Acquire and release the lock in an existing function where it won't be overly disruptive to the existing logic --- Include/internal/pycore_critical_section.h | 46 ++ Modules/_sre/sre.c | 27 +- Objects/clinic/dictobject.c.h | 30 +- Objects/dictobject.c | 866 +++++++++++++++------ Objects/odictobject.c | 19 +- Objects/setobject.c | 78 +- 6 files changed, 782 insertions(+), 284 deletions(-) diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index bf2bbfffc38bd0..38ed8cd69804ba 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -104,12 +104,37 @@ extern "C" { # define Py_END_CRITICAL_SECTION2() \ _PyCriticalSection2_End(&_cs2); \ } + +// Asserts that the mutex is locked. The mutex must be held by the +// top-most critical section otherwise there's the possibility +// that the mutex would be swalled out in some code paths. +#define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) \ + _PyCriticalSection_AssertHeld(mutex) + +// Asserts that the mutex for the given object is locked. The mutex must +// be held by the top-most critical section otherwise there's the +// possibility that the mutex would be swalled out in some code paths. +#ifdef Py_DEBUG + +#define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) \ + if (Py_REFCNT(op) != 1) { \ + _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(&_PyObject_CAST(op)->ob_mutex); \ + } + +#else /* Py_DEBUG */ + +#define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) + +#endif /* Py_DEBUG */ + #else /* !Py_GIL_DISABLED */ // The critical section APIs are no-ops with the GIL. # define Py_BEGIN_CRITICAL_SECTION(op) # define Py_END_CRITICAL_SECTION() # define Py_BEGIN_CRITICAL_SECTION2(a, b) # define Py_END_CRITICAL_SECTION2() +# define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) +# define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) #endif /* !Py_GIL_DISABLED */ typedef struct { @@ -236,6 +261,27 @@ _PyCriticalSection2_End(_PyCriticalSection2 *c) PyAPI_FUNC(void) _PyCriticalSection_SuspendAll(PyThreadState *tstate); +#ifdef Py_GIL_DISABLED + +static inline void +_PyCriticalSection_AssertHeld(PyMutex *mutex) { +#ifdef Py_DEBUG + PyThreadState *tstate = _PyThreadState_GET(); + uintptr_t prev = tstate->critical_section; + if (prev & _Py_CRITICAL_SECTION_TWO_MUTEXES) { + _PyCriticalSection2 *cs = (_PyCriticalSection2 *)(prev & ~_Py_CRITICAL_SECTION_MASK); + assert(cs != NULL && (cs->base.mutex == mutex || cs->mutex2 == mutex)); + } + else { + _PyCriticalSection *cs = (_PyCriticalSection *)(tstate->critical_section & ~_Py_CRITICAL_SECTION_MASK); + assert(cs != NULL && cs->mutex == mutex); + } + +#endif +} + +#endif + #ifdef __cplusplus } #endif diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index d451974b9cf81e..00fbd9674b8cdd 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -39,13 +39,14 @@ static const char copyright[] = " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB "; #include "Python.h" -#include "pycore_dict.h" // _PyDict_Next() -#include "pycore_long.h" // _PyLong_GetZero() -#include "pycore_moduleobject.h" // _PyModule_GetState() +#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION +#include "pycore_dict.h" // _PyDict_Next() +#include "pycore_long.h" // _PyLong_GetZero() +#include "pycore_moduleobject.h" // _PyModule_GetState() -#include "sre.h" // SRE_CODE +#include "sre.h" // SRE_CODE -#include // tolower(), toupper(), isalnum() +#include // tolower(), toupper(), isalnum() #define SRE_CODE_BITS (8 * sizeof(SRE_CODE)) @@ -2349,26 +2350,28 @@ _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value) if (!result || !self->pattern->groupindex) return result; + Py_BEGIN_CRITICAL_SECTION(self->pattern->groupindex); while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) { int status; Py_INCREF(key); value = match_getslice(self, key, default_value); if (!value) { Py_DECREF(key); - goto failed; + Py_CLEAR(result); + goto exit; } status = _PyDict_SetItem_KnownHash(result, key, value, hash); Py_DECREF(value); Py_DECREF(key); - if (status < 0) - goto failed; + if (status < 0) { + Py_CLEAR(result); + goto exit; + } } +exit: + Py_END_CRITICAL_SECTION(); return result; - -failed: - Py_DECREF(result); - return NULL; } /*[clinic input] diff --git a/Objects/clinic/dictobject.c.h b/Objects/clinic/dictobject.c.h index 8f532f454156de..daaef211b1db49 100644 --- a/Objects/clinic/dictobject.c.h +++ b/Objects/clinic/dictobject.c.h @@ -2,6 +2,7 @@ preserve [clinic start generated code]*/ +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(dict_fromkeys__doc__, @@ -65,6 +66,21 @@ PyDoc_STRVAR(dict___contains____doc__, #define DICT___CONTAINS___METHODDEF \ {"__contains__", (PyCFunction)dict___contains__, METH_O|METH_COEXIST, dict___contains____doc__}, +static PyObject * +dict___contains___impl(PyDictObject *self, PyObject *key); + +static PyObject * +dict___contains__(PyDictObject *self, PyObject *key) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = dict___contains___impl(self, key); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + PyDoc_STRVAR(dict_get__doc__, "get($self, key, default=None, /)\n" "--\n" @@ -93,7 +109,9 @@ dict_get(PyDictObject *self, PyObject *const *args, Py_ssize_t nargs) } default_value = args[1]; skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = dict_get_impl(self, key, default_value); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -130,7 +148,9 @@ dict_setdefault(PyDictObject *self, PyObject *const *args, Py_ssize_t nargs) } default_value = args[1]; skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = dict_setdefault_impl(self, key, default_value); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -209,7 +229,13 @@ dict_popitem_impl(PyDictObject *self); static PyObject * dict_popitem(PyDictObject *self, PyObject *Py_UNUSED(ignored)) { - return dict_popitem_impl(self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = dict_popitem_impl(self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(dict___sizeof____doc__, @@ -301,4 +327,4 @@ dict_values(PyDictObject *self, PyObject *Py_UNUSED(ignored)) { return dict_values_impl(self); } -/*[clinic end generated code: output=f3ac47dfbf341b23 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c8fda06bac5b05f3 input=a9049054013a1b77]*/ diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 11b388d9f4adb0..2df95e977a180f 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -113,18 +113,19 @@ As a consequence of this, split keys have a maximum size of 16. #define PyDict_MINSIZE 8 #include "Python.h" -#include "pycore_bitutils.h" // _Py_bit_length -#include "pycore_call.h" // _PyObject_CallNoArgs() -#include "pycore_ceval.h" // _PyEval_GetBuiltin() -#include "pycore_code.h" // stats -#include "pycore_dict.h" // export _PyDict_SizeOf() -#include "pycore_freelist.h" // _PyFreeListState_GET() -#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() -#include "pycore_object.h" // _PyObject_GC_TRACK(), _PyDebugAllocatorStats() -#include "pycore_pyerrors.h" // _PyErr_GetRaisedException() -#include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_setobject.h" // _PySet_NextEntry() -#include "stringlib/eq.h" // unicode_eq() +#include "pycore_bitutils.h" // _Py_bit_length +#include "pycore_call.h" // _PyObject_CallNoArgs() +#include "pycore_ceval.h" // _PyEval_GetBuiltin() +#include "pycore_code.h" // stats +#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION, Py_END_CRITICAL_SECTION +#include "pycore_dict.h" // export _PyDict_SizeOf() +#include "pycore_freelist.h" // _PyFreeListState_GET() +#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() +#include "pycore_object.h" // _PyObject_GC_TRACK(), _PyDebugAllocatorStats() +#include "pycore_pyerrors.h" // _PyErr_GetRaisedException() +#include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_setobject.h" // _PySet_NextEntry() +#include "stringlib/eq.h" // unicode_eq() #include @@ -141,6 +142,21 @@ To avoid slowing down lookups on a near-full table, we resize the table when it's USABLE_FRACTION (currently two-thirds) full. */ +#ifdef Py_GIL_DISABLED + +static inline void +ASSERT_DICT_LOCKED(PyObject *op) +{ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); +} +#define ASSERT_DICT_LOCKED(op) ASSERT_DICT_LOCKED(_Py_CAST(PyObject*, op)) + +#else + +#define ASSERT_DICT_LOCKED(op) + +#endif + #define PERTURB_SHIFT 5 /* @@ -240,6 +256,16 @@ static int dictresize(PyInterpreterState *interp, PyDictObject *mp, static PyObject* dict_iter(PyObject *dict); +static int +contains_lock_held(PyDictObject *mp, PyObject *key); +static int +contains_known_hash_lock_held(PyDictObject *mp, PyObject *key, Py_ssize_t hash); +static int +setitem_lock_held(PyDictObject *mp, PyObject *key, PyObject *value); +static int +dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_value, + PyObject **result, int incref_result); + #include "clinic/dictobject.c.h" @@ -789,6 +815,8 @@ clone_combined_dict_keys(PyDictObject *orig) assert(orig->ma_keys != Py_EMPTY_KEYS); assert(orig->ma_keys->dk_refcnt == 1); + ASSERT_DICT_LOCKED(orig); + size_t keys_size = _PyDict_KeysSize(orig->ma_keys); PyDictKeysObject *keys = PyMem_Malloc(keys_size); if (keys == NULL) { @@ -1230,6 +1258,8 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, { PyObject *old_value; + ASSERT_DICT_LOCKED(mp); + if (DK_IS_UNICODE(mp->ma_keys) && !PyUnicode_CheckExact(key)) { if (insertion_resize(interp, mp, 0) < 0) goto Fail; @@ -1326,6 +1356,7 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value) { assert(mp->ma_keys == Py_EMPTY_KEYS); + ASSERT_DICT_LOCKED(mp); uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, value); @@ -1419,6 +1450,8 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp, PyDictKeysObject *oldkeys; PyDictValues *oldvalues; + ASSERT_DICT_LOCKED(mp); + if (log2_newsize >= SIZEOF_SIZE_T*8) { PyErr_NoMemory(); return -1; @@ -1613,7 +1646,7 @@ _PyDict_FromItems(PyObject *const *keys, Py_ssize_t keys_offset, for (Py_ssize_t i = 0; i < length; i++) { PyObject *key = *ks; PyObject *value = *vs; - if (PyDict_SetItem(dict, key, value) < 0) { + if (setitem_lock_held((PyDictObject *)dict, key, value) < 0) { Py_DECREF(dict); return NULL; } @@ -1688,6 +1721,7 @@ PyDict_GetItem(PyObject *op, PyObject *key) Py_ssize_t _PyDict_LookupIndex(PyDictObject *mp, PyObject *key) { + // TODO: Thread safety PyObject *value; assert(PyDict_CheckExact((PyObject*)mp)); assert(PyUnicode_CheckExact(key)); @@ -1864,9 +1898,11 @@ _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key) } /* Consumes references to key and value */ -int -_PyDict_SetItem_Take2(PyDictObject *mp, PyObject *key, PyObject *value) +static int +setitem_take2_lock_held(PyDictObject *mp, PyObject *key, PyObject *value) { + ASSERT_DICT_LOCKED(mp); + assert(key); assert(value); assert(PyDict_Check(mp)); @@ -1879,7 +1915,9 @@ _PyDict_SetItem_Take2(PyDictObject *mp, PyObject *key, PyObject *value) return -1; } } + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (mp->ma_keys == Py_EMPTY_KEYS) { return insert_to_emptydict(interp, mp, key, hash, value); } @@ -1887,6 +1925,16 @@ _PyDict_SetItem_Take2(PyDictObject *mp, PyObject *key, PyObject *value) return insertdict(interp, mp, key, hash, value); } +int +_PyDict_SetItem_Take2(PyDictObject *mp, PyObject *key, PyObject *value) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(mp); + res = setitem_take2_lock_held(mp, key, value); + Py_END_CRITICAL_SECTION(); + return res; +} + /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the * dictionary if it's merely replacing the value for an existing key. * This means that it's safe to loop over a dictionary with PyDict_Next() @@ -1906,6 +1954,16 @@ PyDict_SetItem(PyObject *op, PyObject *key, PyObject *value) Py_NewRef(key), Py_NewRef(value)); } +static int +setitem_lock_held(PyDictObject *mp, PyObject *key, PyObject *value) +{ + assert(key); + assert(value); + return setitem_take2_lock_held(mp, + Py_NewRef(key), Py_NewRef(value)); +} + + int _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value, Py_hash_t hash) @@ -1921,12 +1979,21 @@ _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value, assert(hash != -1); mp = (PyDictObject *)op; + int res; PyInterpreterState *interp = _PyInterpreterState_GET(); + + Py_BEGIN_CRITICAL_SECTION(mp); + if (mp->ma_keys == Py_EMPTY_KEYS) { - return insert_to_emptydict(interp, mp, Py_NewRef(key), hash, Py_NewRef(value)); + res = insert_to_emptydict(interp, mp, Py_NewRef(key), hash, Py_NewRef(value)); } - /* insertdict() handles any resizing that might be necessary */ - return insertdict(interp, mp, Py_NewRef(key), hash, Py_NewRef(value)); + else { + /* insertdict() handles any resizing that might be necessary */ + res = insertdict(interp, mp, Py_NewRef(key), hash, Py_NewRef(value)); + } + + Py_END_CRITICAL_SECTION(); + return res; } static void @@ -1951,6 +2018,8 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, { PyObject *old_key; + ASSERT_DICT_LOCKED(mp); + Py_ssize_t hashpos = lookdict_index(mp->ma_keys, hash, ix); assert(hashpos >= 0); @@ -2002,8 +2071,8 @@ PyDict_DelItem(PyObject *op, PyObject *key) return _PyDict_DelItem_KnownHash(op, key, hash); } -int -_PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) +static int +delitem_knownhash_lock_held(PyObject *op, PyObject *key, Py_hash_t hash) { Py_ssize_t ix; PyDictObject *mp; @@ -2013,6 +2082,9 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) PyErr_BadInternalCall(); return -1; } + + ASSERT_DICT_LOCKED(op); + assert(key); assert(hash != -1); mp = (PyDictObject *)op; @@ -2030,13 +2102,19 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) return delitem_common(mp, hash, ix, old_value, new_version); } -/* This function promises that the predicate -> deletion sequence is atomic - * (i.e. protected by the GIL), assuming the predicate itself doesn't - * release the GIL. - */ int -_PyDict_DelItemIf(PyObject *op, PyObject *key, - int (*predicate)(PyObject *value)) +_PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(op); + res = delitem_knownhash_lock_held(op, key, hash); + Py_END_CRITICAL_SECTION(); + return res; +} + +static int +delitemif_lock_held(PyObject *op, PyObject *key, + int (*predicate)(PyObject *value)) { Py_ssize_t hashpos, ix; PyDictObject *mp; @@ -2044,6 +2122,8 @@ _PyDict_DelItemIf(PyObject *op, PyObject *key, PyObject *old_value; int res; + ASSERT_DICT_LOCKED(op); + if (!PyDict_Check(op)) { PyErr_BadInternalCall(); return -1; @@ -2077,16 +2157,32 @@ _PyDict_DelItemIf(PyObject *op, PyObject *key, return 0; } } +/* This function promises that the predicate -> deletion sequence is atomic + * (i.e. protected by the GIL or the per-dict mutex in free threaded builds), + * assuming the predicate itself doesn't release the GIL (or cause re-entrancy + * which would release the per-dict mutex) + */ +int +_PyDict_DelItemIf(PyObject *op, PyObject *key, + int (*predicate)(PyObject *value)) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(op); + res = delitemif_lock_held(op, key, predicate); + Py_END_CRITICAL_SECTION(); + return res; +} - -void -PyDict_Clear(PyObject *op) +static void +clear_lock_held(PyObject *op) { PyDictObject *mp; PyDictKeysObject *oldkeys; PyDictValues *oldvalues; Py_ssize_t i, n; + ASSERT_DICT_LOCKED(op); + if (!PyDict_Check(op)) return; mp = ((PyDictObject *)op); @@ -2119,6 +2215,14 @@ PyDict_Clear(PyObject *op) ASSERT_CONSISTENT(mp); } +void +PyDict_Clear(PyObject *op) +{ + Py_BEGIN_CRITICAL_SECTION(op); + clear_lock_held(op); + Py_END_CRITICAL_SECTION(); +} + /* Internal version of PyDict_Next that returns a hash value in addition * to the key and value. * Return 1 on success, return 0 when the reached the end of the dictionary @@ -2135,6 +2239,9 @@ _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, if (!PyDict_Check(op)) return 0; + + ASSERT_DICT_LOCKED(op); + mp = (PyDictObject *)op; i = *ppos; if (mp->ma_values) { @@ -2208,7 +2315,11 @@ _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, int PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) { - return _PyDict_Next(op, ppos, pkey, pvalue, NULL); + int res; + Py_BEGIN_CRITICAL_SECTION(op); + res = _PyDict_Next(op, ppos, pkey, pvalue, NULL); + Py_END_CRITICAL_SECTION(); + return res; } @@ -2219,6 +2330,8 @@ _PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, { assert(PyDict_Check(mp)); + ASSERT_DICT_LOCKED(mp); + if (mp->ma_used == 0) { if (result) { *result = NULL; @@ -2258,10 +2371,11 @@ _PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, return 1; } - -int -PyDict_Pop(PyObject *op, PyObject *key, PyObject **result) +static int +pop_lock_held(PyObject *op, PyObject *key, PyObject **result) { + ASSERT_DICT_LOCKED(op); + if (!PyDict_Check(op)) { if (result) { *result = NULL; @@ -2291,6 +2405,17 @@ PyDict_Pop(PyObject *op, PyObject *key, PyObject **result) return _PyDict_Pop_KnownHash(dict, key, hash, result); } +int +PyDict_Pop(PyObject *op, PyObject *key, PyObject **result) +{ + int err; + Py_BEGIN_CRITICAL_SECTION(op); + err = pop_lock_held(op, key, result); + Py_END_CRITICAL_SECTION(); + + return err; +} + int PyDict_PopString(PyObject *op, const char *key, PyObject **result) @@ -2323,6 +2448,55 @@ _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *default_value) return result; } +static PyDictObject * +dict_dict_fromkeys(PyInterpreterState *interp, PyDictObject *mp, + PyObject *iterable, PyObject *value) +{ + PyObject *oldvalue; + Py_ssize_t pos = 0; + PyObject *key; + Py_hash_t hash; + int unicode = DK_IS_UNICODE(((PyDictObject*)iterable)->ma_keys); + uint8_t new_size = Py_MAX( + estimate_log2_keysize(PyDict_GET_SIZE(iterable)), + DK_LOG_SIZE(mp->ma_keys)); + if (dictresize(interp, mp, new_size, unicode)) { + Py_DECREF(mp); + return NULL; + } + + while (_PyDict_Next(iterable, &pos, &key, &oldvalue, &hash)) { + if (insertdict(interp, mp, + Py_NewRef(key), hash, Py_NewRef(value))) { + Py_DECREF(mp); + return NULL; + } + } + return mp; +} + +static PyDictObject * +dict_set_fromkeys(PyInterpreterState *interp, PyDictObject *mp, + PyObject *iterable, PyObject *value) +{ + Py_ssize_t pos = 0; + PyObject *key; + Py_hash_t hash; + + if (dictresize(interp, mp, + estimate_log2_keysize(PySet_GET_SIZE(iterable)), 0)) { + Py_DECREF(mp); + return NULL; + } + + while (_PySet_NextEntry(iterable, &pos, &key, &hash)) { + if (insertdict(interp, mp, Py_NewRef(key), hash, Py_NewRef(value))) { + Py_DECREF(mp); + return NULL; + } + } + return mp; +} /* Internal version of dict.from_keys(). It is subclass-friendly. */ PyObject * @@ -2338,49 +2512,22 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value) if (d == NULL) return NULL; - if (PyDict_CheckExact(d) && ((PyDictObject *)d)->ma_used == 0) { + + if (PyDict_CheckExact(d)) { if (PyDict_CheckExact(iterable)) { PyDictObject *mp = (PyDictObject *)d; - PyObject *oldvalue; - Py_ssize_t pos = 0; - PyObject *key; - Py_hash_t hash; - - int unicode = DK_IS_UNICODE(((PyDictObject*)iterable)->ma_keys); - if (dictresize(interp, mp, - estimate_log2_keysize(PyDict_GET_SIZE(iterable)), - unicode)) { - Py_DECREF(d); - return NULL; - } - while (_PyDict_Next(iterable, &pos, &key, &oldvalue, &hash)) { - if (insertdict(interp, mp, - Py_NewRef(key), hash, Py_NewRef(value))) { - Py_DECREF(d); - return NULL; - } - } + Py_BEGIN_CRITICAL_SECTION2(d, iterable); + d = (PyObject *)dict_dict_fromkeys(interp, mp, iterable, value); + Py_END_CRITICAL_SECTION2(); return d; } - if (PyAnySet_CheckExact(iterable)) { + else if (PyAnySet_CheckExact(iterable)) { PyDictObject *mp = (PyDictObject *)d; - Py_ssize_t pos = 0; - PyObject *key; - Py_hash_t hash; - - if (dictresize(interp, mp, - estimate_log2_keysize(PySet_GET_SIZE(iterable)), 0)) { - Py_DECREF(d); - return NULL; - } - while (_PySet_NextEntry(iterable, &pos, &key, &hash)) { - if (insertdict(interp, mp, Py_NewRef(key), hash, Py_NewRef(value))) { - Py_DECREF(d); - return NULL; - } - } + Py_BEGIN_CRITICAL_SECTION2(d, iterable); + d = (PyObject *)dict_set_fromkeys(interp, mp, iterable, value); + Py_END_CRITICAL_SECTION2(); return d; } } @@ -2392,12 +2539,17 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value) } if (PyDict_CheckExact(d)) { + Py_BEGIN_CRITICAL_SECTION(d); while ((key = PyIter_Next(it)) != NULL) { - status = PyDict_SetItem(d, key, value); + status = setitem_lock_held((PyDictObject *)d, key, value); Py_DECREF(key); - if (status < 0) - goto Fail; + if (status < 0) { + assert(PyErr_Occurred()); + goto dict_iter_exit; + } } +dict_iter_exit: + Py_END_CRITICAL_SECTION(); } else { while ((key = PyIter_Next(it)) != NULL) { status = PyObject_SetItem(d, key, value); @@ -2468,7 +2620,7 @@ dict_dealloc(PyObject *self) static PyObject * -dict_repr(PyObject *self) +dict_repr_lock_held(PyObject *self) { PyDictObject *mp = (PyDictObject *)self; Py_ssize_t i; @@ -2498,7 +2650,7 @@ dict_repr(PyObject *self) Note that repr may mutate the dict. */ i = 0; first = 1; - while (PyDict_Next((PyObject *)mp, &i, &key, &value)) { + while (_PyDict_Next((PyObject *)mp, &i, &key, &value, NULL)) { PyObject *s; int res; @@ -2551,15 +2703,25 @@ dict_repr(PyObject *self) return NULL; } +static PyObject * +dict_repr(PyObject *self) +{ + PyObject *res; + Py_BEGIN_CRITICAL_SECTION(self); + res = dict_repr_lock_held(self); + Py_END_CRITICAL_SECTION(); + return res; +} + static Py_ssize_t dict_length(PyObject *self) { PyDictObject *mp = (PyDictObject *)self; - return mp->ma_used; + return _Py_atomic_load_ssize_relaxed(&mp->ma_used); } static PyObject * -dict_subscript(PyObject *self, PyObject *key) +dict_subscript_lock_held(PyObject *self, PyObject *key) { PyDictObject *mp = (PyDictObject *)self; Py_ssize_t ix; @@ -2594,6 +2756,16 @@ dict_subscript(PyObject *self, PyObject *key) return Py_NewRef(value); } +static PyObject * +dict_subscript(PyObject *self, PyObject *key) +{ + PyObject *res; + Py_BEGIN_CRITICAL_SECTION(self); + res = dict_subscript_lock_held(self, key); + Py_END_CRITICAL_SECTION(); + return res; +} + static int dict_ass_sub(PyObject *mp, PyObject *v, PyObject *w) { @@ -2609,9 +2781,11 @@ static PyMappingMethods dict_as_mapping = { dict_ass_sub, /*mp_ass_subscript*/ }; -PyObject * -PyDict_Keys(PyObject *dict) +static PyObject * +keys_lock_held(PyObject *dict) { + ASSERT_DICT_LOCKED(dict); + if (dict == NULL || !PyDict_Check(dict)) { PyErr_BadInternalCall(); return NULL; @@ -2646,8 +2820,21 @@ PyDict_Keys(PyObject *dict) } PyObject * -PyDict_Values(PyObject *dict) +PyDict_Keys(PyObject *dict) +{ + PyObject *res; + Py_BEGIN_CRITICAL_SECTION(dict); + res = keys_lock_held(dict); + Py_END_CRITICAL_SECTION(); + + return res; +} + +static PyObject * +values_lock_held(PyObject *dict) { + ASSERT_DICT_LOCKED(dict); + if (dict == NULL || !PyDict_Check(dict)) { PyErr_BadInternalCall(); return NULL; @@ -2682,8 +2869,20 @@ PyDict_Values(PyObject *dict) } PyObject * -PyDict_Items(PyObject *dict) +PyDict_Values(PyObject *dict) +{ + PyObject *res; + Py_BEGIN_CRITICAL_SECTION(dict); + res = values_lock_held(dict); + Py_END_CRITICAL_SECTION(); + return res; +} + +static PyObject * +items_lock_held(PyObject *dict) { + ASSERT_DICT_LOCKED(dict); + if (dict == NULL || !PyDict_Check(dict)) { PyErr_BadInternalCall(); return NULL; @@ -2732,6 +2931,17 @@ PyDict_Items(PyObject *dict) return v; } +PyObject * +PyDict_Items(PyObject *dict) +{ + PyObject *res; + Py_BEGIN_CRITICAL_SECTION(dict); + res = items_lock_held(dict); + Py_END_CRITICAL_SECTION(); + + return res; +} + /*[clinic input] @classmethod dict.fromkeys @@ -2810,8 +3020,8 @@ dict_update(PyObject *self, PyObject *args, PyObject *kwds) producing iterable objects of length 2. */ -int -PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override) +static int +merge_from_seq2_lock_held(PyObject *d, PyObject *seq2, int override) { PyObject *it; /* iter(seq2) */ Py_ssize_t i; /* index into seq2 of current element */ @@ -2863,14 +3073,14 @@ PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override) Py_INCREF(key); Py_INCREF(value); if (override) { - if (PyDict_SetItem(d, key, value) < 0) { + if (setitem_lock_held((PyDictObject *)d, key, value) < 0) { Py_DECREF(key); Py_DECREF(value); goto Fail; } } else { - if (PyDict_SetDefault(d, key, value) == NULL) { + if (dict_setdefault_ref_lock_held(d, key, value, NULL, 0) < 0) { Py_DECREF(key); Py_DECREF(value); goto Fail; @@ -2895,6 +3105,117 @@ PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override) return Py_SAFE_DOWNCAST(i, Py_ssize_t, int); } +int +PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(d); + res = merge_from_seq2_lock_held(d, seq2, override); + Py_END_CRITICAL_SECTION(); + + return res; +} + +static int +dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *other, int override) +{ + if (other == mp || other->ma_used == 0) + /* a.update(a) or a.update({}); nothing to do */ + return 0; + if (mp->ma_used == 0) { + /* Since the target dict is empty, PyDict_GetItem() + * always returns NULL. Setting override to 1 + * skips the unnecessary test. + */ + override = 1; + PyDictKeysObject *okeys = other->ma_keys; + + // If other is clean, combined, and just allocated, just clone it. + if (other->ma_values == NULL && + other->ma_used == okeys->dk_nentries && + (DK_LOG_SIZE(okeys) == PyDict_LOG_MINSIZE || + USABLE_FRACTION(DK_SIZE(okeys)/2) < other->ma_used)) { + uint64_t new_version = _PyDict_NotifyEvent( + interp, PyDict_EVENT_CLONED, mp, (PyObject *)other, NULL); + PyDictKeysObject *keys = clone_combined_dict_keys(other); + if (keys == NULL) + return -1; + + dictkeys_decref(interp, mp->ma_keys); + mp->ma_keys = keys; + if (mp->ma_values != NULL) { + free_values(mp->ma_values); + mp->ma_values = NULL; + } + + mp->ma_used = other->ma_used; + mp->ma_version_tag = new_version; + ASSERT_CONSISTENT(mp); + + if (_PyObject_GC_IS_TRACKED(other) && !_PyObject_GC_IS_TRACKED(mp)) { + /* Maintain tracking. */ + _PyObject_GC_TRACK(mp); + } + + return 0; + } + } + /* Do one big resize at the start, rather than + * incrementally resizing as we insert new items. Expect + * that there will be no (or few) overlapping keys. + */ + if (USABLE_FRACTION(DK_SIZE(mp->ma_keys)) < other->ma_used) { + int unicode = DK_IS_UNICODE(other->ma_keys); + if (dictresize(interp, mp, + estimate_log2_keysize(mp->ma_used + other->ma_used), + unicode)) { + return -1; + } + } + + Py_ssize_t orig_size = other->ma_keys->dk_nentries; + Py_ssize_t pos = 0; + Py_hash_t hash; + PyObject *key, *value; + + while (_PyDict_Next((PyObject*)other, &pos, &key, &value, &hash)) { + int err = 0; + Py_INCREF(key); + Py_INCREF(value); + if (override == 1) { + err = insertdict(interp, mp, + Py_NewRef(key), hash, Py_NewRef(value)); + } + else { + err = contains_known_hash_lock_held(mp, key, hash); + if (err == 0) { + err = insertdict(interp, mp, + Py_NewRef(key), hash, Py_NewRef(value)); + } + else if (err > 0) { + if (override != 0) { + _PyErr_SetKeyError(key); + Py_DECREF(value); + Py_DECREF(key); + return -1; + } + err = 0; + } + } + Py_DECREF(value); + Py_DECREF(key); + if (err != 0) + return -1; + + if (orig_size != other->ma_keys->dk_nentries) { + PyErr_SetString(PyExc_RuntimeError, + "dict mutated during update"); + return -1; + } + } + return 0; +} + static int dict_merge(PyInterpreterState *interp, PyObject *a, PyObject *b, int override) { @@ -2912,127 +3233,44 @@ dict_merge(PyInterpreterState *interp, PyObject *a, PyObject *b, int override) return -1; } mp = (PyDictObject*)a; + int res = 0; if (PyDict_Check(b) && (Py_TYPE(b)->tp_iter == dict_iter)) { other = (PyDictObject*)b; - if (other == mp || other->ma_used == 0) - /* a.update(a) or a.update({}); nothing to do */ - return 0; - if (mp->ma_used == 0) { - /* Since the target dict is empty, PyDict_GetItem() - * always returns NULL. Setting override to 1 - * skips the unnecessary test. - */ - override = 1; - PyDictKeysObject *okeys = other->ma_keys; - - // If other is clean, combined, and just allocated, just clone it. - if (other->ma_values == NULL && - other->ma_used == okeys->dk_nentries && - (DK_LOG_SIZE(okeys) == PyDict_LOG_MINSIZE || - USABLE_FRACTION(DK_SIZE(okeys)/2) < other->ma_used)) { - uint64_t new_version = _PyDict_NotifyEvent( - interp, PyDict_EVENT_CLONED, mp, b, NULL); - PyDictKeysObject *keys = clone_combined_dict_keys(other); - if (keys == NULL) { - return -1; - } - - dictkeys_decref(interp, mp->ma_keys); - mp->ma_keys = keys; - if (mp->ma_values != NULL) { - free_values(mp->ma_values); - mp->ma_values = NULL; - } - - mp->ma_used = other->ma_used; - mp->ma_version_tag = new_version; - ASSERT_CONSISTENT(mp); - - if (_PyObject_GC_IS_TRACKED(other) && !_PyObject_GC_IS_TRACKED(mp)) { - /* Maintain tracking. */ - _PyObject_GC_TRACK(mp); - } - - return 0; - } - } - /* Do one big resize at the start, rather than - * incrementally resizing as we insert new items. Expect - * that there will be no (or few) overlapping keys. - */ - if (USABLE_FRACTION(DK_SIZE(mp->ma_keys)) < other->ma_used) { - int unicode = DK_IS_UNICODE(other->ma_keys); - if (dictresize(interp, mp, - estimate_log2_keysize(mp->ma_used + other->ma_used), - unicode)) { - return -1; - } - } - - Py_ssize_t orig_size = other->ma_keys->dk_nentries; - Py_ssize_t pos = 0; - Py_hash_t hash; - PyObject *key, *value; - - while (_PyDict_Next((PyObject*)other, &pos, &key, &value, &hash)) { - int err = 0; - Py_INCREF(key); - Py_INCREF(value); - if (override == 1) { - err = insertdict(interp, mp, - Py_NewRef(key), hash, Py_NewRef(value)); - } - else { - err = _PyDict_Contains_KnownHash(a, key, hash); - if (err == 0) { - err = insertdict(interp, mp, - Py_NewRef(key), hash, Py_NewRef(value)); - } - else if (err > 0) { - if (override != 0) { - _PyErr_SetKeyError(key); - Py_DECREF(value); - Py_DECREF(key); - return -1; - } - err = 0; - } - } - Py_DECREF(value); - Py_DECREF(key); - if (err != 0) - return -1; - - if (orig_size != other->ma_keys->dk_nentries) { - PyErr_SetString(PyExc_RuntimeError, - "dict mutated during update"); - return -1; - } - } + int res; + Py_BEGIN_CRITICAL_SECTION2(a, b); + res = dict_dict_merge(interp, (PyDictObject *)a, other, override); + ASSERT_CONSISTENT(a); + Py_END_CRITICAL_SECTION2(); + return res; } else { /* Do it the generic, slower way */ + Py_BEGIN_CRITICAL_SECTION(a); PyObject *keys = PyMapping_Keys(b); PyObject *iter; PyObject *key, *value; int status; - if (keys == NULL) + if (keys == NULL) { /* Docstring says this is equivalent to E.keys() so * if E doesn't have a .keys() method we want * AttributeError to percolate up. Might as well * do the same for any other error. */ - return -1; + res = -1; + goto slow_exit; + } iter = PyObject_GetIter(keys); Py_DECREF(keys); - if (iter == NULL) - return -1; + if (iter == NULL) { + res = -1; + goto slow_exit; + } for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) { if (override != 1) { - status = PyDict_Contains(a, key); + status = contains_lock_held(mp, key); if (status != 0) { if (status > 0) { if (override == 0) { @@ -3043,30 +3281,39 @@ dict_merge(PyInterpreterState *interp, PyObject *a, PyObject *b, int override) } Py_DECREF(key); Py_DECREF(iter); - return -1; + res = -1; + goto slow_exit; } } value = PyObject_GetItem(b, key); if (value == NULL) { Py_DECREF(iter); Py_DECREF(key); - return -1; + res = -1; + goto slow_exit; } - status = PyDict_SetItem(a, key, value); + status = setitem_lock_held(mp, key, value); Py_DECREF(key); Py_DECREF(value); if (status < 0) { Py_DECREF(iter); + res = -1; + goto slow_exit; return -1; } } Py_DECREF(iter); - if (PyErr_Occurred()) + if (PyErr_Occurred()) { /* Iterator completed, via error */ - return -1; + res = -1; + goto slow_exit; + } + +slow_exit: + ASSERT_CONSISTENT(a); + Py_END_CRITICAL_SECTION(); + return res; } - ASSERT_CONSISTENT(a); - return 0; } int @@ -3104,17 +3351,14 @@ dict_copy_impl(PyDictObject *self) return PyDict_Copy((PyObject *)self); } -PyObject * -PyDict_Copy(PyObject *o) +static PyObject * +copy_lock_held(PyObject *o) { PyObject *copy; PyDictObject *mp; PyInterpreterState *interp = _PyInterpreterState_GET(); - if (o == NULL || !PyDict_Check(o)) { - PyErr_BadInternalCall(); - return NULL; - } + ASSERT_DICT_LOCKED(o); mp = (PyDictObject *)o; if (mp->ma_used == 0) { @@ -3197,6 +3441,23 @@ PyDict_Copy(PyObject *o) return NULL; } +PyObject * +PyDict_Copy(PyObject *o) +{ + if (o == NULL || !PyDict_Check(o)) { + PyErr_BadInternalCall(); + return NULL; + } + + PyObject *res; + Py_BEGIN_CRITICAL_SECTION(o); + + res = copy_lock_held(o); + + Py_END_CRITICAL_SECTION(); + return res; +} + Py_ssize_t PyDict_Size(PyObject *mp) { @@ -3212,10 +3473,13 @@ PyDict_Size(PyObject *mp) * Uses only Py_EQ comparison. */ static int -dict_equal(PyDictObject *a, PyDictObject *b) +dict_equal_lock_held(PyDictObject *a, PyDictObject *b) { Py_ssize_t i; + ASSERT_DICT_LOCKED(a); + ASSERT_DICT_LOCKED(b); + if (a->ma_used != b->ma_used) /* can't be equal if # of entries differ */ return 0; @@ -3270,6 +3534,17 @@ dict_equal(PyDictObject *a, PyDictObject *b) return 1; } +static int +dict_equal(PyDictObject *a, PyDictObject *b) +{ + int res; + Py_BEGIN_CRITICAL_SECTION2(a, b); + res = dict_equal_lock_held(a, b); + Py_END_CRITICAL_SECTION2(); + + return res; +} + static PyObject * dict_richcompare(PyObject *v, PyObject *w, int op) { @@ -3293,6 +3568,7 @@ dict_richcompare(PyObject *v, PyObject *w, int op) /*[clinic input] @coexist +@critical_section dict.__contains__ key: object @@ -3302,8 +3578,8 @@ True if the dictionary has the specified key, else False. [clinic start generated code]*/ static PyObject * -dict___contains__(PyDictObject *self, PyObject *key) -/*[clinic end generated code: output=a3d03db709ed6e6b input=fe1cb42ad831e820]*/ +dict___contains___impl(PyDictObject *self, PyObject *key) +/*[clinic end generated code: output=1b314e6da7687dae input=bc76ec9c157cb81b]*/ { register PyDictObject *mp = self; Py_hash_t hash; @@ -3324,6 +3600,7 @@ dict___contains__(PyDictObject *self, PyObject *key) } /*[clinic input] +@critical_section dict.get key: object @@ -3335,7 +3612,7 @@ Return the value for key if key is in the dictionary, else default. static PyObject * dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value) -/*[clinic end generated code: output=bba707729dee05bf input=279ddb5790b6b107]*/ +/*[clinic end generated code: output=bba707729dee05bf input=a631d3f18f584c60]*/ { PyObject *val = NULL; Py_hash_t hash; @@ -3356,7 +3633,7 @@ dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value) } static int -dict_setdefault_ref(PyObject *d, PyObject *key, PyObject *default_value, +dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_value, PyObject **result, int incref_result) { PyDictObject *mp = (PyDictObject *)d; @@ -3364,6 +3641,8 @@ dict_setdefault_ref(PyObject *d, PyObject *key, PyObject *default_value, Py_hash_t hash; PyInterpreterState *interp = _PyInterpreterState_GET(); + ASSERT_DICT_LOCKED(d); + if (!PyDict_Check(d)) { PyErr_BadInternalCall(); if (result) { @@ -3490,18 +3769,25 @@ int PyDict_SetDefaultRef(PyObject *d, PyObject *key, PyObject *default_value, PyObject **result) { - return dict_setdefault_ref(d, key, default_value, result, 1); + int res; + Py_BEGIN_CRITICAL_SECTION(d); + res = dict_setdefault_ref_lock_held(d, key, default_value, result, 1); + Py_END_CRITICAL_SECTION(); + return res; } PyObject * PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) { PyObject *result; - dict_setdefault_ref(d, key, defaultobj, &result, 0); + Py_BEGIN_CRITICAL_SECTION(d); + dict_setdefault_ref_lock_held(d, key, defaultobj, &result, 0); + Py_END_CRITICAL_SECTION(); return result; } /*[clinic input] +@critical_section dict.setdefault key: object @@ -3516,10 +3802,10 @@ Return the value for key if key is in the dictionary, else default. static PyObject * dict_setdefault_impl(PyDictObject *self, PyObject *key, PyObject *default_value) -/*[clinic end generated code: output=f8c1101ebf69e220 input=0f063756e815fd9d]*/ +/*[clinic end generated code: output=f8c1101ebf69e220 input=9237af9a0a224302]*/ { PyObject *val; - PyDict_SetDefaultRef((PyObject *)self, key, default_value, &val); + dict_setdefault_ref_lock_held((PyObject *)self, key, default_value, &val, 1); return val; } @@ -3559,6 +3845,7 @@ dict_pop_impl(PyDictObject *self, PyObject *key, PyObject *default_value) } /*[clinic input] +@critical_section dict.popitem Remove and return a (key, value) pair as a 2-tuple. @@ -3569,7 +3856,7 @@ Raises KeyError if the dict is empty. static PyObject * dict_popitem_impl(PyDictObject *self) -/*[clinic end generated code: output=e65fcb04420d230d input=1c38a49f21f64941]*/ +/*[clinic end generated code: output=e65fcb04420d230d input=ef28b4da5f0f762e]*/ { Py_ssize_t i, j; PyObject *res; @@ -3695,8 +3982,8 @@ dict_tp_clear(PyObject *op) static PyObject *dictiter_new(PyDictObject *, PyTypeObject *); -Py_ssize_t -_PyDict_SizeOf(PyDictObject *mp) +static Py_ssize_t +sizeof_lock_held(PyDictObject *mp) { size_t res = _PyObject_SIZE(Py_TYPE(mp)); if (mp->ma_values) { @@ -3711,6 +3998,17 @@ _PyDict_SizeOf(PyDictObject *mp) return (Py_ssize_t)res; } +Py_ssize_t +_PyDict_SizeOf(PyDictObject *mp) +{ + Py_ssize_t res; + Py_BEGIN_CRITICAL_SECTION(mp); + res = sizeof_lock_held(mp); + Py_END_CRITICAL_SECTION(); + + return res; +} + size_t _PyDict_KeysSize(PyDictKeysObject *keys) { @@ -3794,15 +4092,29 @@ static PyMethodDef mapp_methods[] = { {NULL, NULL} /* sentinel */ }; -/* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */ -int -PyDict_Contains(PyObject *op, PyObject *key) +static int +contains_known_hash_lock_held(PyDictObject *mp, PyObject *key, Py_ssize_t hash) +{ + Py_ssize_t ix; + PyObject *value; + + ASSERT_DICT_LOCKED(mp); + + ix = _Py_dict_lookup(mp, key, hash, &value); + if (ix == DKIX_ERROR) + return -1; + return (ix != DKIX_EMPTY && value != NULL); +} + +static int +contains_lock_held(PyDictObject *mp, PyObject *key) { Py_hash_t hash; Py_ssize_t ix; - PyDictObject *mp = (PyDictObject *)op; PyObject *value; + ASSERT_DICT_LOCKED(mp); + if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { hash = PyObject_Hash(key); if (hash == -1) @@ -3814,6 +4126,17 @@ PyDict_Contains(PyObject *op, PyObject *key) return (ix != DKIX_EMPTY && value != NULL); } +/* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */ +int +PyDict_Contains(PyObject *op, PyObject *key) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(op); + res = contains_lock_held((PyDictObject *)op, key); + Py_END_CRITICAL_SECTION(); + return res; +} + int PyDict_ContainsString(PyObject *op, const char *key) { @@ -4180,17 +4503,15 @@ static PyMethodDef dictiter_methods[] = { }; static PyObject* -dictiter_iternextkey(PyObject *self) +dictiter_iternextkey_lock_held(PyDictObject *d, PyObject *self) { dictiterobject *di = (dictiterobject *)self; PyObject *key; Py_ssize_t i; PyDictKeysObject *k; - PyDictObject *d = di->di_dict; - if (d == NULL) - return NULL; assert (PyDict_Check(d)); + ASSERT_DICT_LOCKED(d); if (di->di_used != d->ma_used) { PyErr_SetString(PyExc_RuntimeError, @@ -4248,6 +4569,23 @@ dictiter_iternextkey(PyObject *self) return NULL; } +static PyObject* +dictiter_iternextkey(PyObject *self) +{ + dictiterobject *di = (dictiterobject *)self; + PyDictObject *d = di->di_dict; + + if (d == NULL) + return NULL; + + PyObject *value; + Py_BEGIN_CRITICAL_SECTION(d); + value = dictiter_iternextkey_lock_held(d, self); + Py_END_CRITICAL_SECTION(); + + return value; +} + PyTypeObject PyDictIterKey_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "dict_keyiterator", /* tp_name */ @@ -4282,16 +4620,14 @@ PyTypeObject PyDictIterKey_Type = { }; static PyObject * -dictiter_iternextvalue(PyObject *self) +dictiter_iternextvalue_lock_held(PyDictObject *d, PyObject *self) { dictiterobject *di = (dictiterobject *)self; PyObject *value; Py_ssize_t i; - PyDictObject *d = di->di_dict; - if (d == NULL) - return NULL; assert (PyDict_Check(d)); + ASSERT_DICT_LOCKED(d); if (di->di_used != d->ma_used) { PyErr_SetString(PyExc_RuntimeError, @@ -4348,6 +4684,23 @@ dictiter_iternextvalue(PyObject *self) return NULL; } +static PyObject * +dictiter_iternextvalue(PyObject *self) +{ + dictiterobject *di = (dictiterobject *)self; + PyDictObject *d = di->di_dict; + + if (d == NULL) + return NULL; + + PyObject *value; + Py_BEGIN_CRITICAL_SECTION(d); + value = dictiter_iternextvalue_lock_held(d, self); + Py_END_CRITICAL_SECTION(); + + return value; +} + PyTypeObject PyDictIterValue_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "dict_valueiterator", /* tp_name */ @@ -4382,15 +4735,12 @@ PyTypeObject PyDictIterValue_Type = { }; static PyObject * -dictiter_iternextitem(PyObject *self) +dictiter_iternextitem_lock_held(PyDictObject *d, PyObject *self) { dictiterobject *di = (dictiterobject *)self; PyObject *key, *value, *result; Py_ssize_t i; - PyDictObject *d = di->di_dict; - if (d == NULL) - return NULL; assert (PyDict_Check(d)); if (di->di_used != d->ma_used) { @@ -4473,6 +4823,22 @@ dictiter_iternextitem(PyObject *self) return NULL; } +static PyObject * +dictiter_iternextitem(PyObject *self) +{ + dictiterobject *di = (dictiterobject *)self; + PyDictObject *d = di->di_dict; + + if (d == NULL) + return NULL; + + PyObject *item; + Py_BEGIN_CRITICAL_SECTION(d); + item = dictiter_iternextitem_lock_held(d, self); + Py_END_CRITICAL_SECTION(); + return item; +} + PyTypeObject PyDictIterItem_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "dict_itemiterator", /* tp_name */ @@ -4510,15 +4876,12 @@ PyTypeObject PyDictIterItem_Type = { /* dictreviter */ static PyObject * -dictreviter_iternext(PyObject *self) +dictreviter_iter_PyDict_Next(PyDictObject *d, PyObject *self) { dictiterobject *di = (dictiterobject *)self; - PyDictObject *d = di->di_dict; - if (d == NULL) { - return NULL; - } assert (PyDict_Check(d)); + ASSERT_DICT_LOCKED(d); if (di->di_used != d->ma_used) { PyErr_SetString(PyExc_RuntimeError, @@ -4609,6 +4972,23 @@ dictreviter_iternext(PyObject *self) return NULL; } +static PyObject * +dictreviter_iternext(PyObject *self) +{ + dictiterobject *di = (dictiterobject *)self; + PyDictObject *d = di->di_dict; + + if (d == NULL) + return NULL; + + PyObject *value; + Py_BEGIN_CRITICAL_SECTION(d); + value = dictreviter_iter_PyDict_Next(d, self); + Py_END_CRITICAL_SECTION(); + + return value; +} + PyTypeObject PyDictRevIterKey_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "dict_reversekeyiterator", @@ -5037,14 +5417,12 @@ dictviews_or(PyObject* self, PyObject *other) } static PyObject * -dictitems_xor(PyObject *self, PyObject *other) +dictitems_xor_lock_held(PyObject *d1, PyObject *d2) { - assert(PyDictItems_Check(self)); - assert(PyDictItems_Check(other)); - PyObject *d1 = (PyObject *)((_PyDictViewObject *)self)->dv_dict; - PyObject *d2 = (PyObject *)((_PyDictViewObject *)other)->dv_dict; + ASSERT_DICT_LOCKED(d1); + ASSERT_DICT_LOCKED(d2); - PyObject *temp_dict = PyDict_Copy(d1); + PyObject *temp_dict = copy_lock_held(d1); if (temp_dict == NULL) { return NULL; } @@ -5122,6 +5500,22 @@ dictitems_xor(PyObject *self, PyObject *other) return NULL; } +static PyObject * +dictitems_xor(PyObject *self, PyObject *other) +{ + assert(PyDictItems_Check(self)); + assert(PyDictItems_Check(other)); + PyObject *d1 = (PyObject *)((_PyDictViewObject *)self)->dv_dict; + PyObject *d2 = (PyObject *)((_PyDictViewObject *)other)->dv_dict; + + PyObject *res; + Py_BEGIN_CRITICAL_SECTION2(d1, d2); + res = dictitems_xor_lock_held(d1, d2); + Py_END_CRITICAL_SECTION2(); + + return res; +} + static PyObject* dictviews_xor(PyObject* self, PyObject *other) { diff --git a/Objects/odictobject.c b/Objects/odictobject.c index b5280c39e1be54..421bc52992d735 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -465,12 +465,13 @@ Potential Optimizations */ #include "Python.h" -#include "pycore_call.h" // _PyObject_CallNoArgs() -#include "pycore_ceval.h" // _PyEval_GetBuiltin() -#include "pycore_dict.h" // _Py_dict_lookup() -#include "pycore_object.h" // _PyObject_GC_UNTRACK() -#include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() -#include // offsetof() +#include "pycore_call.h" // _PyObject_CallNoArgs() +#include "pycore_ceval.h" // _PyEval_GetBuiltin() +#include "pycore_critical_section.h" //_Py_BEGIN_CRITICAL_SECTION +#include "pycore_dict.h" // _Py_dict_lookup() +#include "pycore_object.h" // _PyObject_GC_UNTRACK() +#include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() +#include // offsetof() #include "clinic/odictobject.c.h" @@ -1039,6 +1040,8 @@ _odict_popkey_hash(PyObject *od, PyObject *key, PyObject *failobj, { PyObject *value = NULL; + Py_BEGIN_CRITICAL_SECTION(od); + _ODictNode *node = _odict_find_node_hash((PyODictObject *)od, key, hash); if (node != NULL) { /* Pop the node first to avoid a possible dict resize (due to @@ -1046,7 +1049,7 @@ _odict_popkey_hash(PyObject *od, PyObject *key, PyObject *failobj, resolution. */ int res = _odict_clear_node((PyODictObject *)od, node, key, hash); if (res < 0) { - return NULL; + goto done; } /* Now delete the value from the dict. */ if (_PyDict_Pop_KnownHash((PyDictObject *)od, key, hash, @@ -1063,6 +1066,8 @@ _odict_popkey_hash(PyObject *od, PyObject *key, PyObject *failobj, PyErr_SetObject(PyExc_KeyError, key); } } + Py_END_CRITICAL_SECTION(); +done: return value; } diff --git a/Objects/setobject.c b/Objects/setobject.c index 93de8e84f2ddf9..3acf2a7a74890b 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -32,13 +32,14 @@ */ #include "Python.h" -#include "pycore_ceval.h" // _PyEval_GetBuiltin() -#include "pycore_dict.h" // _PyDict_Contains_KnownHash() -#include "pycore_modsupport.h" // _PyArg_NoKwnames() -#include "pycore_object.h" // _PyObject_GC_UNTRACK() -#include "pycore_pyerrors.h" // _PyErr_SetKeyError() -#include "pycore_setobject.h" // _PySet_NextEntry() definition -#include // offsetof() +#include "pycore_ceval.h" // _PyEval_GetBuiltin() +#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION, Py_END_CRITICAL_SECTION +#include "pycore_dict.h" // _PyDict_Contains_KnownHash() +#include "pycore_modsupport.h" // _PyArg_NoKwnames() +#include "pycore_object.h" // _PyObject_GC_UNTRACK() +#include "pycore_pyerrors.h" // _PyErr_SetKeyError() +#include "pycore_setobject.h" // _PySet_NextEntry() definition +#include // offsetof() /* Object used as dummy key to fill deleted entries */ static PyObject _dummy_struct; @@ -903,11 +904,17 @@ set_update_internal(PySetObject *so, PyObject *other) if (set_table_resize(so, (so->used + dictsize)*2) != 0) return -1; } + int err = 0; + Py_BEGIN_CRITICAL_SECTION(other); while (_PyDict_Next(other, &pos, &key, &value, &hash)) { - if (set_add_entry(so, key, hash)) - return -1; + if (set_add_entry(so, key, hash)) { + err = -1; + goto exit; + } } - return 0; +exit: + Py_END_CRITICAL_SECTION(); + return err; } it = PyObject_GetIter(other); @@ -1620,6 +1627,33 @@ set_isub(PySetObject *so, PyObject *other) return Py_NewRef(so); } +static PyObject * +set_symmetric_difference_update_dict(PySetObject *so, PyObject *other) +{ + PyObject *key; + Py_ssize_t pos = 0; + Py_hash_t hash; + PyObject *value; + int rv; + + while (_PyDict_Next(other, &pos, &key, &value, &hash)) { + Py_INCREF(key); + rv = set_discard_entry(so, key, hash); + if (rv < 0) { + Py_DECREF(key); + return NULL; + } + if (rv == DISCARD_NOTFOUND) { + if (set_add_entry(so, key, hash)) { + Py_DECREF(key); + return NULL; + } + } + Py_DECREF(key); + } + Py_RETURN_NONE; +} + static PyObject * set_symmetric_difference_update(PySetObject *so, PyObject *other) { @@ -1634,23 +1668,13 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) return set_clear(so, NULL); if (PyDict_CheckExact(other)) { - PyObject *value; - while (_PyDict_Next(other, &pos, &key, &value, &hash)) { - Py_INCREF(key); - rv = set_discard_entry(so, key, hash); - if (rv < 0) { - Py_DECREF(key); - return NULL; - } - if (rv == DISCARD_NOTFOUND) { - if (set_add_entry(so, key, hash)) { - Py_DECREF(key); - return NULL; - } - } - Py_DECREF(key); - } - Py_RETURN_NONE; + PyObject *res; + + Py_BEGIN_CRITICAL_SECTION(other); + res = set_symmetric_difference_update_dict(so, other); + Py_END_CRITICAL_SECTION(); + + return res; } if (PyAnySet_Check(other)) { From 11ac6f5354ec7a4da2a7e052d27d636b5a41c714 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 6 Feb 2024 23:44:14 +0100 Subject: [PATCH 017/126] gh-115009: Update Windows installer to use SQLite 3.45.1 (#115065) --- .../next/Windows/2024-02-06-09-05-13.gh-issue-115009.ShMjZs.rst | 1 + PCbuild/get_externals.bat | 2 +- PCbuild/python.props | 2 +- PCbuild/readme.txt | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-02-06-09-05-13.gh-issue-115009.ShMjZs.rst diff --git a/Misc/NEWS.d/next/Windows/2024-02-06-09-05-13.gh-issue-115009.ShMjZs.rst b/Misc/NEWS.d/next/Windows/2024-02-06-09-05-13.gh-issue-115009.ShMjZs.rst new file mode 100644 index 00000000000000..5bdb6963a24311 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-02-06-09-05-13.gh-issue-115009.ShMjZs.rst @@ -0,0 +1 @@ +Update Windows installer to use SQLite 3.45.1. diff --git a/PCbuild/get_externals.bat b/PCbuild/get_externals.bat index 0989bd46a580f7..60ce12b725e233 100644 --- a/PCbuild/get_externals.bat +++ b/PCbuild/get_externals.bat @@ -54,7 +54,7 @@ set libraries= set libraries=%libraries% bzip2-1.0.8 if NOT "%IncludeLibffiSrc%"=="false" set libraries=%libraries% libffi-3.4.4 if NOT "%IncludeSSLSrc%"=="false" set libraries=%libraries% openssl-3.0.13 -set libraries=%libraries% sqlite-3.44.2.0 +set libraries=%libraries% sqlite-3.45.1.0 if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tcl-core-8.6.13.1 if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tk-8.6.13.1 set libraries=%libraries% xz-5.2.5 diff --git a/PCbuild/python.props b/PCbuild/python.props index 54553db4057288..e21f1f60464bc8 100644 --- a/PCbuild/python.props +++ b/PCbuild/python.props @@ -68,7 +68,7 @@ - $(ExternalsDir)sqlite-3.44.2.0\ + $(ExternalsDir)sqlite-3.45.1.0\ $(ExternalsDir)bzip2-1.0.8\ $(ExternalsDir)xz-5.2.5\ $(ExternalsDir)libffi-3.4.4\ diff --git a/PCbuild/readme.txt b/PCbuild/readme.txt index b9d76515c383f7..387565515fa0b0 100644 --- a/PCbuild/readme.txt +++ b/PCbuild/readme.txt @@ -189,7 +189,7 @@ _ssl again when building. _sqlite3 - Wraps SQLite 3.44.2, which is itself built by sqlite3.vcxproj + Wraps SQLite 3.45.1, which is itself built by sqlite3.vcxproj Homepage: https://www.sqlite.org/ _tkinter From 3f71c416c085cfaed49ef325f70eb374a4966256 Mon Sep 17 00:00:00 2001 From: Finite State Machine <38001514+finite-state-machine@users.noreply.github.com> Date: Tue, 6 Feb 2024 20:28:01 -0500 Subject: [PATCH 018/126] gh-115106 docs: 'enum.Flag.__iter__()' did not exist prior to Python 3.11 (GH-115107) change versionchanged to versionadded --- Doc/library/enum.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Doc/library/enum.rst b/Doc/library/enum.rst index f31e6ea848f3b2..534939943d3326 100644 --- a/Doc/library/enum.rst +++ b/Doc/library/enum.rst @@ -534,9 +534,7 @@ Data Types >>> list(purple) [, ] - .. versionchanged:: 3.11 - - Aliases are no longer returned during iteration. + .. versionadded:: 3.11 .. method:: __len__(self): From 60375a38092b4d4dec9a826818a20adc5d4ff2f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 6 Feb 2024 23:22:47 -0600 Subject: [PATCH 019/126] gh-115114: Add missing slash to file URI prefix `file:/` (#115115) Add missing slash to file URI prefix `file:/` --- Doc/whatsnew/3.13.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index e034d34c5fb5ab..c75d4406531394 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -391,7 +391,7 @@ pathlib (Contributed by Barney Gale in :gh:`89812`.) * Add :meth:`pathlib.Path.from_uri`, a new constructor to create a :class:`pathlib.Path` - object from a 'file' URI (``file:/``). + object from a 'file' URI (``file://``). (Contributed by Barney Gale in :gh:`107465`.) * Add :meth:`pathlib.PurePath.full_match` for matching paths with From 2afc7182e66635b3ec7efb59d2a6c18a7ad1f215 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Wed, 7 Feb 2024 02:50:24 -0600 Subject: [PATCH 020/126] gh-114505: Add missing header file dependencies (#114513) Also move PYTHON_HEADERS up and make _testembed.o depend on it. --- Makefile.pre.in | 500 +++++++++++++++++++++++++----------------------- 1 file changed, 259 insertions(+), 241 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index aad637876ead80..07b2ec7adde78a 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -936,6 +936,261 @@ python.html: $(srcdir)/Tools/wasm/python.html python.worker.js python.worker.js: $(srcdir)/Tools/wasm/python.worker.js @cp $(srcdir)/Tools/wasm/python.worker.js $@ +############################################################################ +# Header files + +PYTHON_HEADERS= \ + $(srcdir)/Include/Python.h \ + $(srcdir)/Include/abstract.h \ + $(srcdir)/Include/bltinmodule.h \ + $(srcdir)/Include/boolobject.h \ + $(srcdir)/Include/bytearrayobject.h \ + $(srcdir)/Include/bytesobject.h \ + $(srcdir)/Include/ceval.h \ + $(srcdir)/Include/codecs.h \ + $(srcdir)/Include/compile.h \ + $(srcdir)/Include/complexobject.h \ + $(srcdir)/Include/descrobject.h \ + $(srcdir)/Include/dictobject.h \ + $(srcdir)/Include/dynamic_annotations.h \ + $(srcdir)/Include/enumobject.h \ + $(srcdir)/Include/errcode.h \ + $(srcdir)/Include/exports.h \ + $(srcdir)/Include/fileobject.h \ + $(srcdir)/Include/fileutils.h \ + $(srcdir)/Include/floatobject.h \ + $(srcdir)/Include/frameobject.h \ + $(srcdir)/Include/genericaliasobject.h \ + $(srcdir)/Include/import.h \ + $(srcdir)/Include/interpreteridobject.h \ + $(srcdir)/Include/intrcheck.h \ + $(srcdir)/Include/iterobject.h \ + $(srcdir)/Include/listobject.h \ + $(srcdir)/Include/longobject.h \ + $(srcdir)/Include/marshal.h \ + $(srcdir)/Include/memoryobject.h \ + $(srcdir)/Include/methodobject.h \ + $(srcdir)/Include/modsupport.h \ + $(srcdir)/Include/moduleobject.h \ + $(srcdir)/Include/object.h \ + $(srcdir)/Include/objimpl.h \ + $(srcdir)/Include/opcode.h \ + $(srcdir)/Include/opcode_ids.h \ + $(srcdir)/Include/osdefs.h \ + $(srcdir)/Include/osmodule.h \ + $(srcdir)/Include/patchlevel.h \ + $(srcdir)/Include/pyatomic.h \ + $(srcdir)/Include/pybuffer.h \ + $(srcdir)/Include/pycapsule.h \ + $(srcdir)/Include/pydtrace.h \ + $(srcdir)/Include/pyerrors.h \ + $(srcdir)/Include/pyexpat.h \ + $(srcdir)/Include/pyframe.h \ + $(srcdir)/Include/pyhash.h \ + $(srcdir)/Include/pylifecycle.h \ + $(srcdir)/Include/pymacconfig.h \ + $(srcdir)/Include/pymacro.h \ + $(srcdir)/Include/pymath.h \ + $(srcdir)/Include/pymem.h \ + $(srcdir)/Include/pyport.h \ + $(srcdir)/Include/pystate.h \ + $(srcdir)/Include/pystats.h \ + $(srcdir)/Include/pystrcmp.h \ + $(srcdir)/Include/pystrtod.h \ + $(srcdir)/Include/pythonrun.h \ + $(srcdir)/Include/pythread.h \ + $(srcdir)/Include/pytypedefs.h \ + $(srcdir)/Include/rangeobject.h \ + $(srcdir)/Include/setobject.h \ + $(srcdir)/Include/sliceobject.h \ + $(srcdir)/Include/structmember.h \ + $(srcdir)/Include/structseq.h \ + $(srcdir)/Include/sysmodule.h \ + $(srcdir)/Include/traceback.h \ + $(srcdir)/Include/tupleobject.h \ + $(srcdir)/Include/typeslots.h \ + $(srcdir)/Include/unicodeobject.h \ + $(srcdir)/Include/warnings.h \ + $(srcdir)/Include/weakrefobject.h \ + \ + pyconfig.h \ + $(PARSER_HEADERS) \ + \ + $(srcdir)/Include/cpython/abstract.h \ + $(srcdir)/Include/cpython/bytearrayobject.h \ + $(srcdir)/Include/cpython/bytesobject.h \ + $(srcdir)/Include/cpython/cellobject.h \ + $(srcdir)/Include/cpython/ceval.h \ + $(srcdir)/Include/cpython/classobject.h \ + $(srcdir)/Include/cpython/code.h \ + $(srcdir)/Include/cpython/compile.h \ + $(srcdir)/Include/cpython/complexobject.h \ + $(srcdir)/Include/cpython/context.h \ + $(srcdir)/Include/cpython/descrobject.h \ + $(srcdir)/Include/cpython/dictobject.h \ + $(srcdir)/Include/cpython/fileobject.h \ + $(srcdir)/Include/cpython/fileutils.h \ + $(srcdir)/Include/cpython/floatobject.h \ + $(srcdir)/Include/cpython/frameobject.h \ + $(srcdir)/Include/cpython/funcobject.h \ + $(srcdir)/Include/cpython/genobject.h \ + $(srcdir)/Include/cpython/import.h \ + $(srcdir)/Include/cpython/initconfig.h \ + $(srcdir)/Include/cpython/interpreteridobject.h \ + $(srcdir)/Include/cpython/listobject.h \ + $(srcdir)/Include/cpython/longintrepr.h \ + $(srcdir)/Include/cpython/longobject.h \ + $(srcdir)/Include/cpython/memoryobject.h \ + $(srcdir)/Include/cpython/methodobject.h \ + $(srcdir)/Include/cpython/object.h \ + $(srcdir)/Include/cpython/objimpl.h \ + $(srcdir)/Include/cpython/odictobject.h \ + $(srcdir)/Include/cpython/optimizer.h \ + $(srcdir)/Include/cpython/picklebufobject.h \ + $(srcdir)/Include/cpython/pthread_stubs.h \ + $(srcdir)/Include/cpython/pyatomic.h \ + $(srcdir)/Include/cpython/pyatomic_gcc.h \ + $(srcdir)/Include/cpython/pyatomic_std.h \ + $(srcdir)/Include/cpython/pyctype.h \ + $(srcdir)/Include/cpython/pydebug.h \ + $(srcdir)/Include/cpython/pyerrors.h \ + $(srcdir)/Include/cpython/pyfpe.h \ + $(srcdir)/Include/cpython/pyframe.h \ + $(srcdir)/Include/cpython/pyhash.h \ + $(srcdir)/Include/cpython/pylifecycle.h \ + $(srcdir)/Include/cpython/pymem.h \ + $(srcdir)/Include/cpython/pystate.h \ + $(srcdir)/Include/cpython/pystats.h \ + $(srcdir)/Include/cpython/pythonrun.h \ + $(srcdir)/Include/cpython/pythread.h \ + $(srcdir)/Include/cpython/setobject.h \ + $(srcdir)/Include/cpython/sysmodule.h \ + $(srcdir)/Include/cpython/traceback.h \ + $(srcdir)/Include/cpython/tracemalloc.h \ + $(srcdir)/Include/cpython/tupleobject.h \ + $(srcdir)/Include/cpython/unicodeobject.h \ + $(srcdir)/Include/cpython/warnings.h \ + $(srcdir)/Include/cpython/weakrefobject.h \ + \ + $(MIMALLOC_HEADERS) \ + \ + $(srcdir)/Include/internal/pycore_abstract.h \ + $(srcdir)/Include/internal/pycore_asdl.h \ + $(srcdir)/Include/internal/pycore_ast.h \ + $(srcdir)/Include/internal/pycore_ast_state.h \ + $(srcdir)/Include/internal/pycore_atexit.h \ + $(srcdir)/Include/internal/pycore_bitutils.h \ + $(srcdir)/Include/internal/pycore_blocks_output_buffer.h \ + $(srcdir)/Include/internal/pycore_bytes_methods.h \ + $(srcdir)/Include/internal/pycore_bytesobject.h \ + $(srcdir)/Include/internal/pycore_call.h \ + $(srcdir)/Include/internal/pycore_capsule.h \ + $(srcdir)/Include/internal/pycore_ceval.h \ + $(srcdir)/Include/internal/pycore_ceval_state.h \ + $(srcdir)/Include/internal/pycore_code.h \ + $(srcdir)/Include/internal/pycore_codecs.h \ + $(srcdir)/Include/internal/pycore_compile.h \ + $(srcdir)/Include/internal/pycore_complexobject.h \ + $(srcdir)/Include/internal/pycore_condvar.h \ + $(srcdir)/Include/internal/pycore_context.h \ + $(srcdir)/Include/internal/pycore_critical_section.h \ + $(srcdir)/Include/internal/pycore_crossinterp.h \ + $(srcdir)/Include/internal/pycore_descrobject.h \ + $(srcdir)/Include/internal/pycore_dict.h \ + $(srcdir)/Include/internal/pycore_dict_state.h \ + $(srcdir)/Include/internal/pycore_dtoa.h \ + $(srcdir)/Include/internal/pycore_exceptions.h \ + $(srcdir)/Include/internal/pycore_faulthandler.h \ + $(srcdir)/Include/internal/pycore_fileutils.h \ + $(srcdir)/Include/internal/pycore_floatobject.h \ + $(srcdir)/Include/internal/pycore_flowgraph.h \ + $(srcdir)/Include/internal/pycore_format.h \ + $(srcdir)/Include/internal/pycore_frame.h \ + $(srcdir)/Include/internal/pycore_freelist.h \ + $(srcdir)/Include/internal/pycore_function.h \ + $(srcdir)/Include/internal/pycore_gc.h \ + $(srcdir)/Include/internal/pycore_genobject.h \ + $(srcdir)/Include/internal/pycore_getopt.h \ + $(srcdir)/Include/internal/pycore_gil.h \ + $(srcdir)/Include/internal/pycore_global_objects.h \ + $(srcdir)/Include/internal/pycore_global_objects_fini_generated.h \ + $(srcdir)/Include/internal/pycore_global_strings.h \ + $(srcdir)/Include/internal/pycore_hamt.h \ + $(srcdir)/Include/internal/pycore_hashtable.h \ + $(srcdir)/Include/internal/pycore_identifier.h \ + $(srcdir)/Include/internal/pycore_import.h \ + $(srcdir)/Include/internal/pycore_importdl.h \ + $(srcdir)/Include/internal/pycore_initconfig.h \ + $(srcdir)/Include/internal/pycore_instruments.h \ + $(srcdir)/Include/internal/pycore_interp.h \ + $(srcdir)/Include/internal/pycore_intrinsics.h \ + $(srcdir)/Include/internal/pycore_jit.h \ + $(srcdir)/Include/internal/pycore_list.h \ + $(srcdir)/Include/internal/pycore_llist.h \ + $(srcdir)/Include/internal/pycore_lock.h \ + $(srcdir)/Include/internal/pycore_long.h \ + $(srcdir)/Include/internal/pycore_memoryobject.h \ + $(srcdir)/Include/internal/pycore_mimalloc.h \ + $(srcdir)/Include/internal/pycore_modsupport.h \ + $(srcdir)/Include/internal/pycore_moduleobject.h \ + $(srcdir)/Include/internal/pycore_namespace.h \ + $(srcdir)/Include/internal/pycore_object.h \ + $(srcdir)/Include/internal/pycore_object_alloc.h \ + $(srcdir)/Include/internal/pycore_object_stack.h \ + $(srcdir)/Include/internal/pycore_object_state.h \ + $(srcdir)/Include/internal/pycore_obmalloc.h \ + $(srcdir)/Include/internal/pycore_obmalloc_init.h \ + $(srcdir)/Include/internal/pycore_opcode_metadata.h \ + $(srcdir)/Include/internal/pycore_opcode_utils.h \ + $(srcdir)/Include/internal/pycore_optimizer.h \ + $(srcdir)/Include/internal/pycore_parking_lot.h \ + $(srcdir)/Include/internal/pycore_parser.h \ + $(srcdir)/Include/internal/pycore_pathconfig.h \ + $(srcdir)/Include/internal/pycore_pyarena.h \ + $(srcdir)/Include/internal/pycore_pybuffer.h \ + $(srcdir)/Include/internal/pycore_pyerrors.h \ + $(srcdir)/Include/internal/pycore_pyhash.h \ + $(srcdir)/Include/internal/pycore_pylifecycle.h \ + $(srcdir)/Include/internal/pycore_pymath.h \ + $(srcdir)/Include/internal/pycore_pymem.h \ + $(srcdir)/Include/internal/pycore_pymem_init.h \ + $(srcdir)/Include/internal/pycore_pystate.h \ + $(srcdir)/Include/internal/pycore_pystats.h \ + $(srcdir)/Include/internal/pycore_pythonrun.h \ + $(srcdir)/Include/internal/pycore_pythread.h \ + $(srcdir)/Include/internal/pycore_range.h \ + $(srcdir)/Include/internal/pycore_runtime.h \ + $(srcdir)/Include/internal/pycore_runtime_init.h \ + $(srcdir)/Include/internal/pycore_runtime_init_generated.h \ + $(srcdir)/Include/internal/pycore_semaphore.h \ + $(srcdir)/Include/internal/pycore_setobject.h \ + $(srcdir)/Include/internal/pycore_signal.h \ + $(srcdir)/Include/internal/pycore_sliceobject.h \ + $(srcdir)/Include/internal/pycore_strhex.h \ + $(srcdir)/Include/internal/pycore_structseq.h \ + $(srcdir)/Include/internal/pycore_symtable.h \ + $(srcdir)/Include/internal/pycore_sysmodule.h \ + $(srcdir)/Include/internal/pycore_time.h \ + $(srcdir)/Include/internal/pycore_token.h \ + $(srcdir)/Include/internal/pycore_traceback.h \ + $(srcdir)/Include/internal/pycore_tracemalloc.h \ + $(srcdir)/Include/internal/pycore_tstate.h \ + $(srcdir)/Include/internal/pycore_tuple.h \ + $(srcdir)/Include/internal/pycore_typeobject.h \ + $(srcdir)/Include/internal/pycore_typevarobject.h \ + $(srcdir)/Include/internal/pycore_ucnhash.h \ + $(srcdir)/Include/internal/pycore_unicodeobject.h \ + $(srcdir)/Include/internal/pycore_unicodeobject_generated.h \ + $(srcdir)/Include/internal/pycore_unionobject.h \ + $(srcdir)/Include/internal/pycore_uop_ids.h \ + $(srcdir)/Include/internal/pycore_uop_metadata.h \ + $(srcdir)/Include/internal/pycore_warnings.h \ + $(srcdir)/Include/internal/pycore_weakref.h \ + $(DTRACE_HEADERS) \ + @PLATFORM_HEADERS@ \ + \ + $(srcdir)/Python/stdlib_module_names.h + ########################################################################## # Build static libmpdec.a LIBMPDEC_CFLAGS=@LIBMPDEC_CFLAGS@ $(PY_STDMODULE_CFLAGS) $(CCSHARED) @@ -1400,7 +1655,7 @@ Modules/getpath.o: $(srcdir)/Modules/getpath.c Python/frozen_modules/getpath.h M Programs/python.o: $(srcdir)/Programs/python.c $(CC) -c $(PY_CORE_CFLAGS) -o $@ $(srcdir)/Programs/python.c -Programs/_testembed.o: $(srcdir)/Programs/_testembed.c Programs/test_frozenmain.h +Programs/_testembed.o: $(srcdir)/Programs/_testembed.c Programs/test_frozenmain.h $(PYTHON_HEADERS) $(CC) -c $(PY_CORE_CFLAGS) -o $@ $(srcdir)/Programs/_testembed.c Modules/_sre/sre.o: $(srcdir)/Modules/_sre/sre.c $(srcdir)/Modules/_sre/sre.h $(srcdir)/Modules/_sre/sre_constants.h $(srcdir)/Modules/_sre/sre_lib.h @@ -1669,246 +1924,6 @@ regen-typeslots: $(srcdir)/Objects/typeslots.inc.new $(UPDATE_FILE) $(srcdir)/Objects/typeslots.inc $(srcdir)/Objects/typeslots.inc.new -############################################################################ -# Header files - -PYTHON_HEADERS= \ - $(srcdir)/Include/Python.h \ - $(srcdir)/Include/abstract.h \ - $(srcdir)/Include/bltinmodule.h \ - $(srcdir)/Include/boolobject.h \ - $(srcdir)/Include/bytearrayobject.h \ - $(srcdir)/Include/bytesobject.h \ - $(srcdir)/Include/ceval.h \ - $(srcdir)/Include/codecs.h \ - $(srcdir)/Include/compile.h \ - $(srcdir)/Include/complexobject.h \ - $(srcdir)/Include/descrobject.h \ - $(srcdir)/Include/dictobject.h \ - $(srcdir)/Include/dynamic_annotations.h \ - $(srcdir)/Include/enumobject.h \ - $(srcdir)/Include/errcode.h \ - $(srcdir)/Include/fileobject.h \ - $(srcdir)/Include/fileutils.h \ - $(srcdir)/Include/floatobject.h \ - $(srcdir)/Include/frameobject.h \ - $(srcdir)/Include/import.h \ - $(srcdir)/Include/interpreteridobject.h \ - $(srcdir)/Include/intrcheck.h \ - $(srcdir)/Include/iterobject.h \ - $(srcdir)/Include/listobject.h \ - $(srcdir)/Include/longobject.h \ - $(srcdir)/Include/marshal.h \ - $(srcdir)/Include/memoryobject.h \ - $(srcdir)/Include/methodobject.h \ - $(srcdir)/Include/modsupport.h \ - $(srcdir)/Include/moduleobject.h \ - $(srcdir)/Include/object.h \ - $(srcdir)/Include/objimpl.h \ - $(srcdir)/Include/opcode.h \ - $(srcdir)/Include/opcode_ids.h \ - $(srcdir)/Include/osdefs.h \ - $(srcdir)/Include/osmodule.h \ - $(srcdir)/Include/patchlevel.h \ - $(srcdir)/Include/pybuffer.h \ - $(srcdir)/Include/pycapsule.h \ - $(srcdir)/Include/pydtrace.h \ - $(srcdir)/Include/pyerrors.h \ - $(srcdir)/Include/pyframe.h \ - $(srcdir)/Include/pyhash.h \ - $(srcdir)/Include/pylifecycle.h \ - $(srcdir)/Include/pymacconfig.h \ - $(srcdir)/Include/pymacro.h \ - $(srcdir)/Include/pymath.h \ - $(srcdir)/Include/pymem.h \ - $(srcdir)/Include/pyport.h \ - $(srcdir)/Include/pystate.h \ - $(srcdir)/Include/pystats.h \ - $(srcdir)/Include/pystrcmp.h \ - $(srcdir)/Include/pystrtod.h \ - $(srcdir)/Include/pythonrun.h \ - $(srcdir)/Include/pythread.h \ - $(srcdir)/Include/pytypedefs.h \ - $(srcdir)/Include/rangeobject.h \ - $(srcdir)/Include/setobject.h \ - $(srcdir)/Include/sliceobject.h \ - $(srcdir)/Include/structmember.h \ - $(srcdir)/Include/structseq.h \ - $(srcdir)/Include/sysmodule.h \ - $(srcdir)/Include/traceback.h \ - $(srcdir)/Include/tupleobject.h \ - $(srcdir)/Include/unicodeobject.h \ - $(srcdir)/Include/warnings.h \ - $(srcdir)/Include/weakrefobject.h \ - \ - pyconfig.h \ - $(PARSER_HEADERS) \ - \ - $(srcdir)/Include/cpython/abstract.h \ - $(srcdir)/Include/cpython/bytearrayobject.h \ - $(srcdir)/Include/cpython/bytesobject.h \ - $(srcdir)/Include/cpython/cellobject.h \ - $(srcdir)/Include/cpython/ceval.h \ - $(srcdir)/Include/cpython/classobject.h \ - $(srcdir)/Include/cpython/code.h \ - $(srcdir)/Include/cpython/compile.h \ - $(srcdir)/Include/cpython/complexobject.h \ - $(srcdir)/Include/cpython/context.h \ - $(srcdir)/Include/cpython/descrobject.h \ - $(srcdir)/Include/cpython/dictobject.h \ - $(srcdir)/Include/cpython/fileobject.h \ - $(srcdir)/Include/cpython/fileutils.h \ - $(srcdir)/Include/cpython/floatobject.h \ - $(srcdir)/Include/cpython/frameobject.h \ - $(srcdir)/Include/cpython/funcobject.h \ - $(srcdir)/Include/cpython/genobject.h \ - $(srcdir)/Include/cpython/import.h \ - $(srcdir)/Include/cpython/initconfig.h \ - $(srcdir)/Include/cpython/interpreteridobject.h \ - $(srcdir)/Include/cpython/listobject.h \ - $(srcdir)/Include/cpython/longintrepr.h \ - $(srcdir)/Include/cpython/longobject.h \ - $(srcdir)/Include/cpython/memoryobject.h \ - $(srcdir)/Include/cpython/methodobject.h \ - $(srcdir)/Include/cpython/object.h \ - $(srcdir)/Include/cpython/objimpl.h \ - $(srcdir)/Include/cpython/odictobject.h \ - $(srcdir)/Include/cpython/optimizer.h \ - $(srcdir)/Include/cpython/picklebufobject.h \ - $(srcdir)/Include/cpython/pthread_stubs.h \ - $(srcdir)/Include/cpython/pyatomic.h \ - $(srcdir)/Include/cpython/pyatomic_gcc.h \ - $(srcdir)/Include/cpython/pyatomic_std.h \ - $(srcdir)/Include/cpython/pyctype.h \ - $(srcdir)/Include/cpython/pydebug.h \ - $(srcdir)/Include/cpython/pyerrors.h \ - $(srcdir)/Include/cpython/pyfpe.h \ - $(srcdir)/Include/cpython/pyframe.h \ - $(srcdir)/Include/cpython/pyhash.h \ - $(srcdir)/Include/cpython/pylifecycle.h \ - $(srcdir)/Include/cpython/pymem.h \ - $(srcdir)/Include/cpython/pystate.h \ - $(srcdir)/Include/cpython/pystats.h \ - $(srcdir)/Include/cpython/pythonrun.h \ - $(srcdir)/Include/cpython/pythread.h \ - $(srcdir)/Include/cpython/setobject.h \ - $(srcdir)/Include/cpython/sysmodule.h \ - $(srcdir)/Include/cpython/traceback.h \ - $(srcdir)/Include/cpython/tracemalloc.h \ - $(srcdir)/Include/cpython/tupleobject.h \ - $(srcdir)/Include/cpython/unicodeobject.h \ - $(srcdir)/Include/cpython/warnings.h \ - $(srcdir)/Include/cpython/weakrefobject.h \ - \ - $(MIMALLOC_HEADERS) \ - \ - $(srcdir)/Include/internal/pycore_abstract.h \ - $(srcdir)/Include/internal/pycore_asdl.h \ - $(srcdir)/Include/internal/pycore_ast.h \ - $(srcdir)/Include/internal/pycore_ast_state.h \ - $(srcdir)/Include/internal/pycore_atexit.h \ - $(srcdir)/Include/internal/pycore_bitutils.h \ - $(srcdir)/Include/internal/pycore_bytes_methods.h \ - $(srcdir)/Include/internal/pycore_bytesobject.h \ - $(srcdir)/Include/internal/pycore_call.h \ - $(srcdir)/Include/internal/pycore_capsule.h \ - $(srcdir)/Include/internal/pycore_ceval.h \ - $(srcdir)/Include/internal/pycore_ceval_state.h \ - $(srcdir)/Include/internal/pycore_code.h \ - $(srcdir)/Include/internal/pycore_codecs.h \ - $(srcdir)/Include/internal/pycore_compile.h \ - $(srcdir)/Include/internal/pycore_complexobject.h \ - $(srcdir)/Include/internal/pycore_condvar.h \ - $(srcdir)/Include/internal/pycore_context.h \ - $(srcdir)/Include/internal/pycore_critical_section.h \ - $(srcdir)/Include/internal/pycore_crossinterp.h \ - $(srcdir)/Include/internal/pycore_dict.h \ - $(srcdir)/Include/internal/pycore_dict_state.h \ - $(srcdir)/Include/internal/pycore_descrobject.h \ - $(srcdir)/Include/internal/pycore_dtoa.h \ - $(srcdir)/Include/internal/pycore_exceptions.h \ - $(srcdir)/Include/internal/pycore_faulthandler.h \ - $(srcdir)/Include/internal/pycore_fileutils.h \ - $(srcdir)/Include/internal/pycore_floatobject.h \ - $(srcdir)/Include/internal/pycore_format.h \ - $(srcdir)/Include/internal/pycore_frame.h \ - $(srcdir)/Include/internal/pycore_freelist.h \ - $(srcdir)/Include/internal/pycore_function.h \ - $(srcdir)/Include/internal/pycore_gc.h \ - $(srcdir)/Include/internal/pycore_genobject.h \ - $(srcdir)/Include/internal/pycore_getopt.h \ - $(srcdir)/Include/internal/pycore_gil.h \ - $(srcdir)/Include/internal/pycore_global_objects.h \ - $(srcdir)/Include/internal/pycore_global_objects_fini_generated.h \ - $(srcdir)/Include/internal/pycore_hamt.h \ - $(srcdir)/Include/internal/pycore_hashtable.h \ - $(srcdir)/Include/internal/pycore_identifier.h \ - $(srcdir)/Include/internal/pycore_import.h \ - $(srcdir)/Include/internal/pycore_initconfig.h \ - $(srcdir)/Include/internal/pycore_interp.h \ - $(srcdir)/Include/internal/pycore_intrinsics.h \ - $(srcdir)/Include/internal/pycore_jit.h \ - $(srcdir)/Include/internal/pycore_list.h \ - $(srcdir)/Include/internal/pycore_llist.h \ - $(srcdir)/Include/internal/pycore_lock.h \ - $(srcdir)/Include/internal/pycore_long.h \ - $(srcdir)/Include/internal/pycore_modsupport.h \ - $(srcdir)/Include/internal/pycore_moduleobject.h \ - $(srcdir)/Include/internal/pycore_namespace.h \ - $(srcdir)/Include/internal/pycore_object.h \ - $(srcdir)/Include/internal/pycore_object_alloc.h \ - $(srcdir)/Include/internal/pycore_object_stack.h \ - $(srcdir)/Include/internal/pycore_object_state.h \ - $(srcdir)/Include/internal/pycore_obmalloc.h \ - $(srcdir)/Include/internal/pycore_obmalloc_init.h \ - $(srcdir)/Include/internal/pycore_opcode_metadata.h \ - $(srcdir)/Include/internal/pycore_opcode_utils.h \ - $(srcdir)/Include/internal/pycore_optimizer.h \ - $(srcdir)/Include/internal/pycore_parking_lot.h \ - $(srcdir)/Include/internal/pycore_pathconfig.h \ - $(srcdir)/Include/internal/pycore_pyarena.h \ - $(srcdir)/Include/internal/pycore_pybuffer.h \ - $(srcdir)/Include/internal/pycore_pyerrors.h \ - $(srcdir)/Include/internal/pycore_pyhash.h \ - $(srcdir)/Include/internal/pycore_pylifecycle.h \ - $(srcdir)/Include/internal/pycore_pymem.h \ - $(srcdir)/Include/internal/pycore_pymem_init.h \ - $(srcdir)/Include/internal/pycore_pystate.h \ - $(srcdir)/Include/internal/pycore_pystats.h \ - $(srcdir)/Include/internal/pycore_pythonrun.h \ - $(srcdir)/Include/internal/pycore_pythread.h \ - $(srcdir)/Include/internal/pycore_range.h \ - $(srcdir)/Include/internal/pycore_runtime.h \ - $(srcdir)/Include/internal/pycore_runtime_init_generated.h \ - $(srcdir)/Include/internal/pycore_runtime_init.h \ - $(srcdir)/Include/internal/pycore_semaphore.h \ - $(srcdir)/Include/internal/pycore_setobject.h \ - $(srcdir)/Include/internal/pycore_signal.h \ - $(srcdir)/Include/internal/pycore_sliceobject.h \ - $(srcdir)/Include/internal/pycore_strhex.h \ - $(srcdir)/Include/internal/pycore_structseq.h \ - $(srcdir)/Include/internal/pycore_symtable.h \ - $(srcdir)/Include/internal/pycore_sysmodule.h \ - $(srcdir)/Include/internal/pycore_time.h \ - $(srcdir)/Include/internal/pycore_token.h \ - $(srcdir)/Include/internal/pycore_traceback.h \ - $(srcdir)/Include/internal/pycore_tracemalloc.h \ - $(srcdir)/Include/internal/pycore_tstate.h \ - $(srcdir)/Include/internal/pycore_tuple.h \ - $(srcdir)/Include/internal/pycore_typeobject.h \ - $(srcdir)/Include/internal/pycore_typevarobject.h \ - $(srcdir)/Include/internal/pycore_ucnhash.h \ - $(srcdir)/Include/internal/pycore_unionobject.h \ - $(srcdir)/Include/internal/pycore_unicodeobject.h \ - $(srcdir)/Include/internal/pycore_unicodeobject_generated.h \ - $(srcdir)/Include/internal/pycore_uop_metadata.h \ - $(srcdir)/Include/internal/pycore_warnings.h \ - $(srcdir)/Include/internal/pycore_weakref.h \ - $(DTRACE_HEADERS) \ - @PLATFORM_HEADERS@ \ - \ - $(srcdir)/Python/stdlib_module_names.h - $(LIBRARY_OBJS) $(MODOBJS) Programs/python.o: $(PYTHON_HEADERS) @@ -2877,6 +2892,9 @@ Python/thread.o: @THREADHEADERS@ $(srcdir)/Python/condvar.h MODULE_DEPS_STATIC=Modules/config.c MODULE_DEPS_SHARED=$(MODULE_DEPS_STATIC) $(EXPORTSYMS) +MODULE__CURSES_DEPS=$(srcdir)/Include/py_curses.h +MODULE__CURSES_PANEL_DEPS=$(srcdir)/Include/py_curses.h +MODULE__DATETIME_DEPS=$(srcdir)/Include/datetime.h MODULE_CMATH_DEPS=$(srcdir)/Modules/_math.h MODULE_MATH_DEPS=$(srcdir)/Modules/_math.h MODULE_PYEXPAT_DEPS=@LIBEXPAT_INTERNAL@ From d0322fdf2c1a7292a43959fe5a572d783b88a1c4 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Wed, 7 Feb 2024 04:48:42 -0600 Subject: [PATCH 021/126] gh-101100: Fix Py_DEBUG dangling Sphinx references (#115003) --- Doc/c-api/intro.rst | 11 ++++++----- Doc/library/test.rst | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index 4dbca92b18b5cd..dcda1071a58f35 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -148,7 +148,7 @@ complete listing. worse performances (due to increased code size for example). The compiler is usually smarter than the developer for the cost/benefit analysis. - If Python is :ref:`built in debug mode ` (if the ``Py_DEBUG`` + If Python is :ref:`built in debug mode ` (if the :c:macro:`Py_DEBUG` macro is defined), the :c:macro:`Py_ALWAYS_INLINE` macro does nothing. It must be specified before the function return type. Usage:: @@ -812,12 +812,14 @@ available that support tracing of reference counts, debugging the memory allocator, or low-level profiling of the main interpreter loop. Only the most frequently used builds will be described in the remainder of this section. -Compiling the interpreter with the :c:macro:`Py_DEBUG` macro defined produces +.. c:macro:: Py_DEBUG + +Compiling the interpreter with the :c:macro:`!Py_DEBUG` macro defined produces what is generally meant by :ref:`a debug build of Python `. -:c:macro:`Py_DEBUG` is enabled in the Unix build by adding +:c:macro:`!Py_DEBUG` is enabled in the Unix build by adding :option:`--with-pydebug` to the :file:`./configure` command. It is also implied by the presence of the -not-Python-specific :c:macro:`_DEBUG` macro. When :c:macro:`Py_DEBUG` is enabled +not-Python-specific :c:macro:`!_DEBUG` macro. When :c:macro:`!Py_DEBUG` is enabled in the Unix build, compiler optimization is disabled. In addition to the reference count debugging described below, extra checks are @@ -832,4 +834,3 @@ after every statement run by the interpreter.) Please refer to :file:`Misc/SpecialBuilds.txt` in the Python source distribution for more detailed information. - diff --git a/Doc/library/test.rst b/Doc/library/test.rst index cad1023021a512..7d28f625345726 100644 --- a/Doc/library/test.rst +++ b/Doc/library/test.rst @@ -324,9 +324,9 @@ The :mod:`test.support` module defines the following constants: .. data:: Py_DEBUG - True if Python is built with the :c:macro:`Py_DEBUG` macro defined: if - Python is :ref:`built in debug mode ` - (:option:`./configure --with-pydebug <--with-pydebug>`). + True if Python was built with the :c:macro:`Py_DEBUG` macro + defined, that is, if + Python was :ref:`built in debug mode `. .. versionadded:: 3.12 From 8a3c499ffe7e15297dd4c0b446a0b97b4d32108a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 7 Feb 2024 12:38:34 +0000 Subject: [PATCH 022/126] GH-108362: Revert "GH-108362: Incremental GC implementation (GH-108038)" (#115132) Revert "GH-108362: Incremental GC implementation (GH-108038)" This reverts commit 36518e69d74607e5f094ce55286188e4545a947d. --- Doc/whatsnew/3.13.rst | 34 - Include/internal/pycore_gc.h | 42 +- Include/internal/pycore_object.h | 17 +- Include/internal/pycore_runtime_init.h | 8 +- Lib/test/test_gc.py | 22 +- ...-01-07-04-22-51.gh-issue-108362.oB9Gcf.rst | 13 - Modules/gcmodule.c | 23 +- Objects/object.c | 15 - Objects/structseq.c | 5 +- Python/gc.c | 824 +++++++----------- Python/gc_free_threading.c | 27 +- Python/import.c | 2 +- Tools/gdb/libpython.py | 7 +- 13 files changed, 392 insertions(+), 647 deletions(-) delete mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-07-04-22-51.gh-issue-108362.oB9Gcf.rst diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index c75d4406531394..2ac5afa8ce601c 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -92,10 +92,6 @@ Interpreter improvements: New Features ============ -* The cyclic garbage collector is now incremental. - This means that maximum pause times are reduced, - by an order of magnitude or more for larger heaps. - Improved Error Messages ----------------------- @@ -105,13 +101,6 @@ Improved Error Messages variables. See also :ref:`using-on-controlling-color`. (Contributed by Pablo Galindo Salgado in :gh:`112730`.) -Incremental Garbage Collection ------------------------------- - -* The cycle garbage collector is now incremental. - This means that maximum pause times are reduced - by an order of magnitude or more for larger heaps. - Other Language Changes ====================== @@ -257,29 +246,6 @@ fractions sign handling, minimum width and grouping. (Contributed by Mark Dickinson in :gh:`111320`.) -gc --- -* The cyclic garbage collector is now incremental, which changes the meanings - of the results of :meth:`gc.get_threshold` and :meth:`gc.get_threshold` as - well as :meth:`gc.get_count` and :meth:`gc.get_stats`. -* :meth:`gc.get_threshold` returns a three-tuple for backwards compatibility, - the first value is the threshold for young collections, as before, the second - value determines the rate at which the old collection is scanned; the - default is 10 and higher values mean that the old collection is scanned more slowly. - The third value is meangless and is always zero. -* :meth:`gc.set_threshold` ignores any items after the second. -* :meth:`gc.get_count` and :meth:`gc.get_stats`. - These functions return the same format of results as before. - The only difference is that instead of the results refering to - the young, aging and old generations, the results refer to the - young generation and the aging and collecting spaces of the old generation. - -In summary, code that attempted to manipulate the behavior of the cycle GC may -not work as well as intended, but it is very unlikely to harmful. -All other code will work just fine. -Uses should avoid calling :meth:`gc.collect` unless their workload is episodic, -but that has always been the case to some extent. - glob ---- diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index aeb07238fc8345..8d0bc2a218e48d 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -88,15 +88,11 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) { /* Bit flags for _gc_prev */ /* Bit 0 is set when tp_finalize is called */ -#define _PyGC_PREV_MASK_FINALIZED 1 +#define _PyGC_PREV_MASK_FINALIZED (1) /* Bit 1 is set when the object is in generation which is GCed currently. */ -#define _PyGC_PREV_MASK_COLLECTING 2 - -/* Bit 0 is set if the object belongs to old space 1 */ -#define _PyGC_NEXT_MASK_OLD_SPACE_1 1 - +#define _PyGC_PREV_MASK_COLLECTING (2) /* The (N-2) most significant bits contain the real address. */ -#define _PyGC_PREV_SHIFT 2 +#define _PyGC_PREV_SHIFT (2) #define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT) /* set for debugging information */ @@ -122,13 +118,11 @@ typedef enum { // Lowest bit of _gc_next is used for flags only in GC. // But it is always 0 for normal code. static inline PyGC_Head* _PyGCHead_NEXT(PyGC_Head *gc) { - uintptr_t next = gc->_gc_next & _PyGC_PREV_MASK; + uintptr_t next = gc->_gc_next; return (PyGC_Head*)next; } static inline void _PyGCHead_SET_NEXT(PyGC_Head *gc, PyGC_Head *next) { - uintptr_t unext = (uintptr_t)next; - assert((unext & ~_PyGC_PREV_MASK) == 0); - gc->_gc_next = (gc->_gc_next & ~_PyGC_PREV_MASK) | unext; + gc->_gc_next = (uintptr_t)next; } // Lowest two bits of _gc_prev is used for _PyGC_PREV_MASK_* flags. @@ -136,7 +130,6 @@ static inline PyGC_Head* _PyGCHead_PREV(PyGC_Head *gc) { uintptr_t prev = (gc->_gc_prev & _PyGC_PREV_MASK); return (PyGC_Head*)prev; } - static inline void _PyGCHead_SET_PREV(PyGC_Head *gc, PyGC_Head *prev) { uintptr_t uprev = (uintptr_t)prev; assert((uprev & ~_PyGC_PREV_MASK) == 0); @@ -222,13 +215,6 @@ struct gc_generation { generations */ }; -struct gc_collection_stats { - /* number of collected objects */ - Py_ssize_t collected; - /* total number of uncollectable objects (put into gc.garbage) */ - Py_ssize_t uncollectable; -}; - /* Running stats per generation */ struct gc_generation_stats { /* total number of collections */ @@ -250,8 +236,8 @@ struct _gc_runtime_state { int enabled; int debug; /* linked lists of container objects */ - struct gc_generation young; - struct gc_generation old[2]; + struct gc_generation generations[NUM_GENERATIONS]; + PyGC_Head *generation0; /* a permanent generation which won't be collected */ struct gc_generation permanent_generation; struct gc_generation_stats generation_stats[NUM_GENERATIONS]; @@ -264,20 +250,22 @@ struct _gc_runtime_state { /* This is the number of objects that survived the last full collection. It approximates the number of long lived objects tracked by the GC. + (by "full collection", we mean a collection of the oldest generation). */ Py_ssize_t long_lived_total; - - Py_ssize_t work_to_do; - /* Which of the old spaces is the visited space */ - int visited_space; + /* This is the number of objects that survived all "non-full" + collections, and are awaiting to undergo a full collection for + the first time. */ + Py_ssize_t long_lived_pending; }; extern void _PyGC_InitState(struct _gc_runtime_state *); -extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason); -extern void _PyGC_CollectNoFail(PyThreadState *tstate); +extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, + _PyGC_Reason reason); +extern Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate); /* Freeze objects tracked by the GC and ignore them in future collections. */ extern void _PyGC_Freeze(PyInterpreterState *interp); diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index efa712c4a0b458..34a83ea228e8b1 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -125,7 +125,19 @@ static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n) } #define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n) -extern void _Py_SetImmortal(PyObject *op); +static inline void _Py_SetImmortal(PyObject *op) +{ + if (op) { +#ifdef Py_GIL_DISABLED + op->ob_tid = _Py_UNOWNED_TID; + op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; + op->ob_ref_shared = 0; +#else + op->ob_refcnt = _Py_IMMORTAL_REFCNT; +#endif + } +} +#define _Py_SetImmortal(op) _Py_SetImmortal(_PyObject_CAST(op)) // Makes an immortal object mortal again with the specified refcnt. Should only // be used during runtime finalization. @@ -313,12 +325,11 @@ static inline void _PyObject_GC_TRACK( filename, lineno, __func__); PyInterpreterState *interp = _PyInterpreterState_GET(); - PyGC_Head *generation0 = &interp->gc.young.head; + PyGC_Head *generation0 = interp->gc.generation0; PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); _PyGCHead_SET_NEXT(last, gc); _PyGCHead_SET_PREV(gc, last); _PyGCHead_SET_NEXT(gc, generation0); - assert((gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1) == 0); generation0->_gc_prev = (uintptr_t)gc; #endif } diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 2ad1347ad48a59..571a7d612c94e2 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -162,12 +162,12 @@ extern PyTypeObject _PyExc_MemoryError; }, \ .gc = { \ .enabled = 1, \ - .young = { .threshold = 2000, }, \ - .old = { \ + .generations = { \ + /* .head is set in _PyGC_InitState(). */ \ + { .threshold = 700, }, \ + { .threshold = 10, }, \ { .threshold = 10, }, \ - { .threshold = 0, }, \ }, \ - .work_to_do = -5000, \ }, \ .object_state = _py_object_state_INIT(INTERP), \ .dtoa = _dtoa_state_INIT(&(INTERP)), \ diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 0002852fce9643..b01f344cb14a1a 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -383,11 +383,19 @@ def test_collect_generations(self): # each call to collect(N) x = [] gc.collect(0) - # x is now in the old gen + # x is now in gen 1 a, b, c = gc.get_count() - # We don't check a since its exact values depends on + gc.collect(1) + # x is now in gen 2 + d, e, f = gc.get_count() + gc.collect(2) + # x is now in gen 3 + g, h, i = gc.get_count() + # We don't check a, d, g since their exact values depends on # internal implementation details of the interpreter. self.assertEqual((b, c), (1, 0)) + self.assertEqual((e, f), (0, 1)) + self.assertEqual((h, i), (0, 0)) def test_trashcan(self): class Ouch: @@ -838,6 +846,16 @@ def test_get_objects_generations(self): self.assertFalse( any(l is element for element in gc.get_objects(generation=2)) ) + gc.collect(generation=1) + self.assertFalse( + any(l is element for element in gc.get_objects(generation=0)) + ) + self.assertFalse( + any(l is element for element in gc.get_objects(generation=1)) + ) + self.assertTrue( + any(l is element for element in gc.get_objects(generation=2)) + ) gc.collect(generation=2) self.assertFalse( any(l is element for element in gc.get_objects(generation=0)) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-07-04-22-51.gh-issue-108362.oB9Gcf.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-07-04-22-51.gh-issue-108362.oB9Gcf.rst deleted file mode 100644 index 1fe4e0f41e1295..00000000000000 --- a/Misc/NEWS.d/next/Core and Builtins/2024-01-07-04-22-51.gh-issue-108362.oB9Gcf.rst +++ /dev/null @@ -1,13 +0,0 @@ -Implements an incremental cyclic garbage collector. By collecting the old -generation in increments, there is no need for a full heap scan. This can -hugely reduce maximum pause time for programs with large heaps. - -Reduces the number of generations from three to two. The old generation is -split into two spaces, "aging" and "collecting". - -Collection happens in two steps:: * First, the young generation is scanned -and the survivors moved to the end of the aging space. * Then objects are -taken from the collecting space, at such a rate that all cycles are -collected eventually. Those objects are then scanned and the survivors -moved to the end of the aging space. When the collecting space becomes -empty, the two spaces are swapped. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 3a42654b41b2ac..a2b66b9b78c169 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -158,12 +158,17 @@ gc_set_threshold_impl(PyObject *module, int threshold0, int group_right_1, { GCState *gcstate = get_gc_state(); - gcstate->young.threshold = threshold0; + gcstate->generations[0].threshold = threshold0; if (group_right_1) { - gcstate->old[0].threshold = threshold1; + gcstate->generations[1].threshold = threshold1; } if (group_right_2) { - gcstate->old[1].threshold = threshold2; + gcstate->generations[2].threshold = threshold2; + + /* generations higher than 2 get the same threshold */ + for (int i = 3; i < NUM_GENERATIONS; i++) { + gcstate->generations[i].threshold = gcstate->generations[2].threshold; + } } Py_RETURN_NONE; } @@ -180,9 +185,9 @@ gc_get_threshold_impl(PyObject *module) { GCState *gcstate = get_gc_state(); return Py_BuildValue("(iii)", - gcstate->young.threshold, - gcstate->old[0].threshold, - 0); + gcstate->generations[0].threshold, + gcstate->generations[1].threshold, + gcstate->generations[2].threshold); } /*[clinic input] @@ -197,9 +202,9 @@ gc_get_count_impl(PyObject *module) { GCState *gcstate = get_gc_state(); return Py_BuildValue("(iii)", - gcstate->young.count, - gcstate->old[gcstate->visited_space].count, - gcstate->old[gcstate->visited_space^1].count); + gcstate->generations[0].count, + gcstate->generations[1].count, + gcstate->generations[2].count); } /*[clinic input] diff --git a/Objects/object.c b/Objects/object.c index 7247eb21df6b6e..bbf7f98ae3daf9 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2387,21 +2387,6 @@ _Py_NewReferenceNoTotal(PyObject *op) new_reference(op); } -void -_Py_SetImmortal(PyObject *op) -{ - if (PyObject_IS_GC(op) && _PyObject_GC_IS_TRACKED(op)) { - _PyObject_GC_UNTRACK(op); - } -#ifdef Py_GIL_DISABLED - op->ob_tid = _Py_UNOWNED_TID; - op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; - op->ob_ref_shared = 0; -#else - op->ob_refcnt = _Py_IMMORTAL_REFCNT; -#endif -} - void _Py_ResurrectReference(PyObject *op) { diff --git a/Objects/structseq.c b/Objects/structseq.c index 661d96a968fb80..581d6ad240885a 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -603,9 +603,6 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp, PyStructSequence_Desc *desc, unsigned long tp_flags) { - if (Py_TYPE(type) == NULL) { - Py_SET_TYPE(type, &PyType_Type); - } Py_ssize_t n_unnamed_members; Py_ssize_t n_members = count_members(desc, &n_unnamed_members); PyMemberDef *members = NULL; @@ -621,7 +618,7 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp, } initialize_static_fields(type, desc, members, tp_flags); - _Py_SetImmortal((PyObject *)type); + _Py_SetImmortal(type); } #ifndef NDEBUG else { diff --git a/Python/gc.c b/Python/gc.c index cda12ff7fbc982..46646760291526 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -45,7 +45,7 @@ typedef struct _gc_runtime_state GCState; // move_legacy_finalizers() removes this flag instead. // Between them, unreachable list is not normal list and we can not use // most gc_list_* functions for it. -#define NEXT_MASK_UNREACHABLE 2 +#define NEXT_MASK_UNREACHABLE (1) #define AS_GC(op) _Py_AS_GC(op) #define FROM_GC(gc) _Py_FROM_GC(gc) @@ -95,48 +95,9 @@ gc_decref(PyGC_Head *g) g->_gc_prev -= 1 << _PyGC_PREV_SHIFT; } -static inline int -gc_old_space(PyGC_Head *g) -{ - return g->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1; -} -static inline int -flip_old_space(int space) -{ - assert(space == 0 || space == 1); - return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1; -} +#define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head) -static inline void -gc_flip_old_space(PyGC_Head *g) -{ - g->_gc_next ^= _PyGC_NEXT_MASK_OLD_SPACE_1; -} - -static inline void -gc_set_old_space(PyGC_Head *g, int space) -{ - assert(space == 0 || space == _PyGC_NEXT_MASK_OLD_SPACE_1); - g->_gc_next &= ~_PyGC_NEXT_MASK_OLD_SPACE_1; - g->_gc_next |= space; -} - -static PyGC_Head * -GEN_HEAD(GCState *gcstate, int n) -{ - assert((gcstate->visited_space & (~1)) == 0); - switch(n) { - case 0: - return &gcstate->young.head; - case 1: - return &gcstate->old[gcstate->visited_space].head; - case 2: - return &gcstate->old[gcstate->visited_space^1].head; - default: - Py_UNREACHABLE(); - } -} static GCState * get_gc_state(void) @@ -155,12 +116,11 @@ _PyGC_InitState(GCState *gcstate) GEN.head._gc_prev = (uintptr_t)&GEN.head; \ } while (0) - assert(gcstate->young.count == 0); - assert(gcstate->old[0].count == 0); - assert(gcstate->old[1].count == 0); - INIT_HEAD(gcstate->young); - INIT_HEAD(gcstate->old[0]); - INIT_HEAD(gcstate->old[1]); + for (int i = 0; i < NUM_GENERATIONS; i++) { + assert(gcstate->generations[i].count == 0); + INIT_HEAD(gcstate->generations[i]); + }; + gcstate->generation0 = GEN_HEAD(gcstate, 0); INIT_HEAD(gcstate->permanent_generation); #undef INIT_HEAD @@ -258,7 +218,6 @@ gc_list_is_empty(PyGC_Head *list) static inline void gc_list_append(PyGC_Head *node, PyGC_Head *list) { - assert((list->_gc_prev & ~_PyGC_PREV_MASK) == 0); PyGC_Head *last = (PyGC_Head *)list->_gc_prev; // last <-> node @@ -316,8 +275,6 @@ gc_list_merge(PyGC_Head *from, PyGC_Head *to) PyGC_Head *from_tail = GC_PREV(from); assert(from_head != from); assert(from_tail != from); - assert(gc_list_is_empty(to) || - gc_old_space(to_tail) == gc_old_space(from_tail)); _PyGCHead_SET_NEXT(to_tail, from_head); _PyGCHead_SET_PREV(from_head, to_tail); @@ -386,8 +343,8 @@ enum flagstates {collecting_clear_unreachable_clear, static void validate_list(PyGC_Head *head, enum flagstates flags) { - assert((head->_gc_prev & ~_PyGC_PREV_MASK) == 0); - assert((head->_gc_next & ~_PyGC_PREV_MASK) == 0); + assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0); + assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0); uintptr_t prev_value = 0, next_value = 0; switch (flags) { case collecting_clear_unreachable_clear: @@ -409,7 +366,7 @@ validate_list(PyGC_Head *head, enum flagstates flags) PyGC_Head *gc = GC_NEXT(head); while (gc != head) { PyGC_Head *trueprev = GC_PREV(gc); - PyGC_Head *truenext = GC_NEXT(gc); + PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); assert(truenext != NULL); assert(trueprev == prev); assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value); @@ -419,44 +376,8 @@ validate_list(PyGC_Head *head, enum flagstates flags) } assert(prev == GC_PREV(head)); } - -static void -validate_old(GCState *gcstate) -{ - for (int space = 0; space < 2; space++) { - PyGC_Head *head = &gcstate->old[space].head; - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - PyGC_Head *next = GC_NEXT(gc); - assert(gc_old_space(gc) == space); - gc = next; - } - } -} - -static void -validate_consistent_old_space(PyGC_Head *head) -{ - PyGC_Head *prev = head; - PyGC_Head *gc = GC_NEXT(head); - if (gc == head) { - return; - } - int old_space = gc_old_space(gc); - while (gc != head) { - PyGC_Head *truenext = GC_NEXT(gc); - assert(truenext != NULL); - assert(gc_old_space(gc) == old_space); - prev = gc; - gc = truenext; - } - assert(prev == GC_PREV(head)); -} - #else #define validate_list(x, y) do{}while(0) -#define validate_old(g) do{}while(0) -#define validate_consistent_old_space(l) do{}while(0) #endif /*** end of list stuff ***/ @@ -473,7 +394,15 @@ update_refs(PyGC_Head *containers) while (gc != containers) { next = GC_NEXT(gc); - assert(!_Py_IsImmortal(FROM_GC(gc))); + /* Move any object that might have become immortal to the + * permanent generation as the reference count is not accurately + * reflecting the actual number of live references to this object + */ + if (_Py_IsImmortal(FROM_GC(gc))) { + gc_list_move(gc, &get_gc_state()->permanent_generation.head); + gc = next; + continue; + } gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc))); /* Python's cyclic gc should never see an incoming refcount * of 0: if something decref'ed to 0, it should have been @@ -571,13 +500,12 @@ visit_reachable(PyObject *op, void *arg) // Manually unlink gc from unreachable list because the list functions // don't work right in the presence of NEXT_MASK_UNREACHABLE flags. PyGC_Head *prev = GC_PREV(gc); - PyGC_Head *next = GC_NEXT(gc); + PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); _PyObject_ASSERT(FROM_GC(prev), prev->_gc_next & NEXT_MASK_UNREACHABLE); _PyObject_ASSERT(FROM_GC(next), next->_gc_next & NEXT_MASK_UNREACHABLE); - prev->_gc_next = gc->_gc_next; // copy flag bits - gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE _PyGCHead_SET_PREV(next, prev); gc_list_append(gc, reachable); @@ -629,9 +557,6 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) * or to the right have been scanned yet. */ - validate_consistent_old_space(young); - /* Record which old space we are in, and set NEXT_MASK_UNREACHABLE bit for convenience */ - uintptr_t flags = NEXT_MASK_UNREACHABLE | (gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1); while (gc != young) { if (gc_get_refs(gc)) { /* gc is definitely reachable from outside the @@ -677,18 +602,17 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) // But this may pollute the unreachable list head's 'next' pointer // too. That's semantically senseless but expedient here - the // damage is repaired when this function ends. - last->_gc_next = flags | (uintptr_t)gc; + last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc); _PyGCHead_SET_PREV(gc, last); - gc->_gc_next = flags | (uintptr_t)unreachable; + gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable); unreachable->_gc_prev = (uintptr_t)gc; } - gc = _PyGCHead_NEXT(prev); + gc = (PyGC_Head*)prev->_gc_next; } // young->_gc_prev must be last element remained in the list. young->_gc_prev = (uintptr_t)prev; - young->_gc_next &= _PyGC_PREV_MASK; // don't let the pollution of the list head's next pointer leak - unreachable->_gc_next &= _PyGC_PREV_MASK; + unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; } static void @@ -745,8 +669,8 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) PyObject *op = FROM_GC(gc); _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE); - next = GC_NEXT(gc); gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + next = (PyGC_Head*)gc->_gc_next; if (has_legacy_finalizer(op)) { gc_clear_collecting(gc); @@ -765,8 +689,8 @@ clear_unreachable_mask(PyGC_Head *unreachable) assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE); - next = GC_NEXT(gc); gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + next = (PyGC_Head*)gc->_gc_next; } validate_list(unreachable, collecting_set_unreachable_clear); } @@ -1099,6 +1023,25 @@ delete_garbage(PyThreadState *tstate, GCState *gcstate, } +// Show stats for objects in each generations +static void +show_stats_each_generations(GCState *gcstate) +{ + char buf[100]; + size_t pos = 0; + + for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) { + pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos, + " %zd", + gc_list_size(GEN_HEAD(gcstate, i))); + } + + PySys_FormatStderr( + "gc: objects in each generation:%s\n" + "gc: objects in permanent generation: %zd\n", + buf, gc_list_size(&gcstate->permanent_generation.head)); +} + /* Deduce which objects among "base" are unreachable from outside the list and move them to 'unreachable'. The process consist in the following steps: @@ -1172,6 +1115,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { * the reachable objects instead. But this is a one-time cost, probably not * worth complicating the code to speed just a little. */ + gc_list_init(unreachable); move_unreachable(base, unreachable); // gc_prev is pointer again validate_list(base, collecting_clear_unreachable_clear); validate_list(unreachable, collecting_set_unreachable_set); @@ -1210,272 +1154,219 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, } -#define UNTRACK_TUPLES 1 -#define UNTRACK_DICTS 2 - -static void -gc_collect_region(PyThreadState *tstate, - PyGC_Head *from, - PyGC_Head *to, - int untrack, - struct gc_collection_stats *stats); - -static inline Py_ssize_t -gc_list_set_space(PyGC_Head *list, int space) -{ - Py_ssize_t size = 0; - PyGC_Head *gc; - for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) { - gc_set_old_space(gc, space); - size++; - } - return size; -} - - +/* Invoke progress callbacks to notify clients that garbage collection + * is starting or stopping + */ static void -add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) +invoke_gc_callback(PyThreadState *tstate, const char *phase, + int generation, Py_ssize_t collected, + Py_ssize_t uncollectable) { - gcstate->generation_stats[gen].collected += stats->collected; - gcstate->generation_stats[gen].uncollectable += stats->uncollectable; - gcstate->generation_stats[gen].collections += 1; -} - - -/* Multiply by 4 so that the default incremental threshold of 10 - * scans objects at 40% the rate that the young gen tenures them. */ -#define SCAN_RATE_MULTIPLIER 4 - + assert(!_PyErr_Occurred(tstate)); -static void -gc_collect_young(PyThreadState *tstate, - struct gc_collection_stats *stats) -{ + /* we may get called very early */ GCState *gcstate = &tstate->interp->gc; - PyGC_Head *young = &gcstate->young.head; - PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; -#ifdef Py_STATS - { - Py_ssize_t count = 0; - PyGC_Head *gc; - for (gc = GC_NEXT(young); gc != young; gc = GC_NEXT(gc)) { - count++; - } + if (gcstate->callbacks == NULL) { + return; } -#endif - PyGC_Head survivors; - gc_list_init(&survivors); - gc_collect_region(tstate, young, &survivors, UNTRACK_TUPLES, stats); - Py_ssize_t survivor_count = 0; - if (gcstate->visited_space) { - /* objects in visited space have bit set, so we set it here */ - survivor_count = gc_list_set_space(&survivors, 1); - } - else { - PyGC_Head *gc; - for (gc = GC_NEXT(&survivors); gc != &survivors; gc = GC_NEXT(gc)) { -#ifdef GC_DEBUG - assert(gc_old_space(gc) == 0); -#endif - survivor_count++; + /* The local variable cannot be rebound, check it for sanity */ + assert(PyList_CheckExact(gcstate->callbacks)); + PyObject *info = NULL; + if (PyList_GET_SIZE(gcstate->callbacks) != 0) { + info = Py_BuildValue("{sisnsn}", + "generation", generation, + "collected", collected, + "uncollectable", uncollectable); + if (info == NULL) { + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; } } - gc_list_merge(&survivors, visited); - validate_old(gcstate); - gcstate->young.count = 0; - gcstate->old[gcstate->visited_space].count++; - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } - gcstate->work_to_do += survivor_count + survivor_count * SCAN_RATE_MULTIPLIER / scale_factor; - add_stats(gcstate, 0, stats); -} -static inline int -is_in_visited(PyGC_Head *gc, int visited_space) -{ - assert(visited_space == 0 || flip_old_space(visited_space) == 0); - return gc_old_space(gc) == visited_space; -} - -struct container_and_flag { - PyGC_Head *container; - int visited_space; -}; + PyObject *phase_obj = PyUnicode_FromString(phase); + if (phase_obj == NULL) { + Py_XDECREF(info); + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; + } -/* A traversal callback for adding to container) */ -static int -visit_add_to_container(PyObject *op, void *arg) -{ - OBJECT_STAT_INC(object_visits); - struct container_and_flag *cf = (struct container_and_flag *)arg; - int visited = cf->visited_space; - assert(visited == get_gc_state()->visited_space); - if (_PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited) { - assert(!_Py_IsImmortal(op)); - gc_flip_old_space(gc); - gc_list_move(gc, cf->container); + PyObject *stack[] = {phase_obj, info}; + for (Py_ssize_t i=0; icallbacks); i++) { + PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); + Py_INCREF(cb); /* make sure cb doesn't go away */ + r = PyObject_Vectorcall(cb, stack, 2, NULL); + if (r == NULL) { + PyErr_WriteUnraisable(cb); } + else { + Py_DECREF(r); + } + Py_DECREF(cb); } - return 0; + Py_DECREF(phase_obj); + Py_XDECREF(info); + assert(!_PyErr_Occurred(tstate)); } -static uintptr_t -expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) -{ - validate_list(container, collecting_clear_unreachable_clear); - struct container_and_flag arg = { - .container = container, - .visited_space = gcstate->visited_space, - }; - uintptr_t size = 0; - assert(GC_NEXT(gc) == container); - while (gc != container) { - /* Survivors will be moved to visited space, so they should - * have been marked as visited */ - assert(is_in_visited(gc, gcstate->visited_space)); - PyObject *op = FROM_GC(gc); - if (_Py_IsImmortal(op)) { - PyGC_Head *next = GC_NEXT(gc); - gc_list_move(gc, &get_gc_state()->permanent_generation.head); - gc = next; - continue; + +/* Find the oldest generation (highest numbered) where the count + * exceeds the threshold. Objects in the that generation and + * generations younger than it will be collected. */ +static int +gc_select_generation(GCState *gcstate) +{ + for (int i = NUM_GENERATIONS-1; i >= 0; i--) { + if (gcstate->generations[i].count > gcstate->generations[i].threshold) { + /* Avoid quadratic performance degradation in number + of tracked objects (see also issue #4074): + + To limit the cost of garbage collection, there are two strategies; + - make each collection faster, e.g. by scanning fewer objects + - do less collections + This heuristic is about the latter strategy. + + In addition to the various configurable thresholds, we only trigger a + full collection if the ratio + + long_lived_pending / long_lived_total + + is above a given value (hardwired to 25%). + + The reason is that, while "non-full" collections (i.e., collections of + the young and middle generations) will always examine roughly the same + number of objects -- determined by the aforementioned thresholds --, + the cost of a full collection is proportional to the total number of + long-lived objects, which is virtually unbounded. + + Indeed, it has been remarked that doing a full collection every + of object creations entails a dramatic performance + degradation in workloads which consist in creating and storing lots of + long-lived objects (e.g. building a large list of GC-tracked objects would + show quadratic performance, instead of linear as expected: see issue #4074). + + Using the above ratio, instead, yields amortized linear performance in + the total number of objects (the effect of which can be summarized + thusly: "each full garbage collection is more and more costly as the + number of objects grows, but we do fewer and fewer of them"). + + This heuristic was suggested by Martin von Löwis on python-dev in + June 2008. His original analysis and proposal can be found at: + http://mail.python.org/pipermail/python-dev/2008-June/080579.html + */ + if (i == NUM_GENERATIONS - 1 + && gcstate->long_lived_pending < gcstate->long_lived_total / 4) + { + continue; + } + return i; } - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); - gc = GC_NEXT(gc); - size++; } - return size; + return -1; } -/* Do bookkeeping for a completed GC cycle */ -static void -completed_cycle(GCState *gcstate) -{ - assert(gc_list_is_empty(&gcstate->old[gcstate->visited_space^1].head)); - assert(gc_list_is_empty(&gcstate->young.head)); - gcstate->visited_space = flip_old_space(gcstate->visited_space); - if (gcstate->work_to_do > 0) { - gcstate->work_to_do = 0; - } -} -static void -gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) +/* This is the main function. Read this to understand how the + * collection process works. */ +static Py_ssize_t +gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) { + int i; + Py_ssize_t m = 0; /* # objects collected */ + Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ + PyGC_Head *young; /* the generation we are examining */ + PyGC_Head *old; /* next older generation */ + PyGC_Head unreachable; /* non-problematic unreachable trash */ + PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ + PyGC_Head *gc; + _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ GCState *gcstate = &tstate->interp->gc; - if (gcstate->work_to_do <= 0) { - /* No work to do */ - return; - } - PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; - PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - PyGC_Head increment; - gc_list_init(&increment); - if (gc_list_is_empty(not_visited)) { - completed_cycle(gcstate); - return; + + // gc_collect_main() must not be called before _PyGC_Init + // or after _PyGC_Fini() + assert(gcstate->garbage != NULL); + assert(!_PyErr_Occurred(tstate)); + + int expected = 0; + if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { + // Don't start a garbage collection if one is already in progress. + return 0; } - Py_ssize_t region_size = 0; - while (region_size < gcstate->work_to_do) { - if (gc_list_is_empty(not_visited)) { - break; + + if (generation == GENERATION_AUTO) { + // Select the oldest generation that needs collecting. We will collect + // objects from that generation and all generations younger than it. + generation = gc_select_generation(gcstate); + if (generation < 0) { + // No generation needs to be collected. + _Py_atomic_store_int(&gcstate->collecting, 0); + return 0; } - PyGC_Head *gc = _PyGCHead_NEXT(not_visited); - gc_list_move(gc, &increment); - gc_set_old_space(gc, gcstate->visited_space); - region_size += expand_region_transitively_reachable(&increment, gc, gcstate); - } - assert(region_size == gc_list_size(&increment)); - PyGC_Head survivors; - gc_list_init(&survivors); - gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats); - gc_list_merge(&survivors, visited); - assert(gc_list_is_empty(&increment)); - gcstate->work_to_do -= region_size; - validate_old(gcstate); - add_stats(gcstate, 1, stats); - if (gc_list_is_empty(not_visited)) { - completed_cycle(gcstate); } -} + assert(generation >= 0 && generation < NUM_GENERATIONS); -static void -gc_collect_full(PyThreadState *tstate, - struct gc_collection_stats *stats) -{ - GCState *gcstate = &tstate->interp->gc; - validate_old(gcstate); - PyGC_Head *young = &gcstate->young.head; - PyGC_Head *old0 = &gcstate->old[0].head; - PyGC_Head *old1 = &gcstate->old[1].head; - /* merge all generations into old0 */ - gc_list_merge(young, old0); - gcstate->young.count = 0; - PyGC_Head *gc = GC_NEXT(old1); - while (gc != old1) { - PyGC_Head *next = GC_NEXT(gc); - gc_set_old_space(gc, 0); - gc = next; +#ifdef Py_STATS + if (_Py_stats) { + _Py_stats->object_stats.object_visits = 0; } - gc_list_merge(old1, old0); - - gc_collect_region(tstate, old0, old0, - UNTRACK_TUPLES | UNTRACK_DICTS, - stats); - gcstate->visited_space = 1; - gcstate->young.count = 0; - gcstate->old[0].count = 0; - gcstate->old[1].count = 0; +#endif + GC_STAT_ADD(generation, collections, 1); - gcstate->work_to_do = - gcstate->young.threshold * 2; + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "start", generation, 0, 0); + } - _PyGC_ClearAllFreeLists(tstate->interp); - validate_old(gcstate); - add_stats(gcstate, 2, stats); -} + if (gcstate->debug & _PyGC_DEBUG_STATS) { + PySys_WriteStderr("gc: collecting generation %d...\n", generation); + show_stats_each_generations(gcstate); + t1 = _PyTime_GetPerfCounter(); + } -/* This is the main function. Read this to understand how the - * collection process works. */ -static void -gc_collect_region(PyThreadState *tstate, - PyGC_Head *from, - PyGC_Head *to, - int untrack, - struct gc_collection_stats *stats) -{ - PyGC_Head unreachable; /* non-problematic unreachable trash */ - PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ - PyGC_Head *gc; /* initialize to prevent a compiler warning */ - GCState *gcstate = &tstate->interp->gc; + if (PyDTrace_GC_START_ENABLED()) { + PyDTrace_GC_START(generation); + } - assert(gcstate->garbage != NULL); - assert(!_PyErr_Occurred(tstate)); + /* update collection and allocation counters */ + if (generation+1 < NUM_GENERATIONS) { + gcstate->generations[generation+1].count += 1; + } + for (i = 0; i <= generation; i++) { + gcstate->generations[i].count = 0; + } - gc_list_init(&unreachable); - deduce_unreachable(from, &unreachable); - validate_consistent_old_space(from); - if (untrack & UNTRACK_TUPLES) { - untrack_tuples(from); + /* merge younger generations with one we are currently collecting */ + for (i = 0; i < generation; i++) { + gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation)); } - if (untrack & UNTRACK_DICTS) { - untrack_dicts(from); + + /* handy references */ + young = GEN_HEAD(gcstate, generation); + if (generation < NUM_GENERATIONS-1) { + old = GEN_HEAD(gcstate, generation+1); } - validate_consistent_old_space(to); - if (from != to) { - gc_list_merge(from, to); + else { + old = young; } - validate_consistent_old_space(to); + validate_list(old, collecting_clear_unreachable_clear); + + deduce_unreachable(young, &unreachable); + + untrack_tuples(young); /* Move reachable objects to next generation. */ + if (young != old) { + if (generation == NUM_GENERATIONS - 2) { + gcstate->long_lived_pending += gc_list_size(young); + } + gc_list_merge(young, old); + } + else { + /* We only un-track dicts in full collections, to avoid quadratic + dict build-up. See issue #14775. */ + untrack_dicts(young); + gcstate->long_lived_pending = 0; + gcstate->long_lived_total = gc_list_size(young); + } /* All objects in unreachable are trash, but objects reachable from * legacy finalizers (e.g. tp_del) can't safely be deleted. @@ -1489,8 +1380,10 @@ gc_collect_region(PyThreadState *tstate, * and we move those into the finalizers list too. */ move_legacy_finalizer_reachable(&finalizers); + validate_list(&finalizers, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); + /* Print debugging information. */ if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) { for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { @@ -1499,99 +1392,89 @@ gc_collect_region(PyThreadState *tstate, } /* Clear weakrefs and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&unreachable, to); - validate_list(to, collecting_clear_unreachable_clear); + m += handle_weakrefs(&unreachable, old); + + validate_list(old, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); /* Call tp_finalize on objects which have one. */ finalize_garbage(tstate, &unreachable); + /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the * objects that are still unreachable */ PyGC_Head final_unreachable; - gc_list_init(&final_unreachable); - handle_resurrected_objects(&unreachable, &final_unreachable, to); + handle_resurrected_objects(&unreachable, &final_unreachable, old); /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. */ - stats->collected += gc_list_size(&final_unreachable); - delete_garbage(tstate, gcstate, &final_unreachable, to); + m += gc_list_size(&final_unreachable); + delete_garbage(tstate, gcstate, &final_unreachable, old); /* Collect statistics on uncollectable objects found and print * debugging information. */ - Py_ssize_t n = 0; for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) { n++; if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) debug_cycle("uncollectable", FROM_GC(gc)); } - stats->uncollectable = n; + if (gcstate->debug & _PyGC_DEBUG_STATS) { + double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1); + PySys_WriteStderr( + "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n", + n+m, n, d); + } + /* Append instances in the uncollectable set to a Python * reachable list of garbage. The programmer has to deal with * this if they insist on creating this type of structure. */ - handle_legacy_finalizers(tstate, gcstate, &finalizers, to); - validate_list(to, collecting_clear_unreachable_clear); -} + handle_legacy_finalizers(tstate, gcstate, &finalizers, old); + validate_list(old, collecting_clear_unreachable_clear); -/* Invoke progress callbacks to notify clients that garbage collection - * is starting or stopping - */ -static void -do_gc_callback(GCState *gcstate, const char *phase, - int generation, struct gc_collection_stats *stats) -{ - assert(!PyErr_Occurred()); + /* Clear free list only during the collection of the highest + * generation */ + if (generation == NUM_GENERATIONS-1) { + _PyGC_ClearAllFreeLists(tstate->interp); + } - /* The local variable cannot be rebound, check it for sanity */ - assert(PyList_CheckExact(gcstate->callbacks)); - PyObject *info = NULL; - if (PyList_GET_SIZE(gcstate->callbacks) != 0) { - info = Py_BuildValue("{sisnsn}", - "generation", generation, - "collected", stats->collected, - "uncollectable", stats->uncollectable); - if (info == NULL) { - PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); - return; + if (_PyErr_Occurred(tstate)) { + if (reason == _Py_GC_REASON_SHUTDOWN) { + _PyErr_Clear(tstate); + } + else { + PyErr_FormatUnraisable("Exception ignored in garbage collection"); } } - PyObject *phase_obj = PyUnicode_FromString(phase); - if (phase_obj == NULL) { - Py_XDECREF(info); - PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); - return; + /* Update stats */ + struct gc_generation_stats *stats = &gcstate->generation_stats[generation]; + stats->collections++; + stats->collected += m; + stats->uncollectable += n; + + GC_STAT_ADD(generation, objects_collected, m); +#ifdef Py_STATS + if (_Py_stats) { + GC_STAT_ADD(generation, object_visits, + _Py_stats->object_stats.object_visits); + _Py_stats->object_stats.object_visits = 0; } +#endif - PyObject *stack[] = {phase_obj, info}; - for (Py_ssize_t i=0; icallbacks); i++) { - PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); - Py_INCREF(cb); /* make sure cb doesn't go away */ - r = PyObject_Vectorcall(cb, stack, 2, NULL); - if (r == NULL) { - PyErr_WriteUnraisable(cb); - } - else { - Py_DECREF(r); - } - Py_DECREF(cb); + if (PyDTrace_GC_DONE_ENABLED()) { + PyDTrace_GC_DONE(n + m); } - Py_DECREF(phase_obj); - Py_XDECREF(info); - assert(!PyErr_Occurred()); -} -static void -invoke_gc_callback(GCState *gcstate, const char *phase, - int generation, struct gc_collection_stats *stats) -{ - if (gcstate->callbacks == NULL) { - return; + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "stop", generation, m, n); } - do_gc_callback(gcstate, phase, generation, stats); + + assert(!_PyErr_Occurred(tstate)); + _Py_atomic_store_int(&gcstate->collecting, 0); + return n + m; } static int @@ -1666,7 +1549,7 @@ _PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation) } } else { - if (append_objects(result, GEN_HEAD(gcstate, (int)generation))) { + if (append_objects(result, GEN_HEAD(gcstate, generation))) { goto error; } } @@ -1681,16 +1564,10 @@ void _PyGC_Freeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; - gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head); - gcstate->young.count = 0; - PyGC_Head*old0 = &gcstate->old[0].head; - PyGC_Head*old1 = &gcstate->old[1].head; - gc_list_merge(old0, &gcstate->permanent_generation.head); - gcstate->old[0].count = 0; - gc_list_set_space(old1, 0); - gc_list_merge(old1, &gcstate->permanent_generation.head); - gcstate->old[1].count = 0; - validate_old(gcstate); + for (int i = 0; i < NUM_GENERATIONS; ++i) { + gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head); + gcstate->generations[i].count = 0; + } } void @@ -1698,8 +1575,7 @@ _PyGC_Unfreeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; gc_list_merge(&gcstate->permanent_generation.head, - &gcstate->old[0].head); - validate_old(gcstate); + GEN_HEAD(gcstate, NUM_GENERATIONS-1)); } Py_ssize_t @@ -1735,100 +1611,32 @@ PyGC_IsEnabled(void) return gcstate->enabled; } -// Show stats for objects in each generations -static void -show_stats_each_generations(GCState *gcstate) -{ - char buf[100]; - size_t pos = 0; - - for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) { - pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos, - " %zd", - gc_list_size(GEN_HEAD(gcstate, i))); - } - - PySys_FormatStderr( - "gc: objects in each generation:%s\n" - "gc: objects in permanent generation: %zd\n", - buf, gc_list_size(&gcstate->permanent_generation.head)); -} - +/* Public API to invoke gc.collect() from C */ Py_ssize_t -_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) +PyGC_Collect(void) { + PyThreadState *tstate = _PyThreadState_GET(); GCState *gcstate = &tstate->interp->gc; - int expected = 0; - if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { - // Don't start a garbage collection if one is already in progress. + if (!gcstate->enabled) { return 0; } - struct gc_collection_stats stats = { 0 }; - if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(gcstate, "start", generation, &stats); - } - _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ - if (gcstate->debug & _PyGC_DEBUG_STATS) { - PySys_WriteStderr("gc: collecting generation %d...\n", generation); - show_stats_each_generations(gcstate); - t1 = _PyTime_GetPerfCounter(); - } - if (PyDTrace_GC_START_ENABLED()) { - PyDTrace_GC_START(generation); - } - GC_STAT_ADD(generation, collections, 1); + Py_ssize_t n; PyObject *exc = _PyErr_GetRaisedException(tstate); - switch(generation) { - case 0: - gc_collect_young(tstate, &stats); - break; - case 1: - gc_collect_young(tstate, &stats); - gc_collect_increment(tstate, &stats); - break; - case 2: - gc_collect_full(tstate, &stats); - break; - default: - Py_UNREACHABLE(); - } - if (PyDTrace_GC_DONE_ENABLED()) { - PyDTrace_GC_DONE(stats.uncollectable + stats.collected); - } - if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(gcstate, "stop", generation, &stats); - } + n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL); _PyErr_SetRaisedException(tstate, exc); - GC_STAT_ADD(generation, objects_collected, stats.collected); -#ifdef Py_STATS - if (_Py_stats) { - GC_STAT_ADD(generation, object_visits, - _Py_stats->object_stats.object_visits); - _Py_stats->object_stats.object_visits = 0; - } -#endif - validate_old(gcstate); - if (gcstate->debug & _PyGC_DEBUG_STATS) { - double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1); - PySys_WriteStderr( - "gc: done, %zd collected, %zd uncollectable, %.4fs elapsed\n", - stats.collected, stats.uncollectable, d); - } - _Py_atomic_store_int(&gcstate->collecting, 0); - return stats.uncollectable + stats.collected; + return n; } -/* Public API to invoke gc.collect() from C */ Py_ssize_t -PyGC_Collect(void) +_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) { - return _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_MANUAL); + return gc_collect_main(tstate, generation, reason); } -void +Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate) { /* Ideally, this function is only called on interpreter shutdown, @@ -1837,7 +1645,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate) during interpreter shutdown (and then never finish it). See http://bugs.python.org/issue8713#msg195178 for an example. */ - _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_SHUTDOWN); + return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN); } void @@ -1972,10 +1780,10 @@ _PyObject_GC_Link(PyObject *op) GCState *gcstate = &tstate->interp->gc; g->_gc_next = 0; g->_gc_prev = 0; - gcstate->young.count++; /* number of allocated GC objects */ - if (gcstate->young.count > gcstate->young.threshold && + gcstate->generations[0].count++; /* number of allocated GC objects */ + if (gcstate->generations[0].count > gcstate->generations[0].threshold && gcstate->enabled && - gcstate->young.threshold && + gcstate->generations[0].threshold && !_Py_atomic_load_int_relaxed(&gcstate->collecting) && !_PyErr_Occurred(tstate)) { @@ -1986,9 +1794,7 @@ _PyObject_GC_Link(PyObject *op) void _Py_RunGC(PyThreadState *tstate) { - if (tstate->interp->gc.enabled) { - _PyGC_Collect(tstate, 1, _Py_GC_REASON_HEAP); - } + gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP); } static PyObject * @@ -2091,8 +1897,8 @@ PyObject_GC_Del(void *op) #endif } GCState *gcstate = get_gc_state(); - if (gcstate->young.count > 0) { - gcstate->young.count--; + if (gcstate->generations[0].count > 0) { + gcstate->generations[0].count--; } PyObject_Free(((char *)op)-presize); } @@ -2115,36 +1921,26 @@ PyObject_GC_IsFinalized(PyObject *obj) return 0; } -static int -visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen) -{ - PyGC_Head *gc_list, *gc; - gc_list = &gen->head; - for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { - PyObject *op = FROM_GC(gc); - Py_INCREF(op); - int res = callback(op, arg); - Py_DECREF(op); - if (!res) { - return -1; - } - } - return 0; -} - void PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) { + size_t i; GCState *gcstate = get_gc_state(); int origenstate = gcstate->enabled; gcstate->enabled = 0; - if (visit_generation(callback, arg, &gcstate->young)) { - goto done; - } - if (visit_generation(callback, arg, &gcstate->old[0])) { - goto done; + for (i = 0; i < NUM_GENERATIONS; i++) { + PyGC_Head *gc_list, *gc; + gc_list = GEN_HEAD(gcstate, i); + for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { + PyObject *op = FROM_GC(gc); + Py_INCREF(op); + int res = callback(op, arg); + Py_DECREF(op); + if (!res) { + goto done; + } + } } - visit_generation(callback, arg, &gcstate->old[1]); done: gcstate->enabled = origenstate; } diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 1c4da726866e4e..8fbcdb15109b76 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -616,7 +616,7 @@ void _PyGC_InitState(GCState *gcstate) { // TODO: move to pycore_runtime_init.h once the incremental GC lands. - gcstate->young.threshold = 2000; + gcstate->generations[0].threshold = 2000; } @@ -911,8 +911,8 @@ cleanup_worklist(struct worklist *worklist) static bool gc_should_collect(GCState *gcstate) { - int count = _Py_atomic_load_int_relaxed(&gcstate->young.count); - int threshold = gcstate->young.threshold; + int count = _Py_atomic_load_int_relaxed(&gcstate->generations[0].count); + int threshold = gcstate->generations[0].threshold; if (count <= threshold || threshold == 0 || !gcstate->enabled) { return false; } @@ -920,7 +920,7 @@ gc_should_collect(GCState *gcstate) // objects. A few tests rely on immediate scheduling of the GC so we ignore // the scaled threshold if generations[1].threshold is set to zero. return (count > gcstate->long_lived_total / 4 || - gcstate->old[0].threshold == 0); + gcstate->generations[1].threshold == 0); } static void @@ -1031,15 +1031,10 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) /* update collection and allocation counters */ if (generation+1 < NUM_GENERATIONS) { - gcstate->old[generation].count += 1; + gcstate->generations[generation+1].count += 1; } for (i = 0; i <= generation; i++) { - if (i == 0) { - gcstate->young.count = 0; - } - else { - gcstate->old[i-1].count = 0; - } + gcstate->generations[i].count = 0; } PyInterpreterState *interp = tstate->interp; @@ -1362,7 +1357,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) return gc_collect_main(tstate, generation, reason); } -void +Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate) { /* Ideally, this function is only called on interpreter shutdown, @@ -1371,7 +1366,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate) during interpreter shutdown (and then never finish it). See http://bugs.python.org/issue8713#msg195178 for an example. */ - gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN); + return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN); } void @@ -1495,7 +1490,7 @@ _PyObject_GC_Link(PyObject *op) { PyThreadState *tstate = _PyThreadState_GET(); GCState *gcstate = &tstate->interp->gc; - gcstate->young.count++; + gcstate->generations[0].count++; if (gc_should_collect(gcstate) && !_Py_atomic_load_int_relaxed(&gcstate->collecting)) @@ -1610,8 +1605,8 @@ PyObject_GC_Del(void *op) #endif } GCState *gcstate = get_gc_state(); - if (gcstate->young.count > 0) { - gcstate->young.count--; + if (gcstate->generations[0].count > 0) { + gcstate->generations[0].count--; } PyObject_Free(((char *)op)-presize); } diff --git a/Python/import.c b/Python/import.c index dfc5ec1f2f2927..2fd0c08a6bb5ae 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1030,7 +1030,7 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) if (!already_set) { /* We assume that all module defs are statically allocated and will never be freed. Otherwise, we would incref here. */ - _Py_SetImmortal((PyObject *)def); + _Py_SetImmortal(def); } res = 0; diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 96b891481d9f46..483f28b46dfec7 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -1753,11 +1753,8 @@ def is_waiting_for_gil(self): return (name == 'take_gil') def is_gc_collect(self): - '''Is this frame a collector within the garbage-collector?''' - return self._gdbframe.name() in ( - 'collect', 'gc_collect_full', 'gc_collect_main', - 'gc_collect_young', 'gc_collect_increment' - ) + '''Is this frame gc_collect_main() within the garbage-collector?''' + return self._gdbframe.name() in ('collect', 'gc_collect_main') def get_pyop(self): try: From fedbf77191ea9d6515b39f958cc9e588d23517c9 Mon Sep 17 00:00:00 2001 From: Carl Meyer Date: Wed, 7 Feb 2024 11:56:16 -0500 Subject: [PATCH 023/126] gh-114828: Fix __class__ in class-scope inlined comprehensions (#115139) --- Lib/test/test_listcomps.py | 12 ++++++++++++ ...-02-07-07-50-12.gh-issue-114828.nSXwMi.rst | 2 ++ Python/symtable.c | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-07-07-50-12.gh-issue-114828.nSXwMi.rst diff --git a/Lib/test/test_listcomps.py b/Lib/test/test_listcomps.py index f95a78aff0c711..2868dd01545b95 100644 --- a/Lib/test/test_listcomps.py +++ b/Lib/test/test_listcomps.py @@ -156,6 +156,18 @@ def method(self): self.assertEqual(C.y, [4, 4, 4, 4, 4]) self.assertIs(C().method(), C) + def test_references_super(self): + code = """ + res = [super for x in [1]] + """ + self._check_in_scopes(code, outputs={"res": [super]}) + + def test_references___class__(self): + code = """ + res = [__class__ for x in [1]] + """ + self._check_in_scopes(code, raises=NameError) + def test_inner_cell_shadows_outer(self): code = """ items = [(lambda: i) for i in range(5)] diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-07-07-50-12.gh-issue-114828.nSXwMi.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-07-07-50-12.gh-issue-114828.nSXwMi.rst new file mode 100644 index 00000000000000..b1c63e0a1518fd --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-07-07-50-12.gh-issue-114828.nSXwMi.rst @@ -0,0 +1,2 @@ +Fix compilation crashes in uncommon code examples using :func:`super` inside +a comprehension in a class body. diff --git a/Python/symtable.c b/Python/symtable.c index 743029956e32fa..d69516351efba2 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -758,6 +758,8 @@ inline_comprehension(PySTEntryObject *ste, PySTEntryObject *comp, { PyObject *k, *v; Py_ssize_t pos = 0; + int remove_dunder_class = 0; + while (PyDict_Next(comp->ste_symbols, &pos, &k, &v)) { // skip comprehension parameter long comp_flags = PyLong_AS_LONG(v); @@ -779,6 +781,19 @@ inline_comprehension(PySTEntryObject *ste, PySTEntryObject *comp, if (!existing) { // name does not exist in scope, copy from comprehension assert(scope != FREE || PySet_Contains(comp_free, k) == 1); + if (scope == FREE && ste->ste_type == ClassBlock && + _PyUnicode_EqualToASCIIString(k, "__class__")) { + // if __class__ is unbound in the enclosing class scope and free + // in the comprehension scope, it needs special handling; just + // letting it be marked as free in class scope will break due to + // drop_class_free + scope = GLOBAL_IMPLICIT; + only_flags &= ~DEF_FREE; + if (PySet_Discard(comp_free, k) < 0) { + return 0; + } + remove_dunder_class = 1; + } PyObject *v_flags = PyLong_FromLong(only_flags); if (v_flags == NULL) { return 0; @@ -803,6 +818,10 @@ inline_comprehension(PySTEntryObject *ste, PySTEntryObject *comp, } } } + comp->ste_free = PySet_Size(comp_free) > 0; + if (remove_dunder_class && PyDict_DelItemString(comp->ste_symbols, "__class__") < 0) { + return 0; + } return 1; } From ef3ceab09d2d0959c343c662461123d5b0e0b64b Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 7 Feb 2024 13:43:18 -0500 Subject: [PATCH 024/126] gh-112066: Use `PyDict_SetDefaultRef` in place of `PyDict_SetDefault`. (#112211) This changes a number of internal usages of `PyDict_SetDefault` to use `PyDict_SetDefaultRef`. Co-authored-by: Erlend E. Aasland --- Modules/_json.c | 5 ++--- Modules/posixmodule.c | 2 +- Modules/pyexpat.c | 3 ++- Objects/typeobject.c | 6 +++--- Objects/unicodeobject.c | 12 +++++++----- Python/compile.c | 29 +++++++++++++++++------------ 6 files changed, 32 insertions(+), 25 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 24b292ce70e5eb..c55299899e77fe 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -691,11 +691,10 @@ _parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ss key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx); if (key == NULL) goto bail; - memokey = PyDict_SetDefault(memo, key, key); - if (memokey == NULL) { + if (PyDict_SetDefaultRef(memo, key, key, &memokey) < 0) { goto bail; } - Py_SETREF(key, Py_NewRef(memokey)); + Py_SETREF(key, memokey); idx = next_idx; /* skip whitespace between key and : delimiter, read :, skip whitespace */ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 22891135bde0af..e26265fc874ebb 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1627,7 +1627,7 @@ convertenviron(void) Py_DECREF(d); return NULL; } - if (PyDict_SetDefault(d, k, v) == NULL) { + if (PyDict_SetDefaultRef(d, k, v, NULL) < 0) { Py_DECREF(v); Py_DECREF(k); Py_DECREF(d); diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 7c08eda83e66b2..62cd262a7885e9 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1615,7 +1615,8 @@ static int init_handler_descrs(pyexpat_state *state) if (descr == NULL) return -1; - if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) { + if (PyDict_SetDefaultRef(state->xml_parse_type->tp_dict, + PyDescr_NAME(descr), descr, NULL) < 0) { Py_DECREF(descr); return -1; } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e220d10ce563c2..c65d0ec2acae52 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6683,7 +6683,7 @@ type_add_method(PyTypeObject *type, PyMethodDef *meth) int err; PyObject *dict = lookup_tp_dict(type); if (!(meth->ml_flags & METH_COEXIST)) { - err = PyDict_SetDefault(dict, name, descr) == NULL; + err = PyDict_SetDefaultRef(dict, name, descr, NULL) < 0; } else { err = PyDict_SetItem(dict, name, descr) < 0; @@ -6731,7 +6731,7 @@ type_add_members(PyTypeObject *type) if (descr == NULL) return -1; - if (PyDict_SetDefault(dict, PyDescr_NAME(descr), descr) == NULL) { + if (PyDict_SetDefaultRef(dict, PyDescr_NAME(descr), descr, NULL) < 0) { Py_DECREF(descr); return -1; } @@ -6756,7 +6756,7 @@ type_add_getset(PyTypeObject *type) return -1; } - if (PyDict_SetDefault(dict, PyDescr_NAME(descr), descr) == NULL) { + if (PyDict_SetDefaultRef(dict, PyDescr_NAME(descr), descr, NULL) < 0) { Py_DECREF(descr); return -1; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b236ddba9cdc69..0a569a950e88e2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14894,16 +14894,18 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p) PyObject *interned = get_interned_dict(interp); assert(interned != NULL); - PyObject *t = PyDict_SetDefault(interned, s, s); - if (t == NULL) { + PyObject *t; + int res = PyDict_SetDefaultRef(interned, s, s, &t); + if (res < 0) { PyErr_Clear(); return; } - - if (t != s) { - Py_SETREF(*p, Py_NewRef(t)); + else if (res == 1) { + // value was already present (not inserted) + Py_SETREF(*p, t); return; } + Py_DECREF(t); if (_Py_IsImmortal(s)) { // XXX Restrict this to the main interpreter? diff --git a/Python/compile.c b/Python/compile.c index 4c1d3bb2d2b475..15e5cf38a37b97 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -958,14 +958,15 @@ merge_consts_recursive(PyObject *const_cache, PyObject *o) return NULL; } - // t is borrowed reference - PyObject *t = PyDict_SetDefault(const_cache, key, key); - if (t != key) { - // o is registered in const_cache. Just use it. - Py_XINCREF(t); + PyObject *t; + int res = PyDict_SetDefaultRef(const_cache, key, key, &t); + if (res != 0) { + // o was not inserted into const_cache. t is either the existing value + // or NULL (on error). Py_DECREF(key); return t; } + Py_DECREF(t); // We registered o in const_cache. // When o is a tuple or frozenset, we want to merge its @@ -7527,22 +7528,26 @@ _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj) return ERROR; } - // t is borrowed reference - PyObject *t = PyDict_SetDefault(const_cache, key, key); + PyObject *t; + int res = PyDict_SetDefaultRef(const_cache, key, key, &t); Py_DECREF(key); - if (t == NULL) { + if (res < 0) { return ERROR; } - if (t == key) { // obj is new constant. + if (res == 0) { // inserted: obj is new constant. + Py_DECREF(t); return SUCCESS; } if (PyTuple_CheckExact(t)) { - // t is still borrowed reference - t = PyTuple_GET_ITEM(t, 1); + PyObject *item = PyTuple_GET_ITEM(t, 1); + Py_SETREF(*obj, Py_NewRef(item)); + Py_DECREF(t); + } + else { + Py_SETREF(*obj, t); } - Py_SETREF(*obj, Py_NewRef(t)); return SUCCESS; } From 8f0998e844c2fd8c0c94681d0a6331c34ee31562 Mon Sep 17 00:00:00 2001 From: Carl Meyer Date: Wed, 7 Feb 2024 15:19:47 -0500 Subject: [PATCH 025/126] gh-114828: parenthesize non-atomic macro definitions in pycore_symtable.h (#115143) --- Include/internal/pycore_symtable.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_symtable.h b/Include/internal/pycore_symtable.h index 1d782ca2c96e05..b44393b5644673 100644 --- a/Include/internal/pycore_symtable.h +++ b/Include/internal/pycore_symtable.h @@ -109,18 +109,18 @@ extern PyObject* _Py_Mangle(PyObject *p, PyObject *name); /* Flags for def-use information */ -#define DEF_GLOBAL 1 /* global stmt */ -#define DEF_LOCAL 2 /* assignment in code block */ -#define DEF_PARAM 2<<1 /* formal parameter */ -#define DEF_NONLOCAL 2<<2 /* nonlocal stmt */ -#define USE 2<<3 /* name is used */ -#define DEF_FREE 2<<4 /* name used but not defined in nested block */ -#define DEF_FREE_CLASS 2<<5 /* free variable from class's method */ -#define DEF_IMPORT 2<<6 /* assignment occurred via import */ -#define DEF_ANNOT 2<<7 /* this name is annotated */ -#define DEF_COMP_ITER 2<<8 /* this name is a comprehension iteration variable */ -#define DEF_TYPE_PARAM 2<<9 /* this name is a type parameter */ -#define DEF_COMP_CELL 2<<10 /* this name is a cell in an inlined comprehension */ +#define DEF_GLOBAL 1 /* global stmt */ +#define DEF_LOCAL 2 /* assignment in code block */ +#define DEF_PARAM (2<<1) /* formal parameter */ +#define DEF_NONLOCAL (2<<2) /* nonlocal stmt */ +#define USE (2<<3) /* name is used */ +#define DEF_FREE (2<<4) /* name used but not defined in nested block */ +#define DEF_FREE_CLASS (2<<5) /* free variable from class's method */ +#define DEF_IMPORT (2<<6) /* assignment occurred via import */ +#define DEF_ANNOT (2<<7) /* this name is annotated */ +#define DEF_COMP_ITER (2<<8) /* this name is a comprehension iteration variable */ +#define DEF_TYPE_PARAM (2<<9) /* this name is a type parameter */ +#define DEF_COMP_CELL (2<<10) /* this name is a cell in an inlined comprehension */ #define DEF_BOUND (DEF_LOCAL | DEF_PARAM | DEF_IMPORT) From 38b970dfcc3cdebc87a456f17ef1e0f06dde7375 Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Wed, 7 Feb 2024 17:21:33 -0500 Subject: [PATCH 026/126] When the Py_CompileStringExFlags fuzzer encounters a SystemError, abort (#115147) This allows us to catch bugs beyond memory corruption and assertions. --- Modules/_xxtestfuzz/fuzzer.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c index e133b4d3c44480..6ea9f64d628530 100644 --- a/Modules/_xxtestfuzz/fuzzer.c +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -502,7 +502,6 @@ static int fuzz_elementtree_parsewhole(const char* data, size_t size) { } #define MAX_PYCOMPILE_TEST_SIZE 16384 -static char pycompile_scratch[MAX_PYCOMPILE_TEST_SIZE]; static const int start_vals[] = {Py_eval_input, Py_single_input, Py_file_input}; const size_t NUM_START_VALS = sizeof(start_vals) / sizeof(start_vals[0]); @@ -531,6 +530,8 @@ static int fuzz_pycompile(const char* data, size_t size) { unsigned char optimize_idx = (unsigned char) data[1]; int optimize = optimize_vals[optimize_idx % NUM_OPTIMIZE_VALS]; + char pycompile_scratch[MAX_PYCOMPILE_TEST_SIZE]; + // Create a NUL-terminated C string from the remaining input memcpy(pycompile_scratch, data + 2, size - 2); // Put a NUL terminator just after the copied data. (Space was reserved already.) @@ -549,7 +550,13 @@ static int fuzz_pycompile(const char* data, size_t size) { PyObject *result = Py_CompileStringExFlags(pycompile_scratch, "", start, flags, optimize); if (result == NULL) { - /* compilation failed, most likely from a syntax error */ + /* Compilation failed, most likely from a syntax error. If it was a + SystemError we abort. There's no non-bug reason to raise a + SystemError. */ + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SystemError)) { + PyErr_Print(); + abort(); + } PyErr_Clear(); } else { Py_DECREF(result); From 4a7f63869aa61b24a7cc2d33f8a5e5a7fd0d76a4 Mon Sep 17 00:00:00 2001 From: Justin Applegate <70449145+Legoclones@users.noreply.github.com> Date: Thu, 8 Feb 2024 01:12:58 -0700 Subject: [PATCH 027/126] gh-115146: Fix typo in pickletools.py documentation (GH-115148) --- Lib/pickletools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 95a77aeb2afe2a..51ee4a7a2632ac 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -1253,7 +1253,7 @@ def __init__(self, name, code, arg, stack_before=[], stack_after=[pyint], proto=2, - doc="""Long integer using found-byte length. + doc="""Long integer using four-byte length. A more efficient encoding of a Python long; the long4 encoding says it all."""), From 9e90313320a2af2d9ff7049ed3842344ed236630 Mon Sep 17 00:00:00 2001 From: Artem Chernyshev <62871052+dTenebrae@users.noreply.github.com> Date: Thu, 8 Feb 2024 11:40:38 +0300 Subject: [PATCH 028/126] gh-115136: Fix possible NULL deref in getpath_joinpath() (GH-115137) Signed-off-by: Artem Chernyshev --- Modules/getpath.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Modules/getpath.c b/Modules/getpath.c index a3c8fc269d1c3c..abed139028244a 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -262,6 +262,10 @@ getpath_joinpath(PyObject *Py_UNUSED(self), PyObject *args) } /* Convert all parts to wchar and accumulate max final length */ wchar_t **parts = (wchar_t **)PyMem_Malloc(n * sizeof(wchar_t *)); + if (parts == NULL) { + PyErr_NoMemory(); + return NULL; + } memset(parts, 0, n * sizeof(wchar_t *)); Py_ssize_t cchFinal = 0; Py_ssize_t first = 0; From 17689e3c41d9f61bcd1928b24d3c50c37ceaf3f2 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Thu, 8 Feb 2024 01:04:41 -0800 Subject: [PATCH 029/126] gh-107944: Improve error message for getargs with bad keyword arguments (#114792) --- Doc/whatsnew/3.13.rst | 11 +++ Lib/test/test_call.py | 32 ++++++++- Lib/test/test_capi/test_getargs.py | 26 +++---- Lib/test/test_exceptions.py | 2 +- ...-01-31-09-10-10.gh-issue-107944.XWm1B-.rst | 1 + Python/getargs.c | 70 +++++++++++++++---- 6 files changed, 113 insertions(+), 29 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-31-09-10-10.gh-issue-107944.XWm1B-.rst diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 2ac5afa8ce601c..50a2a69c75ac70 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -101,6 +101,17 @@ Improved Error Messages variables. See also :ref:`using-on-controlling-color`. (Contributed by Pablo Galindo Salgado in :gh:`112730`.) +* When an incorrect keyword argument is passed to a function, the error message + now potentially suggests the correct keyword argument. + (Contributed by Pablo Galindo Salgado and Shantanu Jain in :gh:`107944`.) + + >>> "better error messages!".split(max_split=1) + Traceback (most recent call last): + File "", line 1, in + "better error messages!".split(max_split=1) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^ + TypeError: split() got an unexpected keyword argument 'max_split'. Did you mean 'maxsplit'? + Other Language Changes ====================== diff --git a/Lib/test/test_call.py b/Lib/test/test_call.py index 3c8fc35e3c116d..2a6a5d287b04ee 100644 --- a/Lib/test/test_call.py +++ b/Lib/test/test_call.py @@ -155,7 +155,7 @@ def test_varargs16_kw(self): min, 0, default=1, key=2, foo=3) def test_varargs17_kw(self): - msg = r"'foo' is an invalid keyword argument for print\(\)$" + msg = r"print\(\) got an unexpected keyword argument 'foo'$" self.assertRaisesRegex(TypeError, msg, print, 0, sep=1, end=2, file=3, flush=4, foo=5) @@ -928,7 +928,7 @@ def check_suggestion_includes(self, message): self.assertIn(f"Did you mean '{message}'?", str(cm.exception)) @contextlib.contextmanager - def check_suggestion_not_pressent(self): + def check_suggestion_not_present(self): with self.assertRaises(TypeError) as cm: yield self.assertNotIn("Did you mean", str(cm.exception)) @@ -946,7 +946,7 @@ def foo(blech=None, /, aaa=None, *args, late1=None): for keyword, suggestion in cases: with self.subTest(keyword): - ctx = self.check_suggestion_includes(suggestion) if suggestion else self.check_suggestion_not_pressent() + ctx = self.check_suggestion_includes(suggestion) if suggestion else self.check_suggestion_not_present() with ctx: foo(**{keyword:None}) @@ -987,6 +987,32 @@ def case_change_over_substitution(BLuch=None, Luch = None, fluch = None): with self.check_suggestion_includes(suggestion): func(bluch=None) + def test_unexpected_keyword_suggestion_via_getargs(self): + with self.check_suggestion_includes("maxsplit"): + "foo".split(maxsplt=1) + + self.assertRaisesRegex( + TypeError, r"split\(\) got an unexpected keyword argument 'blech'$", + "foo".split, blech=1 + ) + with self.check_suggestion_not_present(): + "foo".split(blech=1) + with self.check_suggestion_not_present(): + "foo".split(more_noise=1, maxsplt=1) + + # Also test the vgetargskeywords path + with self.check_suggestion_includes("name"): + ImportError(namez="oops") + + self.assertRaisesRegex( + TypeError, r"ImportError\(\) got an unexpected keyword argument 'blech'$", + ImportError, blech=1 + ) + with self.check_suggestion_not_present(): + ImportError(blech=1) + with self.check_suggestion_not_present(): + ImportError(blech=1, namez="oops") + @cpython_only class TestRecursion(unittest.TestCase): diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 9b6aef27625ad0..12039803ba543e 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -667,7 +667,7 @@ def test_invalid_keyword(self): try: getargs_keywords((1,2),3,arg5=10,arg666=666) except TypeError as err: - self.assertEqual(str(err), "'arg666' is an invalid keyword argument for this function") + self.assertEqual(str(err), "this function got an unexpected keyword argument 'arg666'") else: self.fail('TypeError should have been raised') @@ -675,7 +675,7 @@ def test_surrogate_keyword(self): try: getargs_keywords((1,2), 3, (4,(5,6)), (7,8,9), **{'\uDC80': 10}) except TypeError as err: - self.assertEqual(str(err), "'\udc80' is an invalid keyword argument for this function") + self.assertEqual(str(err), "this function got an unexpected keyword argument '\udc80'") else: self.fail('TypeError should have been raised') @@ -742,12 +742,12 @@ def test_too_many_args(self): def test_invalid_keyword(self): # extraneous keyword arg with self.assertRaisesRegex(TypeError, - "'monster' is an invalid keyword argument for this function"): + "this function got an unexpected keyword argument 'monster'"): getargs_keyword_only(1, 2, monster=666) def test_surrogate_keyword(self): with self.assertRaisesRegex(TypeError, - "'\udc80' is an invalid keyword argument for this function"): + "this function got an unexpected keyword argument '\udc80'"): getargs_keyword_only(1, 2, **{'\uDC80': 10}) def test_weird_str_subclass(self): @@ -761,7 +761,7 @@ def __hash__(self): "invalid keyword argument for this function"): getargs_keyword_only(1, 2, **{BadStr("keyword_only"): 3}) with self.assertRaisesRegex(TypeError, - "invalid keyword argument for this function"): + "this function got an unexpected keyword argument"): getargs_keyword_only(1, 2, **{BadStr("monster"): 666}) def test_weird_str_subclass2(self): @@ -774,7 +774,7 @@ def __hash__(self): "invalid keyword argument for this function"): getargs_keyword_only(1, 2, **{BadStr("keyword_only"): 3}) with self.assertRaisesRegex(TypeError, - "invalid keyword argument for this function"): + "this function got an unexpected keyword argument"): getargs_keyword_only(1, 2, **{BadStr("monster"): 666}) @@ -807,7 +807,7 @@ def test_required_args(self): def test_empty_keyword(self): with self.assertRaisesRegex(TypeError, - "'' is an invalid keyword argument for this function"): + "this function got an unexpected keyword argument ''"): self.getargs(1, 2, **{'': 666}) @@ -1204,7 +1204,7 @@ def test_basic(self): "function missing required argument 'a'"): parse((), {}, 'O', ['a']) with self.assertRaisesRegex(TypeError, - "'b' is an invalid keyword argument"): + "this function got an unexpected keyword argument 'b'"): parse((), {'b': 1}, '|O', ['a']) with self.assertRaisesRegex(TypeError, fr"argument for function given by name \('a'\) " @@ -1278,10 +1278,10 @@ def test_nonascii_keywords(self): fr"and position \(1\)"): parse((1,), {name: 2}, 'O|O', [name, 'b']) with self.assertRaisesRegex(TypeError, - f"'{name}' is an invalid keyword argument"): + f"this function got an unexpected keyword argument '{name}'"): parse((), {name: 1}, '|O', ['b']) with self.assertRaisesRegex(TypeError, - "'b' is an invalid keyword argument"): + "this function got an unexpected keyword argument 'b'"): parse((), {'b': 1}, '|O', [name]) invalid = name.encode() + (name.encode()[:-1] or b'\x80') @@ -1301,17 +1301,17 @@ def test_nonascii_keywords(self): for name2 in ('b', 'ë', 'ĉ', 'Ɐ', '𐀁'): with self.subTest(name2=name2): with self.assertRaisesRegex(TypeError, - f"'{name2}' is an invalid keyword argument"): + f"this function got an unexpected keyword argument '{name2}'"): parse((), {name2: 1}, '|O', [name]) name2 = name.encode().decode('latin1') if name2 != name: with self.assertRaisesRegex(TypeError, - f"'{name2}' is an invalid keyword argument"): + f"this function got an unexpected keyword argument '{name2}'"): parse((), {name2: 1}, '|O', [name]) name3 = name + '3' with self.assertRaisesRegex(TypeError, - f"'{name2}' is an invalid keyword argument"): + f"this function got an unexpected keyword argument '{name2}'"): parse((), {name2: 1, name3: 2}, '|OO', [name, name3]) def test_nested_tuple(self): diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index c57488e44aecc6..c7e76414ff0715 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1917,7 +1917,7 @@ def test_attributes(self): self.assertEqual(exc.name, 'somename') self.assertEqual(exc.path, 'somepath') - msg = "'invalid' is an invalid keyword argument for ImportError" + msg = r"ImportError\(\) got an unexpected keyword argument 'invalid'" with self.assertRaisesRegex(TypeError, msg): ImportError('test', invalid='keyword') diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-31-09-10-10.gh-issue-107944.XWm1B-.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-31-09-10-10.gh-issue-107944.XWm1B-.rst new file mode 100644 index 00000000000000..8e3fb786c11055 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-31-09-10-10.gh-issue-107944.XWm1B-.rst @@ -0,0 +1 @@ +Improve error message for function calls with bad keyword arguments via getargs diff --git a/Python/getargs.c b/Python/getargs.c index 0c4ce282f48764..08e97ee3e627b5 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -8,6 +8,7 @@ #include "pycore_modsupport.h" // export _PyArg_NoKeywords() #include "pycore_pylifecycle.h" // _PyArg_Fini #include "pycore_tuple.h" // _PyTuple_ITEMS() +#include "pycore_pyerrors.h" // _Py_CalculateSuggestions() /* Export Stable ABIs (abi only) */ PyAPI_FUNC(int) _PyArg_Parse_SizeT(PyObject *, const char *, ...); @@ -1424,12 +1425,31 @@ error_unexpected_keyword_arg(PyObject *kwargs, PyObject *kwnames, PyObject *kwtu int match = PySequence_Contains(kwtuple, keyword); if (match <= 0) { if (!match) { - PyErr_Format(PyExc_TypeError, - "'%S' is an invalid keyword " - "argument for %.200s%s", - keyword, - (fname == NULL) ? "this function" : fname, - (fname == NULL) ? "" : "()"); + PyObject *kwlist = PySequence_List(kwtuple); + if (!kwlist) { + return; + } + PyObject *suggestion_keyword = _Py_CalculateSuggestions(kwlist, keyword); + Py_DECREF(kwlist); + + if (suggestion_keyword) { + PyErr_Format(PyExc_TypeError, + "%.200s%s got an unexpected keyword argument '%S'." + " Did you mean '%S'?", + (fname == NULL) ? "this function" : fname, + (fname == NULL) ? "" : "()", + keyword, + suggestion_keyword); + Py_DECREF(suggestion_keyword); + } + else { + PyErr_Format(PyExc_TypeError, + "%.200s%s got an unexpected keyword argument '%S'", + (fname == NULL) ? "this function" : fname, + (fname == NULL) ? "" : "()", + keyword); + } + } return; } @@ -1457,6 +1477,9 @@ PyArg_ValidateKeywordArguments(PyObject *kwargs) return 1; } +static PyObject * +new_kwtuple(const char * const *keywords, int total, int pos); + #define IS_END_OF_FORMAT(c) (c == '\0' || c == ';' || c == ':') static int @@ -1722,12 +1745,35 @@ vgetargskeywords(PyObject *args, PyObject *kwargs, const char *format, } } if (!match) { - PyErr_Format(PyExc_TypeError, - "'%U' is an invalid keyword " - "argument for %.200s%s", - key, - (fname == NULL) ? "this function" : fname, - (fname == NULL) ? "" : "()"); + PyObject *_pykwtuple = new_kwtuple(kwlist, len, pos); + if (!_pykwtuple) { + return cleanreturn(0, &freelist); + } + PyObject *pykwlist = PySequence_List(_pykwtuple); + Py_DECREF(_pykwtuple); + if (!pykwlist) { + return cleanreturn(0, &freelist); + } + PyObject *suggestion_keyword = _Py_CalculateSuggestions(pykwlist, key); + Py_DECREF(pykwlist); + + if (suggestion_keyword) { + PyErr_Format(PyExc_TypeError, + "%.200s%s got an unexpected keyword argument '%S'." + " Did you mean '%S'?", + (fname == NULL) ? "this function" : fname, + (fname == NULL) ? "" : "()", + key, + suggestion_keyword); + Py_DECREF(suggestion_keyword); + } + else { + PyErr_Format(PyExc_TypeError, + "%.200s%s got an unexpected keyword argument '%S'", + (fname == NULL) ? "this function" : fname, + (fname == NULL) ? "" : "()", + key); + } return cleanreturn(0, &freelist); } } From ed1a8daf10bc471f929c14c2d1e0474d44a63b00 Mon Sep 17 00:00:00 2001 From: Tomas R Date: Thu, 8 Feb 2024 17:47:27 +0100 Subject: [PATCH 030/126] gh-112069: Adapt set/frozenset methods to Argument Clinic (#115112) --- ...-02-07-00-18-42.gh-issue-112069.jRDRR5.rst | 1 + Objects/clinic/setobject.c.h | 414 +++++++++++++++++ Objects/setobject.c | 416 +++++++++++------- 3 files changed, 674 insertions(+), 157 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-07-00-18-42.gh-issue-112069.jRDRR5.rst create mode 100644 Objects/clinic/setobject.c.h diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-07-00-18-42.gh-issue-112069.jRDRR5.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-07-00-18-42.gh-issue-112069.jRDRR5.rst new file mode 100644 index 00000000000000..51ba6bd1ddaac3 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-07-00-18-42.gh-issue-112069.jRDRR5.rst @@ -0,0 +1 @@ +Adapt :class:`set` and :class:`frozenset` methods to Argument Clinic. diff --git a/Objects/clinic/setobject.c.h b/Objects/clinic/setobject.c.h new file mode 100644 index 00000000000000..f3c96995ede60d --- /dev/null +++ b/Objects/clinic/setobject.c.h @@ -0,0 +1,414 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#include "pycore_modsupport.h" // _PyArg_CheckPositional() + +PyDoc_STRVAR(set_pop__doc__, +"pop($self, /)\n" +"--\n" +"\n" +"Remove and return an arbitrary set element.\n" +"\n" +"Raises KeyError if the set is empty."); + +#define SET_POP_METHODDEF \ + {"pop", (PyCFunction)set_pop, METH_NOARGS, set_pop__doc__}, + +static PyObject * +set_pop_impl(PySetObject *so); + +static PyObject * +set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored)) +{ + return set_pop_impl(so); +} + +PyDoc_STRVAR(set_update__doc__, +"update($self, /, *others)\n" +"--\n" +"\n" +"Update the set, adding elements from all others."); + +#define SET_UPDATE_METHODDEF \ + {"update", _PyCFunction_CAST(set_update), METH_FASTCALL, set_update__doc__}, + +static PyObject * +set_update_impl(PySetObject *so, PyObject *args); + +static PyObject * +set_update(PySetObject *so, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *__clinic_args = NULL; + + if (!_PyArg_CheckPositional("update", nargs, 0, PY_SSIZE_T_MAX)) { + goto exit; + } + __clinic_args = PyTuple_New(nargs - 0); + if (!__clinic_args) { + goto exit; + } + for (Py_ssize_t i = 0; i < nargs - 0; ++i) { + PyTuple_SET_ITEM(__clinic_args, i, Py_NewRef(args[0 + i])); + } + return_value = set_update_impl(so, __clinic_args); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + +PyDoc_STRVAR(set_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"Return a shallow copy of a set."); + +#define SET_COPY_METHODDEF \ + {"copy", (PyCFunction)set_copy, METH_NOARGS, set_copy__doc__}, + +static PyObject * +set_copy_impl(PySetObject *so); + +static PyObject * +set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored)) +{ + return set_copy_impl(so); +} + +PyDoc_STRVAR(frozenset_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"Return a shallow copy of a set."); + +#define FROZENSET_COPY_METHODDEF \ + {"copy", (PyCFunction)frozenset_copy, METH_NOARGS, frozenset_copy__doc__}, + +static PyObject * +frozenset_copy_impl(PySetObject *so); + +static PyObject * +frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored)) +{ + return frozenset_copy_impl(so); +} + +PyDoc_STRVAR(set_clear__doc__, +"clear($self, /)\n" +"--\n" +"\n" +"Remove all elements from this set."); + +#define SET_CLEAR_METHODDEF \ + {"clear", (PyCFunction)set_clear, METH_NOARGS, set_clear__doc__}, + +static PyObject * +set_clear_impl(PySetObject *so); + +static PyObject * +set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored)) +{ + return set_clear_impl(so); +} + +PyDoc_STRVAR(set_union__doc__, +"union($self, /, *others)\n" +"--\n" +"\n" +"Return a new set with elements from the set and all others."); + +#define SET_UNION_METHODDEF \ + {"union", _PyCFunction_CAST(set_union), METH_FASTCALL, set_union__doc__}, + +static PyObject * +set_union_impl(PySetObject *so, PyObject *args); + +static PyObject * +set_union(PySetObject *so, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *__clinic_args = NULL; + + if (!_PyArg_CheckPositional("union", nargs, 0, PY_SSIZE_T_MAX)) { + goto exit; + } + __clinic_args = PyTuple_New(nargs - 0); + if (!__clinic_args) { + goto exit; + } + for (Py_ssize_t i = 0; i < nargs - 0; ++i) { + PyTuple_SET_ITEM(__clinic_args, i, Py_NewRef(args[0 + i])); + } + return_value = set_union_impl(so, __clinic_args); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + +PyDoc_STRVAR(set_intersection_multi__doc__, +"intersection($self, /, *others)\n" +"--\n" +"\n" +"Return a new set with elements common to the set and all others."); + +#define SET_INTERSECTION_MULTI_METHODDEF \ + {"intersection", _PyCFunction_CAST(set_intersection_multi), METH_FASTCALL, set_intersection_multi__doc__}, + +static PyObject * +set_intersection_multi_impl(PySetObject *so, PyObject *args); + +static PyObject * +set_intersection_multi(PySetObject *so, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *__clinic_args = NULL; + + if (!_PyArg_CheckPositional("intersection", nargs, 0, PY_SSIZE_T_MAX)) { + goto exit; + } + __clinic_args = PyTuple_New(nargs - 0); + if (!__clinic_args) { + goto exit; + } + for (Py_ssize_t i = 0; i < nargs - 0; ++i) { + PyTuple_SET_ITEM(__clinic_args, i, Py_NewRef(args[0 + i])); + } + return_value = set_intersection_multi_impl(so, __clinic_args); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + +PyDoc_STRVAR(set_intersection_update_multi__doc__, +"intersection_update($self, /, *others)\n" +"--\n" +"\n" +"Update the set, keeping only elements found in it and all others."); + +#define SET_INTERSECTION_UPDATE_MULTI_METHODDEF \ + {"intersection_update", _PyCFunction_CAST(set_intersection_update_multi), METH_FASTCALL, set_intersection_update_multi__doc__}, + +static PyObject * +set_intersection_update_multi_impl(PySetObject *so, PyObject *args); + +static PyObject * +set_intersection_update_multi(PySetObject *so, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *__clinic_args = NULL; + + if (!_PyArg_CheckPositional("intersection_update", nargs, 0, PY_SSIZE_T_MAX)) { + goto exit; + } + __clinic_args = PyTuple_New(nargs - 0); + if (!__clinic_args) { + goto exit; + } + for (Py_ssize_t i = 0; i < nargs - 0; ++i) { + PyTuple_SET_ITEM(__clinic_args, i, Py_NewRef(args[0 + i])); + } + return_value = set_intersection_update_multi_impl(so, __clinic_args); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + +PyDoc_STRVAR(set_isdisjoint__doc__, +"isdisjoint($self, other, /)\n" +"--\n" +"\n" +"Return True if two sets have a null intersection."); + +#define SET_ISDISJOINT_METHODDEF \ + {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O, set_isdisjoint__doc__}, + +PyDoc_STRVAR(set_difference_update__doc__, +"difference_update($self, /, *others)\n" +"--\n" +"\n" +"Update the set, removing elements found in others."); + +#define SET_DIFFERENCE_UPDATE_METHODDEF \ + {"difference_update", _PyCFunction_CAST(set_difference_update), METH_FASTCALL, set_difference_update__doc__}, + +static PyObject * +set_difference_update_impl(PySetObject *so, PyObject *args); + +static PyObject * +set_difference_update(PySetObject *so, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *__clinic_args = NULL; + + if (!_PyArg_CheckPositional("difference_update", nargs, 0, PY_SSIZE_T_MAX)) { + goto exit; + } + __clinic_args = PyTuple_New(nargs - 0); + if (!__clinic_args) { + goto exit; + } + for (Py_ssize_t i = 0; i < nargs - 0; ++i) { + PyTuple_SET_ITEM(__clinic_args, i, Py_NewRef(args[0 + i])); + } + return_value = set_difference_update_impl(so, __clinic_args); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + +PyDoc_STRVAR(set_difference_multi__doc__, +"difference($self, /, *others)\n" +"--\n" +"\n" +"Return a new set with elements in the set that are not in the others."); + +#define SET_DIFFERENCE_MULTI_METHODDEF \ + {"difference", _PyCFunction_CAST(set_difference_multi), METH_FASTCALL, set_difference_multi__doc__}, + +static PyObject * +set_difference_multi_impl(PySetObject *so, PyObject *args); + +static PyObject * +set_difference_multi(PySetObject *so, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *__clinic_args = NULL; + + if (!_PyArg_CheckPositional("difference", nargs, 0, PY_SSIZE_T_MAX)) { + goto exit; + } + __clinic_args = PyTuple_New(nargs - 0); + if (!__clinic_args) { + goto exit; + } + for (Py_ssize_t i = 0; i < nargs - 0; ++i) { + PyTuple_SET_ITEM(__clinic_args, i, Py_NewRef(args[0 + i])); + } + return_value = set_difference_multi_impl(so, __clinic_args); + +exit: + Py_XDECREF(__clinic_args); + return return_value; +} + +PyDoc_STRVAR(set_symmetric_difference_update__doc__, +"symmetric_difference_update($self, other, /)\n" +"--\n" +"\n" +"Update the set, keeping only elements found in either set, but not in both."); + +#define SET_SYMMETRIC_DIFFERENCE_UPDATE_METHODDEF \ + {"symmetric_difference_update", (PyCFunction)set_symmetric_difference_update, METH_O, set_symmetric_difference_update__doc__}, + +PyDoc_STRVAR(set_symmetric_difference__doc__, +"symmetric_difference($self, other, /)\n" +"--\n" +"\n" +"Return a new set with elements in either the set or other but not both."); + +#define SET_SYMMETRIC_DIFFERENCE_METHODDEF \ + {"symmetric_difference", (PyCFunction)set_symmetric_difference, METH_O, set_symmetric_difference__doc__}, + +PyDoc_STRVAR(set_issubset__doc__, +"issubset($self, other, /)\n" +"--\n" +"\n" +"Report whether another set contains this set."); + +#define SET_ISSUBSET_METHODDEF \ + {"issubset", (PyCFunction)set_issubset, METH_O, set_issubset__doc__}, + +PyDoc_STRVAR(set_issuperset__doc__, +"issuperset($self, other, /)\n" +"--\n" +"\n" +"Report whether this set contains another set."); + +#define SET_ISSUPERSET_METHODDEF \ + {"issuperset", (PyCFunction)set_issuperset, METH_O, set_issuperset__doc__}, + +PyDoc_STRVAR(set_add__doc__, +"add($self, object, /)\n" +"--\n" +"\n" +"Add an element to a set.\n" +"\n" +"This has no effect if the element is already present."); + +#define SET_ADD_METHODDEF \ + {"add", (PyCFunction)set_add, METH_O, set_add__doc__}, + +PyDoc_STRVAR(set___contains____doc__, +"__contains__($self, object, /)\n" +"--\n" +"\n" +"x.__contains__(y) <==> y in x."); + +#define SET___CONTAINS___METHODDEF \ + {"__contains__", (PyCFunction)set___contains__, METH_O|METH_COEXIST, set___contains____doc__}, + +PyDoc_STRVAR(set_remove__doc__, +"remove($self, object, /)\n" +"--\n" +"\n" +"Remove an element from a set; it must be a member.\n" +"\n" +"If the element is not a member, raise a KeyError."); + +#define SET_REMOVE_METHODDEF \ + {"remove", (PyCFunction)set_remove, METH_O, set_remove__doc__}, + +PyDoc_STRVAR(set_discard__doc__, +"discard($self, object, /)\n" +"--\n" +"\n" +"Remove an element from a set if it is a member.\n" +"\n" +"Unlike set.remove(), the discard() method does not raise\n" +"an exception when an element is missing from the set."); + +#define SET_DISCARD_METHODDEF \ + {"discard", (PyCFunction)set_discard, METH_O, set_discard__doc__}, + +PyDoc_STRVAR(set___reduce____doc__, +"__reduce__($self, /)\n" +"--\n" +"\n" +"Return state information for pickling."); + +#define SET___REDUCE___METHODDEF \ + {"__reduce__", (PyCFunction)set___reduce__, METH_NOARGS, set___reduce____doc__}, + +static PyObject * +set___reduce___impl(PySetObject *so); + +static PyObject * +set___reduce__(PySetObject *so, PyObject *Py_UNUSED(ignored)) +{ + return set___reduce___impl(so); +} + +PyDoc_STRVAR(set___sizeof____doc__, +"__sizeof__($self, /)\n" +"--\n" +"\n" +"S.__sizeof__() -> size of S in memory, in bytes."); + +#define SET___SIZEOF___METHODDEF \ + {"__sizeof__", (PyCFunction)set___sizeof__, METH_NOARGS, set___sizeof____doc__}, + +static PyObject * +set___sizeof___impl(PySetObject *so); + +static PyObject * +set___sizeof__(PySetObject *so, PyObject *Py_UNUSED(ignored)) +{ + return set___sizeof___impl(so); +} +/*[clinic end generated code: output=34a30591148da884 input=a9049054013a1b77]*/ diff --git a/Objects/setobject.c b/Objects/setobject.c index 3acf2a7a74890b..6a4c8c45f0836d 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -40,6 +40,19 @@ #include "pycore_pyerrors.h" // _PyErr_SetKeyError() #include "pycore_setobject.h" // _PySet_NextEntry() definition #include // offsetof() +#include "clinic/setobject.c.h" + +/*[clinic input] +class set "PySetObject *" "&PySet_Type" +class frozenset "PySetObject *" "&PyFrozenSet_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=97ad1d3e9f117079]*/ + +/*[python input] +class setobject_converter(self_converter): + type = "PySetObject *" +[python start generated code]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=33a44506d4d57793]*/ /* Object used as dummy key to fill deleted entries */ static PyObject _dummy_struct; @@ -631,8 +644,18 @@ set_merge(PySetObject *so, PyObject *otherset) return 0; } +/*[clinic input] +set.pop + so: setobject + +Remove and return an arbitrary set element. + +Raises KeyError if the set is empty. +[clinic start generated code]*/ + static PyObject * -set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored)) +set_pop_impl(PySetObject *so) +/*[clinic end generated code: output=4d65180f1271871b input=4a3f5552e660a260]*/ { /* Make sure the search finger is in bounds */ setentry *entry = so->table + (so->finger & so->mask); @@ -656,9 +679,6 @@ set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored)) return key; } -PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\ -Raises KeyError if the set is empty."); - static int set_traverse(PySetObject *so, visitproc visit, void *arg) { @@ -935,8 +955,18 @@ set_update_internal(PySetObject *so, PyObject *other) return 0; } +/*[clinic input] +set.update + so: setobject + *others as args: object + / + +Update the set, adding elements from all others. +[clinic start generated code]*/ + static PyObject * -set_update(PySetObject *so, PyObject *args) +set_update_impl(PySetObject *so, PyObject *args) +/*[clinic end generated code: output=34f6371704974c8a input=eb47c4fbaeb3286e]*/ { Py_ssize_t i; @@ -948,12 +978,6 @@ set_update(PySetObject *so, PyObject *args) Py_RETURN_NONE; } -PyDoc_STRVAR(update_doc, -"update($self, /, *others)\n\ ---\n\ -\n\ -Update the set, adding elements from all others."); - /* XXX Todo: If aligned memory allocations become available, make the set object 64 byte aligned so that most of the fields @@ -1101,14 +1125,30 @@ set_swap_bodies(PySetObject *a, PySetObject *b) } } +/*[clinic input] +set.copy + so: setobject + +Return a shallow copy of a set. +[clinic start generated code]*/ + static PyObject * -set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored)) +set_copy_impl(PySetObject *so) +/*[clinic end generated code: output=c9223a1e1cc6b041 input=2b80b288d47b8cf1]*/ { return make_new_set_basetype(Py_TYPE(so), (PyObject *)so); } +/*[clinic input] +frozenset.copy + so: setobject + +Return a shallow copy of a set. +[clinic start generated code]*/ + static PyObject * -frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored)) +frozenset_copy_impl(PySetObject *so) +/*[clinic end generated code: output=b356263526af9e70 input=3dc65577d344eff7]*/ { if (PyFrozenSet_CheckExact(so)) { return Py_NewRef(so); @@ -1116,19 +1156,33 @@ frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored)) return set_copy(so, NULL); } -PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set."); +/*[clinic input] +set.clear + so: setobject + +Remove all elements from this set. +[clinic start generated code]*/ static PyObject * -set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored)) +set_clear_impl(PySetObject *so) +/*[clinic end generated code: output=4e71d5a83904161a input=74ac19794da81a39]*/ { set_clear_internal(so); Py_RETURN_NONE; } -PyDoc_STRVAR(clear_doc, "Remove all elements from this set."); +/*[clinic input] +set.union + so: setobject + *others as args: object + / + +Return a new set with elements from the set and all others. +[clinic start generated code]*/ static PyObject * -set_union(PySetObject *so, PyObject *args) +set_union_impl(PySetObject *so, PyObject *args) +/*[clinic end generated code: output=2c83d05a446a1477 input=2e2024fa1e40ac84]*/ { PySetObject *result; PyObject *other; @@ -1150,12 +1204,6 @@ set_union(PySetObject *so, PyObject *args) return (PyObject *)result; } -PyDoc_STRVAR(union_doc, -"union($self, /, *others)\n\ ---\n\ -\n\ -Return a new set with elements from the set and all others."); - static PyObject * set_or(PySetObject *so, PyObject *other) { @@ -1270,8 +1318,18 @@ set_intersection(PySetObject *so, PyObject *other) return NULL; } +/*[clinic input] +set.intersection as set_intersection_multi + so: setobject + *others as args: object + / + +Return a new set with elements common to the set and all others. +[clinic start generated code]*/ + static PyObject * -set_intersection_multi(PySetObject *so, PyObject *args) +set_intersection_multi_impl(PySetObject *so, PyObject *args) +/*[clinic end generated code: output=2406ef3387adbe2f input=04108ea6d7f0532b]*/ { Py_ssize_t i; @@ -1291,12 +1349,6 @@ set_intersection_multi(PySetObject *so, PyObject *args) return result; } -PyDoc_STRVAR(intersection_doc, -"intersection($self, /, *others)\n\ ---\n\ -\n\ -Return a new set with elements common to the set and all others."); - static PyObject * set_intersection_update(PySetObject *so, PyObject *other) { @@ -1310,12 +1362,22 @@ set_intersection_update(PySetObject *so, PyObject *other) Py_RETURN_NONE; } +/*[clinic input] +set.intersection_update as set_intersection_update_multi + so: setobject + *others as args: object + / + +Update the set, keeping only elements found in it and all others. +[clinic start generated code]*/ + static PyObject * -set_intersection_update_multi(PySetObject *so, PyObject *args) +set_intersection_update_multi_impl(PySetObject *so, PyObject *args) +/*[clinic end generated code: output=251c1f729063609d input=ff8f119f97458d16]*/ { PyObject *tmp; - tmp = set_intersection_multi(so, args); + tmp = set_intersection_multi_impl(so, args); if (tmp == NULL) return NULL; set_swap_bodies(so, (PySetObject *)tmp); @@ -1323,12 +1385,6 @@ set_intersection_update_multi(PySetObject *so, PyObject *args) Py_RETURN_NONE; } -PyDoc_STRVAR(intersection_update_doc, -"intersection_update($self, /, *others)\n\ ---\n\ -\n\ -Update the set, keeping only elements found in it and all others."); - static PyObject * set_and(PySetObject *so, PyObject *other) { @@ -1351,8 +1407,18 @@ set_iand(PySetObject *so, PyObject *other) return Py_NewRef(so); } +/*[clinic input] +set.isdisjoint + so: setobject + other: object + / + +Return True if two sets have a null intersection. +[clinic start generated code]*/ + static PyObject * set_isdisjoint(PySetObject *so, PyObject *other) +/*[clinic end generated code: output=a92bbf9a2db6a3da input=c254ddec8a2326e3]*/ { PyObject *key, *it, *tmp; int rv; @@ -1410,9 +1476,6 @@ set_isdisjoint(PySetObject *so, PyObject *other) Py_RETURN_TRUE; } -PyDoc_STRVAR(isdisjoint_doc, -"Return True if two sets have a null intersection."); - static int set_difference_update_internal(PySetObject *so, PyObject *other) { @@ -1471,8 +1534,18 @@ set_difference_update_internal(PySetObject *so, PyObject *other) return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); } +/*[clinic input] +set.difference_update + so: setobject + *others as args: object + / + +Update the set, removing elements found in others. +[clinic start generated code]*/ + static PyObject * -set_difference_update(PySetObject *so, PyObject *args) +set_difference_update_impl(PySetObject *so, PyObject *args) +/*[clinic end generated code: output=28685b2fc63e41c4 input=e7abb43c9f2c5a73]*/ { Py_ssize_t i; @@ -1484,12 +1557,6 @@ set_difference_update(PySetObject *so, PyObject *args) Py_RETURN_NONE; } -PyDoc_STRVAR(difference_update_doc, -"difference_update($self, /, *others)\n\ ---\n\ -\n\ -Update the set, removing elements found in others."); - static PyObject * set_copy_and_difference(PySetObject *so, PyObject *other) { @@ -1580,8 +1647,18 @@ set_difference(PySetObject *so, PyObject *other) return result; } +/*[clinic input] +set.difference as set_difference_multi + so: setobject + *others as args: object + / + +Return a new set with elements in the set that are not in the others. +[clinic start generated code]*/ + static PyObject * -set_difference_multi(PySetObject *so, PyObject *args) +set_difference_multi_impl(PySetObject *so, PyObject *args) +/*[clinic end generated code: output=3130c3bb3cac873d input=d8ae9bb6d518ab95]*/ { Py_ssize_t i; PyObject *result, *other; @@ -1604,11 +1681,6 @@ set_difference_multi(PySetObject *so, PyObject *args) return result; } -PyDoc_STRVAR(difference_doc, -"difference($self, /, *others)\n\ ---\n\ -\n\ -Return a new set with elements in the set that are not in the others."); static PyObject * set_sub(PySetObject *so, PyObject *other) { @@ -1654,8 +1726,18 @@ set_symmetric_difference_update_dict(PySetObject *so, PyObject *other) Py_RETURN_NONE; } +/*[clinic input] +set.symmetric_difference_update + so: setobject + other: object + / + +Update the set, keeping only elements found in either set, but not in both. +[clinic start generated code]*/ + static PyObject * set_symmetric_difference_update(PySetObject *so, PyObject *other) +/*[clinic end generated code: output=fbb049c0806028de input=a50acf0365e1f0a5]*/ { PySetObject *otherset; PyObject *key; @@ -1708,14 +1790,18 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) Py_RETURN_NONE; } -PyDoc_STRVAR(symmetric_difference_update_doc, -"symmetric_difference_update($self, other, /)\n\ ---\n\ -\n\ -Update the set, keeping only elements found in either set, but not in both."); +/*[clinic input] +set.symmetric_difference + so: setobject + other: object + / + +Return a new set with elements in either the set or other but not both. +[clinic start generated code]*/ static PyObject * set_symmetric_difference(PySetObject *so, PyObject *other) +/*[clinic end generated code: output=f95364211b88775a input=f18af370ad72ebac]*/ { PyObject *rv; PySetObject *otherset; @@ -1732,12 +1818,6 @@ set_symmetric_difference(PySetObject *so, PyObject *other) return (PyObject *)otherset; } -PyDoc_STRVAR(symmetric_difference_doc, -"symmetric_difference($self, other, /)\n\ ---\n\ -\n\ -Return a new set with elements in either the set or other but not both."); - static PyObject * set_xor(PySetObject *so, PyObject *other) { @@ -1760,8 +1840,18 @@ set_ixor(PySetObject *so, PyObject *other) return Py_NewRef(so); } +/*[clinic input] +set.issubset + so: setobject + other: object + / + +Report whether another set contains this set. +[clinic start generated code]*/ + static PyObject * set_issubset(PySetObject *so, PyObject *other) +/*[clinic end generated code: output=78aef1f377aedef1 input=37fbc579b609db0c]*/ { setentry *entry; Py_ssize_t pos = 0; @@ -1794,14 +1884,18 @@ set_issubset(PySetObject *so, PyObject *other) Py_RETURN_TRUE; } -PyDoc_STRVAR(issubset_doc, -"issubset($self, other, /)\n\ ---\n\ -\n\ -Test whether every element in the set is in other."); +/*[clinic input] +set.issuperset + so: setobject + other: object + / + +Report whether this set contains another set. +[clinic start generated code]*/ static PyObject * set_issuperset(PySetObject *so, PyObject *other) +/*[clinic end generated code: output=7d2b71dd714a7ec7 input=fd5dab052f2e9bb3]*/ { if (PyAnySet_Check(other)) { return set_issubset((PySetObject *)other, (PyObject *)so); @@ -1830,12 +1924,6 @@ set_issuperset(PySetObject *so, PyObject *other) Py_RETURN_TRUE; } -PyDoc_STRVAR(issuperset_doc, -"issuperset($self, other, /)\n\ ---\n\ -\n\ -Test whether every element in other is in the set."); - static PyObject * set_richcompare(PySetObject *v, PyObject *w, int op) { @@ -1879,19 +1967,26 @@ set_richcompare(PySetObject *v, PyObject *w, int op) Py_RETURN_NOTIMPLEMENTED; } +/*[clinic input] +set.add + so: setobject + object as key: object + / + +Add an element to a set. + +This has no effect if the element is already present. +[clinic start generated code]*/ + static PyObject * set_add(PySetObject *so, PyObject *key) +/*[clinic end generated code: output=cd9c2d5c2069c2ba input=96f1efe029e47972]*/ { if (set_add_key(so, key)) return NULL; Py_RETURN_NONE; } -PyDoc_STRVAR(add_doc, -"Add an element to a set.\n\ -\n\ -This has no effect if the element is already present."); - static int set_contains(PySetObject *so, PyObject *key) { @@ -1912,8 +2007,19 @@ set_contains(PySetObject *so, PyObject *key) return rv; } +/*[clinic input] +@coexist +set.__contains__ + so: setobject + object as key: object + / + +x.__contains__(y) <==> y in x. +[clinic start generated code]*/ + static PyObject * -set_direct_contains(PySetObject *so, PyObject *key) +set___contains__(PySetObject *so, PyObject *key) +/*[clinic end generated code: output=b5948bc5c590d3ca input=cf4c72db704e4cf0]*/ { long result; @@ -1923,10 +2029,20 @@ set_direct_contains(PySetObject *so, PyObject *key) return PyBool_FromLong(result); } -PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x."); +/*[clinic input] +set.remove + so: setobject + object as key: object + / + +Remove an element from a set; it must be a member. + +If the element is not a member, raise a KeyError. +[clinic start generated code]*/ static PyObject * set_remove(PySetObject *so, PyObject *key) +/*[clinic end generated code: output=08ae496d0cd2b8c1 input=10132515dfe8ebd7]*/ { PyObject *tmpkey; int rv; @@ -1952,13 +2068,21 @@ set_remove(PySetObject *so, PyObject *key) Py_RETURN_NONE; } -PyDoc_STRVAR(remove_doc, -"Remove an element from a set; it must be a member.\n\ -\n\ -If the element is not a member, raise a KeyError."); +/*[clinic input] +set.discard + so: setobject + object as key: object + / + +Remove an element from a set if it is a member. + +Unlike set.remove(), the discard() method does not raise +an exception when an element is missing from the set. +[clinic start generated code]*/ static PyObject * set_discard(PySetObject *so, PyObject *key) +/*[clinic end generated code: output=9181b60d7bb7d480 input=82a689eba94d5ad9]*/ { PyObject *tmpkey; int rv; @@ -1979,14 +2103,16 @@ set_discard(PySetObject *so, PyObject *key) Py_RETURN_NONE; } -PyDoc_STRVAR(discard_doc, -"Remove an element from a set if it is a member.\n\ -\n\ -Unlike set.remove(), the discard() method does not raise\n\ -an exception when an element is missing from the set."); +/*[clinic input] +set.__reduce__ + so: setobject + +Return state information for pickling. +[clinic start generated code]*/ static PyObject * -set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored)) +set___reduce___impl(PySetObject *so) +/*[clinic end generated code: output=9af7d0e029df87ee input=531375e87a24a449]*/ { PyObject *keys=NULL, *args=NULL, *result=NULL, *state=NULL; @@ -2007,8 +2133,16 @@ set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored)) return result; } +/*[clinic input] +set.__sizeof__ + so: setobject + +S.__sizeof__() -> size of S in memory, in bytes. +[clinic start generated code]*/ + static PyObject * -set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored)) +set___sizeof___impl(PySetObject *so) +/*[clinic end generated code: output=4bfa3df7bd38ed88 input=0f214fc2225319fc]*/ { size_t res = _PyObject_SIZE(Py_TYPE(so)); if (so->table != so->smalltable) { @@ -2017,7 +2151,6 @@ set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored)) return PyLong_FromSize_t(res); } -PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes"); static int set_init(PySetObject *self, PyObject *args, PyObject *kwds) { @@ -2071,46 +2204,26 @@ static PySequenceMethods set_as_sequence = { /* set object ********************************************************/ static PyMethodDef set_methods[] = { - {"add", (PyCFunction)set_add, METH_O, - add_doc}, - {"clear", (PyCFunction)set_clear, METH_NOARGS, - clear_doc}, - {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST, - contains_doc}, - {"copy", (PyCFunction)set_copy, METH_NOARGS, - copy_doc}, - {"discard", (PyCFunction)set_discard, METH_O, - discard_doc}, - {"difference", (PyCFunction)set_difference_multi, METH_VARARGS, - difference_doc}, - {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS, - difference_update_doc}, - {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS, - intersection_doc}, - {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS, - intersection_update_doc}, - {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O, - isdisjoint_doc}, - {"issubset", (PyCFunction)set_issubset, METH_O, - issubset_doc}, - {"issuperset", (PyCFunction)set_issuperset, METH_O, - issuperset_doc}, - {"pop", (PyCFunction)set_pop, METH_NOARGS, - pop_doc}, - {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS, - reduce_doc}, - {"remove", (PyCFunction)set_remove, METH_O, - remove_doc}, - {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS, - sizeof_doc}, - {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O, - symmetric_difference_doc}, - {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O, - symmetric_difference_update_doc}, - {"union", (PyCFunction)set_union, METH_VARARGS, - union_doc}, - {"update", (PyCFunction)set_update, METH_VARARGS, - update_doc}, + SET_ADD_METHODDEF + SET_CLEAR_METHODDEF + SET___CONTAINS___METHODDEF + SET_COPY_METHODDEF + SET_DISCARD_METHODDEF + SET_DIFFERENCE_MULTI_METHODDEF + SET_DIFFERENCE_UPDATE_METHODDEF + SET_INTERSECTION_MULTI_METHODDEF + SET_INTERSECTION_UPDATE_MULTI_METHODDEF + SET_ISDISJOINT_METHODDEF + SET_ISSUBSET_METHODDEF + SET_ISSUPERSET_METHODDEF + SET_POP_METHODDEF + SET___REDUCE___METHODDEF + SET_REMOVE_METHODDEF + SET___SIZEOF___METHODDEF + SET_SYMMETRIC_DIFFERENCE_METHODDEF + SET_SYMMETRIC_DIFFERENCE_UPDATE_METHODDEF + SET_UNION_METHODDEF + SET_UPDATE_METHODDEF {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL} /* sentinel */ }; @@ -2203,28 +2316,17 @@ PyTypeObject PySet_Type = { static PyMethodDef frozenset_methods[] = { - {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST, - contains_doc}, - {"copy", (PyCFunction)frozenset_copy, METH_NOARGS, - copy_doc}, - {"difference", (PyCFunction)set_difference_multi, METH_VARARGS, - difference_doc}, - {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS, - intersection_doc}, - {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O, - isdisjoint_doc}, - {"issubset", (PyCFunction)set_issubset, METH_O, - issubset_doc}, - {"issuperset", (PyCFunction)set_issuperset, METH_O, - issuperset_doc}, - {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS, - reduce_doc}, - {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS, - sizeof_doc}, - {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O, - symmetric_difference_doc}, - {"union", (PyCFunction)set_union, METH_VARARGS, - union_doc}, + SET___CONTAINS___METHODDEF + FROZENSET_COPY_METHODDEF + SET_DIFFERENCE_MULTI_METHODDEF + SET_INTERSECTION_MULTI_METHODDEF + SET_ISDISJOINT_METHODDEF + SET_ISSUBSET_METHODDEF + SET_ISSUPERSET_METHODDEF + SET___REDUCE___METHODDEF + SET___SIZEOF___METHODDEF + SET_SYMMETRIC_DIFFERENCE_METHODDEF + SET_UNION_METHODDEF {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL} /* sentinel */ }; From 5914a211ef5542edd1f792c2684e373a42647b04 Mon Sep 17 00:00:00 2001 From: adang1345 Date: Thu, 8 Feb 2024 16:42:45 -0500 Subject: [PATCH 031/126] gh-115167: Exclude vcruntime140_threads.dll from Windows build output (GH-115176) --- .../next/Build/2024-02-08-19-36-20.gh-issue-115167.LB9nDK.rst | 1 + PCbuild/pyproject.props | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Build/2024-02-08-19-36-20.gh-issue-115167.LB9nDK.rst diff --git a/Misc/NEWS.d/next/Build/2024-02-08-19-36-20.gh-issue-115167.LB9nDK.rst b/Misc/NEWS.d/next/Build/2024-02-08-19-36-20.gh-issue-115167.LB9nDK.rst new file mode 100644 index 00000000000000..c60c4a93fe8906 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-02-08-19-36-20.gh-issue-115167.LB9nDK.rst @@ -0,0 +1 @@ +Avoid vendoring ``vcruntime140_threads.dll`` when building with Visual Studio 2022 version 17.8. diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props index fd5fbc9e910eee..9c85e5efa4af4a 100644 --- a/PCbuild/pyproject.props +++ b/PCbuild/pyproject.props @@ -250,7 +250,7 @@ public override bool Execute() { - + From 553c90ccc2f5b15be76a2bb6e38d23e58d739e2f Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 9 Feb 2024 09:40:28 +0300 Subject: [PATCH 032/126] gh-101100: Fix sphinx warnings in `library/enum.rst` (#114696) Co-authored-by: Ethan Furman --- Doc/library/enum.rst | 17 +++++++++++++++-- Doc/tools/.nitignore | 1 - 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Doc/library/enum.rst b/Doc/library/enum.rst index 534939943d3326..30d80ce8d488cc 100644 --- a/Doc/library/enum.rst +++ b/Doc/library/enum.rst @@ -286,6 +286,19 @@ Data Types appropriate value will be chosen for you. See :class:`auto` for the details. + .. attribute:: Enum._name_ + + Name of the member. + + .. attribute:: Enum._value_ + + Value of the member, can be set in :meth:`~object.__new__`. + + .. attribute:: Enum._order_ + + No longer used, kept for backward compatibility. + (class attribute, removed during class creation). + .. attribute:: Enum._ignore_ ``_ignore_`` is only used during creation and is removed from the @@ -823,8 +836,8 @@ Supported ``_sunder_`` names - :attr:`~Enum._ignore_` -- a list of names, either as a :class:`list` or a :class:`str`, that will not be transformed into members, and will be removed from the final class -- :attr:`~Enum._order_` -- used in Python 2/3 code to ensure member order is - consistent (class attribute, removed during class creation) +- :attr:`~Enum._order_` -- no longer used, kept for backward + compatibility (class attribute, removed during class creation) - :meth:`~Enum._generate_next_value_` -- used to get an appropriate value for an enum member; may be overridden diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index f96478b45e44c0..9db02c5c3c73c9 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -31,7 +31,6 @@ Doc/library/email.compat32-message.rst Doc/library/email.errors.rst Doc/library/email.parser.rst Doc/library/email.policy.rst -Doc/library/enum.rst Doc/library/exceptions.rst Doc/library/faulthandler.rst Doc/library/fcntl.rst From c968dc7ff3041137bb702436ff944692dede1ad1 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Fri, 9 Feb 2024 00:21:49 -0800 Subject: [PATCH 033/126] GH-113632: update configure.ac for WebAssembly support tiers (#115192) Move WASI to tier 2 and drop Emscripten. --- Doc/whatsnew/3.13.rst | 6 ++++++ .../Build/2024-02-08-17-38-56.gh-issue-113632.y9KIGb.rst | 2 ++ configure | 6 ++---- configure.ac | 3 +-- 4 files changed, 11 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-02-08-17-38-56.gh-issue-113632.y9KIGb.rst diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 50a2a69c75ac70..b05e4badc9e58b 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1339,6 +1339,12 @@ Build Changes :ref:`limited C API `. (Contributed by Victor Stinner in :gh:`85283`.) +* ``wasm32-wasi`` is now a tier 2 platform. + (Contributed by Brett Cannon in :gh:`115192`.) + +* ``wasm32-emscripten`` is no longer a supported platform. + (Contributed by Brett Cannon in :gh:`115192`.) + C API Changes ============= diff --git a/Misc/NEWS.d/next/Build/2024-02-08-17-38-56.gh-issue-113632.y9KIGb.rst b/Misc/NEWS.d/next/Build/2024-02-08-17-38-56.gh-issue-113632.y9KIGb.rst new file mode 100644 index 00000000000000..8b02b1b2cd08c9 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-02-08-17-38-56.gh-issue-113632.y9KIGb.rst @@ -0,0 +1,2 @@ +Promote WASI to a tier 2 platform and drop Emscripten from tier 3 in +configure.ac. diff --git a/configure b/configure index 0375565c294552..705a778cafced3 100755 --- a/configure +++ b/configure @@ -6805,6 +6805,8 @@ case $host/$ac_cv_cc_name in #( aarch64-*-linux-gnu/clang) : PY_SUPPORT_TIER=2 ;; #( powerpc64le-*-linux-gnu/gcc) : + PY_SUPPORT_TIER=2 ;; #( + wasm32-unknown-wasi/clang) : PY_SUPPORT_TIER=2 ;; #( x86_64-*-linux-gnu/clang) : PY_SUPPORT_TIER=2 ;; #( @@ -6817,10 +6819,6 @@ case $host/$ac_cv_cc_name in #( PY_SUPPORT_TIER=3 ;; #( s390x-*-linux-gnu/gcc) : PY_SUPPORT_TIER=3 ;; #( - wasm32-unknown-emscripten/clang) : - PY_SUPPORT_TIER=3 ;; #( - wasm32-unknown-wasi/clang) : - PY_SUPPORT_TIER=3 ;; #( x86_64-*-freebsd*/clang) : PY_SUPPORT_TIER=3 ;; #( *) : diff --git a/configure.ac b/configure.ac index e121e893a1d0d9..dee7ed552b370f 100644 --- a/configure.ac +++ b/configure.ac @@ -973,14 +973,13 @@ AS_CASE([$host/$ac_cv_cc_name], [aarch64-*-linux-gnu/gcc], [PY_SUPPORT_TIER=2], dnl Linux ARM64, glibc, gcc+clang [aarch64-*-linux-gnu/clang], [PY_SUPPORT_TIER=2], [powerpc64le-*-linux-gnu/gcc], [PY_SUPPORT_TIER=2], dnl Linux on PPC64 little endian, glibc, gcc + [wasm32-unknown-wasi/clang], [PY_SUPPORT_TIER=2], dnl WebAssembly System Interface, clang [x86_64-*-linux-gnu/clang], [PY_SUPPORT_TIER=2], dnl Linux on AMD64, any vendor, glibc, clang [aarch64-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=3], dnl Windows ARM64, MSVC [armv7l-*-linux-gnueabihf/gcc], [PY_SUPPORT_TIER=3], dnl ARMv7 LE with hardware floats, any vendor, glibc, gcc [powerpc64le-*-linux-gnu/clang], [PY_SUPPORT_TIER=3], dnl Linux on PPC64 little endian, glibc, clang [s390x-*-linux-gnu/gcc], [PY_SUPPORT_TIER=3], dnl Linux on 64bit s390x (big endian), glibc, gcc - [wasm32-unknown-emscripten/clang], [PY_SUPPORT_TIER=3], dnl WebAssembly Emscripten - [wasm32-unknown-wasi/clang], [PY_SUPPORT_TIER=3], dnl WebAssembly System Interface [x86_64-*-freebsd*/clang], [PY_SUPPORT_TIER=3], dnl FreeBSD on AMD64 [PY_SUPPORT_TIER=0] ) From 846fd721d518dda88a7d427ec3d2c03c45d9fa90 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 9 Feb 2024 12:36:12 +0200 Subject: [PATCH 034/126] gh-115059: Flush the underlying write buffer in io.BufferedRandom.read1() (GH-115163) --- Lib/test/test_io.py | 52 +++++++++++++++++++ ...-02-08-13-26-14.gh-issue-115059.DqP9dr.rst | 1 + Modules/_io/bufferedio.c | 10 ++++ 3 files changed, 63 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-02-08-13-26-14.gh-issue-115059.DqP9dr.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 73669ecc792776..a24579dcc878cf 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2497,6 +2497,28 @@ def test_interleaved_read_write(self): f.flush() self.assertEqual(raw.getvalue(), b'a2c') + def test_read1_after_write(self): + with self.BytesIO(b'abcdef') as raw: + with self.tp(raw, 3) as f: + f.write(b"1") + self.assertEqual(f.read1(1), b'b') + f.flush() + self.assertEqual(raw.getvalue(), b'1bcdef') + with self.BytesIO(b'abcdef') as raw: + with self.tp(raw, 3) as f: + f.write(b"1") + self.assertEqual(f.read1(), b'bcd') + f.flush() + self.assertEqual(raw.getvalue(), b'1bcdef') + with self.BytesIO(b'abcdef') as raw: + with self.tp(raw, 3) as f: + f.write(b"1") + # XXX: read(100) returns different numbers of bytes + # in Python and C implementations. + self.assertEqual(f.read1(100)[:3], b'bcd') + f.flush() + self.assertEqual(raw.getvalue(), b'1bcdef') + def test_interleaved_readline_write(self): with self.BytesIO(b'ab\ncdef\ng\n') as raw: with self.tp(raw) as f: @@ -2509,6 +2531,36 @@ def test_interleaved_readline_write(self): f.flush() self.assertEqual(raw.getvalue(), b'1b\n2def\n3\n') + def test_xxx(self): + with self.BytesIO(b'abcdefgh') as raw: + with self.tp(raw) as f: + f.write(b'123') + self.assertEqual(f.read(), b'defgh') + f.write(b'456') + f.flush() + self.assertEqual(raw.getvalue(), b'123defgh456') + with self.BytesIO(b'abcdefgh') as raw: + with self.tp(raw) as f: + f.write(b'123') + self.assertEqual(f.read(3), b'def') + f.write(b'456') + f.flush() + self.assertEqual(raw.getvalue(), b'123def456') + with self.BytesIO(b'abcdefgh') as raw: + with self.tp(raw) as f: + f.write(b'123') + self.assertEqual(f.read1(), b'defgh') + f.write(b'456') + f.flush() + self.assertEqual(raw.getvalue(), b'123defgh456') + with self.BytesIO(b'abcdefgh') as raw: + with self.tp(raw) as f: + f.write(b'123') + self.assertEqual(f.read1(3), b'def') + f.write(b'456') + f.flush() + self.assertEqual(raw.getvalue(), b'123def456') + # You can't construct a BufferedRandom over a non-seekable stream. test_unseekable = None diff --git a/Misc/NEWS.d/next/Library/2024-02-08-13-26-14.gh-issue-115059.DqP9dr.rst b/Misc/NEWS.d/next/Library/2024-02-08-13-26-14.gh-issue-115059.DqP9dr.rst new file mode 100644 index 00000000000000..331baedd3b24c5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-08-13-26-14.gh-issue-115059.DqP9dr.rst @@ -0,0 +1 @@ +:meth:`io.BufferedRandom.read1` now flushes the underlying write buffer. diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index f02207ace9f3d2..8ebe9ec7095586 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -1050,6 +1050,16 @@ _io__Buffered_read1_impl(buffered *self, Py_ssize_t n) Py_DECREF(res); return NULL; } + /* Flush the write buffer if necessary */ + if (self->writable) { + PyObject *r = buffered_flush_and_rewind_unlocked(self); + if (r == NULL) { + LEAVE_BUFFERED(self) + Py_DECREF(res); + return NULL; + } + Py_DECREF(r); + } _bufferedreader_reset_buf(self); r = _bufferedreader_raw_read(self, PyBytes_AS_STRING(res), n); LEAVE_BUFFERED(self) From 769d4448260aaec687d9306950225316f9faefce Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 9 Feb 2024 15:11:36 +0100 Subject: [PATCH 035/126] Docs: correctly link to code objects (#115214) --- Doc/c-api/code.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Doc/c-api/code.rst b/Doc/c-api/code.rst index 5082b0cb6ad3f3..11c12e685fcace 100644 --- a/Doc/c-api/code.rst +++ b/Doc/c-api/code.rst @@ -22,12 +22,13 @@ bound into a function. .. c:var:: PyTypeObject PyCode_Type This is an instance of :c:type:`PyTypeObject` representing the Python - :class:`code` type. + :ref:`code object `. .. c:function:: int PyCode_Check(PyObject *co) - Return true if *co* is a :class:`code` object. This function always succeeds. + Return true if *co* is a :ref:`code object `. + This function always succeeds. .. c:function:: int PyCode_GetNumFree(PyCodeObject *co) From 31633f4473966b3bcd470440bab7f348711be48f Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 9 Feb 2024 09:23:12 -0500 Subject: [PATCH 036/126] gh-115184: Fix refleak tracking issues in free-threaded build (#115188) Fixes a few issues related to refleak tracking in the free-threaded build: - Count blocks in abandoned segments - Call `_mi_page_free_collect` earlier during heap traversal in order to get an accurate count of blocks in use. - Add missing refcount tracking in `_Py_DecRefSharedDebug` and `_Py_ExplicitMergeRefcount`. - Pause threads in `get_num_global_allocated_blocks` to ensure that traversing the mimalloc heaps is safe. --- Objects/mimalloc/heap.c | 2 +- Objects/object.c | 11 +++++++---- Objects/obmalloc.c | 9 ++++++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Objects/mimalloc/heap.c b/Objects/mimalloc/heap.c index 164b28f0fab240..154dad0b128480 100644 --- a/Objects/mimalloc/heap.c +++ b/Objects/mimalloc/heap.c @@ -538,7 +538,6 @@ bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t *page, mi_ mi_assert(page != NULL); if (page == NULL) return true; - _mi_page_free_collect(page,true); mi_assert_internal(page->local_free == NULL); if (page->used == 0) return true; @@ -635,6 +634,7 @@ bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t *page, mi_ typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg); void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) { + _mi_page_free_collect(page,true); const size_t bsize = mi_page_block_size(page); const size_t ubsize = mi_page_usable_block_size(page); area->reserved = page->reserved * bsize; diff --git a/Objects/object.c b/Objects/object.c index bbf7f98ae3daf9..37a4b7a417e35f 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -346,6 +346,9 @@ _Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) if (should_queue) { // TODO: the inter-thread queue is not yet implemented. For now, // we just merge the refcount here. +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyInterpreterState_GET()); +#endif Py_ssize_t refcount = _Py_ExplicitMergeRefcount(o, -1); if (refcount == 0) { _Py_Dealloc(o); @@ -399,10 +402,6 @@ _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra) Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); do { refcnt = Py_ARITHMETIC_RIGHT_SHIFT(Py_ssize_t, shared, _Py_REF_SHARED_SHIFT); - if (_Py_REF_IS_MERGED(shared)) { - return refcnt; - } - refcnt += (Py_ssize_t)op->ob_ref_local; refcnt += extra; @@ -410,6 +409,10 @@ _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra) } while (!_Py_atomic_compare_exchange_ssize(&op->ob_ref_shared, &shared, new_shared)); +#ifdef Py_REF_DEBUG + _Py_AddRefTotal(_PyInterpreterState_GET(), extra); +#endif + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); _Py_atomic_store_uintptr_relaxed(&op->ob_tid, 0); return refcnt; diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index bea4ea85332bdd..6a12c3dca38b36 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1073,7 +1073,12 @@ get_mimalloc_allocated_blocks(PyInterpreterState *interp) mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks); } } - // TODO(sgross): count blocks in abandoned segments. + + mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool; + for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) { + _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks, + &allocated_blocks); + } #else // TODO(sgross): this only counts the current thread's blocks. mi_heap_t *heap = mi_heap_get_default(); @@ -1189,6 +1194,7 @@ get_num_global_allocated_blocks(_PyRuntimeState *runtime) } } else { + _PyEval_StopTheWorldAll(&_PyRuntime); HEAD_LOCK(runtime); PyInterpreterState *interp = PyInterpreterState_Head(); assert(interp != NULL); @@ -1208,6 +1214,7 @@ get_num_global_allocated_blocks(_PyRuntimeState *runtime) } } HEAD_UNLOCK(runtime); + _PyEval_StartTheWorldAll(&_PyRuntime); #ifdef Py_DEBUG assert(got_main); #endif From f8931adc597aa696a0f60439e8f9a9047d51ef1c Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Fri, 9 Feb 2024 19:59:41 +0300 Subject: [PATCH 037/126] gh-115142: Skip test_optimizer if _testinternalcapi module is not available (GH-115175) --- Lib/test/test_optimizer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_optimizer.py b/Lib/test/test_optimizer.py index b56bf3cfd9560e..c8554c40df4b2d 100644 --- a/Lib/test/test_optimizer.py +++ b/Lib/test/test_optimizer.py @@ -1,6 +1,9 @@ -import _testinternalcapi import unittest import types +from test.support import import_helper + + +_testinternalcapi = import_helper.import_module("_testinternalcapi") class TestRareEventCounters(unittest.TestCase): From 5a173efa693a053bf4a059c82c1c06c82a9fa8fb Mon Sep 17 00:00:00 2001 From: Peter Lazorchak Date: Fri, 9 Feb 2024 09:06:14 -0800 Subject: [PATCH 038/126] Add Peter L to ACKS (GH-115222) --- Misc/ACKS | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/ACKS b/Misc/ACKS index 466023f390a421..8a80e02ecba26a 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1051,6 +1051,7 @@ Mark Lawrence Chris Laws Michael Layzell Michael Lazar +Peter Lazorchak Brian Leair Mathieu Leduc-Hamel Amandine Lee From a225520af941fb125a4ede77a617501dfb8b46da Mon Sep 17 00:00:00 2001 From: Carl Meyer Date: Fri, 9 Feb 2024 12:19:09 -0700 Subject: [PATCH 039/126] gh-112903: Handle non-types in _BaseGenericAlias.__mro_entries__() (#115191) Co-authored-by: Alex Waygood --- Lib/test/test_typing.py | 69 +++++++++++++++++++ Lib/typing.py | 22 +++++- ...-02-08-17-04-58.gh-issue-112903.SN_vUs.rst | 2 + 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-08-17-04-58.gh-issue-112903.SN_vUs.rst diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index b684af4f33ed71..58566c4bfc821c 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -4920,6 +4920,75 @@ class B(Generic[S]): ... class C(List[int], B): ... self.assertEqual(C.__mro__, (C, list, B, Generic, object)) + def test_multiple_inheritance_non_type_with___mro_entries__(self): + class GoodEntries: + def __mro_entries__(self, bases): + return (object,) + + class A(List[int], GoodEntries()): ... + + self.assertEqual(A.__mro__, (A, list, Generic, object)) + + def test_multiple_inheritance_non_type_without___mro_entries__(self): + # Error should be from the type machinery, not from typing.py + with self.assertRaisesRegex(TypeError, r"^bases must be types"): + class A(List[int], object()): ... + + def test_multiple_inheritance_non_type_bad___mro_entries__(self): + class BadEntries: + def __mro_entries__(self, bases): + return None + + # Error should be from the type machinery, not from typing.py + with self.assertRaisesRegex( + TypeError, + r"^__mro_entries__ must return a tuple", + ): + class A(List[int], BadEntries()): ... + + def test_multiple_inheritance___mro_entries___returns_non_type(self): + class BadEntries: + def __mro_entries__(self, bases): + return (object(),) + + # Error should be from the type machinery, not from typing.py + with self.assertRaisesRegex( + TypeError, + r"^bases must be types", + ): + class A(List[int], BadEntries()): ... + + def test_multiple_inheritance_with_genericalias(self): + class A(typing.Sized, list[int]): ... + + self.assertEqual( + A.__mro__, + (A, collections.abc.Sized, Generic, list, object), + ) + + def test_multiple_inheritance_with_genericalias_2(self): + T = TypeVar("T") + + class BaseSeq(typing.Sequence[T]): ... + class MySeq(List[T], BaseSeq[T]): ... + + self.assertEqual( + MySeq.__mro__, + ( + MySeq, + list, + BaseSeq, + collections.abc.Sequence, + collections.abc.Reversible, + collections.abc.Collection, + collections.abc.Sized, + collections.abc.Iterable, + collections.abc.Container, + Generic, + object, + ), + ) + def test_init_subclass_super_called(self): class FinalException(Exception): pass diff --git a/Lib/typing.py b/Lib/typing.py index d278b4effc7eba..347373f00956c7 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -1135,9 +1135,29 @@ def __mro_entries__(self, bases): res = [] if self.__origin__ not in bases: res.append(self.__origin__) + + # Check if any base that occurs after us in `bases` is either itself a + # subclass of Generic, or something which will add a subclass of Generic + # to `__bases__` via its `__mro_entries__`. If not, add Generic + # ourselves. The goal is to ensure that Generic (or a subclass) will + # appear exactly once in the final bases tuple. If we let it appear + # multiple times, we risk "can't form a consistent MRO" errors. i = bases.index(self) for b in bases[i+1:]: - if isinstance(b, _BaseGenericAlias) or issubclass(b, Generic): + if isinstance(b, _BaseGenericAlias): + break + if not isinstance(b, type): + meth = getattr(b, "__mro_entries__", None) + new_bases = meth(bases) if meth else None + if ( + isinstance(new_bases, tuple) and + any( + isinstance(b2, type) and issubclass(b2, Generic) + for b2 in new_bases + ) + ): + break + elif issubclass(b, Generic): break else: res.append(Generic) diff --git a/Misc/NEWS.d/next/Library/2024-02-08-17-04-58.gh-issue-112903.SN_vUs.rst b/Misc/NEWS.d/next/Library/2024-02-08-17-04-58.gh-issue-112903.SN_vUs.rst new file mode 100644 index 00000000000000..e27f5832553c13 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-08-17-04-58.gh-issue-112903.SN_vUs.rst @@ -0,0 +1,2 @@ +Fix "issubclass() arg 1 must be a class" errors in certain cases of multiple +inheritance with generic aliases (regression in early 3.13 alpha releases). From a3af3cb4f424034b56404704fdf8f18e8c0a9982 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 9 Feb 2024 17:08:32 -0500 Subject: [PATCH 040/126] gh-110481: Implement inter-thread queue for biased reference counting (#114824) Biased reference counting maintains two refcount fields in each object: `ob_ref_local` and `ob_ref_shared`. The true refcount is the sum of these two fields. In some cases, when refcounting operations are split across threads, the ob_ref_shared field can be negative (although the total refcount must be at least zero). In this case, the thread that decremented the refcount requests that the owning thread give up ownership and merge the refcount fields. --- Include/internal/pycore_brc.h | 74 +++++++ Include/internal/pycore_ceval.h | 1 + Include/internal/pycore_interp.h | 1 + Include/internal/pycore_object_stack.h | 6 + Include/internal/pycore_tstate.h | 2 + Lib/test/test_code.py | 1 + Lib/test/test_concurrent_futures/executor.py | 17 +- .../test_process_pool.py | 1 + Makefile.pre.in | 2 + Modules/posixmodule.c | 4 + Objects/dictobject.c | 16 +- Objects/object.c | 8 +- PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 + PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 + Python/brc.c | 198 ++++++++++++++++++ Python/ceval_gil.c | 8 + Python/gc_free_threading.c | 46 +++- Python/object_stack.c | 21 ++ Python/pystate.c | 11 + 21 files changed, 418 insertions(+), 11 deletions(-) create mode 100644 Include/internal/pycore_brc.h create mode 100644 Python/brc.c diff --git a/Include/internal/pycore_brc.h b/Include/internal/pycore_brc.h new file mode 100644 index 00000000000000..3453d83b57ca97 --- /dev/null +++ b/Include/internal/pycore_brc.h @@ -0,0 +1,74 @@ +#ifndef Py_INTERNAL_BRC_H +#define Py_INTERNAL_BRC_H + +#include +#include "pycore_llist.h" // struct llist_node +#include "pycore_lock.h" // PyMutex +#include "pycore_object_stack.h" // _PyObjectStack + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef Py_GIL_DISABLED + +// Prime number to avoid correlations with memory addresses. +#define _Py_BRC_NUM_BUCKETS 257 + +// Hash table bucket +struct _brc_bucket { + // Mutex protects both the bucket and thread state queues in this bucket. + PyMutex mutex; + + // Linked list of _PyThreadStateImpl objects hashed to this bucket. + struct llist_node root; +}; + +// Per-interpreter biased reference counting state +struct _brc_state { + // Hash table of thread states by thread-id. Thread states within a bucket + // are chained using a doubly-linked list. + struct _brc_bucket table[_Py_BRC_NUM_BUCKETS]; +}; + +// Per-thread biased reference counting state +struct _brc_thread_state { + // Linked-list of thread states per hash bucket + struct llist_node bucket_node; + + // Thread-id as determined by _PyThread_Id() + uintptr_t tid; + + // Objects with refcounts to be merged (protected by bucket mutex) + _PyObjectStack objects_to_merge; + + // Local stack of objects to be merged (not accessed by other threads) + _PyObjectStack local_objects_to_merge; +}; + +// Initialize/finalize the per-thread biased reference counting state +void _Py_brc_init_thread(PyThreadState *tstate); +void _Py_brc_remove_thread(PyThreadState *tstate); + +// Initialize per-interpreter state +void _Py_brc_init_state(PyInterpreterState *interp); + +void _Py_brc_after_fork(PyInterpreterState *interp); + +// Enqueues an object to be merged by it's owning thread (tid). This +// steals a reference to the object. +void _Py_brc_queue_object(PyObject *ob); + +// Merge the refcounts of queued objects for the current thread. +void _Py_brc_merge_refcounts(PyThreadState *tstate); + +#endif /* Py_GIL_DISABLED */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_BRC_H */ diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index a66af1389541dd..b158fc9ff5ebc1 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -206,6 +206,7 @@ void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame) #define _PY_ASYNC_EXCEPTION_BIT 3 #define _PY_GC_SCHEDULED_BIT 4 #define _PY_EVAL_PLEASE_STOP_BIT 5 +#define _PY_EVAL_EXPLICIT_MERGE_BIT 6 /* Reserve a few bits for future use */ #define _PY_EVAL_EVENTS_BITS 8 diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index f7c332ed747cfa..31d88071e19d0c 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -201,6 +201,7 @@ struct _is { #if defined(Py_GIL_DISABLED) struct _mimalloc_interp_state mimalloc; + struct _brc_state brc; // biased reference counting state #endif // Per-interpreter state for the obmalloc allocator. For the main diff --git a/Include/internal/pycore_object_stack.h b/Include/internal/pycore_object_stack.h index 1dc1c1591525de..d042be2a98090a 100644 --- a/Include/internal/pycore_object_stack.h +++ b/Include/internal/pycore_object_stack.h @@ -1,6 +1,8 @@ #ifndef Py_INTERNAL_OBJECT_STACK_H #define Py_INTERNAL_OBJECT_STACK_H +#include "pycore_freelist.h" // _PyFreeListState + #ifdef __cplusplus extern "C" { #endif @@ -74,6 +76,10 @@ _PyObjectStack_Pop(_PyObjectStack *stack) return obj; } +// Merge src into dst, leaving src empty +extern void +_PyObjectStack_Merge(_PyObjectStack *dst, _PyObjectStack *src); + // Remove all items from the stack extern void _PyObjectStack_Clear(_PyObjectStack *stack); diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 472fa08154e8f9..77a1dc59163d21 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -10,6 +10,7 @@ extern "C" { #include "pycore_freelist.h" // struct _Py_freelist_state #include "pycore_mimalloc.h" // struct _mimalloc_thread_state +#include "pycore_brc.h" // struct _brc_thread_state // Every PyThreadState is actually allocated as a _PyThreadStateImpl. The @@ -22,6 +23,7 @@ typedef struct _PyThreadStateImpl { #ifdef Py_GIL_DISABLED struct _mimalloc_thread_state mimalloc; struct _Py_freelist_state freelist_state; + struct _brc_thread_state brc; #endif } _PyThreadStateImpl; diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index d8fb826edeb681..46bebfc7af675b 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -865,6 +865,7 @@ def __init__(self, f, test): self.test = test def run(self): del self.f + gc_collect() self.test.assertEqual(LAST_FREED, 500) SetExtra(f.__code__, FREE_INDEX, ctypes.c_voidp(500)) diff --git a/Lib/test/test_concurrent_futures/executor.py b/Lib/test/test_concurrent_futures/executor.py index 1e7d4344740943..6a79fe69ec37cf 100644 --- a/Lib/test/test_concurrent_futures/executor.py +++ b/Lib/test/test_concurrent_futures/executor.py @@ -1,8 +1,10 @@ import threading import time +import unittest import weakref from concurrent import futures from test import support +from test.support import Py_GIL_DISABLED def mul(x, y): @@ -83,10 +85,21 @@ def test_no_stale_references(self): my_object_collected = threading.Event() my_object_callback = weakref.ref( my_object, lambda obj: my_object_collected.set()) - # Deliberately discarding the future. - self.executor.submit(my_object.my_method) + fut = self.executor.submit(my_object.my_method) del my_object + if Py_GIL_DISABLED: + # Due to biased reference counting, my_object might only be + # deallocated while the thread that created it runs -- if the + # thread is paused waiting on an event, it may not merge the + # refcount of the queued object. For that reason, we wait for the + # task to finish (so that it's no longer referenced) and force a + # GC to ensure that it is collected. + fut.result() # Wait for the task to finish. + support.gc_collect() + else: + del fut # Deliberately discard the future. + collected = my_object_collected.wait(timeout=support.SHORT_TIMEOUT) self.assertTrue(collected, "Stale reference not collected within timeout.") diff --git a/Lib/test/test_concurrent_futures/test_process_pool.py b/Lib/test/test_concurrent_futures/test_process_pool.py index 3e61b0c9387c6f..7fc59a05f3deac 100644 --- a/Lib/test/test_concurrent_futures/test_process_pool.py +++ b/Lib/test/test_concurrent_futures/test_process_pool.py @@ -98,6 +98,7 @@ def test_ressources_gced_in_workers(self): # explicitly destroy the object to ensure that EventfulGCObj.__del__() # is called while manager is still running. + support.gc_collect() obj = None support.gc_collect() diff --git a/Makefile.pre.in b/Makefile.pre.in index 07b2ec7adde78a..4dabe328ce0362 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -405,6 +405,7 @@ PYTHON_OBJS= \ Python/ast_opt.o \ Python/ast_unparse.o \ Python/bltinmodule.o \ + Python/brc.o \ Python/ceval.o \ Python/codecs.o \ Python/compile.o \ @@ -1081,6 +1082,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_atexit.h \ $(srcdir)/Include/internal/pycore_bitutils.h \ $(srcdir)/Include/internal/pycore_blocks_output_buffer.h \ + $(srcdir)/Include/internal/pycore_brc.h \ $(srcdir)/Include/internal/pycore_bytes_methods.h \ $(srcdir)/Include/internal/pycore_bytesobject.h \ $(srcdir)/Include/internal/pycore_call.h \ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index e26265fc874ebb..230c961a2ac3c0 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -637,6 +637,10 @@ PyOS_AfterFork_Child(void) tstate->native_thread_id = PyThread_get_thread_native_id(); #endif +#ifdef Py_GIL_DISABLED + _Py_brc_after_fork(tstate->interp); +#endif + status = _PyEval_ReInitThreads(tstate); if (_PyStatus_EXCEPTION(status)) { goto fatal_error; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 2df95e977a180f..9b1defa5cbc609 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -5989,6 +5989,18 @@ _PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values) return make_dict_from_instance_attributes(interp, keys, values); } +static bool +has_unique_reference(PyObject *op) +{ +#ifdef Py_GIL_DISABLED + return (_Py_IsOwnedByCurrentThread(op) && + op->ob_ref_local == 1 && + _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared) == 0); +#else + return Py_REFCNT(op) == 1; +#endif +} + // Return true if the dict was dematerialized, false otherwise. bool _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv) @@ -6005,7 +6017,9 @@ _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv) return false; } assert(_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_HEAPTYPE)); - if (dict->ma_keys != CACHED_KEYS(Py_TYPE(obj)) || Py_REFCNT(dict) != 1) { + if (dict->ma_keys != CACHED_KEYS(Py_TYPE(obj)) || + !has_unique_reference((PyObject *)dict)) + { return false; } assert(dict->ma_values); diff --git a/Objects/object.c b/Objects/object.c index 37a4b7a417e35f..61e6131c6e99bb 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2,6 +2,7 @@ /* Generic object operations; and implementation of None */ #include "Python.h" +#include "pycore_brc.h" // _Py_brc_queue_object() #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" // _Py_EnterRecursiveCallTstate() #include "pycore_context.h" // _PyContextTokenMissing_Type @@ -344,15 +345,10 @@ _Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) &shared, new_shared)); if (should_queue) { - // TODO: the inter-thread queue is not yet implemented. For now, - // we just merge the refcount here. #ifdef Py_REF_DEBUG _Py_IncRefTotal(_PyInterpreterState_GET()); #endif - Py_ssize_t refcount = _Py_ExplicitMergeRefcount(o, -1); - if (refcount == 0) { - _Py_Dealloc(o); - } + _Py_brc_queue_object(o); } else if (new_shared == _Py_REF_MERGED) { // refcount is zero AND merged diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 35788ec4503e8f..49f529ebbc2f9b 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -191,6 +191,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 7a44179e356105..5b1bd7552b4cd9 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -46,6 +46,9 @@ Source Files + + Python + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index e1ff97659659ee..4cc0ca4b9af8de 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -206,6 +206,7 @@ + @@ -553,6 +554,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 4c55f23006b2f0..ceaa21217267cf 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -546,6 +546,9 @@ Include\internal + + Include\internal + Include\internal @@ -1253,6 +1256,9 @@ Python + + Python + Python diff --git a/Python/brc.c b/Python/brc.c new file mode 100644 index 00000000000000..f1fd57a2964cf5 --- /dev/null +++ b/Python/brc.c @@ -0,0 +1,198 @@ +// Implementation of biased reference counting inter-thread queue. +// +// Biased reference counting maintains two refcount fields in each object: +// ob_ref_local and ob_ref_shared. The true refcount is the sum of these two +// fields. In some cases, when refcounting operations are split across threads, +// the ob_ref_shared field can be negative (although the total refcount must +// be at least zero). In this case, the thread that decremented the refcount +// requests that the owning thread give up ownership and merge the refcount +// fields. This file implements the mechanism for doing so. +// +// Each thread state maintains a queue of objects whose refcounts it should +// merge. The thread states are stored in a per-interpreter hash table by +// thread id. The hash table has a fixed size and uses a linked list to store +// thread states within each bucket. +// +// The queueing thread uses the eval breaker mechanism to notify the owning +// thread that it has objects to merge. Additionaly, all queued objects are +// merged during GC. +#include "Python.h" +#include "pycore_object.h" // _Py_ExplicitMergeRefcount +#include "pycore_brc.h" // struct _brc_thread_state +#include "pycore_ceval.h" // _Py_set_eval_breaker_bit +#include "pycore_llist.h" // struct llist_node +#include "pycore_pystate.h" // _PyThreadStateImpl + +#ifdef Py_GIL_DISABLED + +// Get the hashtable bucket for a given thread id. +static struct _brc_bucket * +get_bucket(PyInterpreterState *interp, uintptr_t tid) +{ + return &interp->brc.table[tid % _Py_BRC_NUM_BUCKETS]; +} + +// Find the thread state in a hash table bucket by thread id. +static _PyThreadStateImpl * +find_thread_state(struct _brc_bucket *bucket, uintptr_t thread_id) +{ + struct llist_node *node; + llist_for_each(node, &bucket->root) { + // Get the containing _PyThreadStateImpl from the linked-list node. + _PyThreadStateImpl *ts = llist_data(node, _PyThreadStateImpl, + brc.bucket_node); + if (ts->brc.tid == thread_id) { + return ts; + } + } + return NULL; +} + +// Enqueue an object to be merged by the owning thread. This steals a +// reference to the object. +void +_Py_brc_queue_object(PyObject *ob) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + + uintptr_t ob_tid = _Py_atomic_load_uintptr(&ob->ob_tid); + if (ob_tid == 0) { + // The owning thread may have concurrently decided to merge the + // refcount fields. + Py_DECREF(ob); + return; + } + + struct _brc_bucket *bucket = get_bucket(interp, ob_tid); + PyMutex_Lock(&bucket->mutex); + _PyThreadStateImpl *tstate = find_thread_state(bucket, ob_tid); + if (tstate == NULL) { + // If we didn't find the owning thread then it must have already exited. + // It's safe (and necessary) to merge the refcount. Subtract one when + // merging because we've stolen a reference. + Py_ssize_t refcount = _Py_ExplicitMergeRefcount(ob, -1); + PyMutex_Unlock(&bucket->mutex); + if (refcount == 0) { + _Py_Dealloc(ob); + } + return; + } + + if (_PyObjectStack_Push(&tstate->brc.objects_to_merge, ob) < 0) { + PyMutex_Unlock(&bucket->mutex); + + // Fall back to stopping all threads and manually merging the refcount + // if we can't enqueue the object to be merged. + _PyEval_StopTheWorld(interp); + Py_ssize_t refcount = _Py_ExplicitMergeRefcount(ob, -1); + _PyEval_StartTheWorld(interp); + + if (refcount == 0) { + _Py_Dealloc(ob); + } + return; + } + + // Notify owning thread + _Py_set_eval_breaker_bit(interp, _PY_EVAL_EXPLICIT_MERGE_BIT, 1); + + PyMutex_Unlock(&bucket->mutex); +} + +static void +merge_queued_objects(_PyObjectStack *to_merge) +{ + PyObject *ob; + while ((ob = _PyObjectStack_Pop(to_merge)) != NULL) { + // Subtract one when merging because the queue had a reference. + Py_ssize_t refcount = _Py_ExplicitMergeRefcount(ob, -1); + if (refcount == 0) { + _Py_Dealloc(ob); + } + } +} + +// Process this thread's queue of objects to merge. +void +_Py_brc_merge_refcounts(PyThreadState *tstate) +{ + struct _brc_thread_state *brc = &((_PyThreadStateImpl *)tstate)->brc; + struct _brc_bucket *bucket = get_bucket(tstate->interp, brc->tid); + + // Append all objects into a local stack. We don't want to hold the lock + // while calling destructors. + PyMutex_Lock(&bucket->mutex); + _PyObjectStack_Merge(&brc->local_objects_to_merge, &brc->objects_to_merge); + PyMutex_Unlock(&bucket->mutex); + + // Process the local stack until it's empty + merge_queued_objects(&brc->local_objects_to_merge); +} + +void +_Py_brc_init_state(PyInterpreterState *interp) +{ + struct _brc_state *brc = &interp->brc; + for (Py_ssize_t i = 0; i < _Py_BRC_NUM_BUCKETS; i++) { + llist_init(&brc->table[i].root); + } +} + +void +_Py_brc_init_thread(PyThreadState *tstate) +{ + struct _brc_thread_state *brc = &((_PyThreadStateImpl *)tstate)->brc; + brc->tid = _Py_ThreadId(); + + // Add ourself to the hashtable + struct _brc_bucket *bucket = get_bucket(tstate->interp, brc->tid); + PyMutex_Lock(&bucket->mutex); + llist_insert_tail(&bucket->root, &brc->bucket_node); + PyMutex_Unlock(&bucket->mutex); +} + +void +_Py_brc_remove_thread(PyThreadState *tstate) +{ + struct _brc_thread_state *brc = &((_PyThreadStateImpl *)tstate)->brc; + struct _brc_bucket *bucket = get_bucket(tstate->interp, brc->tid); + + // We need to fully process any objects to merge before removing ourself + // from the hashtable. It is not safe to perform any refcount operations + // after we are removed. After that point, other threads treat our objects + // as abandoned and may merge the objects' refcounts directly. + bool empty = false; + while (!empty) { + // Process the local stack until it's empty + merge_queued_objects(&brc->local_objects_to_merge); + + PyMutex_Lock(&bucket->mutex); + empty = (brc->objects_to_merge.head == NULL); + if (empty) { + llist_remove(&brc->bucket_node); + } + else { + _PyObjectStack_Merge(&brc->local_objects_to_merge, + &brc->objects_to_merge); + } + PyMutex_Unlock(&bucket->mutex); + } + + assert(brc->local_objects_to_merge.head == NULL); + assert(brc->objects_to_merge.head == NULL); +} + +void +_Py_brc_after_fork(PyInterpreterState *interp) +{ + // Unlock all bucket mutexes. Some of the buckets may be locked because + // locks can be handed off to a parked thread (see lock.c). We don't have + // to worry about consistency here, becuase no thread can be actively + // modifying a bucket, but it might be paused (not yet woken up) on a + // PyMutex_Lock while holding that lock. + for (Py_ssize_t i = 0; i < _Py_BRC_NUM_BUCKETS; i++) { + _PyMutex_at_fork_reinit(&interp->brc.table[i].mutex); + } +} + +#endif /* Py_GIL_DISABLED */ diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index ad90359318761a..deb9741291fca7 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -980,6 +980,14 @@ _Py_HandlePending(PyThreadState *tstate) } } +#ifdef Py_GIL_DISABLED + /* Objects with refcounts to merge */ + if (_Py_eval_breaker_bit_is_set(interp, _PY_EVAL_EXPLICIT_MERGE_BIT)) { + _Py_set_eval_breaker_bit(interp, _PY_EVAL_EXPLICIT_MERGE_BIT, 0); + _Py_brc_merge_refcounts(tstate); + } +#endif + /* GC scheduled to run */ if (_Py_eval_breaker_bit_is_set(interp, _PY_GC_SCHEDULED_BIT)) { _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 0); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 8fbcdb15109b76..5d3b097dee93e8 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1,5 +1,6 @@ // Cyclic garbage collector implementation for free-threaded build. #include "Python.h" +#include "pycore_brc.h" // struct _brc_thread_state #include "pycore_ceval.h" // _Py_set_eval_breaker_bit() #include "pycore_context.h" #include "pycore_dict.h" // _PyDict_MaybeUntrack() @@ -152,8 +153,7 @@ gc_decref(PyObject *op) op->ob_tid -= 1; } -// Merge refcounts while the world is stopped. -static void +static Py_ssize_t merge_refcount(PyObject *op, Py_ssize_t extra) { assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); @@ -169,6 +169,7 @@ merge_refcount(PyObject *op, Py_ssize_t extra) op->ob_tid = 0; op->ob_ref_local = 0; op->ob_ref_shared = _Py_REF_SHARED(refcount, _Py_REF_MERGED); + return refcount; } static void @@ -282,6 +283,41 @@ gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor, return err; } +static void +merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state) +{ + struct _brc_thread_state *brc = &tstate->brc; + _PyObjectStack_Merge(&brc->local_objects_to_merge, &brc->objects_to_merge); + + PyObject *op; + while ((op = _PyObjectStack_Pop(&brc->local_objects_to_merge)) != NULL) { + // Subtract one when merging because the queue had a reference. + Py_ssize_t refcount = merge_refcount(op, -1); + + if (!_PyObject_GC_IS_TRACKED(op) && refcount == 0) { + // GC objects with zero refcount are handled subsequently by the + // GC as if they were cyclic trash, but we have to handle dead + // non-GC objects here. Add one to the refcount so that we can + // decref and deallocate the object once we start the world again. + op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT); +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyInterpreterState_GET()); +#endif + worklist_push(&state->objs_to_decref, op); + } + } +} + +static void +merge_all_queued_objects(PyInterpreterState *interp, struct collection_state *state) +{ + HEAD_LOCK(&_PyRuntime); + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { + merge_queued_objects((_PyThreadStateImpl *)p, state); + } + HEAD_UNLOCK(&_PyRuntime); +} + // Subtract an incoming reference from the computed "gc_refs" refcount. static int visit_decref(PyObject *op, void *arg) @@ -927,6 +963,9 @@ static void gc_collect_internal(PyInterpreterState *interp, struct collection_state *state) { _PyEval_StopTheWorld(interp); + // merge refcounts for all queued objects + merge_all_queued_objects(interp, state); + // Find unreachable objects int err = deduce_unreachable_heap(interp, state); if (err < 0) { @@ -946,6 +985,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state) clear_weakrefs(state); _PyEval_StartTheWorld(interp); + // Deallocate any object from the refcount merge step + cleanup_worklist(&state->objs_to_decref); + // Call weakref callbacks and finalizers after unpausing other threads to // avoid potential deadlocks. call_weakref_callbacks(state); diff --git a/Python/object_stack.c b/Python/object_stack.c index 8544892eb71dcb..ced4460da00f44 100644 --- a/Python/object_stack.c +++ b/Python/object_stack.c @@ -67,6 +67,27 @@ _PyObjectStack_Clear(_PyObjectStack *queue) } } +void +_PyObjectStack_Merge(_PyObjectStack *dst, _PyObjectStack *src) +{ + if (src->head == NULL) { + return; + } + + if (dst->head != NULL) { + // First, append dst to the bottom of src + _PyObjectStackChunk *last = src->head; + while (last->prev != NULL) { + last = last->prev; + } + last->prev = dst->head; + } + + // Now that src has all the chunks, set dst to src + dst->head = src->head; + src->head = NULL; +} + void _PyObjectStackChunk_ClearFreeList(_PyFreeListState *free_lists, int is_finalization) { diff --git a/Python/pystate.c b/Python/pystate.c index e77e5bfa7e2df8..6cd034743ddf4c 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -611,6 +611,9 @@ init_interpreter(PyInterpreterState *interp, _PyGC_InitState(&interp->gc); PyConfig_InitPythonConfig(&interp->config); _PyType_InitCache(interp); +#ifdef Py_GIL_DISABLED + _Py_brc_init_state(interp); +#endif for (int i = 0; i < _PY_MONITORING_UNGROUPED_EVENTS; i++) { interp->monitors.tools[i] = 0; } @@ -1336,6 +1339,11 @@ init_threadstate(_PyThreadStateImpl *_tstate, tstate->datastack_limit = NULL; tstate->what_event = -1; +#ifdef Py_GIL_DISABLED + // Initialize biased reference counting inter-thread queue + _Py_brc_init_thread(tstate); +#endif + if (interp->stoptheworld.requested || _PyRuntime.stoptheworld.requested) { // Start in the suspended state if there is an ongoing stop-the-world. tstate->state = _Py_THREAD_SUSPENDED; @@ -1561,6 +1569,9 @@ PyThreadState_Clear(PyThreadState *tstate) _PyFreeListState *freelist_state = &((_PyThreadStateImpl*)tstate)->freelist_state; _Py_ClearFreeLists(freelist_state, 1); _PySlice_ClearCache(freelist_state); + + // Remove ourself from the biased reference counting table of threads. + _Py_brc_remove_thread(tstate); #endif _PyThreadState_ClearMimallocHeaps(tstate); From 564385612cdf72c2fa8e629a68225fb2cd3b3d99 Mon Sep 17 00:00:00 2001 From: dave-shawley Date: Fri, 9 Feb 2024 17:11:37 -0500 Subject: [PATCH 041/126] gh-115165: Fix `typing.Annotated` for immutable types (#115213) The return value from an annotated callable can raise any exception from __setattr__ for the `__orig_class__` property. --- Lib/test/test_typing.py | 21 +++++++++++++++++++ Lib/typing.py | 4 +++- ...-02-09-07-20-16.gh-issue-115165.yfJLXA.rst | 4 ++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-09-07-20-16.gh-issue-115165.yfJLXA.rst diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index 58566c4bfc821c..c3a092f3af3009 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -4323,6 +4323,16 @@ class C(B[int]): c.bar = 'abc' self.assertEqual(c.__dict__, {'bar': 'abc'}) + def test_setattr_exceptions(self): + class Immutable[T]: + def __setattr__(self, key, value): + raise RuntimeError("immutable") + + # gh-115165: This used to cause RuntimeError to be raised + # when we tried to set `__orig_class__` on the `Immutable` instance + # returned by the `Immutable[int]()` call + self.assertIsInstance(Immutable[int](), Immutable) + def test_subscripted_generics_as_proxies(self): T = TypeVar('T') class C(Generic[T]): @@ -8561,6 +8571,17 @@ def test_instantiate_generic(self): self.assertEqual(MyCount([4, 4, 5]), {4: 2, 5: 1}) self.assertEqual(MyCount[int]([4, 4, 5]), {4: 2, 5: 1}) + def test_instantiate_immutable(self): + class C: + def __setattr__(self, key, value): + raise Exception("should be ignored") + + A = Annotated[C, "a decoration"] + # gh-115165: This used to cause RuntimeError to be raised + # when we tried to set `__orig_class__` on the `C` instance + # returned by the `A()` call + self.assertIsInstance(A(), C) + def test_cannot_instantiate_forward(self): A = Annotated["int", (5, 6)] with self.assertRaises(TypeError): diff --git a/Lib/typing.py b/Lib/typing.py index 347373f00956c7..914ddeaf504cd0 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -1127,7 +1127,9 @@ def __call__(self, *args, **kwargs): result = self.__origin__(*args, **kwargs) try: result.__orig_class__ = self - except AttributeError: + # Some objects raise TypeError (or something even more exotic) + # if you try to set attributes on them; we guard against that here + except Exception: pass return result diff --git a/Misc/NEWS.d/next/Library/2024-02-09-07-20-16.gh-issue-115165.yfJLXA.rst b/Misc/NEWS.d/next/Library/2024-02-09-07-20-16.gh-issue-115165.yfJLXA.rst new file mode 100644 index 00000000000000..73d3d001f07f3f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-09-07-20-16.gh-issue-115165.yfJLXA.rst @@ -0,0 +1,4 @@ +Most exceptions are now ignored when attempting to set the ``__orig_class__`` +attribute on objects returned when calling :mod:`typing` generic aliases +(including generic aliases created using :data:`typing.Annotated`). +Previously only :exc:`AttributeError`` was ignored. Patch by Dave Shawley. From d4d5bae1471788b345155e8e93a2fe4ab92d09dc Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 10 Feb 2024 09:57:04 +0900 Subject: [PATCH 042/126] gh-111968: Refactor _PyXXX_Fini to integrate with _PyObject_ClearFreeLists (gh-114899) --- Include/internal/pycore_context.h | 1 - Include/internal/pycore_floatobject.h | 1 - Include/internal/pycore_freelist.h | 10 ++++++++++ Include/internal/pycore_gc.h | 8 -------- Include/internal/pycore_genobject.h | 4 ---- Include/internal/pycore_list.h | 6 ------ Include/internal/pycore_object_stack.h | 3 --- Include/internal/pycore_sliceobject.h | 2 -- Include/internal/pycore_tuple.h | 1 - Objects/floatobject.c | 10 ---------- Objects/genobject.c | 11 ----------- Objects/listobject.c | 10 ---------- Objects/object.c | 15 +++++++++++++++ Objects/sliceobject.c | 12 ++++-------- Objects/tupleobject.c | 5 ----- Python/context.c | 11 ----------- Python/gc_free_threading.c | 2 +- Python/gc_gil.c | 2 +- Python/pylifecycle.c | 12 +++++------- Python/pystate.c | 19 ++----------------- 20 files changed, 38 insertions(+), 107 deletions(-) diff --git a/Include/internal/pycore_context.h b/Include/internal/pycore_context.h index 3284efba2b6f4c..ae5c47f195eb7f 100644 --- a/Include/internal/pycore_context.h +++ b/Include/internal/pycore_context.h @@ -14,7 +14,6 @@ extern PyTypeObject _PyContextTokenMissing_Type; /* runtime lifecycle */ PyStatus _PyContext_Init(PyInterpreterState *); -void _PyContext_Fini(_PyFreeListState *); /* other API */ diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h index 038578e1f9680a..3767df5506d43f 100644 --- a/Include/internal/pycore_floatobject.h +++ b/Include/internal/pycore_floatobject.h @@ -15,7 +15,6 @@ extern "C" { extern void _PyFloat_InitState(PyInterpreterState *); extern PyStatus _PyFloat_InitTypes(PyInterpreterState *); -extern void _PyFloat_Fini(_PyFreeListState *); extern void _PyFloat_FiniType(PyInterpreterState *); diff --git a/Include/internal/pycore_freelist.h b/Include/internal/pycore_freelist.h index 82a42300991ecc..1bc551914794f0 100644 --- a/Include/internal/pycore_freelist.h +++ b/Include/internal/pycore_freelist.h @@ -125,6 +125,16 @@ typedef struct _Py_freelist_state { struct _Py_object_stack_state object_stacks; } _PyFreeListState; +extern void _PyObject_ClearFreeLists(_PyFreeListState *state, int is_finalization); +extern void _PyTuple_ClearFreeList(_PyFreeListState *state, int is_finalization); +extern void _PyFloat_ClearFreeList(_PyFreeListState *state, int is_finalization); +extern void _PyList_ClearFreeList(_PyFreeListState *state, int is_finalization); +extern void _PySlice_ClearFreeList(_PyFreeListState *state, int is_finalization); +extern void _PyDict_ClearFreeList(_PyFreeListState *state, int is_finalization); +extern void _PyAsyncGen_ClearFreeLists(_PyFreeListState *state, int is_finalization); +extern void _PyContext_ClearFreeList(_PyFreeListState *state, int is_finalization); +extern void _PyObjectStackChunk_ClearFreeList(_PyFreeListState *state, int is_finalization); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 8d0bc2a218e48d..582a16bf5218ce 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -279,14 +279,6 @@ extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs); // Functions to clear types free lists extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp); -extern void _Py_ClearFreeLists(_PyFreeListState *state, int is_finalization); -extern void _PyTuple_ClearFreeList(_PyFreeListState *state, int is_finalization); -extern void _PyFloat_ClearFreeList(_PyFreeListState *state, int is_finalization); -extern void _PyList_ClearFreeList(_PyFreeListState *state, int is_finalization); -extern void _PySlice_ClearCache(_PyFreeListState *state); -extern void _PyDict_ClearFreeList(_PyFreeListState *state, int is_finalization); -extern void _PyAsyncGen_ClearFreeLists(_PyFreeListState *state, int is_finalization); -extern void _PyContext_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _Py_ScheduleGC(PyInterpreterState *interp); extern void _Py_RunGC(PyThreadState *tstate); diff --git a/Include/internal/pycore_genobject.h b/Include/internal/pycore_genobject.h index 5ad63658051e86..b2aa017598409f 100644 --- a/Include/internal/pycore_genobject.h +++ b/Include/internal/pycore_genobject.h @@ -26,10 +26,6 @@ extern PyTypeObject _PyCoroWrapper_Type; extern PyTypeObject _PyAsyncGenWrappedValue_Type; extern PyTypeObject _PyAsyncGenAThrow_Type; -/* runtime lifecycle */ - -extern void _PyAsyncGen_Fini(_PyFreeListState *); - #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 4536f90e414493..50dc13c4da4487 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -13,12 +13,6 @@ extern "C" { extern PyObject* _PyList_Extend(PyListObject *, PyObject *); extern void _PyList_DebugMallocStats(FILE *out); - -/* runtime lifecycle */ - -extern void _PyList_Fini(_PyFreeListState *); - - #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item) extern int diff --git a/Include/internal/pycore_object_stack.h b/Include/internal/pycore_object_stack.h index d042be2a98090a..fc130b1e9920b4 100644 --- a/Include/internal/pycore_object_stack.h +++ b/Include/internal/pycore_object_stack.h @@ -34,9 +34,6 @@ _PyObjectStackChunk_New(void); extern void _PyObjectStackChunk_Free(_PyObjectStackChunk *); -extern void -_PyObjectStackChunk_ClearFreeList(_PyFreeListState *state, int is_finalization); - // Push an item onto the stack. Return -1 on allocation failure, 0 on success. static inline int _PyObjectStack_Push(_PyObjectStack *stack, PyObject *obj) diff --git a/Include/internal/pycore_sliceobject.h b/Include/internal/pycore_sliceobject.h index 0c72d3ee6225c5..89086f67683a2f 100644 --- a/Include/internal/pycore_sliceobject.h +++ b/Include/internal/pycore_sliceobject.h @@ -11,8 +11,6 @@ extern "C" { /* runtime lifecycle */ -extern void _PySlice_Fini(_PyFreeListState *); - extern PyObject * _PyBuildSlice_ConsumeRefs(PyObject *start, PyObject *stop); diff --git a/Include/internal/pycore_tuple.h b/Include/internal/pycore_tuple.h index b348339a505b0f..4605f355ccbc38 100644 --- a/Include/internal/pycore_tuple.h +++ b/Include/internal/pycore_tuple.h @@ -14,7 +14,6 @@ extern void _PyTuple_DebugMallocStats(FILE *out); /* runtime lifecycle */ extern PyStatus _PyTuple_InitGlobalObjects(PyInterpreterState *); -extern void _PyTuple_Fini(_PyFreeListState *); /* other API */ diff --git a/Objects/floatobject.c b/Objects/floatobject.c index c440e0dab0e79f..9b322c52d4daea 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -2010,16 +2010,6 @@ _PyFloat_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) #endif } -void -_PyFloat_Fini(_PyFreeListState *state) -{ - // With Py_GIL_DISABLED: - // the freelists for the current thread state have already been cleared. -#ifndef Py_GIL_DISABLED - _PyFloat_ClearFreeList(state, 1); -#endif -} - void _PyFloat_FiniType(PyInterpreterState *interp) { diff --git a/Objects/genobject.c b/Objects/genobject.c index ab523e46cceaa3..59ab7abf6180bd 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -1682,17 +1682,6 @@ _PyAsyncGen_ClearFreeLists(_PyFreeListState *freelist_state, int is_finalization #endif } -void -_PyAsyncGen_Fini(_PyFreeListState *state) -{ - // With Py_GIL_DISABLED: - // the freelists for the current thread state have already been cleared. -#ifndef Py_GIL_DISABLED - _PyAsyncGen_ClearFreeLists(state, 1); -#endif -} - - static PyObject * async_gen_unwrap_value(PyAsyncGenObject *gen, PyObject *result) { diff --git a/Objects/listobject.c b/Objects/listobject.c index 307b8f1bd76cac..7fdb91eab890b5 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -135,16 +135,6 @@ _PyList_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) #endif } -void -_PyList_Fini(_PyFreeListState *state) -{ - // With Py_GIL_DISABLED: - // the freelists for the current thread state have already been cleared. -#ifndef Py_GIL_DISABLED - _PyList_ClearFreeList(state, 1); -#endif -} - /* Print summary info about the state of the optimized allocator */ void _PyList_DebugMallocStats(FILE *out) diff --git a/Objects/object.c b/Objects/object.c index 61e6131c6e99bb..275aa6713c8c21 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -793,6 +793,21 @@ PyObject_Bytes(PyObject *v) return PyBytes_FromObject(v); } +void +_PyObject_ClearFreeLists(_PyFreeListState *state, int is_finalization) +{ + // In the free-threaded build, freelists are per-PyThreadState and cleared in PyThreadState_Clear() + // In the default build, freelists are per-interpreter and cleared in finalize_interp_types() + _PyFloat_ClearFreeList(state, is_finalization); + _PyTuple_ClearFreeList(state, is_finalization); + _PyList_ClearFreeList(state, is_finalization); + _PyDict_ClearFreeList(state, is_finalization); + _PyContext_ClearFreeList(state, is_finalization); + _PyAsyncGen_ClearFreeLists(state, is_finalization); + // Only be cleared if is_finalization is true. + _PyObjectStackChunk_ClearFreeList(state, is_finalization); + _PySlice_ClearFreeList(state, is_finalization); +} /* def _PyObject_FunctionStr(x): diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 8b9d6bbfd858b7..9880c123c80f95 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -103,8 +103,11 @@ PyObject _Py_EllipsisObject = _PyObject_HEAD_INIT(&PyEllipsis_Type); /* Slice object implementation */ -void _PySlice_ClearCache(_PyFreeListState *state) +void _PySlice_ClearFreeList(_PyFreeListState *state, int is_finalization) { + if (!is_finalization) { + return; + } #ifdef WITH_FREELISTS PySliceObject *obj = state->slices.slice_cache; if (obj != NULL) { @@ -114,13 +117,6 @@ void _PySlice_ClearCache(_PyFreeListState *state) #endif } -void _PySlice_Fini(_PyFreeListState *state) -{ -#ifdef WITH_FREELISTS - _PySlice_ClearCache(state); -#endif -} - /* start, stop, and step are python objects with None indicating no index is present. */ diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index b9bf6cd48f6129..7d73c3fb0f7f2c 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -964,11 +964,6 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) static void maybe_freelist_clear(_PyFreeListState *, int); -void -_PyTuple_Fini(_PyFreeListState *state) -{ - maybe_freelist_clear(state, 1); -} void _PyTuple_ClearFreeList(_PyFreeListState *state, int is_finalization) diff --git a/Python/context.c b/Python/context.c index 793dfa2b72c7e3..e44fef705c36e0 100644 --- a/Python/context.c +++ b/Python/context.c @@ -1284,17 +1284,6 @@ _PyContext_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) } -void -_PyContext_Fini(_PyFreeListState *state) -{ - // With Py_GIL_DISABLED: - // the freelists for the current thread state have already been cleared. -#ifndef Py_GIL_DISABLED - _PyContext_ClearFreeList(state, 1); -#endif -} - - PyStatus _PyContext_Init(PyInterpreterState *interp) { diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 5d3b097dee93e8..93e1168002b6f7 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1721,7 +1721,7 @@ _PyGC_ClearAllFreeLists(PyInterpreterState *interp) HEAD_LOCK(&_PyRuntime); _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head; while (tstate != NULL) { - _Py_ClearFreeLists(&tstate->freelist_state, 0); + _PyObject_ClearFreeLists(&tstate->freelist_state, 0); tstate = (_PyThreadStateImpl *)tstate->base.next; } HEAD_UNLOCK(&_PyRuntime); diff --git a/Python/gc_gil.c b/Python/gc_gil.c index 4e2aa8f7af746c..5f1365f509deb0 100644 --- a/Python/gc_gil.c +++ b/Python/gc_gil.c @@ -11,7 +11,7 @@ void _PyGC_ClearAllFreeLists(PyInterpreterState *interp) { - _Py_ClearFreeLists(&interp->freelist_state, 0); + _PyObject_ClearFreeLists(&interp->freelist_state, 0); } #endif diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 0cac7109340129..61c9d4f9ea9575 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1790,16 +1790,14 @@ finalize_interp_types(PyInterpreterState *interp) // a dict internally. _PyUnicode_ClearInterned(interp); - _PyDict_Fini(interp); _PyUnicode_Fini(interp); +#ifndef Py_GIL_DISABLED + // With Py_GIL_DISABLED: + // the freelists for the current thread state have already been cleared. _PyFreeListState *state = _PyFreeListState_GET(); - _PyTuple_Fini(state); - _PyList_Fini(state); - _PyFloat_Fini(state); - _PySlice_Fini(state); - _PyContext_Fini(state); - _PyAsyncGen_Fini(state); + _PyObject_ClearFreeLists(state, 1); +#endif #ifdef Py_DEBUG _PyStaticObjects_CheckRefcnt(interp); diff --git a/Python/pystate.c b/Python/pystate.c index 6cd034743ddf4c..937c43033b068d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1468,20 +1468,6 @@ clear_datastack(PyThreadState *tstate) } } -void -_Py_ClearFreeLists(_PyFreeListState *state, int is_finalization) -{ - // In the free-threaded build, freelists are per-PyThreadState and cleared in PyThreadState_Clear() - // In the default build, freelists are per-interpreter and cleared in finalize_interp_types() - _PyFloat_ClearFreeList(state, is_finalization); - _PyTuple_ClearFreeList(state, is_finalization); - _PyList_ClearFreeList(state, is_finalization); - _PyDict_ClearFreeList(state, is_finalization); - _PyContext_ClearFreeList(state, is_finalization); - _PyAsyncGen_ClearFreeLists(state, is_finalization); - _PyObjectStackChunk_ClearFreeList(state, is_finalization); -} - void PyThreadState_Clear(PyThreadState *tstate) { @@ -1566,9 +1552,8 @@ PyThreadState_Clear(PyThreadState *tstate) } #ifdef Py_GIL_DISABLED // Each thread should clear own freelists in free-threading builds. - _PyFreeListState *freelist_state = &((_PyThreadStateImpl*)tstate)->freelist_state; - _Py_ClearFreeLists(freelist_state, 1); - _PySlice_ClearCache(freelist_state); + _PyFreeListState *freelist_state = _PyFreeListState_GET(); + _PyObject_ClearFreeLists(freelist_state, 1); // Remove ourself from the biased reference counting table of threads. _Py_brc_remove_thread(tstate); From b2d9d134dcb5633deebebf2b0118cd4f7ca598a2 Mon Sep 17 00:00:00 2001 From: Laurie O Date: Sat, 10 Feb 2024 14:58:30 +1000 Subject: [PATCH 043/126] gh-96471: Add shutdown() method to queue.Queue (#104750) Co-authored-by: Duprat --- Doc/library/queue.rst | 38 ++ Doc/whatsnew/3.13.rst | 7 + Lib/queue.py | 50 +++ Lib/test/test_queue.py | 378 ++++++++++++++++++ ...3-05-06-04-57-10.gh-issue-96471.C9wAU7.rst | 1 + 5 files changed, 474 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-05-06-04-57-10.gh-issue-96471.C9wAU7.rst diff --git a/Doc/library/queue.rst b/Doc/library/queue.rst index b2b787c5a8260c..1421fc2e552f0e 100644 --- a/Doc/library/queue.rst +++ b/Doc/library/queue.rst @@ -93,6 +93,14 @@ The :mod:`queue` module defines the following classes and exceptions: on a :class:`Queue` object which is full. +.. exception:: ShutDown + + Exception raised when :meth:`~Queue.put` or :meth:`~Queue.get` is called on + a :class:`Queue` object which has been shut down. + + .. versionadded:: 3.13 + + .. _queueobjects: Queue Objects @@ -135,6 +143,8 @@ provide the public methods described below. immediately available, else raise the :exc:`Full` exception (*timeout* is ignored in that case). + Raises :exc:`ShutDown` if the queue has been shut down. + .. method:: Queue.put_nowait(item) @@ -155,6 +165,9 @@ provide the public methods described below. an uninterruptible wait on an underlying lock. This means that no exceptions can occur, and in particular a SIGINT will not trigger a :exc:`KeyboardInterrupt`. + Raises :exc:`ShutDown` if the queue has been shut down and is empty, or if + the queue has been shut down immediately. + .. method:: Queue.get_nowait() @@ -177,6 +190,8 @@ fully processed by daemon consumer threads. Raises a :exc:`ValueError` if called more times than there were items placed in the queue. + Raises :exc:`ShutDown` if the queue has been shut down immediately. + .. method:: Queue.join() @@ -187,6 +202,8 @@ fully processed by daemon consumer threads. indicate that the item was retrieved and all work on it is complete. When the count of unfinished tasks drops to zero, :meth:`join` unblocks. + Raises :exc:`ShutDown` if the queue has been shut down immediately. + Example of how to wait for enqueued tasks to be completed:: @@ -214,6 +231,27 @@ Example of how to wait for enqueued tasks to be completed:: print('All work completed') +Terminating queues +^^^^^^^^^^^^^^^^^^ + +:class:`Queue` objects can be made to prevent further interaction by shutting +them down. + +.. method:: Queue.shutdown(immediate=False) + + Shut down the queue, making :meth:`~Queue.get` and :meth:`~Queue.put` raise + :exc:`ShutDown`. + + By default, :meth:`~Queue.get` on a shut down queue will only raise once the + queue is empty. Set *immediate* to true to make :meth:`~Queue.get` raise + immediately instead. + + All blocked callers of :meth:`~Queue.put` will be unblocked. If *immediate* + is true, also unblock callers of :meth:`~Queue.get` and :meth:`~Queue.join`. + + .. versionadded:: 3.13 + + SimpleQueue Objects ------------------- diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index b05e4badc9e58b..de79bd979aff80 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -403,6 +403,13 @@ pdb command line option or :envvar:`PYTHONSAFEPATH` environment variable). (Contributed by Tian Gao and Christian Walther in :gh:`111762`.) +queue +----- + +* Add :meth:`queue.Queue.shutdown` (along with :exc:`queue.ShutDown`) for queue + termination. + (Contributed by Laurie Opperman and Yves Duprat in :gh:`104750`.) + re -- * Rename :exc:`!re.error` to :exc:`re.PatternError` for improved clarity. diff --git a/Lib/queue.py b/Lib/queue.py index 55f50088460f9e..467ff4fcecb134 100644 --- a/Lib/queue.py +++ b/Lib/queue.py @@ -25,6 +25,10 @@ class Full(Exception): pass +class ShutDown(Exception): + '''Raised when put/get with shut-down queue.''' + + class Queue: '''Create a queue object with a given maximum size. @@ -54,6 +58,9 @@ def __init__(self, maxsize=0): self.all_tasks_done = threading.Condition(self.mutex) self.unfinished_tasks = 0 + # Queue shutdown state + self.is_shutdown = False + def task_done(self): '''Indicate that a formerly enqueued task is complete. @@ -67,6 +74,8 @@ def task_done(self): Raises a ValueError if called more times than there were items placed in the queue. + + Raises ShutDown if the queue has been shut down immediately. ''' with self.all_tasks_done: unfinished = self.unfinished_tasks - 1 @@ -84,6 +93,8 @@ def join(self): to indicate the item was retrieved and all work on it is complete. When the count of unfinished tasks drops to zero, join() unblocks. + + Raises ShutDown if the queue has been shut down immediately. ''' with self.all_tasks_done: while self.unfinished_tasks: @@ -129,8 +140,12 @@ def put(self, item, block=True, timeout=None): Otherwise ('block' is false), put an item on the queue if a free slot is immediately available, else raise the Full exception ('timeout' is ignored in that case). + + Raises ShutDown if the queue has been shut down. ''' with self.not_full: + if self.is_shutdown: + raise ShutDown if self.maxsize > 0: if not block: if self._qsize() >= self.maxsize: @@ -138,6 +153,8 @@ def put(self, item, block=True, timeout=None): elif timeout is None: while self._qsize() >= self.maxsize: self.not_full.wait() + if self.is_shutdown: + raise ShutDown elif timeout < 0: raise ValueError("'timeout' must be a non-negative number") else: @@ -147,6 +164,8 @@ def put(self, item, block=True, timeout=None): if remaining <= 0.0: raise Full self.not_full.wait(remaining) + if self.is_shutdown: + raise ShutDown self._put(item) self.unfinished_tasks += 1 self.not_empty.notify() @@ -161,14 +180,21 @@ def get(self, block=True, timeout=None): Otherwise ('block' is false), return an item if one is immediately available, else raise the Empty exception ('timeout' is ignored in that case). + + Raises ShutDown if the queue has been shut down and is empty, + or if the queue has been shut down immediately. ''' with self.not_empty: + if self.is_shutdown and not self._qsize(): + raise ShutDown if not block: if not self._qsize(): raise Empty elif timeout is None: while not self._qsize(): self.not_empty.wait() + if self.is_shutdown and not self._qsize(): + raise ShutDown elif timeout < 0: raise ValueError("'timeout' must be a non-negative number") else: @@ -178,6 +204,8 @@ def get(self, block=True, timeout=None): if remaining <= 0.0: raise Empty self.not_empty.wait(remaining) + if self.is_shutdown and not self._qsize(): + raise ShutDown item = self._get() self.not_full.notify() return item @@ -198,6 +226,28 @@ def get_nowait(self): ''' return self.get(block=False) + def shutdown(self, immediate=False): + '''Shut-down the queue, making queue gets and puts raise. + + By default, gets will only raise once the queue is empty. Set + 'immediate' to True to make gets raise immediately instead. + + All blocked callers of put() will be unblocked, and also get() + and join() if 'immediate'. The ShutDown exception is raised. + ''' + with self.mutex: + self.is_shutdown = True + if immediate: + n_items = self._qsize() + while self._qsize(): + self._get() + if self.unfinished_tasks > 0: + self.unfinished_tasks -= 1 + self.not_empty.notify_all() + # release all blocked threads in `join()` + self.all_tasks_done.notify_all() + self.not_full.notify_all() + # Override these methods to implement other queue organizations # (e.g. stack or priority queue). # These will only be called with appropriate locks held diff --git a/Lib/test/test_queue.py b/Lib/test/test_queue.py index 33113a72e6b6a9..e3d4d566cdda48 100644 --- a/Lib/test/test_queue.py +++ b/Lib/test/test_queue.py @@ -241,6 +241,384 @@ def test_shrinking_queue(self): with self.assertRaises(self.queue.Full): q.put_nowait(4) + def test_shutdown_empty(self): + q = self.type2test() + q.shutdown() + with self.assertRaises(self.queue.ShutDown): + q.put("data") + with self.assertRaises(self.queue.ShutDown): + q.get() + + def test_shutdown_nonempty(self): + q = self.type2test() + q.put("data") + q.shutdown() + q.get() + with self.assertRaises(self.queue.ShutDown): + q.get() + + def test_shutdown_immediate(self): + q = self.type2test() + q.put("data") + q.shutdown(immediate=True) + with self.assertRaises(self.queue.ShutDown): + q.get() + + def test_shutdown_allowed_transitions(self): + # allowed transitions would be from alive via shutdown to immediate + q = self.type2test() + self.assertFalse(q.is_shutdown) + + q.shutdown() + self.assertTrue(q.is_shutdown) + + q.shutdown(immediate=True) + self.assertTrue(q.is_shutdown) + + q.shutdown(immediate=False) + + def _shutdown_all_methods_in_one_thread(self, immediate): + q = self.type2test(2) + q.put("L") + q.put_nowait("O") + q.shutdown(immediate) + + with self.assertRaises(self.queue.ShutDown): + q.put("E") + with self.assertRaises(self.queue.ShutDown): + q.put_nowait("W") + if immediate: + with self.assertRaises(self.queue.ShutDown): + q.get() + with self.assertRaises(self.queue.ShutDown): + q.get_nowait() + with self.assertRaises(ValueError): + q.task_done() + q.join() + else: + self.assertIn(q.get(), "LO") + q.task_done() + self.assertIn(q.get(), "LO") + q.task_done() + q.join() + # on shutdown(immediate=False) + # when queue is empty, should raise ShutDown Exception + with self.assertRaises(self.queue.ShutDown): + q.get() # p.get(True) + with self.assertRaises(self.queue.ShutDown): + q.get_nowait() # p.get(False) + with self.assertRaises(self.queue.ShutDown): + q.get(True, 1.0) + + def test_shutdown_all_methods_in_one_thread(self): + return self._shutdown_all_methods_in_one_thread(False) + + def test_shutdown_immediate_all_methods_in_one_thread(self): + return self._shutdown_all_methods_in_one_thread(True) + + def _write_msg_thread(self, q, n, results, delay, + i_when_exec_shutdown, + event_start, event_end): + event_start.wait() + for i in range(1, n+1): + try: + q.put((i, "YDLO")) + results.append(True) + except self.queue.ShutDown: + results.append(False) + # triggers shutdown of queue + if i == i_when_exec_shutdown: + event_end.set() + time.sleep(delay) + # end of all puts + q.join() + + def _read_msg_thread(self, q, nb, results, delay, event_start): + event_start.wait() + block = True + while nb: + time.sleep(delay) + try: + # Get at least one message + q.get(block) + block = False + q.task_done() + results.append(True) + nb -= 1 + except self.queue.ShutDown: + results.append(False) + nb -= 1 + except self.queue.Empty: + pass + q.join() + + def _shutdown_thread(self, q, event_end, immediate): + event_end.wait() + q.shutdown(immediate) + q.join() + + def _join_thread(self, q, delay, event_start): + event_start.wait() + time.sleep(delay) + q.join() + + def _shutdown_all_methods_in_many_threads(self, immediate): + q = self.type2test() + ps = [] + ev_start = threading.Event() + ev_exec_shutdown = threading.Event() + res_puts = [] + res_gets = [] + delay = 1e-4 + read_process = 4 + nb_msgs = read_process * 16 + nb_msgs_r = nb_msgs // read_process + when_exec_shutdown = nb_msgs // 2 + lprocs = ( + (self._write_msg_thread, 1, (q, nb_msgs, res_puts, delay, + when_exec_shutdown, + ev_start, ev_exec_shutdown)), + (self._read_msg_thread, read_process, (q, nb_msgs_r, + res_gets, delay*2, + ev_start)), + (self._join_thread, 2, (q, delay*2, ev_start)), + (self._shutdown_thread, 1, (q, ev_exec_shutdown, immediate)), + ) + # start all threds + for func, n, args in lprocs: + for i in range(n): + ps.append(threading.Thread(target=func, args=args)) + ps[-1].start() + # set event in order to run q.shutdown() + ev_start.set() + + if not immediate: + assert(len(res_gets) == len(res_puts)) + assert(res_gets.count(True) == res_puts.count(True)) + else: + assert(len(res_gets) <= len(res_puts)) + assert(res_gets.count(True) <= res_puts.count(True)) + + for thread in ps[1:]: + thread.join() + + def test_shutdown_all_methods_in_many_threads(self): + return self._shutdown_all_methods_in_many_threads(False) + + def test_shutdown_immediate_all_methods_in_many_threads(self): + return self._shutdown_all_methods_in_many_threads(True) + + def _get(self, q, go, results, shutdown=False): + go.wait() + try: + msg = q.get() + results.append(not shutdown) + return not shutdown + except self.queue.ShutDown: + results.append(shutdown) + return shutdown + + def _get_shutdown(self, q, go, results): + return self._get(q, go, results, True) + + def _get_task_done(self, q, go, results): + go.wait() + try: + msg = q.get() + q.task_done() + results.append(True) + return msg + except self.queue.ShutDown: + results.append(False) + return False + + def _put(self, q, msg, go, results, shutdown=False): + go.wait() + try: + q.put(msg) + results.append(not shutdown) + return not shutdown + except self.queue.ShutDown: + results.append(shutdown) + return shutdown + + def _put_shutdown(self, q, msg, go, results): + return self._put(q, msg, go, results, True) + + def _join(self, q, results, shutdown=False): + try: + q.join() + results.append(not shutdown) + return not shutdown + except self.queue.ShutDown: + results.append(shutdown) + return shutdown + + def _join_shutdown(self, q, results): + return self._join(q, results, True) + + def _shutdown_get(self, immediate): + q = self.type2test(2) + results = [] + go = threading.Event() + q.put("Y") + q.put("D") + # queue full + + if immediate: + thrds = ( + (self._get_shutdown, (q, go, results)), + (self._get_shutdown, (q, go, results)), + ) + else: + thrds = ( + # on shutdown(immediate=False) + # one of these threads shoud raise Shutdown + (self._get, (q, go, results)), + (self._get, (q, go, results)), + (self._get, (q, go, results)), + ) + threads = [] + for func, params in thrds: + threads.append(threading.Thread(target=func, args=params)) + threads[-1].start() + q.shutdown(immediate) + go.set() + for t in threads: + t.join() + if immediate: + self.assertListEqual(results, [True, True]) + else: + self.assertListEqual(sorted(results), [False] + [True]*(len(thrds)-1)) + + def test_shutdown_get(self): + return self._shutdown_get(False) + + def test_shutdown_immediate_get(self): + return self._shutdown_get(True) + + def _shutdown_put(self, immediate): + q = self.type2test(2) + results = [] + go = threading.Event() + q.put("Y") + q.put("D") + # queue fulled + + thrds = ( + (self._put_shutdown, (q, "E", go, results)), + (self._put_shutdown, (q, "W", go, results)), + ) + threads = [] + for func, params in thrds: + threads.append(threading.Thread(target=func, args=params)) + threads[-1].start() + q.shutdown() + go.set() + for t in threads: + t.join() + + self.assertEqual(results, [True]*len(thrds)) + + def test_shutdown_put(self): + return self._shutdown_put(False) + + def test_shutdown_immediate_put(self): + return self._shutdown_put(True) + + def _shutdown_join(self, immediate): + q = self.type2test() + results = [] + q.put("Y") + go = threading.Event() + nb = q.qsize() + + thrds = ( + (self._join, (q, results)), + (self._join, (q, results)), + ) + threads = [] + for func, params in thrds: + threads.append(threading.Thread(target=func, args=params)) + threads[-1].start() + if not immediate: + res = [] + for i in range(nb): + threads.append(threading.Thread(target=self._get_task_done, args=(q, go, res))) + threads[-1].start() + q.shutdown(immediate) + go.set() + for t in threads: + t.join() + + self.assertEqual(results, [True]*len(thrds)) + + def test_shutdown_immediate_join(self): + return self._shutdown_join(True) + + def test_shutdown_join(self): + return self._shutdown_join(False) + + def _shutdown_put_join(self, immediate): + q = self.type2test(2) + results = [] + go = threading.Event() + q.put("Y") + nb = q.qsize() + # queue not fulled + + thrds = ( + (self._put_shutdown, (q, "E", go, results)), + (self._join, (q, results)), + ) + threads = [] + for func, params in thrds: + threads.append(threading.Thread(target=func, args=params)) + threads[-1].start() + self.assertEqual(q.unfinished_tasks, nb) + for i in range(nb): + t = threading.Thread(target=q.task_done) + t.start() + threads.append(t) + q.shutdown(immediate) + go.set() + for t in threads: + t.join() + + self.assertEqual(results, [True]*len(thrds)) + + def test_shutdown_immediate_put_join(self): + return self._shutdown_put_join(True) + + def test_shutdown_put_join(self): + return self._shutdown_put_join(False) + + def test_shutdown_get_task_done_join(self): + q = self.type2test(2) + results = [] + go = threading.Event() + q.put("Y") + q.put("D") + self.assertEqual(q.unfinished_tasks, q.qsize()) + + thrds = ( + (self._get_task_done, (q, go, results)), + (self._get_task_done, (q, go, results)), + (self._join, (q, results)), + (self._join, (q, results)), + ) + threads = [] + for func, params in thrds: + threads.append(threading.Thread(target=func, args=params)) + threads[-1].start() + go.set() + q.shutdown(False) + for t in threads: + t.join() + + self.assertEqual(results, [True]*len(thrds)) + + class QueueTest(BaseQueueTestMixin): def setUp(self): diff --git a/Misc/NEWS.d/next/Library/2023-05-06-04-57-10.gh-issue-96471.C9wAU7.rst b/Misc/NEWS.d/next/Library/2023-05-06-04-57-10.gh-issue-96471.C9wAU7.rst new file mode 100644 index 00000000000000..0bace8d8bd425c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-06-04-57-10.gh-issue-96471.C9wAU7.rst @@ -0,0 +1 @@ +Add :py:class:`queue.Queue` termination with :py:meth:`~queue.Queue.shutdown`. From e19103a346f0277c44a43dfaebad9a5aa468bf1e Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 10 Feb 2024 11:34:23 +0300 Subject: [PATCH 044/126] gh-114552: Update `__dir__` method docs: it allows returning an iterable (#114662) --- Doc/reference/datamodel.rst | 6 +++--- Lib/test/test_builtin.py | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 0a1c1d58558e94..885ee825c12296 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1988,8 +1988,8 @@ access (use of, assignment to, or deletion of ``x.name``) for class instances. .. method:: object.__dir__(self) - Called when :func:`dir` is called on the object. A sequence must be - returned. :func:`dir` converts the returned sequence to a list and sorts it. + Called when :func:`dir` is called on the object. An iterable must be + returned. :func:`dir` converts the returned iterable to a list and sorts it. Customizing module attribute access @@ -2009,7 +2009,7 @@ not found on a module object through the normal lookup, i.e. the module ``__dict__`` before raising an :exc:`AttributeError`. If found, it is called with the attribute name and the result is returned. -The ``__dir__`` function should accept no arguments, and return a sequence of +The ``__dir__`` function should accept no arguments, and return an iterable of strings that represents the names accessible on module. If present, this function overrides the standard :func:`dir` search on a module. diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index fcddd147bac63e..7a3ab2274a58f2 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -611,6 +611,14 @@ def __dir__(self): self.assertIsInstance(res, list) self.assertTrue(res == ["a", "b", "c"]) + # dir(obj__dir__iterable) + class Foo(object): + def __dir__(self): + return {"b", "c", "a"} + res = dir(Foo()) + self.assertIsInstance(res, list) + self.assertEqual(sorted(res), ["a", "b", "c"]) + # dir(obj__dir__not_sequence) class Foo(object): def __dir__(self): From 6e222a55b1d63de994a2ca39afd4bbf4d2fbdd34 Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Sat, 10 Feb 2024 11:16:45 +0100 Subject: [PATCH 045/126] GH-87804: Fix counter overflow in statvfs on macOS (#99570) On macOS the statvfs interface returns block counts as 32-bit integers, and that results in bad reporting for larger disks. Therefore reimplement statvfs in terms of statfs, which does use 64-bit integers for block counts. Tested using a sparse filesystem image of 100TB. --- ...2-11-18-10-05-35.gh-issue-87804.rhlDmD.rst | 1 + Modules/posixmodule.c | 101 ++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 Misc/NEWS.d/next/macOS/2022-11-18-10-05-35.gh-issue-87804.rhlDmD.rst diff --git a/Misc/NEWS.d/next/macOS/2022-11-18-10-05-35.gh-issue-87804.rhlDmD.rst b/Misc/NEWS.d/next/macOS/2022-11-18-10-05-35.gh-issue-87804.rhlDmD.rst new file mode 100644 index 00000000000000..e6554d5c9f1e1e --- /dev/null +++ b/Misc/NEWS.d/next/macOS/2022-11-18-10-05-35.gh-issue-87804.rhlDmD.rst @@ -0,0 +1 @@ +On macOS the result of ``os.statvfs`` and ``os.fstatvfs`` now correctly report the size of very large disks, in previous versions the reported number of blocks was wrong for disks with at least 2**32 blocks. diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 230c961a2ac3c0..d05b4ba723ce8c 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -52,6 +52,12 @@ # define EX_OK EXIT_SUCCESS #endif +#ifdef __APPLE__ + /* Needed for the implementation of os.statvfs */ +# include +# include +#endif + /* On android API level 21, 'AT_EACCESS' is not declared although * HAVE_FACCESSAT is defined. */ #ifdef __ANDROID__ @@ -12886,6 +12892,59 @@ os_WSTOPSIG_impl(PyObject *module, int status) #endif #include +#ifdef __APPLE__ +/* On macOS struct statvfs uses 32-bit integers for block counts, + * resulting in overflow when filesystems are larger tan 4TB. Therefore + * os.statvfs is implemented in terms of statfs(2). + */ + +static PyObject* +_pystatvfs_fromstructstatfs(PyObject *module, struct statfs st) { + PyObject *StatVFSResultType = get_posix_state(module)->StatVFSResultType; + PyObject *v = PyStructSequence_New((PyTypeObject *)StatVFSResultType); + if (v == NULL) + return NULL; + + long flags = 0; + if (st.f_flags & MNT_RDONLY) { + flags |= ST_RDONLY; + } + if (st.f_flags & MNT_NOSUID) { + flags |= ST_NOSUID; + } + + _Static_assert(sizeof(st.f_blocks) == sizeof(long long), "assuming large file"); + + PyStructSequence_SET_ITEM(v, 0, PyLong_FromLong((long) st.f_iosize)); + PyStructSequence_SET_ITEM(v, 1, PyLong_FromLong((long) st.f_bsize)); + PyStructSequence_SET_ITEM(v, 2, + PyLong_FromLongLong((long long) st.f_blocks)); + PyStructSequence_SET_ITEM(v, 3, + PyLong_FromLongLong((long long) st.f_bfree)); + PyStructSequence_SET_ITEM(v, 4, + PyLong_FromLongLong((long long) st.f_bavail)); + PyStructSequence_SET_ITEM(v, 5, + PyLong_FromLongLong((long long) st.f_files)); + PyStructSequence_SET_ITEM(v, 6, + PyLong_FromLongLong((long long) st.f_ffree)); + PyStructSequence_SET_ITEM(v, 7, + PyLong_FromLongLong((long long) st.f_ffree)); + PyStructSequence_SET_ITEM(v, 8, PyLong_FromLong((long) flags)); + + PyStructSequence_SET_ITEM(v, 9, PyLong_FromLong((long) NAME_MAX)); + PyStructSequence_SET_ITEM(v, 10, PyLong_FromUnsignedLong(st.f_fsid.val[0])); + if (PyErr_Occurred()) { + Py_DECREF(v); + return NULL; + } + + return v; +} + +#else + + + static PyObject* _pystatvfs_fromstructstatvfs(PyObject *module, struct statvfs st) { PyObject *StatVFSResultType = get_posix_state(module)->StatVFSResultType; @@ -12937,6 +12996,8 @@ _pystatvfs_fromstructstatvfs(PyObject *module, struct statvfs st) { return v; } +#endif + /*[clinic input] os.fstatvfs @@ -12954,6 +13015,22 @@ os_fstatvfs_impl(PyObject *module, int fd) { int result; int async_err = 0; +#ifdef __APPLE__ + struct statfs st; + /* On macOS os.fstatvfs is implemented using fstatfs(2) because + * the former uses 32-bit values for block counts. + */ + do { + Py_BEGIN_ALLOW_THREADS + result = fstatfs(fd, &st); + Py_END_ALLOW_THREADS + } while (result != 0 && errno == EINTR && + !(async_err = PyErr_CheckSignals())); + if (result != 0) + return (!async_err) ? posix_error() : NULL; + + return _pystatvfs_fromstructstatfs(module, st); +#else struct statvfs st; do { @@ -12966,6 +13043,7 @@ os_fstatvfs_impl(PyObject *module, int fd) return (!async_err) ? posix_error() : NULL; return _pystatvfs_fromstructstatvfs(module, st); +#endif } #endif /* defined(HAVE_FSTATVFS) && defined(HAVE_SYS_STATVFS_H) */ @@ -12989,6 +13067,28 @@ os_statvfs_impl(PyObject *module, path_t *path) /*[clinic end generated code: output=87106dd1beb8556e input=3f5c35791c669bd9]*/ { int result; + +#ifdef __APPLE__ + /* On macOS os.statvfs is implemented using statfs(2)/fstatfs(2) because + * the former uses 32-bit values for block counts. + */ + struct statfs st; + + Py_BEGIN_ALLOW_THREADS + if (path->fd != -1) { + result = fstatfs(path->fd, &st); + } + else + result = statfs(path->narrow, &st); + Py_END_ALLOW_THREADS + + if (result) { + return path_error(path); + } + + return _pystatvfs_fromstructstatfs(module, st); + +#else struct statvfs st; Py_BEGIN_ALLOW_THREADS @@ -13006,6 +13106,7 @@ os_statvfs_impl(PyObject *module, path_t *path) } return _pystatvfs_fromstructstatvfs(module, st); +#endif } #endif /* defined(HAVE_STATVFS) && defined(HAVE_SYS_STATVFS_H) */ From e2c403892400878707a20d4b7e183de505a64ca5 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 10 Feb 2024 12:21:35 +0200 Subject: [PATCH 046/126] gh-76763: Make chr() always raising ValueError for out-of-range values (GH-114882) Previously it raised OverflowError for very large or very small values. --- Lib/test/test_builtin.py | 11 +++++--- ...4-02-01-23-43-49.gh-issue-76763.o_2J6i.rst | 3 +++ Python/bltinmodule.c | 25 ++++++++++++++++--- Python/clinic/bltinmodule.c.h | 21 +--------------- 4 files changed, 32 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-01-23-43-49.gh-issue-76763.o_2J6i.rst diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 7a3ab2274a58f2..9a0bf524e3943f 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -308,14 +308,13 @@ class C3(C2): pass self.assertTrue(callable(c3)) def test_chr(self): + self.assertEqual(chr(0), '\0') self.assertEqual(chr(32), ' ') self.assertEqual(chr(65), 'A') self.assertEqual(chr(97), 'a') self.assertEqual(chr(0xff), '\xff') - self.assertRaises(ValueError, chr, 1<<24) - self.assertEqual(chr(sys.maxunicode), - str('\\U0010ffff'.encode("ascii"), 'unicode-escape')) self.assertRaises(TypeError, chr) + self.assertRaises(TypeError, chr, 65.0) self.assertEqual(chr(0x0000FFFF), "\U0000FFFF") self.assertEqual(chr(0x00010000), "\U00010000") self.assertEqual(chr(0x00010001), "\U00010001") @@ -327,7 +326,11 @@ def test_chr(self): self.assertEqual(chr(0x0010FFFF), "\U0010FFFF") self.assertRaises(ValueError, chr, -1) self.assertRaises(ValueError, chr, 0x00110000) - self.assertRaises((OverflowError, ValueError), chr, 2**32) + self.assertRaises(ValueError, chr, 1<<24) + self.assertRaises(ValueError, chr, 2**32-1) + self.assertRaises(ValueError, chr, -2**32) + self.assertRaises(ValueError, chr, 2**1000) + self.assertRaises(ValueError, chr, -2**1000) def test_cmp(self): self.assertTrue(not hasattr(builtins, "cmp")) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-01-23-43-49.gh-issue-76763.o_2J6i.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-01-23-43-49.gh-issue-76763.o_2J6i.rst new file mode 100644 index 00000000000000..d35d3d87073ddd --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-01-23-43-49.gh-issue-76763.o_2J6i.rst @@ -0,0 +1,3 @@ +The :func:`chr` builtin function now always raises :exc:`ValueError` for +values outside the valid range. Previously it raised :exc:`OverflowError` for +very large or small values. diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 31c1bf07e8fb91..b0074962b73799 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -703,17 +703,34 @@ builtin_format_impl(PyObject *module, PyObject *value, PyObject *format_spec) /*[clinic input] chr as builtin_chr - i: int + i: object / Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff. [clinic start generated code]*/ static PyObject * -builtin_chr_impl(PyObject *module, int i) -/*[clinic end generated code: output=c733afcd200afcb7 input=3f604ef45a70750d]*/ +builtin_chr(PyObject *module, PyObject *i) +/*[clinic end generated code: output=d34f25b8035a9b10 input=f919867f0ba2f496]*/ { - return PyUnicode_FromOrdinal(i); + int overflow; + long v = PyLong_AsLongAndOverflow(i, &overflow); + if (v == -1 && PyErr_Occurred()) { + return NULL; + } + if (overflow) { + v = overflow < 0 ? INT_MIN : INT_MAX; + /* Allow PyUnicode_FromOrdinal() to raise an exception */ + } +#if SIZEOF_INT < SIZEOF_LONG + else if (v < INT_MIN) { + v = INT_MIN; + } + else if (v > INT_MAX) { + v = INT_MAX; + } +#endif + return PyUnicode_FromOrdinal(v); } diff --git a/Python/clinic/bltinmodule.c.h b/Python/clinic/bltinmodule.c.h index 8d40e659b54a57..3898f987cd61ea 100644 --- a/Python/clinic/bltinmodule.c.h +++ b/Python/clinic/bltinmodule.c.h @@ -233,25 +233,6 @@ PyDoc_STRVAR(builtin_chr__doc__, #define BUILTIN_CHR_METHODDEF \ {"chr", (PyCFunction)builtin_chr, METH_O, builtin_chr__doc__}, -static PyObject * -builtin_chr_impl(PyObject *module, int i); - -static PyObject * -builtin_chr(PyObject *module, PyObject *arg) -{ - PyObject *return_value = NULL; - int i; - - i = PyLong_AsInt(arg); - if (i == -1 && PyErr_Occurred()) { - goto exit; - } - return_value = builtin_chr_impl(module, i); - -exit: - return return_value; -} - PyDoc_STRVAR(builtin_compile__doc__, "compile($module, /, source, filename, mode, flags=0,\n" " dont_inherit=False, optimize=-1, *, _feature_version=-1)\n" @@ -1212,4 +1193,4 @@ builtin_issubclass(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=31bded5d08647a57 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=643a8d5f900e0c36 input=a9049054013a1b77]*/ From 597fad07f7bf709ac7084ac20aa3647995759b01 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 10 Feb 2024 15:17:33 +0200 Subject: [PATCH 047/126] gh-115059: Remove debugging code in test_io (GH-115240) --- Lib/test/test_io.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index a24579dcc878cf..cc387afa391909 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2531,36 +2531,6 @@ def test_interleaved_readline_write(self): f.flush() self.assertEqual(raw.getvalue(), b'1b\n2def\n3\n') - def test_xxx(self): - with self.BytesIO(b'abcdefgh') as raw: - with self.tp(raw) as f: - f.write(b'123') - self.assertEqual(f.read(), b'defgh') - f.write(b'456') - f.flush() - self.assertEqual(raw.getvalue(), b'123defgh456') - with self.BytesIO(b'abcdefgh') as raw: - with self.tp(raw) as f: - f.write(b'123') - self.assertEqual(f.read(3), b'def') - f.write(b'456') - f.flush() - self.assertEqual(raw.getvalue(), b'123def456') - with self.BytesIO(b'abcdefgh') as raw: - with self.tp(raw) as f: - f.write(b'123') - self.assertEqual(f.read1(), b'defgh') - f.write(b'456') - f.flush() - self.assertEqual(raw.getvalue(), b'123defgh456') - with self.BytesIO(b'abcdefgh') as raw: - with self.tp(raw) as f: - f.write(b'123') - self.assertEqual(f.read1(3), b'def') - f.write(b'456') - f.flush() - self.assertEqual(raw.getvalue(), b'123def456') - # You can't construct a BufferedRandom over a non-seekable stream. test_unseekable = None From 5319c66550a6d6c6698dea75c0a0ee005873ce61 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sat, 10 Feb 2024 17:37:19 +0300 Subject: [PATCH 048/126] gh-102840: Fix confused traceback when floordiv or mod operations happens between Fraction and complex objects (GH-102842) --- Lib/fractions.py | 13 ++++----- Lib/test/test_fractions.py | 27 +++++++++++++++++++ ...-02-10-15-24-20.gh-issue-102840.4mnDq1.rst | 3 +++ 3 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-10-15-24-20.gh-issue-102840.4mnDq1.rst diff --git a/Lib/fractions.py b/Lib/fractions.py index 389ab386b6a8a4..f8c6c9c438c737 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -579,7 +579,8 @@ def __format__(self, format_spec, /): f"for object of type {type(self).__name__!r}" ) - def _operator_fallbacks(monomorphic_operator, fallback_operator): + def _operator_fallbacks(monomorphic_operator, fallback_operator, + handle_complex=True): """Generates forward and reverse operators given a purely-rational operator and a function from the operator module. @@ -666,7 +667,7 @@ def forward(a, b): return monomorphic_operator(a, Fraction(b)) elif isinstance(b, float): return fallback_operator(float(a), b) - elif isinstance(b, complex): + elif handle_complex and isinstance(b, complex): return fallback_operator(complex(a), b) else: return NotImplemented @@ -679,7 +680,7 @@ def reverse(b, a): return monomorphic_operator(Fraction(a), b) elif isinstance(a, numbers.Real): return fallback_operator(float(a), float(b)) - elif isinstance(a, numbers.Complex): + elif handle_complex and isinstance(a, numbers.Complex): return fallback_operator(complex(a), complex(b)) else: return NotImplemented @@ -830,7 +831,7 @@ def _floordiv(a, b): """a // b""" return (a.numerator * b.denominator) // (a.denominator * b.numerator) - __floordiv__, __rfloordiv__ = _operator_fallbacks(_floordiv, operator.floordiv) + __floordiv__, __rfloordiv__ = _operator_fallbacks(_floordiv, operator.floordiv, False) def _divmod(a, b): """(a // b, a % b)""" @@ -838,14 +839,14 @@ def _divmod(a, b): div, n_mod = divmod(a.numerator * db, da * b.numerator) return div, Fraction(n_mod, da * db) - __divmod__, __rdivmod__ = _operator_fallbacks(_divmod, divmod) + __divmod__, __rdivmod__ = _operator_fallbacks(_divmod, divmod, False) def _mod(a, b): """a % b""" da, db = a.denominator, b.denominator return Fraction((a.numerator * db) % (b.numerator * da), da * db) - __mod__, __rmod__ = _operator_fallbacks(_mod, operator.mod) + __mod__, __rmod__ = _operator_fallbacks(_mod, operator.mod, False) def __pow__(a, b): """a ** b diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index af3cb214ab0ac1..b45bd098a36684 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -1314,6 +1314,33 @@ def test_float_format_testfile(self): self.assertEqual(float(format(f, fmt2)), float(rhs)) self.assertEqual(float(format(-f, fmt2)), float('-' + rhs)) + def test_complex_handling(self): + # See issue gh-102840 for more details. + + a = F(1, 2) + b = 1j + message = "unsupported operand type(s) for %s: '%s' and '%s'" + # test forward + self.assertRaisesMessage(TypeError, + message % ("%", "Fraction", "complex"), + operator.mod, a, b) + self.assertRaisesMessage(TypeError, + message % ("//", "Fraction", "complex"), + operator.floordiv, a, b) + self.assertRaisesMessage(TypeError, + message % ("divmod()", "Fraction", "complex"), + divmod, a, b) + # test reverse + self.assertRaisesMessage(TypeError, + message % ("%", "complex", "Fraction"), + operator.mod, b, a) + self.assertRaisesMessage(TypeError, + message % ("//", "complex", "Fraction"), + operator.floordiv, b, a) + self.assertRaisesMessage(TypeError, + message % ("divmod()", "complex", "Fraction"), + divmod, b, a) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-02-10-15-24-20.gh-issue-102840.4mnDq1.rst b/Misc/NEWS.d/next/Library/2024-02-10-15-24-20.gh-issue-102840.4mnDq1.rst new file mode 100644 index 00000000000000..52668a9424a976 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-10-15-24-20.gh-issue-102840.4mnDq1.rst @@ -0,0 +1,3 @@ +Fix confused traceback when floordiv, mod, or divmod operations happens +between instances of :class:`fractions.Fraction` and :class:`complex`. + From 9d1a353230f555fc28239c5ca1e82b758084e02a Mon Sep 17 00:00:00 2001 From: Mike Zimin <122507876+mikeziminio@users.noreply.github.com> Date: Sat, 10 Feb 2024 19:59:46 +0400 Subject: [PATCH 049/126] gh-114894: add array.array.clear() method (#114919) Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: AN Long Co-authored-by: Jelle Zijlstra --- Doc/library/array.rst | 7 ++++++ Doc/whatsnew/3.13.rst | 3 +++ Lib/test/test_array.py | 23 +++++++++++++++++++ Lib/test/test_collections.py | 2 ++ ...-02-02-15-50-13.gh-issue-114894.DF-dSd.rst | 1 + Modules/arraymodule.c | 16 +++++++++++++ Modules/clinic/arraymodule.c.h | 20 +++++++++++++++- 7 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-02-15-50-13.gh-issue-114894.DF-dSd.rst diff --git a/Doc/library/array.rst b/Doc/library/array.rst index a0e8bb20a098fd..043badf05ffc12 100644 --- a/Doc/library/array.rst +++ b/Doc/library/array.rst @@ -215,6 +215,13 @@ The module defines the following type: Remove the first occurrence of *x* from the array. + .. method:: clear() + + Remove all elements from the array. + + .. versionadded:: 3.13 + + .. method:: reverse() Reverse the order of the items in the array. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index de79bd979aff80..aee37737a9990a 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -185,6 +185,9 @@ array It can be used instead of ``'u'`` type code, which is deprecated. (Contributed by Inada Naoki in :gh:`80480`.) +* Add ``clear()`` method in order to implement ``MutableSequence``. + (Contributed by Mike Zimin in :gh:`114894`.) + ast --- diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py index a219fa365e7f20..95383be9659eb9 100755 --- a/Lib/test/test_array.py +++ b/Lib/test/test_array.py @@ -1014,6 +1014,29 @@ def test_pop(self): array.array(self.typecode, self.example[3:]+self.example[:-1]) ) + def test_clear(self): + a = array.array(self.typecode, self.example) + with self.assertRaises(TypeError): + a.clear(42) + a.clear() + self.assertEqual(len(a), 0) + self.assertEqual(a.typecode, self.typecode) + + a = array.array(self.typecode) + a.clear() + self.assertEqual(len(a), 0) + self.assertEqual(a.typecode, self.typecode) + + a = array.array(self.typecode, self.example) + a.clear() + a.append(self.example[2]) + a.append(self.example[3]) + self.assertEqual(a, array.array(self.typecode, self.example[2:4])) + + with memoryview(a): + with self.assertRaises(BufferError): + a.clear() + def test_reverse(self): a = array.array(self.typecode, self.example) self.assertRaises(TypeError, a.reverse, 42) diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index 7e6f811e17cfa2..1fb492ecebd668 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -1,5 +1,6 @@ """Unit tests for collections.py.""" +import array import collections import copy import doctest @@ -1972,6 +1973,7 @@ def test_MutableSequence(self): for sample in [list, bytearray, deque]: self.assertIsInstance(sample(), MutableSequence) self.assertTrue(issubclass(sample, MutableSequence)) + self.assertTrue(issubclass(array.array, MutableSequence)) self.assertFalse(issubclass(str, MutableSequence)) self.validate_abstract_methods(MutableSequence, '__contains__', '__iter__', '__len__', '__getitem__', '__setitem__', '__delitem__', 'insert') diff --git a/Misc/NEWS.d/next/Library/2024-02-02-15-50-13.gh-issue-114894.DF-dSd.rst b/Misc/NEWS.d/next/Library/2024-02-02-15-50-13.gh-issue-114894.DF-dSd.rst new file mode 100644 index 00000000000000..ec620f2aae3f03 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-02-15-50-13.gh-issue-114894.DF-dSd.rst @@ -0,0 +1 @@ +Add :meth:`array.array.clear`. diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index b97ade6126fa08..df09d9d84789f7 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -868,6 +868,21 @@ array_slice(arrayobject *a, Py_ssize_t ilow, Py_ssize_t ihigh) return (PyObject *)np; } +/*[clinic input] +array.array.clear + +Remove all items from the array. +[clinic start generated code]*/ + +static PyObject * +array_array_clear_impl(arrayobject *self) +/*[clinic end generated code: output=5efe0417062210a9 input=5dffa30e94e717a4]*/ +{ + if (array_resize(self, 0) == -1) { + return NULL; + } + Py_RETURN_NONE; +} /*[clinic input] array.array.__copy__ @@ -2342,6 +2357,7 @@ static PyMethodDef array_methods[] = { ARRAY_ARRAY_APPEND_METHODDEF ARRAY_ARRAY_BUFFER_INFO_METHODDEF ARRAY_ARRAY_BYTESWAP_METHODDEF + ARRAY_ARRAY_CLEAR_METHODDEF ARRAY_ARRAY___COPY___METHODDEF ARRAY_ARRAY_COUNT_METHODDEF ARRAY_ARRAY___DEEPCOPY___METHODDEF diff --git a/Modules/clinic/arraymodule.c.h b/Modules/clinic/arraymodule.c.h index 0b764e43e19437..60a03fe012550e 100644 --- a/Modules/clinic/arraymodule.c.h +++ b/Modules/clinic/arraymodule.c.h @@ -5,6 +5,24 @@ preserve #include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_modsupport.h" // _PyArg_CheckPositional() +PyDoc_STRVAR(array_array_clear__doc__, +"clear($self, /)\n" +"--\n" +"\n" +"Remove all items from the array."); + +#define ARRAY_ARRAY_CLEAR_METHODDEF \ + {"clear", (PyCFunction)array_array_clear, METH_NOARGS, array_array_clear__doc__}, + +static PyObject * +array_array_clear_impl(arrayobject *self); + +static PyObject * +array_array_clear(arrayobject *self, PyObject *Py_UNUSED(ignored)) +{ + return array_array_clear_impl(self); +} + PyDoc_STRVAR(array_array___copy____doc__, "__copy__($self, /)\n" "--\n" @@ -667,4 +685,4 @@ PyDoc_STRVAR(array_arrayiterator___setstate____doc__, #define ARRAY_ARRAYITERATOR___SETSTATE___METHODDEF \ {"__setstate__", (PyCFunction)array_arrayiterator___setstate__, METH_O, array_arrayiterator___setstate____doc__}, -/*[clinic end generated code: output=3be987238a4bb431 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=52c55d9b1d026c1c input=a9049054013a1b77]*/ From 6f93b4df92b8fbf80529cb6435789f5a75664a20 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 10 Feb 2024 18:12:34 +0000 Subject: [PATCH 050/126] GH-115060: Speed up `pathlib.Path.glob()` by removing redundant regex matching (#115061) When expanding and filtering paths for a `**` wildcard segment, build an `re.Pattern` object from the subsequent pattern parts, rather than the entire pattern, and match against the `os.DirEntry` object prior to instantiating a path object. Also skip compiling a pattern when expanding a `*` wildcard segment. --- Lib/pathlib/__init__.py | 8 +- Lib/pathlib/_abc.py | 82 +++++++++++++------ Lib/test/test_pathlib/test_pathlib.py | 13 +++ ...-02-06-03-55-46.gh-issue-115060.EkWRpP.rst | 1 + 4 files changed, 76 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-06-03-55-46.gh-issue-115060.EkWRpP.rst diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 65ce836765c42b..46834b1a76a6eb 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -587,9 +587,13 @@ def iterdir(self): def _scandir(self): return os.scandir(self) - def _make_child_entry(self, entry): + def _direntry_str(self, entry): + # Transform an entry yielded from _scandir() into a path string. + return entry.name if str(self) == '.' else entry.path + + def _make_child_direntry(self, entry): # Transform an entry yielded from _scandir() into a path object. - path_str = entry.name if str(self) == '.' else entry.path + path_str = self._direntry_str(entry) path = self.with_segments(path_str) path._str = path_str path._drv = self.drive diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index e4b1201a3703c3..27c6b4e367a050 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -86,19 +86,29 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match): continue except OSError: continue - if match(entry.name): - yield parent_path._make_child_entry(entry) + # Avoid cost of making a path object for non-matching paths by + # matching against the os.DirEntry.name string. + if match is None or match(entry.name): + yield parent_path._make_child_direntry(entry) -def _select_recursive(parent_paths, dir_only, follow_symlinks): - """Yield given paths and all their subdirectories, recursively.""" +def _select_recursive(parent_paths, dir_only, follow_symlinks, match): + """Yield given paths and all their children, recursively, filtering by + string and type. + """ if follow_symlinks is None: follow_symlinks = False for parent_path in parent_paths: + if match is not None: + # If we're filtering paths through a regex, record the length of + # the parent path. We'll pass it to match(path, pos=...) later. + parent_len = len(str(parent_path._make_child_relpath('_'))) - 1 paths = [parent_path._make_child_relpath('')] while paths: path = paths.pop() - yield path + if match is None or match(str(path), parent_len): + # Yield *directory* path that matches pattern (if any). + yield path try: # We must close the scandir() object before proceeding to # avoid exhausting file descriptors when globbing deep trees. @@ -108,14 +118,22 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks): pass else: for entry in entries: + # Handle directory entry. try: if entry.is_dir(follow_symlinks=follow_symlinks): - paths.append(path._make_child_entry(entry)) + # Recurse into this directory. + paths.append(path._make_child_direntry(entry)) continue except OSError: pass + + # Handle file entry. if not dir_only: - yield path._make_child_entry(entry) + # Avoid cost of making a path object for non-matching + # files by matching against the os.DirEntry object. + if match is None or match(path._direntry_str(entry), parent_len): + # Yield *file* path that matches pattern (if any). + yield path._make_child_direntry(entry) def _select_unique(paths): @@ -750,8 +768,14 @@ def _scandir(self): from contextlib import nullcontext return nullcontext(self.iterdir()) - def _make_child_entry(self, entry): + def _direntry_str(self, entry): + # Transform an entry yielded from _scandir() into a path string. + # PathBase._scandir() yields PathBase objects, so use str(). + return str(entry) + + def _make_child_direntry(self, entry): # Transform an entry yielded from _scandir() into a path object. + # PathBase._scandir() yields PathBase objects, so this is a no-op. return entry def _make_child_relpath(self, name): @@ -769,43 +793,49 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): stack = pattern._pattern_stack specials = ('', '.', '..') - filter_paths = False deduplicate_paths = False sep = self.pathmod.sep paths = iter([self] if self.is_dir() else []) while stack: part = stack.pop() if part in specials: + # Join special component (e.g. '..') onto paths. paths = _select_special(paths, part) + elif part == '**': - # Consume adjacent '**' components. + # Consume following '**' components, which have no effect. while stack and stack[-1] == '**': stack.pop() - # Consume adjacent non-special components and enable post-walk - # regex filtering, provided we're treating symlinks consistently. + # Consume following non-special components, provided we're + # treating symlinks consistently. Each component is joined + # onto 'part', which is used to generate an re.Pattern object. if follow_symlinks is not None: while stack and stack[-1] not in specials: - filter_paths = True - stack.pop() + part += sep + stack.pop() - dir_only = bool(stack) - paths = _select_recursive(paths, dir_only, follow_symlinks) + # If the previous loop consumed pattern components, compile an + # re.Pattern object based on those components. + match = _compile_pattern(part, sep, case_sensitive) if part != '**' else None + + # Recursively walk directories, filtering by type and regex. + paths = _select_recursive(paths, bool(stack), follow_symlinks, match) + + # De-duplicate if we've already seen a '**' component. if deduplicate_paths: - # De-duplicate if we've already seen a '**' component. paths = _select_unique(paths) deduplicate_paths = True + elif '**' in part: raise ValueError("Invalid pattern: '**' can only be an entire path component") + else: - dir_only = bool(stack) - match = _compile_pattern(part, sep, case_sensitive) - paths = _select_children(paths, dir_only, follow_symlinks, match) - if filter_paths: - # Filter out paths that don't match pattern. - prefix_len = len(str(self._make_child_relpath('_'))) - 1 - match = _compile_pattern(pattern._pattern_str, sep, case_sensitive) - paths = (path for path in paths if match(path._pattern_str, prefix_len)) + # If the pattern component isn't '*', compile an re.Pattern + # object based on the component. + match = _compile_pattern(part, sep, case_sensitive) if part != '*' else None + + # Iterate over directories' children filtering by type and regex. + paths = _select_children(paths, bool(stack), follow_symlinks, match) return paths def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): @@ -854,7 +884,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): if is_dir: if not top_down: - paths.append(path._make_child_entry(entry)) + paths.append(path._make_child_direntry(entry)) dirnames.append(entry.name) else: filenames.append(entry.name) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 2b166451243775..c0dcf314da4bfc 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1250,6 +1250,19 @@ def test_glob_pathlike(self): self.assertEqual(expect, set(p.glob(P(pattern)))) self.assertEqual(expect, set(p.glob(FakePath(pattern)))) + @needs_symlinks + def test_glob_dot(self): + P = self.cls + with os_helper.change_cwd(P(self.base, "dirC")): + self.assertEqual( + set(P('.').glob('*')), {P("fileC"), P("novel.txt"), P("dirD")}) + self.assertEqual( + set(P('.').glob('**')), {P("fileC"), P("novel.txt"), P("dirD"), P("dirD/fileD"), P(".")}) + self.assertEqual( + set(P('.').glob('**/*')), {P("fileC"), P("novel.txt"), P("dirD"), P("dirD/fileD")}) + self.assertEqual( + set(P('.').glob('**/*/*')), {P("dirD/fileD")}) + def test_rglob_pathlike(self): P = self.cls p = P(self.base, "dirC") diff --git a/Misc/NEWS.d/next/Library/2024-02-06-03-55-46.gh-issue-115060.EkWRpP.rst b/Misc/NEWS.d/next/Library/2024-02-06-03-55-46.gh-issue-115060.EkWRpP.rst new file mode 100644 index 00000000000000..b358eeb569626f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-06-03-55-46.gh-issue-115060.EkWRpP.rst @@ -0,0 +1 @@ +Speed up :meth:`pathlib.Path.glob` by removing redundant regex matching. From 33f56b743285f8419e92cfabe673fa165165a580 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 10 Feb 2024 21:34:22 +0300 Subject: [PATCH 051/126] gh-115252: Fix `test_enum` with `-OO` mode (GH-115253) --- Lib/test/test_enum.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 39c1ae0ad5a078..f7503331c1ac1d 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -4891,11 +4891,11 @@ class Color(enum.Enum) | | Data and other attributes defined here: | - | YELLOW = + | CYAN = | | MAGENTA = | - | CYAN = + | YELLOW = | | ---------------------------------------------------------------------- | Data descriptors inherited from enum.Enum: @@ -4905,7 +4905,18 @@ class Color(enum.Enum) | value | | ---------------------------------------------------------------------- - | Data descriptors inherited from enum.EnumType: + | Methods inherited from enum.EnumType: + | + | __contains__(value) from enum.EnumType + | + | __getitem__(name) from enum.EnumType + | + | __iter__() from enum.EnumType + | + | __len__() from enum.EnumType + | + | ---------------------------------------------------------------------- + | Readonly properties inherited from enum.EnumType: | | __members__""" From 3a5b38e3b465e00f133ff8074a2d4afb1392dfb5 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 11 Feb 2024 00:48:28 +0300 Subject: [PATCH 052/126] gh-114670: Fix `_testbuffer` module initialization (#114672) --- Modules/_testbuffer.c | 127 ++++++++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 53 deletions(-) diff --git a/Modules/_testbuffer.c b/Modules/_testbuffer.c index 5101834cfe1387..5084bcadb10f85 100644 --- a/Modules/_testbuffer.c +++ b/Modules/_testbuffer.c @@ -2816,70 +2816,91 @@ static struct PyModuleDef _testbuffermodule = { NULL }; - -PyMODINIT_FUNC -PyInit__testbuffer(void) +static int +_testbuffer_exec(PyObject *mod) { - PyObject *m; - - m = PyModule_Create(&_testbuffermodule); - if (m == NULL) - return NULL; - Py_SET_TYPE(&NDArray_Type, &PyType_Type); - Py_INCREF(&NDArray_Type); - PyModule_AddObject(m, "ndarray", (PyObject *)&NDArray_Type); + if (PyModule_AddType(mod, &NDArray_Type) < 0) { + return -1; + } Py_SET_TYPE(&StaticArray_Type, &PyType_Type); - Py_INCREF(&StaticArray_Type); - PyModule_AddObject(m, "staticarray", (PyObject *)&StaticArray_Type); + if (PyModule_AddType(mod, &StaticArray_Type) < 0) { + return -1; + } structmodule = PyImport_ImportModule("struct"); - if (structmodule == NULL) - return NULL; + if (structmodule == NULL) { + return -1; + } Struct = PyObject_GetAttrString(structmodule, "Struct"); + if (Struct == NULL) { + return -1; + } calcsize = PyObject_GetAttrString(structmodule, "calcsize"); - if (Struct == NULL || calcsize == NULL) - return NULL; + if (calcsize == NULL) { + return -1; + } simple_format = PyUnicode_FromString(simple_fmt); - if (simple_format == NULL) - return NULL; - - PyModule_AddIntMacro(m, ND_MAX_NDIM); - PyModule_AddIntMacro(m, ND_VAREXPORT); - PyModule_AddIntMacro(m, ND_WRITABLE); - PyModule_AddIntMacro(m, ND_FORTRAN); - PyModule_AddIntMacro(m, ND_SCALAR); - PyModule_AddIntMacro(m, ND_PIL); - PyModule_AddIntMacro(m, ND_GETBUF_FAIL); - PyModule_AddIntMacro(m, ND_GETBUF_UNDEFINED); - PyModule_AddIntMacro(m, ND_REDIRECT); - - PyModule_AddIntMacro(m, PyBUF_SIMPLE); - PyModule_AddIntMacro(m, PyBUF_WRITABLE); - PyModule_AddIntMacro(m, PyBUF_FORMAT); - PyModule_AddIntMacro(m, PyBUF_ND); - PyModule_AddIntMacro(m, PyBUF_STRIDES); - PyModule_AddIntMacro(m, PyBUF_INDIRECT); - PyModule_AddIntMacro(m, PyBUF_C_CONTIGUOUS); - PyModule_AddIntMacro(m, PyBUF_F_CONTIGUOUS); - PyModule_AddIntMacro(m, PyBUF_ANY_CONTIGUOUS); - PyModule_AddIntMacro(m, PyBUF_FULL); - PyModule_AddIntMacro(m, PyBUF_FULL_RO); - PyModule_AddIntMacro(m, PyBUF_RECORDS); - PyModule_AddIntMacro(m, PyBUF_RECORDS_RO); - PyModule_AddIntMacro(m, PyBUF_STRIDED); - PyModule_AddIntMacro(m, PyBUF_STRIDED_RO); - PyModule_AddIntMacro(m, PyBUF_CONTIG); - PyModule_AddIntMacro(m, PyBUF_CONTIG_RO); - - PyModule_AddIntMacro(m, PyBUF_READ); - PyModule_AddIntMacro(m, PyBUF_WRITE); - - return m; -} + if (simple_format == NULL) { + return -1; + } +#define ADD_INT_MACRO(mod, macro) \ + do { \ + if (PyModule_AddIntConstant(mod, #macro, macro) < 0) { \ + return -1; \ + } \ + } while (0) + + ADD_INT_MACRO(mod, ND_MAX_NDIM); + ADD_INT_MACRO(mod, ND_VAREXPORT); + ADD_INT_MACRO(mod, ND_WRITABLE); + ADD_INT_MACRO(mod, ND_FORTRAN); + ADD_INT_MACRO(mod, ND_SCALAR); + ADD_INT_MACRO(mod, ND_PIL); + ADD_INT_MACRO(mod, ND_GETBUF_FAIL); + ADD_INT_MACRO(mod, ND_GETBUF_UNDEFINED); + ADD_INT_MACRO(mod, ND_REDIRECT); + + ADD_INT_MACRO(mod, PyBUF_SIMPLE); + ADD_INT_MACRO(mod, PyBUF_WRITABLE); + ADD_INT_MACRO(mod, PyBUF_FORMAT); + ADD_INT_MACRO(mod, PyBUF_ND); + ADD_INT_MACRO(mod, PyBUF_STRIDES); + ADD_INT_MACRO(mod, PyBUF_INDIRECT); + ADD_INT_MACRO(mod, PyBUF_C_CONTIGUOUS); + ADD_INT_MACRO(mod, PyBUF_F_CONTIGUOUS); + ADD_INT_MACRO(mod, PyBUF_ANY_CONTIGUOUS); + ADD_INT_MACRO(mod, PyBUF_FULL); + ADD_INT_MACRO(mod, PyBUF_FULL_RO); + ADD_INT_MACRO(mod, PyBUF_RECORDS); + ADD_INT_MACRO(mod, PyBUF_RECORDS_RO); + ADD_INT_MACRO(mod, PyBUF_STRIDED); + ADD_INT_MACRO(mod, PyBUF_STRIDED_RO); + ADD_INT_MACRO(mod, PyBUF_CONTIG); + ADD_INT_MACRO(mod, PyBUF_CONTIG_RO); + + ADD_INT_MACRO(mod, PyBUF_READ); + ADD_INT_MACRO(mod, PyBUF_WRITE); + +#undef ADD_INT_MACRO + return 0; +} +PyMODINIT_FUNC +PyInit__testbuffer(void) +{ + PyObject *mod = PyModule_Create(&_testbuffermodule); + if (mod == NULL) { + return NULL; + } + if (_testbuffer_exec(mod) < 0) { + Py_DECREF(mod); + return NULL; + } + return mod; +} From b70a68fbd6b72a25b5ef430603e39c9e40f40d29 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 11 Feb 2024 00:51:05 +0300 Subject: [PATCH 053/126] gh-115254: Fix `test_property` with `-00` mode (#115255) --- Lib/test/test_property.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_property.py b/Lib/test/test_property.py index c12c908d2ee32d..8ace9fd17ab96e 100644 --- a/Lib/test/test_property.py +++ b/Lib/test/test_property.py @@ -224,6 +224,7 @@ class PropertySubSlots(property): class PropertySubclassTests(unittest.TestCase): + @support.requires_docstrings def test_slots_docstring_copy_exception(self): # A special case error that we preserve despite the GH-98963 behavior # that would otherwise silently ignore this error. From 4821f08674e290a396d27aa8256fd5b8a121f3d6 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 11 Feb 2024 01:59:23 +0300 Subject: [PATCH 054/126] gh-101100: Fix sphinx warnings in `c-api/gcsupport.rst` (#114786) --- Doc/c-api/gcsupport.rst | 11 ++++++++--- Doc/tools/.nitignore | 1 - Misc/NEWS.d/3.12.0b1.rst | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Doc/c-api/gcsupport.rst b/Doc/c-api/gcsupport.rst index 6b2494ee4f0ed4..621da3eb069949 100644 --- a/Doc/c-api/gcsupport.rst +++ b/Doc/c-api/gcsupport.rst @@ -83,10 +83,15 @@ rules: .. versionadded:: 3.12 -.. c:function:: TYPE* PyObject_GC_Resize(TYPE, PyVarObject *op, Py_ssize_t newsize) +.. c:macro:: PyObject_GC_Resize(TYPE, op, newsize) - Resize an object allocated by :c:macro:`PyObject_NewVar`. Returns the - resized object or ``NULL`` on failure. *op* must not be tracked by the collector yet. + Resize an object allocated by :c:macro:`PyObject_NewVar`. + Returns the resized object of type ``TYPE*`` (refers to any C type) + or ``NULL`` on failure. + + *op* must be of type :c:expr:`PyVarObject *` + and must not be tracked by the collector yet. + *newsize* must be of type :c:type:`Py_ssize_t`. .. c:function:: void PyObject_GC_Track(PyObject *op) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 9db02c5c3c73c9..1d1b16166e906c 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -5,7 +5,6 @@ Doc/c-api/descriptor.rst Doc/c-api/exceptions.rst Doc/c-api/float.rst -Doc/c-api/gcsupport.rst Doc/c-api/init.rst Doc/c-api/init_config.rst Doc/c-api/intro.rst diff --git a/Misc/NEWS.d/3.12.0b1.rst b/Misc/NEWS.d/3.12.0b1.rst index 211513d05d0040..21f2c748f40548 100644 --- a/Misc/NEWS.d/3.12.0b1.rst +++ b/Misc/NEWS.d/3.12.0b1.rst @@ -2371,7 +2371,7 @@ Add a new C-API function to eagerly assign a version tag to a PyTypeObject: .. nonce: _paFIF .. section: C API -:c:func:`PyObject_GC_Resize` should calculate preheader size if needed. +:c:macro:`PyObject_GC_Resize` should calculate preheader size if needed. Patch by Donghee Na. .. From 1a6e2138773b94fdae449b658a9983cd1fc0f08a Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 10 Feb 2024 22:14:25 -0500 Subject: [PATCH 055/126] gh-115258: Temporarily disable test on Windows (#115269) The "test_shutdown_all_methods_in_many_threads" test times out on the Windows CI. This skips the test on Windows until we figure out the root cause. --- Lib/test/test_queue.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/test/test_queue.py b/Lib/test/test_queue.py index e3d4d566cdda48..d308a212999429 100644 --- a/Lib/test/test_queue.py +++ b/Lib/test/test_queue.py @@ -2,6 +2,7 @@ # to ensure the Queue locks remain stable. import itertools import random +import sys import threading import time import unittest @@ -402,9 +403,11 @@ def _shutdown_all_methods_in_many_threads(self, immediate): for thread in ps[1:]: thread.join() + @unittest.skipIf(sys.platform == "win32", "test times out (gh-115258)") def test_shutdown_all_methods_in_many_threads(self): return self._shutdown_all_methods_in_many_threads(False) + @unittest.skipIf(sys.platform == "win32", "test times out (gh-115258)") def test_shutdown_immediate_all_methods_in_many_threads(self): return self._shutdown_all_methods_in_many_threads(True) From 1f23837277e604f41589273aeb3a10377d416510 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 11 Feb 2024 11:00:44 +0300 Subject: [PATCH 056/126] gh-115249: Fix `test_descr` with `-OO` mode (#115250) --- Lib/test/test_descr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index beeab6cb7f254c..5404d8d3b99d5d 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1594,7 +1594,11 @@ def f(cls, arg): cm = classmethod(f) cm_dict = {'__annotations__': {}, - '__doc__': "f docstring", + '__doc__': ( + "f docstring" + if support.HAVE_DOCSTRINGS + else None + ), '__module__': __name__, '__name__': 'f', '__qualname__': f.__qualname__} From f8e9c57067e32baab4ed2fd824b892c52ecb7225 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 11 Feb 2024 11:51:25 +0300 Subject: [PATCH 057/126] gh-115274: Fix direct invocation of `testmock/testpatch.py` (#115275) --- Lib/test/test_unittest/testmock/testpatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_unittest/testmock/testpatch.py b/Lib/test/test_unittest/testmock/testpatch.py index 833d7da1f31a20..d0046d702a53f4 100644 --- a/Lib/test/test_unittest/testmock/testpatch.py +++ b/Lib/test/test_unittest/testmock/testpatch.py @@ -1912,7 +1912,7 @@ def foo(x=0): with patch.object(foo, '__module__', "testpatch2"): self.assertEqual(foo.__module__, "testpatch2") - self.assertEqual(foo.__module__, 'test.test_unittest.testmock.testpatch') + self.assertEqual(foo.__module__, __name__) with patch.object(foo, '__annotations__', dict([('s', 1, )])): self.assertEqual(foo.__annotations__, dict([('s', 1, )])) From 1b895914742d20ccebd1b56b1b0936b7e00eb95e Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Sun, 11 Feb 2024 03:21:10 -0600 Subject: [PATCH 058/126] gh-101100: Fix dangling refs in bdb.rst (#114983) Co-authored-by: AN Long --- Doc/library/bdb.rst | 18 ++++++++++-------- Doc/tools/.nitignore | 1 - 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Doc/library/bdb.rst b/Doc/library/bdb.rst index 52f0ca7c013482..7bf4308a96d0f5 100644 --- a/Doc/library/bdb.rst +++ b/Doc/library/bdb.rst @@ -148,8 +148,8 @@ The :mod:`bdb` module also defines two classes: .. method:: reset() - Set the :attr:`botframe`, :attr:`stopframe`, :attr:`returnframe` and - :attr:`quitting` attributes with values ready to start debugging. + Set the :attr:`!botframe`, :attr:`!stopframe`, :attr:`!returnframe` and + :attr:`quitting ` attributes with values ready to start debugging. .. method:: trace_dispatch(frame, event, arg) @@ -182,7 +182,7 @@ The :mod:`bdb` module also defines two classes: If the debugger should stop on the current line, invoke the :meth:`user_line` method (which should be overridden in subclasses). - Raise a :exc:`BdbQuit` exception if the :attr:`Bdb.quitting` flag is set + Raise a :exc:`BdbQuit` exception if the :attr:`quitting ` flag is set (which can be set from :meth:`user_line`). Return a reference to the :meth:`trace_dispatch` method for further tracing in that scope. @@ -190,7 +190,7 @@ The :mod:`bdb` module also defines two classes: If the debugger should stop on this function call, invoke the :meth:`user_call` method (which should be overridden in subclasses). - Raise a :exc:`BdbQuit` exception if the :attr:`Bdb.quitting` flag is set + Raise a :exc:`BdbQuit` exception if the :attr:`quitting ` flag is set (which can be set from :meth:`user_call`). Return a reference to the :meth:`trace_dispatch` method for further tracing in that scope. @@ -198,7 +198,7 @@ The :mod:`bdb` module also defines two classes: If the debugger should stop on this function return, invoke the :meth:`user_return` method (which should be overridden in subclasses). - Raise a :exc:`BdbQuit` exception if the :attr:`Bdb.quitting` flag is set + Raise a :exc:`BdbQuit` exception if the :attr:`quitting ` flag is set (which can be set from :meth:`user_return`). Return a reference to the :meth:`trace_dispatch` method for further tracing in that scope. @@ -206,7 +206,7 @@ The :mod:`bdb` module also defines two classes: If the debugger should stop at this exception, invokes the :meth:`user_exception` method (which should be overridden in subclasses). - Raise a :exc:`BdbQuit` exception if the :attr:`Bdb.quitting` flag is set + Raise a :exc:`BdbQuit` exception if the :attr:`quitting ` flag is set (which can be set from :meth:`user_exception`). Return a reference to the :meth:`trace_dispatch` method for further tracing in that scope. @@ -293,7 +293,9 @@ The :mod:`bdb` module also defines two classes: .. method:: set_quit() - Set the :attr:`quitting` attribute to ``True``. This raises :exc:`BdbQuit` in + .. index:: single: quitting (bdb.Bdb attribute) + + Set the :attr:`!quitting` attribute to ``True``. This raises :exc:`BdbQuit` in the next call to one of the :meth:`!dispatch_\*` methods. @@ -383,7 +385,7 @@ The :mod:`bdb` module also defines two classes: .. method:: run(cmd, globals=None, locals=None) Debug a statement executed via the :func:`exec` function. *globals* - defaults to :attr:`__main__.__dict__`, *locals* defaults to *globals*. + defaults to :attr:`!__main__.__dict__`, *locals* defaults to *globals*. .. method:: runeval(expr, globals=None, locals=None) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 1d1b16166e906c..2af116c2d79c54 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -21,7 +21,6 @@ Doc/library/ast.rst Doc/library/asyncio-extending.rst Doc/library/asyncio-policy.rst Doc/library/asyncio-subprocess.rst -Doc/library/bdb.rst Doc/library/collections.rst Doc/library/dbm.rst Doc/library/decimal.rst From 4b75032c88046505cad36157aa94a41fd37638f4 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Sun, 11 Feb 2024 01:59:50 -0800 Subject: [PATCH 059/126] gh-114807: multiprocessing: don't raise ImportError if _multiprocessing is missing (#114808) `_multiprocessing` is only used under the `if _winapi:` block, this moves the import to be within the `_winapi` ImportError handling try/except for equivalent treatment. --- Lib/multiprocessing/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index dbbf106f680964..c6a66a1bc963c3 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -19,7 +19,6 @@ import tempfile import itertools -import _multiprocessing from . import util @@ -28,6 +27,7 @@ _ForkingPickler = reduction.ForkingPickler try: + import _multiprocessing import _winapi from _winapi import WAIT_OBJECT_0, WAIT_ABANDONED_0, WAIT_TIMEOUT, INFINITE except ImportError: From 4a08e7b3431cd32a0daf22a33421cd3035343dc4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 11 Feb 2024 12:08:39 +0200 Subject: [PATCH 060/126] gh-115133: Fix tests for XMLPullParser with Expat 2.6.0 (GH-115164) Feeding the parser by too small chunks defers parsing to prevent CVE-2023-52425. Future versions of Expat may be more reactive. --- Lib/test/test_xml_etree.py | 58 ++++++++++++------- ...-02-08-14-21-28.gh-issue-115133.ycl4ko.rst | 2 + 2 files changed, 38 insertions(+), 22 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index a435ec7822ea0c..c535d631bb646f 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -13,6 +13,7 @@ import operator import os import pickle +import pyexpat import sys import textwrap import types @@ -120,6 +121,10 @@ """ +fails_with_expat_2_6_0 = (unittest.expectedFailure + if pyexpat.version_info >= (2, 6, 0) else + lambda test: test) + def checkwarnings(*filters, quiet=False): def decorator(test): def newtest(*args, **kwargs): @@ -1480,28 +1485,37 @@ def assert_event_tags(self, parser, expected, max_events=None): self.assertEqual([(action, elem.tag) for action, elem in events], expected) - def test_simple_xml(self): - for chunk_size in (None, 1, 5): - with self.subTest(chunk_size=chunk_size): - parser = ET.XMLPullParser() - self.assert_event_tags(parser, []) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, []) - self._feed(parser, - "\n text\n", chunk_size) - self.assert_event_tags(parser, [('end', 'element')]) - self._feed(parser, "texttail\n", chunk_size) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [ - ('end', 'element'), - ('end', 'empty-element'), - ]) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [('end', 'root')]) - self.assertIsNone(parser.close()) + def test_simple_xml(self, chunk_size=None): + parser = ET.XMLPullParser() + self.assert_event_tags(parser, []) + self._feed(parser, "\n", chunk_size) + self.assert_event_tags(parser, []) + self._feed(parser, + "\n text\n", chunk_size) + self.assert_event_tags(parser, [('end', 'element')]) + self._feed(parser, "texttail\n", chunk_size) + self._feed(parser, "\n", chunk_size) + self.assert_event_tags(parser, [ + ('end', 'element'), + ('end', 'empty-element'), + ]) + self._feed(parser, "\n", chunk_size) + self.assert_event_tags(parser, [('end', 'root')]) + self.assertIsNone(parser.close()) + + @fails_with_expat_2_6_0 + def test_simple_xml_chunk_1(self): + self.test_simple_xml(chunk_size=1) + + @fails_with_expat_2_6_0 + def test_simple_xml_chunk_5(self): + self.test_simple_xml(chunk_size=5) + + def test_simple_xml_chunk_22(self): + self.test_simple_xml(chunk_size=22) def test_feed_while_iterating(self): parser = ET.XMLPullParser() diff --git a/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst b/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst new file mode 100644 index 00000000000000..6f1015235cc25d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst @@ -0,0 +1,2 @@ +Fix tests for :class:`~xml.etree.ElementTree.XMLPullParser` with Expat +2.6.0. From 573acb30f22a84c0f2c951efa002c9946e29b6a3 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 11 Feb 2024 12:23:30 +0200 Subject: [PATCH 061/126] gh-115172: Fix explicit index extries for the C API (GH-115173) --- Doc/c-api/buffer.rst | 2 +- Doc/c-api/code.rst | 10 +-- Doc/c-api/exceptions.rst | 140 ++++++++++++++++++------------------ Doc/c-api/file.rst | 2 +- Doc/c-api/init.rst | 25 +++---- Doc/c-api/intro.rst | 38 +++++----- Doc/c-api/long.rst | 8 +-- Doc/c-api/memory.rst | 8 +-- Doc/c-api/structures.rst | 44 ++++++------ Doc/c-api/sys.rst | 8 +-- Doc/c-api/veryhigh.rst | 6 +- Doc/extending/extending.rst | 6 +- Doc/extending/newtypes.rst | 4 +- Doc/library/re.rst | 2 +- 14 files changed, 150 insertions(+), 153 deletions(-) diff --git a/Doc/c-api/buffer.rst b/Doc/c-api/buffer.rst index e572815ffd6259..1e1cabdf242bd1 100644 --- a/Doc/c-api/buffer.rst +++ b/Doc/c-api/buffer.rst @@ -29,7 +29,7 @@ without intermediate copying. Python provides such a facility at the C level in the form of the :ref:`buffer protocol `. This protocol has two sides: -.. index:: single: PyBufferProcs +.. index:: single: PyBufferProcs (C type) - on the producer side, a type can export a "buffer interface" which allows objects of that type to expose information about their underlying buffer. diff --git a/Doc/c-api/code.rst b/Doc/c-api/code.rst index 11c12e685fcace..382cfbff864072 100644 --- a/Doc/c-api/code.rst +++ b/Doc/c-api/code.rst @@ -49,7 +49,7 @@ bound into a function. .. versionchanged:: 3.11 Added ``qualname`` and ``exceptiontable`` parameters. - .. index:: single: PyCode_New + .. index:: single: PyCode_New (C function) .. versionchanged:: 3.12 @@ -62,7 +62,7 @@ bound into a function. Similar to :c:func:`PyUnstable_Code_New`, but with an extra "posonlyargcount" for positional-only arguments. The same caveats that apply to ``PyUnstable_Code_New`` also apply to this function. - .. index:: single: PyCode_NewWithPosOnlyArgs + .. index:: single: PyCode_NewWithPosOnlyArgs (C function) .. versionadded:: 3.8 as ``PyCode_NewWithPosOnlyArgs`` @@ -221,7 +221,7 @@ may change without deprecation warnings. *free* will be called on non-``NULL`` data stored under the new index. Use :c:func:`Py_DecRef` when storing :c:type:`PyObject`. - .. index:: single: _PyEval_RequestCodeExtraIndex + .. index:: single: _PyEval_RequestCodeExtraIndex (C function) .. versionadded:: 3.6 as ``_PyEval_RequestCodeExtraIndex`` @@ -239,7 +239,7 @@ may change without deprecation warnings. If no data was set under the index, set *extra* to ``NULL`` and return 0 without setting an exception. - .. index:: single: _PyCode_GetExtra + .. index:: single: _PyCode_GetExtra (C function) .. versionadded:: 3.6 as ``_PyCode_GetExtra`` @@ -254,7 +254,7 @@ may change without deprecation warnings. Set the extra data stored under the given index to *extra*. Return 0 on success. Set an exception and return -1 on failure. - .. index:: single: _PyCode_SetExtra + .. index:: single: _PyCode_SetExtra (C function) .. versionadded:: 3.6 as ``_PyCode_SetExtra`` diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index c7e3cd9463e5d7..eaf723fb2cc4cf 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -180,7 +180,7 @@ For convenience, some of these functions will always return a .. c:function:: PyObject* PyErr_SetFromErrno(PyObject *type) - .. index:: single: strerror() + .. index:: single: strerror (C function) This is a convenience function to raise an exception when a C library function has returned an error and set the C variable :c:data:`errno`. It constructs a @@ -635,7 +635,7 @@ Signal Handling .. index:: pair: module; signal - single: SIGINT + single: SIGINT (C macro) single: KeyboardInterrupt (built-in exception) This function interacts with Python's signal handling. @@ -666,7 +666,7 @@ Signal Handling .. index:: pair: module; signal - single: SIGINT + single: SIGINT (C macro) single: KeyboardInterrupt (built-in exception) Simulate the effect of a :c:macro:`!SIGINT` signal arriving. @@ -968,59 +968,59 @@ All standard Python exceptions are available as global variables whose names are the variables: .. index:: - single: PyExc_BaseException - single: PyExc_Exception - single: PyExc_ArithmeticError - single: PyExc_AssertionError - single: PyExc_AttributeError - single: PyExc_BlockingIOError - single: PyExc_BrokenPipeError - single: PyExc_BufferError - single: PyExc_ChildProcessError - single: PyExc_ConnectionAbortedError - single: PyExc_ConnectionError - single: PyExc_ConnectionRefusedError - single: PyExc_ConnectionResetError - single: PyExc_EOFError - single: PyExc_FileExistsError - single: PyExc_FileNotFoundError - single: PyExc_FloatingPointError - single: PyExc_GeneratorExit - single: PyExc_ImportError - single: PyExc_IndentationError - single: PyExc_IndexError - single: PyExc_InterruptedError - single: PyExc_IsADirectoryError - single: PyExc_KeyError - single: PyExc_KeyboardInterrupt - single: PyExc_LookupError - single: PyExc_MemoryError - single: PyExc_ModuleNotFoundError - single: PyExc_NameError - single: PyExc_NotADirectoryError - single: PyExc_NotImplementedError - single: PyExc_OSError - single: PyExc_OverflowError - single: PyExc_PermissionError - single: PyExc_ProcessLookupError - single: PyExc_RecursionError - single: PyExc_ReferenceError - single: PyExc_RuntimeError - single: PyExc_StopAsyncIteration - single: PyExc_StopIteration - single: PyExc_SyntaxError - single: PyExc_SystemError - single: PyExc_SystemExit - single: PyExc_TabError - single: PyExc_TimeoutError - single: PyExc_TypeError - single: PyExc_UnboundLocalError - single: PyExc_UnicodeDecodeError - single: PyExc_UnicodeEncodeError - single: PyExc_UnicodeError - single: PyExc_UnicodeTranslateError - single: PyExc_ValueError - single: PyExc_ZeroDivisionError + single: PyExc_BaseException (C var) + single: PyExc_Exception (C var) + single: PyExc_ArithmeticError (C var) + single: PyExc_AssertionError (C var) + single: PyExc_AttributeError (C var) + single: PyExc_BlockingIOError (C var) + single: PyExc_BrokenPipeError (C var) + single: PyExc_BufferError (C var) + single: PyExc_ChildProcessError (C var) + single: PyExc_ConnectionAbortedError (C var) + single: PyExc_ConnectionError (C var) + single: PyExc_ConnectionRefusedError (C var) + single: PyExc_ConnectionResetError (C var) + single: PyExc_EOFError (C var) + single: PyExc_FileExistsError (C var) + single: PyExc_FileNotFoundError (C var) + single: PyExc_FloatingPointError (C var) + single: PyExc_GeneratorExit (C var) + single: PyExc_ImportError (C var) + single: PyExc_IndentationError (C var) + single: PyExc_IndexError (C var) + single: PyExc_InterruptedError (C var) + single: PyExc_IsADirectoryError (C var) + single: PyExc_KeyError (C var) + single: PyExc_KeyboardInterrupt (C var) + single: PyExc_LookupError (C var) + single: PyExc_MemoryError (C var) + single: PyExc_ModuleNotFoundError (C var) + single: PyExc_NameError (C var) + single: PyExc_NotADirectoryError (C var) + single: PyExc_NotImplementedError (C var) + single: PyExc_OSError (C var) + single: PyExc_OverflowError (C var) + single: PyExc_PermissionError (C var) + single: PyExc_ProcessLookupError (C var) + single: PyExc_RecursionError (C var) + single: PyExc_ReferenceError (C var) + single: PyExc_RuntimeError (C var) + single: PyExc_StopAsyncIteration (C var) + single: PyExc_StopIteration (C var) + single: PyExc_SyntaxError (C var) + single: PyExc_SystemError (C var) + single: PyExc_SystemExit (C var) + single: PyExc_TabError (C var) + single: PyExc_TimeoutError (C var) + single: PyExc_TypeError (C var) + single: PyExc_UnboundLocalError (C var) + single: PyExc_UnicodeDecodeError (C var) + single: PyExc_UnicodeEncodeError (C var) + single: PyExc_UnicodeError (C var) + single: PyExc_UnicodeTranslateError (C var) + single: PyExc_ValueError (C var) + single: PyExc_ZeroDivisionError (C var) +-----------------------------------------+---------------------------------+----------+ | C Name | Python Name | Notes | @@ -1151,9 +1151,9 @@ the variables: These are compatibility aliases to :c:data:`PyExc_OSError`: .. index:: - single: PyExc_EnvironmentError - single: PyExc_IOError - single: PyExc_WindowsError + single: PyExc_EnvironmentError (C var) + single: PyExc_IOError (C var) + single: PyExc_WindowsError (C var) +-------------------------------------+----------+ | C Name | Notes | @@ -1188,17 +1188,17 @@ names are ``PyExc_`` followed by the Python exception name. These have the type the variables: .. index:: - single: PyExc_Warning - single: PyExc_BytesWarning - single: PyExc_DeprecationWarning - single: PyExc_FutureWarning - single: PyExc_ImportWarning - single: PyExc_PendingDeprecationWarning - single: PyExc_ResourceWarning - single: PyExc_RuntimeWarning - single: PyExc_SyntaxWarning - single: PyExc_UnicodeWarning - single: PyExc_UserWarning + single: PyExc_Warning (C var) + single: PyExc_BytesWarning (C var) + single: PyExc_DeprecationWarning (C var) + single: PyExc_FutureWarning (C var) + single: PyExc_ImportWarning (C var) + single: PyExc_PendingDeprecationWarning (C var) + single: PyExc_ResourceWarning (C var) + single: PyExc_RuntimeWarning (C var) + single: PyExc_SyntaxWarning (C var) + single: PyExc_UnicodeWarning (C var) + single: PyExc_UserWarning (C var) +------------------------------------------+---------------------------------+----------+ | C Name | Python Name | Notes | diff --git a/Doc/c-api/file.rst b/Doc/c-api/file.rst index d3a78c588454e8..e9019a0d500f7e 100644 --- a/Doc/c-api/file.rst +++ b/Doc/c-api/file.rst @@ -96,7 +96,7 @@ the :mod:`io` APIs instead. .. c:function:: int PyFile_WriteObject(PyObject *obj, PyObject *p, int flags) - .. index:: single: Py_PRINT_RAW + .. index:: single: Py_PRINT_RAW (C macro) Write object *obj* to file object *p*. The only supported flag for *flags* is :c:macro:`Py_PRINT_RAW`; if given, the :func:`str` of the object is written diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index f8fd48e781d6da..e7199ad5e0c1b1 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -332,7 +332,7 @@ Initializing and finalizing the interpreter pair: module; __main__ pair: module; sys triple: module; search; path - single: Py_FinalizeEx() + single: Py_FinalizeEx (C function) Initialize the Python interpreter. In an application embedding Python, this should be called before using any other Python/C API functions; see @@ -661,7 +661,7 @@ operations could cause problems in a multi-threaded program: for example, when two threads simultaneously increment the reference count of the same object, the reference count could end up being incremented only once instead of twice. -.. index:: single: setswitchinterval() (in module sys) +.. index:: single: setswitchinterval (in module sys) Therefore, the rule exists that only the thread that has acquired the :term:`GIL` may operate on Python objects or call Python/C API functions. @@ -671,8 +671,7 @@ released around potentially blocking I/O operations like reading or writing a file, so that other Python threads can run in the meantime. .. index:: - single: PyThreadState - single: PyThreadState + single: PyThreadState (C type) The Python interpreter keeps some thread-specific bookkeeping information inside a data structure called :c:type:`PyThreadState`. There's also one @@ -698,8 +697,8 @@ This is so common that a pair of macros exists to simplify it:: Py_END_ALLOW_THREADS .. index:: - single: Py_BEGIN_ALLOW_THREADS - single: Py_END_ALLOW_THREADS + single: Py_BEGIN_ALLOW_THREADS (C macro) + single: Py_END_ALLOW_THREADS (C macro) The :c:macro:`Py_BEGIN_ALLOW_THREADS` macro opens a new block and declares a hidden local variable; the :c:macro:`Py_END_ALLOW_THREADS` macro closes the @@ -714,8 +713,8 @@ The block above expands to the following code:: PyEval_RestoreThread(_save); .. index:: - single: PyEval_RestoreThread() - single: PyEval_SaveThread() + single: PyEval_RestoreThread (C function) + single: PyEval_SaveThread (C function) Here is how these functions work: the global interpreter lock is used to protect the pointer to the current thread state. When releasing the lock and saving the thread state, @@ -1399,8 +1398,8 @@ function. You can create and destroy them using the following functions: may be stored internally on the :c:type:`PyInterpreterState`. .. index:: - single: Py_FinalizeEx() - single: Py_Initialize() + single: Py_FinalizeEx (C function) + single: Py_Initialize (C function) Extension modules are shared between (sub-)interpreters as follows: @@ -1428,7 +1427,7 @@ function. You can create and destroy them using the following functions: As with multi-phase initialization, this means that only C-level static and global variables are shared between these modules. - .. index:: single: close() (in module os) + .. index:: single: close (in module os) .. c:function:: PyThreadState* Py_NewInterpreter(void) @@ -1451,7 +1450,7 @@ function. You can create and destroy them using the following functions: .. c:function:: void Py_EndInterpreter(PyThreadState *tstate) - .. index:: single: Py_FinalizeEx() + .. index:: single: Py_FinalizeEx (C function) Destroy the (sub-)interpreter represented by the given thread state. The given thread state must be the current thread state. See the @@ -1543,8 +1542,6 @@ pointer and a void pointer argument. .. c:function:: int Py_AddPendingCall(int (*func)(void *), void *arg) - .. index:: single: Py_AddPendingCall() - Schedule a function to be called from the main interpreter thread. On success, ``0`` is returned and *func* is queued for being called in the main thread. On failure, ``-1`` is returned without setting any exception. diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index dcda1071a58f35..8ef463e3f88ca8 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -325,8 +325,8 @@ objects that reference each other here; for now, the solution is "don't do that.") .. index:: - single: Py_INCREF() - single: Py_DECREF() + single: Py_INCREF (C function) + single: Py_DECREF (C function) Reference counts are always manipulated explicitly. The normal way is to use the macro :c:func:`Py_INCREF` to take a new reference to an @@ -401,8 +401,8 @@ function, that function assumes that it now owns that reference, and you are not responsible for it any longer. .. index:: - single: PyList_SetItem() - single: PyTuple_SetItem() + single: PyList_SetItem (C function) + single: PyTuple_SetItem (C function) Few functions steal references; the two notable exceptions are :c:func:`PyList_SetItem` and :c:func:`PyTuple_SetItem`, which steal a reference @@ -491,8 +491,8 @@ using :c:func:`PySequence_GetItem` (which happens to take exactly the same arguments), you do own a reference to the returned object. .. index:: - single: PyList_GetItem() - single: PySequence_GetItem() + single: PyList_GetItem (C function) + single: PySequence_GetItem (C function) Here is an example of how you could write a function that computes the sum of the items in a list of integers; once using :c:func:`PyList_GetItem`, and once @@ -587,7 +587,7 @@ caller, then to the caller's caller, and so on, until they reach the top-level interpreter, where they are reported to the user accompanied by a stack traceback. -.. index:: single: PyErr_Occurred() +.. index:: single: PyErr_Occurred (C function) For C programmers, however, error checking always has to be explicit. All functions in the Python/C API can raise exceptions, unless an explicit claim is @@ -601,8 +601,8 @@ ambiguous return value, and require explicit testing for errors with :c:func:`PyErr_Occurred`. These exceptions are always explicitly documented. .. index:: - single: PyErr_SetString() - single: PyErr_Clear() + single: PyErr_SetString (C function) + single: PyErr_Clear (C function) Exception state is maintained in per-thread storage (this is equivalent to using global storage in an unthreaded application). A thread can be in one of @@ -624,7 +624,7 @@ an exception is being passed on between C functions until it reaches the Python bytecode interpreter's main loop, which takes care of transferring it to ``sys.exc_info()`` and friends. -.. index:: single: exc_info() (in module sys) +.. index:: single: exc_info (in module sys) Note that starting with Python 1.5, the preferred, thread-safe way to access the exception state from Python code is to call the function :func:`sys.exc_info`, @@ -709,9 +709,9 @@ Here is the corresponding C code, in all its glory:: .. index:: single: incr_item() .. index:: - single: PyErr_ExceptionMatches() - single: PyErr_Clear() - single: Py_XDECREF() + single: PyErr_ExceptionMatches (C function) + single: PyErr_Clear (C function) + single: Py_XDECREF (C function) This example represents an endorsed use of the ``goto`` statement in C! It illustrates the use of :c:func:`PyErr_ExceptionMatches` and @@ -735,7 +735,7 @@ the finalization, of the Python interpreter. Most functionality of the interpreter can only be used after the interpreter has been initialized. .. index:: - single: Py_Initialize() + single: Py_Initialize (C function) pair: module; builtins pair: module; __main__ pair: module; sys @@ -770,10 +770,10 @@ environment variable :envvar:`PYTHONHOME`, or insert additional directories in front of the standard path by setting :envvar:`PYTHONPATH`. .. index:: - single: Py_GetPath() - single: Py_GetPrefix() - single: Py_GetExecPrefix() - single: Py_GetProgramFullPath() + single: Py_GetPath (C function) + single: Py_GetPrefix (C function) + single: Py_GetExecPrefix (C function) + single: Py_GetProgramFullPath (C function) The embedding application can steer the search by setting :c:member:`PyConfig.program_name` *before* calling @@ -784,7 +784,7 @@ control has to provide its own implementation of :c:func:`Py_GetPath`, :c:func:`Py_GetPrefix`, :c:func:`Py_GetExecPrefix`, and :c:func:`Py_GetProgramFullPath` (all defined in :file:`Modules/getpath.c`). -.. index:: single: Py_IsInitialized() +.. index:: single: Py_IsInitialized (C function) Sometimes, it is desirable to "uninitialize" Python. For instance, the application may want to start over (make another call to diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 045604870d3c84..f42e23db89ae39 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -117,7 +117,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. c:function:: long PyLong_AsLong(PyObject *obj) .. index:: - single: LONG_MAX + single: LONG_MAX (C macro) single: OverflowError (built-in exception) Return a C :c:expr:`long` representation of *obj*. If *obj* is not an @@ -210,7 +210,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. c:function:: Py_ssize_t PyLong_AsSsize_t(PyObject *pylong) .. index:: - single: PY_SSIZE_T_MAX + single: PY_SSIZE_T_MAX (C macro) single: OverflowError (built-in exception) Return a C :c:type:`Py_ssize_t` representation of *pylong*. *pylong* must @@ -225,7 +225,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. c:function:: unsigned long PyLong_AsUnsignedLong(PyObject *pylong) .. index:: - single: ULONG_MAX + single: ULONG_MAX (C macro) single: OverflowError (built-in exception) Return a C :c:expr:`unsigned long` representation of *pylong*. *pylong* @@ -241,7 +241,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. c:function:: size_t PyLong_AsSize_t(PyObject *pylong) .. index:: - single: SIZE_MAX + single: SIZE_MAX (C macro) single: OverflowError (built-in exception) Return a C :c:type:`size_t` representation of *pylong*. *pylong* must be diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst index c05282ffc59521..9da09a21607f61 100644 --- a/Doc/c-api/memory.rst +++ b/Doc/c-api/memory.rst @@ -41,10 +41,10 @@ buffers is performed on demand by the Python memory manager through the Python/C API functions listed in this document. .. index:: - single: malloc() - single: calloc() - single: realloc() - single: free() + single: malloc (C function) + single: calloc (C function) + single: realloc (C function) + single: free (C function) To avoid memory corruption, extension writers should never try to operate on Python objects with the functions exported by the C library: :c:func:`malloc`, diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst index 0032da9659636c..77f2b6991d770e 100644 --- a/Doc/c-api/structures.rst +++ b/Doc/c-api/structures.rst @@ -561,9 +561,9 @@ The following flags can be used with :c:member:`PyMemberDef.flags`: :c:member:`PyMemberDef.offset` to the offset from the ``PyObject`` struct. .. index:: - single: READ_RESTRICTED - single: WRITE_RESTRICTED - single: RESTRICTED + single: READ_RESTRICTED (C macro) + single: WRITE_RESTRICTED (C macro) + single: RESTRICTED (C macro) .. versionchanged:: 3.10 @@ -574,7 +574,7 @@ The following flags can be used with :c:member:`PyMemberDef.flags`: :c:macro:`Py_AUDIT_READ`; :c:macro:`!WRITE_RESTRICTED` does nothing. .. index:: - single: READONLY + single: READONLY (C macro) .. versionchanged:: 3.12 @@ -637,24 +637,24 @@ Macro name C type Python type Reading a ``NULL`` pointer raises :py:exc:`AttributeError`. .. index:: - single: T_BYTE - single: T_SHORT - single: T_INT - single: T_LONG - single: T_LONGLONG - single: T_UBYTE - single: T_USHORT - single: T_UINT - single: T_ULONG - single: T_ULONGULONG - single: T_PYSSIZET - single: T_FLOAT - single: T_DOUBLE - single: T_BOOL - single: T_CHAR - single: T_STRING - single: T_STRING_INPLACE - single: T_OBJECT_EX + single: T_BYTE (C macro) + single: T_SHORT (C macro) + single: T_INT (C macro) + single: T_LONG (C macro) + single: T_LONGLONG (C macro) + single: T_UBYTE (C macro) + single: T_USHORT (C macro) + single: T_UINT (C macro) + single: T_ULONG (C macro) + single: T_ULONGULONG (C macro) + single: T_PYSSIZET (C macro) + single: T_FLOAT (C macro) + single: T_DOUBLE (C macro) + single: T_BOOL (C macro) + single: T_CHAR (C macro) + single: T_STRING (C macro) + single: T_STRING_INPLACE (C macro) + single: T_OBJECT_EX (C macro) single: structmember.h .. versionadded:: 3.12 diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst index e3c54b075114ff..35969b30120d2a 100644 --- a/Doc/c-api/sys.rst +++ b/Doc/c-api/sys.rst @@ -371,7 +371,7 @@ Process Control .. c:function:: void Py_FatalError(const char *message) - .. index:: single: abort() + .. index:: single: abort (C function) Print a fatal error message and kill the process. No cleanup is performed. This function should only be invoked when a condition is detected that would @@ -391,8 +391,8 @@ Process Control .. c:function:: void Py_Exit(int status) .. index:: - single: Py_FinalizeEx() - single: exit() + single: Py_FinalizeEx (C function) + single: exit (C function) Exit the current process. This calls :c:func:`Py_FinalizeEx` and then calls the standard C library function ``exit(status)``. If :c:func:`Py_FinalizeEx` @@ -405,7 +405,7 @@ Process Control .. c:function:: int Py_AtExit(void (*func) ()) .. index:: - single: Py_FinalizeEx() + single: Py_FinalizeEx (C function) single: cleanup functions Register a cleanup function to be called by :c:func:`Py_FinalizeEx`. The cleanup diff --git a/Doc/c-api/veryhigh.rst b/Doc/c-api/veryhigh.rst index 324518c035096b..67167444d0a685 100644 --- a/Doc/c-api/veryhigh.rst +++ b/Doc/c-api/veryhigh.rst @@ -322,7 +322,7 @@ the same library that the Python runtime is using. .. c:var:: int Py_eval_input - .. index:: single: Py_CompileString() + .. index:: single: Py_CompileString (C function) The start symbol from the Python grammar for isolated expressions; for use with :c:func:`Py_CompileString`. @@ -330,7 +330,7 @@ the same library that the Python runtime is using. .. c:var:: int Py_file_input - .. index:: single: Py_CompileString() + .. index:: single: Py_CompileString (C function) The start symbol from the Python grammar for sequences of statements as read from a file or other source; for use with :c:func:`Py_CompileString`. This is @@ -339,7 +339,7 @@ the same library that the Python runtime is using. .. c:var:: int Py_single_input - .. index:: single: Py_CompileString() + .. index:: single: Py_CompileString (C function) The start symbol from the Python grammar for a single statement; for use with :c:func:`Py_CompileString`. This is the symbol used for the interactive diff --git a/Doc/extending/extending.rst b/Doc/extending/extending.rst index 745fc10a22d161..b70e1b1fe57e67 100644 --- a/Doc/extending/extending.rst +++ b/Doc/extending/extending.rst @@ -547,7 +547,7 @@ reference count of an object and are safe in the presence of ``NULL`` pointers (but note that *temp* will not be ``NULL`` in this context). More info on them in section :ref:`refcounts`. -.. index:: single: PyObject_CallObject() +.. index:: single: PyObject_CallObject (C function) Later, when it is time to call the function, you call the C function :c:func:`PyObject_CallObject`. This function has two arguments, both pointers to @@ -638,7 +638,7 @@ the above example, we use :c:func:`Py_BuildValue` to construct the dictionary. : Extracting Parameters in Extension Functions ============================================ -.. index:: single: PyArg_ParseTuple() +.. index:: single: PyArg_ParseTuple (C function) The :c:func:`PyArg_ParseTuple` function is declared as follows:: @@ -730,7 +730,7 @@ Some example calls:: Keyword Parameters for Extension Functions ========================================== -.. index:: single: PyArg_ParseTupleAndKeywords() +.. index:: single: PyArg_ParseTupleAndKeywords (C function) The :c:func:`PyArg_ParseTupleAndKeywords` function is declared as follows:: diff --git a/Doc/extending/newtypes.rst b/Doc/extending/newtypes.rst index 7a92b3257c6cd3..473a418809cff1 100644 --- a/Doc/extending/newtypes.rst +++ b/Doc/extending/newtypes.rst @@ -89,8 +89,8 @@ If your type supports garbage collection, the destructor should call } .. index:: - single: PyErr_Fetch() - single: PyErr_Restore() + single: PyErr_Fetch (C function) + single: PyErr_Restore (C function) One important requirement of the deallocator function is that it leaves any pending exceptions alone. This is important since deallocators are frequently diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 0a8c88b50cdeec..a5bd5c73f2fac7 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -1597,7 +1597,7 @@ To find out what card the pair consists of, one could use the Simulating scanf() ^^^^^^^^^^^^^^^^^^ -.. index:: single: scanf() +.. index:: single: scanf (C function) Python does not currently have an equivalent to :c:func:`!scanf`. Regular expressions are generally more powerful, though also more verbose, than From aeffc7f8951e04258f0fd8cadfa6cd8b704730f6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 11 Feb 2024 12:24:13 +0200 Subject: [PATCH 062/126] gh-79382: Fix recursive glob() with trailing "**" (GH-115134) Trailing "**" no longer allows to match files and non-existing paths in recursive glob(). --- Lib/glob.py | 3 ++- Lib/test/test_glob.py | 11 +++++++++++ .../2024-02-07-12-37-52.gh-issue-79382.Yz_5WB.rst | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-07-12-37-52.gh-issue-79382.Yz_5WB.rst diff --git a/Lib/glob.py b/Lib/glob.py index 4a335a10766cf4..343be78a73b20a 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -132,7 +132,8 @@ def glob1(dirname, pattern): def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False): assert _isrecursive(pattern) - yield pattern[:0] + if not dirname or _isdir(dirname, dir_fd): + yield pattern[:0] yield from _rlistdir(dirname, dir_fd, dironly, include_hidden=include_hidden) diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index aa5fac8eca1354..8b2ea8f89f5daf 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -333,6 +333,17 @@ def test_recursive_glob(self): eq(glob.glob('**', recursive=True, include_hidden=True), [join(*i) for i in full+rec]) + def test_glob_non_directory(self): + eq = self.assertSequencesEqual_noorder + eq(self.rglob('EF'), self.joins(('EF',))) + eq(self.rglob('EF', ''), []) + eq(self.rglob('EF', '*'), []) + eq(self.rglob('EF', '**'), []) + eq(self.rglob('nonexistent'), []) + eq(self.rglob('nonexistent', ''), []) + eq(self.rglob('nonexistent', '*'), []) + eq(self.rglob('nonexistent', '**'), []) + def test_glob_many_open_files(self): depth = 30 base = os.path.join(self.tempdir, 'deep') diff --git a/Misc/NEWS.d/next/Library/2024-02-07-12-37-52.gh-issue-79382.Yz_5WB.rst b/Misc/NEWS.d/next/Library/2024-02-07-12-37-52.gh-issue-79382.Yz_5WB.rst new file mode 100644 index 00000000000000..5eb1888943186a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-07-12-37-52.gh-issue-79382.Yz_5WB.rst @@ -0,0 +1,2 @@ +Trailing ``**`` no longer allows to match files and non-existing paths in +recursive :func:`~glob.glob`. From bf75f1b147b8cc4c5506df7c4bb30b9950ceda1a Mon Sep 17 00:00:00 2001 From: Soumendra Ganguly <67527439+8vasu@users.noreply.github.com> Date: Sun, 11 Feb 2024 11:29:44 +0100 Subject: [PATCH 063/126] gh-85984: Add _POSIX_VDISABLE from unistd.h to termios module. (#114985) Signed-off-by: Soumendra Ganguly Co-authored-by: Gregory P. Smith --- .../Library/2024-02-04-02-28-37.gh-issue-85984.NHZVTQ.rst | 1 + Modules/termios.c | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-04-02-28-37.gh-issue-85984.NHZVTQ.rst diff --git a/Misc/NEWS.d/next/Library/2024-02-04-02-28-37.gh-issue-85984.NHZVTQ.rst b/Misc/NEWS.d/next/Library/2024-02-04-02-28-37.gh-issue-85984.NHZVTQ.rst new file mode 100644 index 00000000000000..bfa7e676f92306 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-04-02-28-37.gh-issue-85984.NHZVTQ.rst @@ -0,0 +1 @@ +Added ``_POSIX_VDISABLE`` from C's ```` to :mod:`termios`. diff --git a/Modules/termios.c b/Modules/termios.c index 69dbd88be5fcc2..4635fefb8f3f5a 100644 --- a/Modules/termios.c +++ b/Modules/termios.c @@ -27,9 +27,7 @@ #include #include -#if defined(__sun) && defined(__SVR4) -# include // ioctl() -#endif +#include // _POSIX_VDISABLE /* HP-UX requires that this be included to pick up MDCD, MCTS, MDSR, * MDTR, MRI, and MRTS (apparently used internally by some things @@ -1315,6 +1313,9 @@ static struct constant { #ifdef TIOCTTYGSTRUCT {"TIOCTTYGSTRUCT", TIOCTTYGSTRUCT}, #endif +#ifdef _POSIX_VDISABLE + {"_POSIX_VDISABLE", _POSIX_VDISABLE}, +#endif /* sentinel */ {NULL, 0} From 5d2794a16bc1639e6053300c08a78d60526aadf2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 11 Feb 2024 12:38:07 +0200 Subject: [PATCH 064/126] gh-67837, gh-112998: Fix dirs creation in concurrent extraction (GH-115082) Avoid race conditions in the creation of directories during concurrent extraction in tarfile and zipfile. Co-authored-by: Samantha Hughes Co-authored-by: Peder Bergebakken Sundt --- Lib/tarfile.py | 2 +- Lib/test/archiver_tests.py | 22 +++++++++++++++++++ Lib/zipfile/__init__.py | 8 +++++-- ...4-02-06-15-16-28.gh-issue-67837._JKa73.rst | 2 ++ 4 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-06-15-16-28.gh-issue-67837._JKa73.rst diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 9775040cbe372c..f4dd0fdab4a3e4 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2411,7 +2411,7 @@ def _extract_member(self, tarinfo, targetpath, set_attrs=True, if upperdirs and not os.path.exists(upperdirs): # Create directories that are not part of the archive with # default permissions. - os.makedirs(upperdirs) + os.makedirs(upperdirs, exist_ok=True) if tarinfo.islnk() or tarinfo.issym(): self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) diff --git a/Lib/test/archiver_tests.py b/Lib/test/archiver_tests.py index 1a4bbb9e5706c5..24745941b08923 100644 --- a/Lib/test/archiver_tests.py +++ b/Lib/test/archiver_tests.py @@ -3,6 +3,7 @@ import os import sys +from test.support import swap_attr from test.support import os_helper class OverwriteTests: @@ -153,3 +154,24 @@ def test_overwrite_broken_dir_symlink_as_implicit_dir(self): self.extractall(ar) self.assertTrue(os.path.islink(target)) self.assertFalse(os.path.exists(target2)) + + def test_concurrent_extract_dir(self): + target = os.path.join(self.testdir, 'test') + def concurrent_mkdir(*args, **kwargs): + orig_mkdir(*args, **kwargs) + orig_mkdir(*args, **kwargs) + with swap_attr(os, 'mkdir', concurrent_mkdir) as orig_mkdir: + with self.open(self.ar_with_dir) as ar: + self.extractall(ar) + self.assertTrue(os.path.isdir(target)) + + def test_concurrent_extract_implicit_dir(self): + target = os.path.join(self.testdir, 'test') + def concurrent_mkdir(*args, **kwargs): + orig_mkdir(*args, **kwargs) + orig_mkdir(*args, **kwargs) + with swap_attr(os, 'mkdir', concurrent_mkdir) as orig_mkdir: + with self.open(self.ar_with_implicit_dir) as ar: + self.extractall(ar) + self.assertTrue(os.path.isdir(target)) + self.assertTrue(os.path.isfile(os.path.join(target, 'file'))) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 8005b4b34ccf76..cc08f602fe44e0 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1802,11 +1802,15 @@ def _extract_member(self, member, targetpath, pwd): # Create all upper directories if necessary. upperdirs = os.path.dirname(targetpath) if upperdirs and not os.path.exists(upperdirs): - os.makedirs(upperdirs) + os.makedirs(upperdirs, exist_ok=True) if member.is_dir(): if not os.path.isdir(targetpath): - os.mkdir(targetpath) + try: + os.mkdir(targetpath) + except FileExistsError: + if not os.path.isdir(targetpath): + raise return targetpath with self.open(member, pwd=pwd) as source, \ diff --git a/Misc/NEWS.d/next/Library/2024-02-06-15-16-28.gh-issue-67837._JKa73.rst b/Misc/NEWS.d/next/Library/2024-02-06-15-16-28.gh-issue-67837._JKa73.rst new file mode 100644 index 00000000000000..340b65f1883942 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-06-15-16-28.gh-issue-67837._JKa73.rst @@ -0,0 +1,2 @@ +Avoid race conditions in the creation of directories during concurrent +extraction in :mod:`tarfile` and :mod:`zipfile`. From d2c4baa41ff93cd5695c201d40e20a88458ecc26 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 11 Feb 2024 12:43:14 +0200 Subject: [PATCH 065/126] gh-97928: Partially restore the behavior of tkinter.Text.count() by default (GH-115031) By default, it preserves an inconsistent behavior of older Python versions: packs the count into a 1-tuple if only one or none options are specified (including 'update'), returns None instead of 0. Except that setting wantobjects to 0 no longer affects the result. Add a new parameter return_ints: specifying return_ints=True makes Text.count() always returning the single count as an integer instead of a 1-tuple or None. --- Doc/whatsnew/3.13.rst | 13 +++--- Lib/idlelib/sidebar.py | 2 +- Lib/test/test_tkinter/test_text.py | 44 ++++++++++++++----- Lib/tkinter/__init__.py | 23 ++++++---- ...4-02-05-16-48-06.gh-issue-97928.JZCies.rst | 5 +++ 5 files changed, 59 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-05-16-48-06.gh-issue-97928.JZCies.rst diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index aee37737a9990a..1b803278ae0d5b 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -469,6 +469,12 @@ tkinter a dict instead of a tuple. (Contributed by Serhiy Storchaka in :gh:`43457`.) +* Add new optional keyword-only parameter *return_ints* in + the :meth:`!Text.count` method. + Passing ``return_ints=True`` makes it always returning the single count + as an integer instead of a 1-tuple or ``None``. + (Contributed by Serhiy Storchaka in :gh:`97928`.) + * Add support of the "vsapi" element type in the :meth:`~tkinter.ttk.Style.element_create` method of :class:`tkinter.ttk.Style`. @@ -1286,13 +1292,6 @@ that may require changes to your code. Changes in the Python API ------------------------- -* :meth:`!tkinter.Text.count` now always returns an integer if one or less - counting options are specified. - Previously it could return a single count as a 1-tuple, an integer (only if - option ``"update"`` was specified) or ``None`` if no items found. - The result is now the same if ``wantobjects`` is set to ``0``. - (Contributed by Serhiy Storchaka in :gh:`97928`.) - * Functions :c:func:`PyDict_GetItem`, :c:func:`PyDict_GetItemString`, :c:func:`PyMapping_HasKey`, :c:func:`PyMapping_HasKeyString`, :c:func:`PyObject_HasAttr`, :c:func:`PyObject_HasAttrString`, and diff --git a/Lib/idlelib/sidebar.py b/Lib/idlelib/sidebar.py index ff77b568a786e0..aa19a24e3edef2 100644 --- a/Lib/idlelib/sidebar.py +++ b/Lib/idlelib/sidebar.py @@ -27,7 +27,7 @@ def get_displaylines(text, index): """Display height, in lines, of a logical line in a Tk text widget.""" return text.count(f"{index} linestart", f"{index} lineend", - "displaylines") + "displaylines", return_ints=True) def get_widget_padding(widget): """Get the total padding of a Tk widget, including its border.""" diff --git a/Lib/test/test_tkinter/test_text.py b/Lib/test/test_tkinter/test_text.py index f809c4510e3a1f..b26956930d3402 100644 --- a/Lib/test/test_tkinter/test_text.py +++ b/Lib/test/test_tkinter/test_text.py @@ -52,27 +52,47 @@ def test_count(self): options = ('chars', 'indices', 'lines', 'displaychars', 'displayindices', 'displaylines', 'xpixels', 'ypixels') + self.assertEqual(len(text.count('1.0', 'end', *options, return_ints=True)), 8) self.assertEqual(len(text.count('1.0', 'end', *options)), 8) - self.assertEqual(text.count('1.0', 'end', 'chars', 'lines'), (124, 4)) + self.assertEqual(text.count('1.0', 'end', 'chars', 'lines', return_ints=True), + (124, 4)) self.assertEqual(text.count('1.3', '4.5', 'chars', 'lines'), (92, 3)) + self.assertEqual(text.count('4.5', '1.3', 'chars', 'lines', return_ints=True), + (-92, -3)) self.assertEqual(text.count('4.5', '1.3', 'chars', 'lines'), (-92, -3)) + self.assertEqual(text.count('1.3', '1.3', 'chars', 'lines', return_ints=True), + (0, 0)) self.assertEqual(text.count('1.3', '1.3', 'chars', 'lines'), (0, 0)) - self.assertEqual(text.count('1.0', 'end', 'lines'), 4) - self.assertEqual(text.count('end', '1.0', 'lines'), -4) - self.assertEqual(text.count('1.3', '1.5', 'lines'), 0) - self.assertEqual(text.count('1.3', '1.3', 'lines'), 0) - self.assertEqual(text.count('1.0', 'end'), 124) # 'indices' by default - self.assertEqual(text.count('1.0', 'end', 'indices'), 124) + self.assertEqual(text.count('1.0', 'end', 'lines', return_ints=True), 4) + self.assertEqual(text.count('1.0', 'end', 'lines'), (4,)) + self.assertEqual(text.count('end', '1.0', 'lines', return_ints=True), -4) + self.assertEqual(text.count('end', '1.0', 'lines'), (-4,)) + self.assertEqual(text.count('1.3', '1.5', 'lines', return_ints=True), 0) + self.assertEqual(text.count('1.3', '1.5', 'lines'), None) + self.assertEqual(text.count('1.3', '1.3', 'lines', return_ints=True), 0) + self.assertEqual(text.count('1.3', '1.3', 'lines'), None) + # Count 'indices' by default. + self.assertEqual(text.count('1.0', 'end', return_ints=True), 124) + self.assertEqual(text.count('1.0', 'end'), (124,)) + self.assertEqual(text.count('1.0', 'end', 'indices', return_ints=True), 124) + self.assertEqual(text.count('1.0', 'end', 'indices'), (124,)) self.assertRaises(tkinter.TclError, text.count, '1.0', 'end', 'spam') self.assertRaises(tkinter.TclError, text.count, '1.0', 'end', '-lines') - self.assertIsInstance(text.count('1.3', '1.5', 'ypixels'), int) + self.assertIsInstance(text.count('1.3', '1.5', 'ypixels', return_ints=True), int) + self.assertIsInstance(text.count('1.3', '1.5', 'ypixels'), tuple) + self.assertIsInstance(text.count('1.3', '1.5', 'update', 'ypixels', return_ints=True), int) self.assertIsInstance(text.count('1.3', '1.5', 'update', 'ypixels'), int) - self.assertEqual(text.count('1.3', '1.3', 'update', 'ypixels'), 0) + self.assertEqual(text.count('1.3', '1.3', 'update', 'ypixels', return_ints=True), 0) + self.assertEqual(text.count('1.3', '1.3', 'update', 'ypixels'), None) + self.assertEqual(text.count('1.3', '1.5', 'update', 'indices', return_ints=True), 2) self.assertEqual(text.count('1.3', '1.5', 'update', 'indices'), 2) - self.assertEqual(text.count('1.3', '1.3', 'update', 'indices'), 0) - self.assertEqual(text.count('1.3', '1.5', 'update'), 2) - self.assertEqual(text.count('1.3', '1.3', 'update'), 0) + self.assertEqual(text.count('1.3', '1.3', 'update', 'indices', return_ints=True), 0) + self.assertEqual(text.count('1.3', '1.3', 'update', 'indices'), None) + self.assertEqual(text.count('1.3', '1.5', 'update', return_ints=True), 2) + self.assertEqual(text.count('1.3', '1.5', 'update'), (2,)) + self.assertEqual(text.count('1.3', '1.3', 'update', return_ints=True), 0) + self.assertEqual(text.count('1.3', '1.3', 'update'), None) if __name__ == "__main__": diff --git a/Lib/tkinter/__init__.py b/Lib/tkinter/__init__.py index 2be9da2cfb9299..175bfbd7d912d2 100644 --- a/Lib/tkinter/__init__.py +++ b/Lib/tkinter/__init__.py @@ -3745,7 +3745,7 @@ def compare(self, index1, op, index2): return self.tk.getboolean(self.tk.call( self._w, 'compare', index1, op, index2)) - def count(self, index1, index2, *options): # new in Tk 8.5 + def count(self, index1, index2, *options, return_ints=False): # new in Tk 8.5 """Counts the number of relevant things between the two indices. If INDEX1 is after INDEX2, the result will be a negative number @@ -3753,19 +3753,26 @@ def count(self, index1, index2, *options): # new in Tk 8.5 The actual items which are counted depends on the options given. The result is a tuple of integers, one for the result of each - counting option given, if more than one option is specified, - otherwise it is an integer. Valid counting options are "chars", - "displaychars", "displayindices", "displaylines", "indices", - "lines", "xpixels" and "ypixels". The default value, if no - option is specified, is "indices". There is an additional possible - option "update", which if given then all subsequent options ensure - that any possible out of date information is recalculated.""" + counting option given, if more than one option is specified or + return_ints is false (default), otherwise it is an integer. + Valid counting options are "chars", "displaychars", + "displayindices", "displaylines", "indices", "lines", "xpixels" + and "ypixels". The default value, if no option is specified, is + "indices". There is an additional possible option "update", + which if given then all subsequent options ensure that any + possible out of date information is recalculated. + """ options = ['-%s' % arg for arg in options] res = self.tk.call(self._w, 'count', *options, index1, index2) if not isinstance(res, int): res = self._getints(res) if len(res) == 1: res, = res + if not return_ints: + if not res: + res = None + elif len(options) <= 1: + res = (res,) return res def debug(self, boolean=None): diff --git a/Misc/NEWS.d/next/Library/2024-02-05-16-48-06.gh-issue-97928.JZCies.rst b/Misc/NEWS.d/next/Library/2024-02-05-16-48-06.gh-issue-97928.JZCies.rst new file mode 100644 index 00000000000000..24fed926a95513 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-05-16-48-06.gh-issue-97928.JZCies.rst @@ -0,0 +1,5 @@ +Partially revert the behavior of :meth:`tkinter.Text.count`. By default it +preserves the behavior of older Python versions, except that setting +``wantobjects`` to 0 no longer has effect. Add a new parameter *return_ints*: +specifying ``return_ints=True`` makes ``Text.count()`` always returning the +single count as an integer instead of a 1-tuple or ``None``. From d9d6909697501a2604d5895f9f88aeec61274ab0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 11 Feb 2024 12:45:58 +0200 Subject: [PATCH 066/126] gh-115011: Improve support of __index__() in setters of members with unsigned integer type (GH-115029) Setters for members with an unsigned integer type now support the same range of valid values for objects that has a __index__() method as for int. Previously, Py_T_UINT, Py_T_ULONG and Py_T_ULLONG did not support objects that has a __index__() method larger than LONG_MAX. Py_T_ULLONG did not support negative ints. Now it supports them and emits a RuntimeWarning. --- Lib/test/test_capi/test_structmembers.py | 44 ++++------ ...-02-05-12-40-26.gh-issue-115011.L1AKF5.rst | 3 + Python/structmember.c | 81 ++++++++++--------- 3 files changed, 61 insertions(+), 67 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-05-12-40-26.gh-issue-115011.L1AKF5.rst diff --git a/Lib/test/test_capi/test_structmembers.py b/Lib/test/test_capi/test_structmembers.py index a294c3b13a5c30..08ca1f828529cf 100644 --- a/Lib/test/test_capi/test_structmembers.py +++ b/Lib/test/test_capi/test_structmembers.py @@ -81,36 +81,22 @@ def _test_int_range(self, name, minval, maxval, *, hardlimit=None, self._test_warn(name, maxval+1, minval) self._test_warn(name, hardmaxval) - if indexlimit is None: - indexlimit = hardlimit - if not indexlimit: + if indexlimit is False: self.assertRaises(TypeError, setattr, ts, name, Index(minval)) self.assertRaises(TypeError, setattr, ts, name, Index(maxval)) else: - hardminindexval, hardmaxindexval = indexlimit self._test_write(name, Index(minval), minval) - if minval < hardminindexval: - self._test_write(name, Index(hardminindexval), hardminindexval) - if maxval < hardmaxindexval: - self._test_write(name, Index(maxval), maxval) - else: - self._test_write(name, Index(hardmaxindexval), hardmaxindexval) - self._test_overflow(name, Index(hardminindexval-1)) - if name in ('T_UINT', 'T_ULONG'): - self.assertRaises(TypeError, setattr, self.ts, name, - Index(hardmaxindexval+1)) - self.assertRaises(TypeError, setattr, self.ts, name, - Index(2**1000)) - else: - self._test_overflow(name, Index(hardmaxindexval+1)) - self._test_overflow(name, Index(2**1000)) + self._test_write(name, Index(maxval), maxval) + self._test_overflow(name, Index(hardminval-1)) + self._test_overflow(name, Index(hardmaxval+1)) + self._test_overflow(name, Index(2**1000)) self._test_overflow(name, Index(-2**1000)) - if hardminindexval < minval and name != 'T_ULONGLONG': - self._test_warn(name, Index(hardminindexval)) - self._test_warn(name, Index(minval-1)) - if maxval < hardmaxindexval: - self._test_warn(name, Index(maxval+1)) - self._test_warn(name, Index(hardmaxindexval)) + if hardminval < minval: + self._test_warn(name, Index(hardminval)) + self._test_warn(name, Index(minval-1), maxval) + if maxval < hardmaxval: + self._test_warn(name, Index(maxval+1), minval) + self._test_warn(name, Index(hardmaxval)) def test_bool(self): ts = self.ts @@ -138,14 +124,12 @@ def test_int(self): self._test_int_range('T_INT', INT_MIN, INT_MAX, hardlimit=(LONG_MIN, LONG_MAX)) self._test_int_range('T_UINT', 0, UINT_MAX, - hardlimit=(LONG_MIN, ULONG_MAX), - indexlimit=(LONG_MIN, LONG_MAX)) + hardlimit=(LONG_MIN, ULONG_MAX)) def test_long(self): self._test_int_range('T_LONG', LONG_MIN, LONG_MAX) self._test_int_range('T_ULONG', 0, ULONG_MAX, - hardlimit=(LONG_MIN, ULONG_MAX), - indexlimit=(LONG_MIN, LONG_MAX)) + hardlimit=(LONG_MIN, ULONG_MAX)) def test_py_ssize_t(self): self._test_int_range('T_PYSSIZET', PY_SSIZE_T_MIN, PY_SSIZE_T_MAX, indexlimit=False) @@ -153,7 +137,7 @@ def test_py_ssize_t(self): def test_longlong(self): self._test_int_range('T_LONGLONG', LLONG_MIN, LLONG_MAX) self._test_int_range('T_ULONGLONG', 0, ULLONG_MAX, - indexlimit=(LONG_MIN, LONG_MAX)) + hardlimit=(LONG_MIN, ULLONG_MAX)) def test_bad_assignments(self): ts = self.ts diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-05-12-40-26.gh-issue-115011.L1AKF5.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-05-12-40-26.gh-issue-115011.L1AKF5.rst new file mode 100644 index 00000000000000..cf91a4f818bd44 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-05-12-40-26.gh-issue-115011.L1AKF5.rst @@ -0,0 +1,3 @@ +Setters for members with an unsigned integer type now support the same range +of valid values for objects that has a :meth:`~object.__index__` method as +for :class:`int`. diff --git a/Python/structmember.c b/Python/structmember.c index c9f03a464078d0..ba881d18a0973d 100644 --- a/Python/structmember.c +++ b/Python/structmember.c @@ -2,6 +2,8 @@ /* Map C struct members to Python object attributes */ #include "Python.h" +#include "pycore_abstract.h" // _PyNumber_Index() +#include "pycore_long.h" // _PyLong_IsNegative() PyObject * @@ -200,27 +202,22 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) case Py_T_UINT: { /* XXX: For compatibility, accept negative int values as well. */ - int overflow; - long long_val = PyLong_AsLongAndOverflow(v, &overflow); - if (long_val == -1 && PyErr_Occurred()) { - return -1; - } - if (overflow < 0) { - PyErr_SetString(PyExc_OverflowError, - "Python int too large to convert to C long"); + v = _PyNumber_Index(v); + if (v == NULL) { return -1; } - else if (!overflow) { - *(unsigned int *)addr = (unsigned int)(unsigned long)long_val; - if (long_val < 0) { - WARN("Writing negative value into unsigned field"); - } - else if ((unsigned long)long_val > UINT_MAX) { - WARN("Truncation of value to unsigned short"); + if (_PyLong_IsNegative((PyLongObject *)v)) { + long long_val = PyLong_AsLong(v); + Py_DECREF(v); + if (long_val == -1 && PyErr_Occurred()) { + return -1; } + *(unsigned int *)addr = (unsigned int)(unsigned long)long_val; + WARN("Writing negative value into unsigned field"); } else { unsigned long ulong_val = PyLong_AsUnsignedLong(v); + Py_DECREF(v); if (ulong_val == (unsigned long)-1 && PyErr_Occurred()) { return -1; } @@ -240,24 +237,22 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) case Py_T_ULONG: { /* XXX: For compatibility, accept negative int values as well. */ - int overflow; - long long_val = PyLong_AsLongAndOverflow(v, &overflow); - if (long_val == -1 && PyErr_Occurred()) { - return -1; - } - if (overflow < 0) { - PyErr_SetString(PyExc_OverflowError, - "Python int too large to convert to C long"); + v = _PyNumber_Index(v); + if (v == NULL) { return -1; } - else if (!overflow) { - *(unsigned long *)addr = (unsigned long)long_val; - if (long_val < 0) { - WARN("Writing negative value into unsigned field"); + if (_PyLong_IsNegative((PyLongObject *)v)) { + long long_val = PyLong_AsLong(v); + Py_DECREF(v); + if (long_val == -1 && PyErr_Occurred()) { + return -1; } + *(unsigned long *)addr = (unsigned long)long_val; + WARN("Writing negative value into unsigned field"); } else { unsigned long ulong_val = PyLong_AsUnsignedLong(v); + Py_DECREF(v); if (ulong_val == (unsigned long)-1 && PyErr_Occurred()) { return -1; } @@ -313,18 +308,30 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) return -1; break; } - case Py_T_ULONGLONG:{ - unsigned long long value; - /* ??? PyLong_AsLongLong accepts an int, but PyLong_AsUnsignedLongLong - doesn't ??? */ - if (PyLong_Check(v)) - *(unsigned long long*)addr = value = PyLong_AsUnsignedLongLong(v); - else - *(unsigned long long*)addr = value = PyLong_AsLong(v); - if ((value == (unsigned long long)-1) && PyErr_Occurred()) + case Py_T_ULONGLONG: { + v = _PyNumber_Index(v); + if (v == NULL) { return -1; - break; } + if (_PyLong_IsNegative((PyLongObject *)v)) { + long long_val = PyLong_AsLong(v); + Py_DECREF(v); + if (long_val == -1 && PyErr_Occurred()) { + return -1; + } + *(unsigned long long *)addr = (unsigned long long)(long long)long_val; + WARN("Writing negative value into unsigned field"); + } + else { + unsigned long long ulonglong_val = PyLong_AsUnsignedLongLong(v); + Py_DECREF(v); + if (ulonglong_val == (unsigned long long)-1 && PyErr_Occurred()) { + return -1; + } + *(unsigned long long*)addr = ulonglong_val; + } + break; + } default: PyErr_Format(PyExc_SystemError, "bad memberdescr type for %s", l->name); From b1043607884d774acabd255ecdcebb159f76a2fb Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 11 Feb 2024 13:06:43 +0200 Subject: [PATCH 067/126] gh-49766: Make date-datetime comparison more symmetric and flexible (GH-114760) Now the special comparison methods like `__eq__` and `__lt__` return NotImplemented if one of comparands is date and other is datetime instead of ignoring the time part and the time zone or forcefully return "not equal" or raise TypeError. It makes comparison of date and datetime subclasses more symmetric and allows to change the default behavior by overriding the special comparison methods in subclasses. It is now the same as if date and datetime was independent classes. --- Doc/library/datetime.rst | 32 ++++++++-- Lib/_pydatetime.py | 35 ++++------ Lib/test/datetimetester.py | 64 +++++++++++-------- ...4-01-30-22-10-50.gh-issue-49766.yulJL_.rst | 8 +++ Modules/_datetimemodule.c | 36 +++-------- 5 files changed, 91 insertions(+), 84 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-30-22-10-50.gh-issue-49766.yulJL_.rst diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 930af6cbbe9e8d..a46eed35ee2329 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -619,11 +619,27 @@ Notes: (4) :class:`date` objects are equal if they represent the same date. + :class:`!date` objects that are not also :class:`.datetime` instances + are never equal to :class:`!datetime` objects, even if they represent + the same date. + (5) *date1* is considered less than *date2* when *date1* precedes *date2* in time. In other words, ``date1 < date2`` if and only if ``date1.toordinal() < date2.toordinal()``. + Order comparison between a :class:`!date` object that is not also a + :class:`.datetime` instance and a :class:`!datetime` object raises + :exc:`TypeError`. + +.. versionchanged:: 3.13 + Comparison between :class:`.datetime` object and an instance of + the :class:`date` subclass that is not a :class:`!datetime` subclass + no longer coverts the latter to :class:`!date`, ignoring the time part + and the time zone. + The default behavior can be changed by overriding the special comparison + methods in subclasses. + In Boolean contexts, all :class:`date` objects are considered to be true. Instance methods: @@ -1192,9 +1208,6 @@ Supported operations: and time, taking into account the time zone. Naive and aware :class:`!datetime` objects are never equal. - :class:`!datetime` objects are never equal to :class:`date` objects - that are not also :class:`!datetime` instances, even if they represent - the same date. If both comparands are aware and have different :attr:`~.datetime.tzinfo` attributes, the comparison acts as comparands were first converted to UTC @@ -1206,9 +1219,8 @@ Supported operations: *datetime1* is considered less than *datetime2* when *datetime1* precedes *datetime2* in time, taking into account the time zone. - Order comparison between naive and aware :class:`.datetime` objects, - as well as a :class:`!datetime` object and a :class:`!date` object - that is not also a :class:`!datetime` instance, raises :exc:`TypeError`. + Order comparison between naive and aware :class:`.datetime` objects + raises :exc:`TypeError`. If both comparands are aware and have different :attr:`~.datetime.tzinfo` attributes, the comparison acts as comparands were first converted to UTC @@ -1218,6 +1230,14 @@ Supported operations: Equality comparisons between aware and naive :class:`.datetime` instances don't raise :exc:`TypeError`. +.. versionchanged:: 3.13 + Comparison between :class:`.datetime` object and an instance of + the :class:`date` subclass that is not a :class:`!datetime` subclass + no longer coverts the latter to :class:`!date`, ignoring the time part + and the time zone. + The default behavior can be changed by overriding the special comparison + methods in subclasses. + Instance methods: .. method:: datetime.date() diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index 54c12d3b2f3f16..b7d569cc41740e 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -556,10 +556,6 @@ def _check_tzinfo_arg(tz): if tz is not None and not isinstance(tz, tzinfo): raise TypeError("tzinfo argument must be None or of a tzinfo subclass") -def _cmperror(x, y): - raise TypeError("can't compare '%s' to '%s'" % ( - type(x).__name__, type(y).__name__)) - def _divide_and_round(a, b): """divide a by b and round result to the nearest integer @@ -1113,32 +1109,33 @@ def replace(self, year=None, month=None, day=None): # Comparisons of date objects with other. def __eq__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) == 0 return NotImplemented def __le__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) <= 0 return NotImplemented def __lt__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) < 0 return NotImplemented def __ge__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) >= 0 return NotImplemented def __gt__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) > 0 return NotImplemented def _cmp(self, other): assert isinstance(other, date) + assert not isinstance(other, datetime) y, m, d = self._year, self._month, self._day y2, m2, d2 = other._year, other._month, other._day return _cmp((y, m, d), (y2, m2, d2)) @@ -2137,42 +2134,32 @@ def dst(self): def __eq__(self, other): if isinstance(other, datetime): return self._cmp(other, allow_mixed=True) == 0 - elif not isinstance(other, date): - return NotImplemented else: - return False + return NotImplemented def __le__(self, other): if isinstance(other, datetime): return self._cmp(other) <= 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __lt__(self, other): if isinstance(other, datetime): return self._cmp(other) < 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __ge__(self, other): if isinstance(other, datetime): return self._cmp(other) >= 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __gt__(self, other): if isinstance(other, datetime): return self._cmp(other) > 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def _cmp(self, other, allow_mixed=False): assert isinstance(other, datetime) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 53ad5e57ada017..980a8e6c1b1836 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -5435,42 +5435,50 @@ def fromutc(self, dt): class Oddballs(unittest.TestCase): - def test_bug_1028306(self): + def test_date_datetime_comparison(self): + # bpo-1028306, bpo-5516 (gh-49766) # Trying to compare a date to a datetime should act like a mixed- # type comparison, despite that datetime is a subclass of date. as_date = date.today() as_datetime = datetime.combine(as_date, time()) - self.assertTrue(as_date != as_datetime) - self.assertTrue(as_datetime != as_date) - self.assertFalse(as_date == as_datetime) - self.assertFalse(as_datetime == as_date) - self.assertRaises(TypeError, lambda: as_date < as_datetime) - self.assertRaises(TypeError, lambda: as_datetime < as_date) - self.assertRaises(TypeError, lambda: as_date <= as_datetime) - self.assertRaises(TypeError, lambda: as_datetime <= as_date) - self.assertRaises(TypeError, lambda: as_date > as_datetime) - self.assertRaises(TypeError, lambda: as_datetime > as_date) - self.assertRaises(TypeError, lambda: as_date >= as_datetime) - self.assertRaises(TypeError, lambda: as_datetime >= as_date) - - # Nevertheless, comparison should work with the base-class (date) - # projection if use of a date method is forced. - self.assertEqual(as_date.__eq__(as_datetime), True) - different_day = (as_date.day + 1) % 20 + 1 - as_different = as_datetime.replace(day= different_day) - self.assertEqual(as_date.__eq__(as_different), False) + date_sc = SubclassDate(as_date.year, as_date.month, as_date.day) + datetime_sc = SubclassDatetime(as_date.year, as_date.month, + as_date.day, 0, 0, 0) + for d in (as_date, date_sc): + for dt in (as_datetime, datetime_sc): + for x, y in (d, dt), (dt, d): + self.assertTrue(x != y) + self.assertFalse(x == y) + self.assertRaises(TypeError, lambda: x < y) + self.assertRaises(TypeError, lambda: x <= y) + self.assertRaises(TypeError, lambda: x > y) + self.assertRaises(TypeError, lambda: x >= y) # And date should compare with other subclasses of date. If a # subclass wants to stop this, it's up to the subclass to do so. - date_sc = SubclassDate(as_date.year, as_date.month, as_date.day) - self.assertEqual(as_date, date_sc) - self.assertEqual(date_sc, as_date) - # Ditto for datetimes. - datetime_sc = SubclassDatetime(as_datetime.year, as_datetime.month, - as_date.day, 0, 0, 0) - self.assertEqual(as_datetime, datetime_sc) - self.assertEqual(datetime_sc, as_datetime) + for x, y in ((as_date, date_sc), + (date_sc, as_date), + (as_datetime, datetime_sc), + (datetime_sc, as_datetime)): + self.assertTrue(x == y) + self.assertFalse(x != y) + self.assertFalse(x < y) + self.assertFalse(x > y) + self.assertTrue(x <= y) + self.assertTrue(x >= y) + + # Nevertheless, comparison should work if other object is an instance + # of date or datetime class with overridden comparison operators. + # So special methods should return NotImplemented, as if + # date and datetime were independent classes. + for x, y in (as_date, as_datetime), (as_datetime, as_date): + self.assertEqual(x.__eq__(y), NotImplemented) + self.assertEqual(x.__ne__(y), NotImplemented) + self.assertEqual(x.__lt__(y), NotImplemented) + self.assertEqual(x.__gt__(y), NotImplemented) + self.assertEqual(x.__gt__(y), NotImplemented) + self.assertEqual(x.__ge__(y), NotImplemented) def test_extra_attributes(self): with self.assertWarns(DeprecationWarning): diff --git a/Misc/NEWS.d/next/Library/2024-01-30-22-10-50.gh-issue-49766.yulJL_.rst b/Misc/NEWS.d/next/Library/2024-01-30-22-10-50.gh-issue-49766.yulJL_.rst new file mode 100644 index 00000000000000..eaaa3ba1cb6f09 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-30-22-10-50.gh-issue-49766.yulJL_.rst @@ -0,0 +1,8 @@ +Fix :class:`~datetime.date`-:class:`~datetime.datetime` comparison. Now the +special comparison methods like ``__eq__`` and ``__lt__`` return +:data:`NotImplemented` if one of comparands is :class:`!date` and other is +:class:`!datetime` instead of ignoring the time part and the time zone or +forcefully return "not equal" or raise :exc:`TypeError`. It makes comparison +of :class:`!date` and :class:`!datetime` subclasses more symmetric and +allows to change the default behavior by overriding the special comparison +methods in subclasses. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 9b8e0a719d9048..b984ea61b82f0f 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1816,16 +1816,6 @@ diff_to_bool(int diff, int op) Py_RETURN_RICHCOMPARE(diff, 0, op); } -/* Raises a "can't compare" TypeError and returns NULL. */ -static PyObject * -cmperror(PyObject *a, PyObject *b) -{ - PyErr_Format(PyExc_TypeError, - "can't compare %s to %s", - Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name); - return NULL; -} - /* --------------------------------------------------------------------------- * Class implementations. */ @@ -3448,7 +3438,15 @@ date_isocalendar(PyDateTime_Date *self, PyObject *Py_UNUSED(ignored)) static PyObject * date_richcompare(PyObject *self, PyObject *other, int op) { - if (PyDate_Check(other)) { + /* Since DateTime is a subclass of Date, if the other object is + * a DateTime, it would compute an equality testing or an ordering + * based on the date part alone, and we don't want that. + * So return NotImplemented here in that case. + * If a subclass wants to change this, it's up to the subclass to do so. + * The behavior is the same as if Date and DateTime were independent + * classes. + */ + if (PyDate_Check(other) && !PyDateTime_Check(other)) { int diff = memcmp(((PyDateTime_Date *)self)->data, ((PyDateTime_Date *)other)->data, _PyDateTime_DATE_DATASIZE); @@ -5880,21 +5878,7 @@ datetime_richcompare(PyObject *self, PyObject *other, int op) PyObject *offset1, *offset2; int diff; - if (! PyDateTime_Check(other)) { - if (PyDate_Check(other)) { - /* Prevent invocation of date_richcompare. We want to - return NotImplemented here to give the other object - a chance. But since DateTime is a subclass of - Date, if the other object is a Date, it would - compute an ordering based on the date part alone, - and we don't want that. So force unequal or - uncomparable here in that case. */ - if (op == Py_EQ) - Py_RETURN_FALSE; - if (op == Py_NE) - Py_RETURN_TRUE; - return cmperror(self, other); - } + if (!PyDateTime_Check(other)) { Py_RETURN_NOTIMPLEMENTED; } From 2939ad02be62110ffa2ac6c4d9211c85e1d1720f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 11 Feb 2024 15:19:44 +0200 Subject: [PATCH 068/126] gh-97959: Fix rendering of routines in pydoc (GH-113941) * Class methods no longer have "method of builtins.type instance" note. * Corresponding notes are now added for class and unbound methods. * Method and function aliases now have references to the module or the class where the origin was defined if it differs from the current. * Bound methods are now listed in the static methods section. * Methods of builtin classes are now supported as well as methods of Python classes. --- Lib/pydoc.py | 149 ++++++++++---- Lib/test/pydocfodder.py | 48 ++++- Lib/test/test_enum.py | 10 +- Lib/test/test_pydoc.py | 191 +++++++++++++++--- ...4-01-11-15-10-53.gh-issue-97959.UOj6d4.rst | 7 + 5 files changed, 333 insertions(+), 72 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-11-15-10-53.gh-issue-97959.UOj6d4.rst diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 96aa1dfc1aacf6..17f7346e5cc619 100755 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -225,6 +225,19 @@ def classname(object, modname): name = object.__module__ + '.' + name return name +def parentname(object, modname): + """Get a name of the enclosing class (qualified it with a module name + if necessary) or module.""" + if '.' in object.__qualname__: + name = object.__qualname__.rpartition('.')[0] + if object.__module__ != modname: + return object.__module__ + '.' + name + else: + return name + else: + if object.__module__ != modname: + return object.__module__ + def isdata(object): """Check if an object is of a type that probably means it's data.""" return not (inspect.ismodule(object) or inspect.isclass(object) or @@ -319,13 +332,15 @@ def visiblename(name, all=None, obj=None): return not name.startswith('_') def classify_class_attrs(object): - """Wrap inspect.classify_class_attrs, with fixup for data descriptors.""" + """Wrap inspect.classify_class_attrs, with fixup for data descriptors and bound methods.""" results = [] for (name, kind, cls, value) in inspect.classify_class_attrs(object): if inspect.isdatadescriptor(value): kind = 'data descriptor' if isinstance(value, property) and value.fset is None: kind = 'readonly property' + elif kind == 'method' and _is_bound_method(value): + kind = 'static method' results.append((name, kind, cls, value)) return results @@ -681,6 +696,25 @@ def classlink(self, object, modname): module.__name__, name, classname(object, modname)) return classname(object, modname) + def parentlink(self, object, modname): + """Make a link for the enclosing class or module.""" + link = None + name, module = object.__name__, sys.modules.get(object.__module__) + if hasattr(module, name) and getattr(module, name) is object: + if '.' in object.__qualname__: + name = object.__qualname__.rpartition('.')[0] + if object.__module__ != modname: + link = '%s.html#%s' % (module.__name__, name) + else: + link = '#%s' % name + else: + if object.__module__ != modname: + link = '%s.html' % module.__name__ + if link: + return '%s' % (link, parentname(object, modname)) + else: + return parentname(object, modname) + def modulelink(self, object): """Make a link for a module.""" return '%s' % (object.__name__, object.__name__) @@ -925,7 +959,7 @@ def spill(msg, attrs, predicate): push(self.docdata(value, name, mod)) else: push(self.document(value, name, mod, - funcs, classes, mdict, object)) + funcs, classes, mdict, object, homecls)) push('\n') return attrs @@ -1043,24 +1077,44 @@ def formatvalue(self, object): return self.grey('=' + self.repr(object)) def docroutine(self, object, name=None, mod=None, - funcs={}, classes={}, methods={}, cl=None): + funcs={}, classes={}, methods={}, cl=None, homecls=None): """Produce HTML documentation for a function or method object.""" realname = object.__name__ name = name or realname - anchor = (cl and cl.__name__ or '') + '-' + name + if homecls is None: + homecls = cl + anchor = ('' if cl is None else cl.__name__) + '-' + name note = '' - skipdocs = 0 + skipdocs = False + imfunc = None if _is_bound_method(object): - imclass = object.__self__.__class__ - if cl: - if imclass is not cl: - note = ' from ' + self.classlink(imclass, mod) + imself = object.__self__ + if imself is cl: + imfunc = getattr(object, '__func__', None) + elif inspect.isclass(imself): + note = ' class method of %s' % self.classlink(imself, mod) else: - if object.__self__ is not None: - note = ' method of %s instance' % self.classlink( - object.__self__.__class__, mod) - else: - note = ' unbound %s method' % self.classlink(imclass,mod) + note = ' method of %s instance' % self.classlink( + imself.__class__, mod) + elif (inspect.ismethoddescriptor(object) or + inspect.ismethodwrapper(object)): + try: + objclass = object.__objclass__ + except AttributeError: + pass + else: + if cl is None: + note = ' unbound %s method' % self.classlink(objclass, mod) + elif objclass is not homecls: + note = ' from ' + self.classlink(objclass, mod) + else: + imfunc = object + if inspect.isfunction(imfunc) and homecls is not None and ( + imfunc.__module__ != homecls.__module__ or + imfunc.__qualname__ != homecls.__qualname__ + '.' + realname): + pname = self.parentlink(imfunc, mod) + if pname: + note = ' from %s' % pname if (inspect.iscoroutinefunction(object) or inspect.isasyncgenfunction(object)): @@ -1071,10 +1125,13 @@ def docroutine(self, object, name=None, mod=None, if name == realname: title = '%s' % (anchor, realname) else: - if cl and inspect.getattr_static(cl, realname, []) is object: + if (cl is not None and + inspect.getattr_static(cl, realname, []) is object): reallink = '%s' % ( cl.__name__ + '-' + realname, realname) - skipdocs = 1 + skipdocs = True + if note.startswith(' from '): + note = '' else: reallink = realname title = '%s = %s' % ( @@ -1102,7 +1159,7 @@ def docroutine(self, object, name=None, mod=None, doc = doc and '
%s
' % doc return '
%s
%s
\n' % (decl, doc) - def docdata(self, object, name=None, mod=None, cl=None): + def docdata(self, object, name=None, mod=None, cl=None, *ignored): """Produce html documentation for a data descriptor.""" results = [] push = results.append @@ -1213,7 +1270,7 @@ def formattree(self, tree, modname, parent=None, prefix=''): entry, modname, c, prefix + ' ') return result - def docmodule(self, object, name=None, mod=None): + def docmodule(self, object, name=None, mod=None, *ignored): """Produce text documentation for a given module object.""" name = object.__name__ # ignore the passed-in name synop, desc = splitdoc(getdoc(object)) @@ -1392,7 +1449,7 @@ def spill(msg, attrs, predicate): push(self.docdata(value, name, mod)) else: push(self.document(value, - name, mod, object)) + name, mod, object, homecls)) return attrs def spilldescriptors(msg, attrs, predicate): @@ -1467,23 +1524,43 @@ def formatvalue(self, object): """Format an argument default value as text.""" return '=' + self.repr(object) - def docroutine(self, object, name=None, mod=None, cl=None): + def docroutine(self, object, name=None, mod=None, cl=None, homecls=None): """Produce text documentation for a function or method object.""" realname = object.__name__ name = name or realname + if homecls is None: + homecls = cl note = '' - skipdocs = 0 + skipdocs = False + imfunc = None if _is_bound_method(object): - imclass = object.__self__.__class__ - if cl: - if imclass is not cl: - note = ' from ' + classname(imclass, mod) + imself = object.__self__ + if imself is cl: + imfunc = getattr(object, '__func__', None) + elif inspect.isclass(imself): + note = ' class method of %s' % classname(imself, mod) else: - if object.__self__ is not None: - note = ' method of %s instance' % classname( - object.__self__.__class__, mod) - else: - note = ' unbound %s method' % classname(imclass,mod) + note = ' method of %s instance' % classname( + imself.__class__, mod) + elif (inspect.ismethoddescriptor(object) or + inspect.ismethodwrapper(object)): + try: + objclass = object.__objclass__ + except AttributeError: + pass + else: + if cl is None: + note = ' unbound %s method' % classname(objclass, mod) + elif objclass is not homecls: + note = ' from ' + classname(objclass, mod) + else: + imfunc = object + if inspect.isfunction(imfunc) and homecls is not None and ( + imfunc.__module__ != homecls.__module__ or + imfunc.__qualname__ != homecls.__qualname__ + '.' + realname): + pname = parentname(imfunc, mod) + if pname: + note = ' from %s' % pname if (inspect.iscoroutinefunction(object) or inspect.isasyncgenfunction(object)): @@ -1494,8 +1571,11 @@ def docroutine(self, object, name=None, mod=None, cl=None): if name == realname: title = self.bold(realname) else: - if cl and inspect.getattr_static(cl, realname, []) is object: - skipdocs = 1 + if (cl is not None and + inspect.getattr_static(cl, realname, []) is object): + skipdocs = True + if note.startswith(' from '): + note = '' title = self.bold(name) + ' = ' + realname argspec = None @@ -1517,7 +1597,7 @@ def docroutine(self, object, name=None, mod=None, cl=None): doc = getdoc(object) or '' return decl + '\n' + (doc and self.indent(doc).rstrip() + '\n') - def docdata(self, object, name=None, mod=None, cl=None): + def docdata(self, object, name=None, mod=None, cl=None, *ignored): """Produce text documentation for a data descriptor.""" results = [] push = results.append @@ -1533,7 +1613,8 @@ def docdata(self, object, name=None, mod=None, cl=None): docproperty = docdata - def docother(self, object, name=None, mod=None, parent=None, maxlen=None, doc=None): + def docother(self, object, name=None, mod=None, parent=None, *ignored, + maxlen=None, doc=None): """Produce text documentation for a data object.""" repr = self.repr(object) if maxlen: diff --git a/Lib/test/pydocfodder.py b/Lib/test/pydocfodder.py index a3ef2231243954..27037e048db819 100644 --- a/Lib/test/pydocfodder.py +++ b/Lib/test/pydocfodder.py @@ -2,6 +2,12 @@ import types +def global_func(x, y): + """Module global function""" + +def global_func2(x, y): + """Module global function 2""" + class A: "A class." @@ -26,7 +32,7 @@ def A_classmethod(cls, x): "A class method defined in A." A_classmethod = classmethod(A_classmethod) - def A_staticmethod(): + def A_staticmethod(x, y): "A static method defined in A." A_staticmethod = staticmethod(A_staticmethod) @@ -61,6 +67,28 @@ def BD_method(self): def BCD_method(self): "Method defined in B, C and D." + @classmethod + def B_classmethod(cls, x): + "A class method defined in B." + + global_func = global_func # same name + global_func_alias = global_func + global_func2_alias = global_func2 + B_classmethod_alias = B_classmethod + A_classmethod_ref = A.A_classmethod + A_staticmethod = A.A_staticmethod # same name + A_staticmethod_alias = A.A_staticmethod + A_method_ref = A().A_method + A_method_alias = A.A_method + B_method_alias = B_method + __repr__ = object.__repr__ # same name + object_repr = object.__repr__ + get = {}.get # same name + dict_get = {}.get + +B.B_classmethod_ref = B.B_classmethod + + class C(A): "A class, derived from A." @@ -136,3 +164,21 @@ def __call__(self, inst): submodule = types.ModuleType(__name__ + '.submodule', """A submodule, which should appear in its parent's summary""") + +global_func_alias = global_func +A_classmethod = A.A_classmethod # same name +A_classmethod2 = A.A_classmethod +A_classmethod3 = B.A_classmethod +A_staticmethod = A.A_staticmethod # same name +A_staticmethod_alias = A.A_staticmethod +A_staticmethod_ref = A().A_staticmethod +A_staticmethod_ref2 = B().A_staticmethod +A_method = A().A_method # same name +A_method2 = A().A_method +A_method3 = B().A_method +B_method = B.B_method # same name +B_method2 = B.B_method +count = list.count # same name +list_count = list.count +get = {}.get # same name +dict_get = {}.get diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index f7503331c1ac1d..5d7dae8829574b 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -4851,22 +4851,22 @@ class Color(enum.Enum) | The value of the Enum member. | | ---------------------------------------------------------------------- - | Methods inherited from enum.EnumType: + | Static methods inherited from enum.EnumType: | - | __contains__(value) from enum.EnumType + | __contains__(value) | Return True if `value` is in `cls`. | | `value` is in `cls` if: | 1) `value` is a member of `cls`, or | 2) `value` is the value of one of the `cls`'s members. | - | __getitem__(name) from enum.EnumType + | __getitem__(name) | Return the member matching `name`. | - | __iter__() from enum.EnumType + | __iter__() | Return members in definition order. | - | __len__() from enum.EnumType + | __len__() | Return the number of members (no aliases) | | ---------------------------------------------------------------------- diff --git a/Lib/test/test_pydoc.py b/Lib/test/test_pydoc.py index 99b19d01783a10..f3c26624c624f5 100644 --- a/Lib/test/test_pydoc.py +++ b/Lib/test/test_pydoc.py @@ -35,6 +35,7 @@ requires_docstrings, MISSING_C_DOCSTRINGS) from test.support.os_helper import (TESTFN, rmtree, unlink) from test import pydoc_mod +from test import pydocfodder class nonascii: @@ -102,7 +103,7 @@ class C(builtins.object) | ---------------------------------------------------------------------- | Class methods defined here: | - | __class_getitem__(item) from builtins.type + | __class_getitem__(item) | | ---------------------------------------------------------------------- | Data descriptors defined here: @@ -166,7 +167,7 @@ class A(builtins.object) Methods defined here: __init__() Wow, I have no function! - + ---------------------------------------------------------------------- Data descriptors defined here: __dict__ dictionary for instance variables @@ -179,6 +180,7 @@ class B(builtins.object) dictionary for instance variables __weakref__ list of weak references to the object + ---------------------------------------------------------------------- Data and other attributes defined here: NO_MEANING = 'eggs' __annotations__ = {'NO_MEANING': } @@ -191,8 +193,10 @@ class C(builtins.object) is_it_true(self) Return self.get_answer() say_no(self) + ---------------------------------------------------------------------- Class methods defined here: - __class_getitem__(item) from builtins.type + __class_getitem__(item) + ---------------------------------------------------------------------- Data descriptors defined here: __dict__ dictionary for instance variables @@ -330,6 +334,10 @@ def get_pydoc_html(module): loc = "
Module Docs" return output.strip(), loc +def clean_text(doc): + # clean up the extra text formatting that pydoc performs + return re.sub('\b.', '', doc) + def get_pydoc_link(module): "Returns a documentation web link of a module" abspath = os.path.abspath @@ -347,10 +355,7 @@ def get_pydoc_text(module): loc = "\nMODULE DOCS\n " + loc + "\n" output = doc.docmodule(module) - - # clean up the extra text formatting that pydoc performs - patt = re.compile('\b.') - output = patt.sub('', output) + output = clean_text(output) return output.strip(), loc def get_html_title(text): @@ -367,6 +372,7 @@ def html2text(html): Tailored for pydoc tests only. """ html = html.replace("
", "\n") + html = html.replace("
", "-"*70) html = re.sub("<.*?>", "", html) html = pydoc.replace(html, " ", " ", ">", ">", "<", "<") return html @@ -798,8 +804,7 @@ def itemconfigure(self, tagOrId, cnf=None, **kw): b_size = A.a_size doc = pydoc.render_doc(B) - # clean up the extra text formatting that pydoc performs - doc = re.sub('\b.', '', doc) + doc = clean_text(doc) self.assertEqual(doc, '''\ Python Library Documentation: class B in module %s @@ -887,8 +892,7 @@ def __init__(self, ... doc = pydoc.render_doc(A) - # clean up the extra text formatting that pydoc performs - doc = re.sub('\b.', '', doc) + doc = clean_text(doc) self.assertEqual(doc, '''Python Library Documentation: class A in module %s class A(builtins.object) @@ -925,8 +929,7 @@ def func( ... doc = pydoc.render_doc(func) - # clean up the extra text formatting that pydoc performs - doc = re.sub('\b.', '', doc) + doc = clean_text(doc) self.assertEqual(doc, '''Python Library Documentation: function func in module %s func( @@ -942,8 +945,7 @@ def function_with_really_long_name_so_annotations_can_be_rather_small( ... doc = pydoc.render_doc(function_with_really_long_name_so_annotations_can_be_rather_small) - # clean up the extra text formatting that pydoc performs - doc = re.sub('\b.', '', doc) + doc = clean_text(doc) self.assertEqual(doc, '''Python Library Documentation: function function_with_really_long_name_so_annotations_can_be_rather_small in module %s function_with_really_long_name_so_annotations_can_be_rather_small( @@ -957,8 +959,7 @@ def function_with_really_long_name_so_annotations_can_be_rather_small( second_very_long_parameter_name: ... doc = pydoc.render_doc(does_not_have_name) - # clean up the extra text formatting that pydoc performs - doc = re.sub('\b.', '', doc) + doc = clean_text(doc) self.assertEqual(doc, '''Python Library Documentation: function in module %s lambda very_long_parameter_name_that_should_not_fit_into_a_single_line, second_very_long_parameter_name @@ -1244,7 +1245,7 @@ def test_unbound_python_method(self): @requires_docstrings def test_unbound_builtin_method(self): self.assertEqual(self._get_summary_line(_pickle.Pickler.dump), - "dump(self, obj, /)") + "dump(self, obj, /) unbound _pickle.Pickler method") # these no longer include "self" def test_bound_python_method(self): @@ -1296,7 +1297,7 @@ def test_module_level_callable_o(self): def test_unbound_builtin_method_noargs(self): self.assertEqual(self._get_summary_line(str.lower), - "lower(self, /)") + "lower(self, /) unbound builtins.str method") def test_bound_builtin_method_noargs(self): self.assertEqual(self._get_summary_line(''.lower), @@ -1304,7 +1305,7 @@ def test_bound_builtin_method_noargs(self): def test_unbound_builtin_method_o(self): self.assertEqual(self._get_summary_line(set.add), - "add(self, object, /)") + "add(self, object, /) unbound builtins.set method") def test_bound_builtin_method_o(self): self.assertEqual(self._get_summary_line(set().add), @@ -1312,7 +1313,7 @@ def test_bound_builtin_method_o(self): def test_unbound_builtin_method_coexist_o(self): self.assertEqual(self._get_summary_line(set.__contains__), - "__contains__(self, object, /)") + "__contains__(self, object, /) unbound builtins.set method") def test_bound_builtin_method_coexist_o(self): self.assertEqual(self._get_summary_line(set().__contains__), @@ -1320,19 +1321,19 @@ def test_bound_builtin_method_coexist_o(self): def test_unbound_builtin_classmethod_noargs(self): self.assertEqual(self._get_summary_line(datetime.datetime.__dict__['utcnow']), - "utcnow(type, /)") + "utcnow(type, /) unbound datetime.datetime method") def test_bound_builtin_classmethod_noargs(self): self.assertEqual(self._get_summary_line(datetime.datetime.utcnow), - "utcnow() method of builtins.type instance") + "utcnow() class method of datetime.datetime") def test_unbound_builtin_classmethod_o(self): self.assertEqual(self._get_summary_line(dict.__dict__['__class_getitem__']), - "__class_getitem__(type, object, /)") + "__class_getitem__(type, object, /) unbound builtins.dict method") def test_bound_builtin_classmethod_o(self): self.assertEqual(self._get_summary_line(dict.__class_getitem__), - "__class_getitem__(object, /) method of builtins.type instance") + "__class_getitem__(object, /) class method of builtins.dict") @support.cpython_only @requires_docstrings @@ -1356,11 +1357,13 @@ def test_builtin_staticmethod_unrepresentable_default(self): @requires_docstrings def test_unbound_builtin_method_unrepresentable_default(self): self.assertEqual(self._get_summary_line(dict.pop), - "pop(self, key, default=, /)") + "pop(self, key, default=, /) " + "unbound builtins.dict method") import _testcapi cls = _testcapi.DocStringUnrepresentableSignatureTest self.assertEqual(self._get_summary_line(cls.meth), - "meth(self, /, a, b=)") + "meth(self, /, a, b=) unbound " + "_testcapi.DocStringUnrepresentableSignatureTest method") @support.cpython_only @requires_docstrings @@ -1381,7 +1384,8 @@ def test_unbound_builtin_classmethod_unrepresentable_default(self): cls = _testcapi.DocStringUnrepresentableSignatureTest descr = cls.__dict__['classmeth'] self.assertEqual(self._get_summary_line(descr), - "classmeth(type, /, a, b=)") + "classmeth(type, /, a, b=) unbound " + "_testcapi.DocStringUnrepresentableSignatureTest method") @support.cpython_only @requires_docstrings @@ -1389,7 +1393,8 @@ def test_bound_builtin_classmethod_unrepresentable_default(self): import _testcapi cls = _testcapi.DocStringUnrepresentableSignatureTest self.assertEqual(self._get_summary_line(cls.classmeth), - "classmeth(a, b=) method of builtins.type instance") + "classmeth(a, b=) class method of " + "_testcapi.DocStringUnrepresentableSignatureTest") def test_overridden_text_signature(self): class C: @@ -1423,7 +1428,7 @@ def smeth(*args, **kwargs): "meth" + bound + " method of test.test_pydoc.C instance") C.cmeth.__func__.__text_signature__ = text_signature self.assertEqual(self._get_summary_line(C.cmeth), - "cmeth" + bound + " method of builtins.type instance") + "cmeth" + bound + " class method of test.test_pydoc.C") C.smeth.__text_signature__ = text_signature self.assertEqual(self._get_summary_line(C.smeth), "smeth" + unbound) @@ -1460,13 +1465,13 @@ def cm(cls, x): 'cm(...)\n' ' A class method\n') self.assertEqual(self._get_summary_lines(X.cm), """\ -cm(x) method of builtins.type instance +cm(x) class method of test.test_pydoc.X A class method """) self.assertIn(""" | Class methods defined here: | - | cm(x) from builtins.type + | cm(x) | A class method """, pydoc.plain(pydoc.render_doc(X))) @@ -1623,6 +1628,128 @@ def a_fn_with_https_link(): ) +class PydocFodderTest(unittest.TestCase): + + def getsection(self, text, beginline, endline): + lines = text.splitlines() + beginindex, endindex = 0, None + if beginline is not None: + beginindex = lines.index(beginline) + if endline is not None: + endindex = lines.index(endline, beginindex) + return lines[beginindex:endindex] + + def test_text_doc_routines_in_class(self, cls=pydocfodder.B): + doc = pydoc.TextDoc() + result = doc.docclass(cls) + result = clean_text(result) + where = 'defined here' if cls is pydocfodder.B else 'inherited from B' + lines = self.getsection(result, f' | Methods {where}:', ' | ' + '-'*70) + self.assertIn(' | A_method_alias = A_method(self)', lines) + self.assertIn(' | B_method_alias = B_method(self)', lines) + self.assertIn(' | A_staticmethod(x, y) from test.pydocfodder.A', lines) + self.assertIn(' | A_staticmethod_alias = A_staticmethod(x, y)', lines) + self.assertIn(' | global_func(x, y) from test.pydocfodder', lines) + self.assertIn(' | global_func_alias = global_func(x, y)', lines) + self.assertIn(' | global_func2_alias = global_func2(x, y) from test.pydocfodder', lines) + self.assertIn(' | __repr__(self, /) from builtins.object', lines) + self.assertIn(' | object_repr = __repr__(self, /)', lines) + + lines = self.getsection(result, f' | Static methods {where}:', ' | ' + '-'*70) + self.assertIn(' | A_classmethod_ref = A_classmethod(x) class method of test.pydocfodder.A', lines) + note = '' if cls is pydocfodder.B else ' class method of test.pydocfodder.B' + self.assertIn(' | B_classmethod_ref = B_classmethod(x)' + note, lines) + self.assertIn(' | A_method_ref = A_method() method of test.pydocfodder.A instance', lines) + self.assertIn(' | get(key, default=None, /) method of builtins.dict instance', lines) + self.assertIn(' | dict_get = get(key, default=None, /) method of builtins.dict instance', lines) + + lines = self.getsection(result, f' | Class methods {where}:', ' | ' + '-'*70) + self.assertIn(' | B_classmethod(x)', lines) + self.assertIn(' | B_classmethod_alias = B_classmethod(x)', lines) + + def test_html_doc_routines_in_class(self, cls=pydocfodder.B): + doc = pydoc.HTMLDoc() + result = doc.docclass(cls) + result = html2text(result) + where = 'defined here' if cls is pydocfodder.B else 'inherited from B' + lines = self.getsection(result, f'Methods {where}:', '-'*70) + self.assertIn('A_method_alias = A_method(self)', lines) + self.assertIn('B_method_alias = B_method(self)', lines) + self.assertIn('A_staticmethod(x, y) from test.pydocfodder.A', lines) + self.assertIn('A_staticmethod_alias = A_staticmethod(x, y)', lines) + self.assertIn('global_func(x, y) from test.pydocfodder', lines) + self.assertIn('global_func_alias = global_func(x, y)', lines) + self.assertIn('global_func2_alias = global_func2(x, y) from test.pydocfodder', lines) + self.assertIn('__repr__(self, /) from builtins.object', lines) + self.assertIn('object_repr = __repr__(self, /)', lines) + + lines = self.getsection(result, f'Static methods {where}:', '-'*70) + self.assertIn('A_classmethod_ref = A_classmethod(x) class method of test.pydocfodder.A', lines) + note = '' if cls is pydocfodder.B else ' class method of test.pydocfodder.B' + self.assertIn('B_classmethod_ref = B_classmethod(x)' + note, lines) + self.assertIn('A_method_ref = A_method() method of test.pydocfodder.A instance', lines) + + lines = self.getsection(result, f'Class methods {where}:', '-'*70) + self.assertIn('B_classmethod(x)', lines) + self.assertIn('B_classmethod_alias = B_classmethod(x)', lines) + + def test_text_doc_inherited_routines_in_class(self): + self.test_text_doc_routines_in_class(pydocfodder.D) + + def test_html_doc_inherited_routines_in_class(self): + self.test_html_doc_routines_in_class(pydocfodder.D) + + def test_text_doc_routines_in_module(self): + doc = pydoc.TextDoc() + result = doc.docmodule(pydocfodder) + result = clean_text(result) + lines = self.getsection(result, 'FUNCTIONS', 'FILE') + # function alias + self.assertIn(' global_func_alias = global_func(x, y)', lines) + self.assertIn(' A_staticmethod(x, y)', lines) + self.assertIn(' A_staticmethod_alias = A_staticmethod(x, y)', lines) + # bound class methods + self.assertIn(' A_classmethod(x) class method of A', lines) + self.assertIn(' A_classmethod2 = A_classmethod(x) class method of A', lines) + self.assertIn(' A_classmethod3 = A_classmethod(x) class method of B', lines) + # bound methods + self.assertIn(' A_method() method of A instance', lines) + self.assertIn(' A_method2 = A_method() method of A instance', lines) + self.assertIn(' A_method3 = A_method() method of B instance', lines) + self.assertIn(' A_staticmethod_ref = A_staticmethod(x, y)', lines) + self.assertIn(' A_staticmethod_ref2 = A_staticmethod(y) method of B instance', lines) + self.assertIn(' get(key, default=None, /) method of builtins.dict instance', lines) + self.assertIn(' dict_get = get(key, default=None, /) method of builtins.dict instance', lines) + # unbound methods + self.assertIn(' B_method(self)', lines) + self.assertIn(' B_method2 = B_method(self)', lines) + + def test_html_doc_routines_in_module(self): + doc = pydoc.HTMLDoc() + result = doc.docmodule(pydocfodder) + result = html2text(result) + lines = self.getsection(result, ' Functions', None) + # function alias + self.assertIn(' global_func_alias = global_func(x, y)', lines) + self.assertIn(' A_staticmethod(x, y)', lines) + self.assertIn(' A_staticmethod_alias = A_staticmethod(x, y)', lines) + # bound class methods + self.assertIn('A_classmethod(x) class method of A', lines) + self.assertIn(' A_classmethod2 = A_classmethod(x) class method of A', lines) + self.assertIn(' A_classmethod3 = A_classmethod(x) class method of B', lines) + # bound methods + self.assertIn(' A_method() method of A instance', lines) + self.assertIn(' A_method2 = A_method() method of A instance', lines) + self.assertIn(' A_method3 = A_method() method of B instance', lines) + self.assertIn(' A_staticmethod_ref = A_staticmethod(x, y)', lines) + self.assertIn(' A_staticmethod_ref2 = A_staticmethod(y) method of B instance', lines) + self.assertIn(' get(key, default=None, /) method of builtins.dict instance', lines) + self.assertIn(' dict_get = get(key, default=None, /) method of builtins.dict instance', lines) + # unbound methods + self.assertIn(' B_method(self)', lines) + self.assertIn(' B_method2 = B_method(self)', lines) + + @unittest.skipIf( is_emscripten or is_wasi, "Socket server not available on Emscripten/WASI." diff --git a/Misc/NEWS.d/next/Library/2024-01-11-15-10-53.gh-issue-97959.UOj6d4.rst b/Misc/NEWS.d/next/Library/2024-01-11-15-10-53.gh-issue-97959.UOj6d4.rst new file mode 100644 index 00000000000000..a317271947dc37 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-11-15-10-53.gh-issue-97959.UOj6d4.rst @@ -0,0 +1,7 @@ +Fix rendering class methods, bound methods, method and function aliases in +:mod:`pydoc`. Class methods no longer have "method of builtins.type +instance" note. Corresponding notes are now added for class and unbound +methods. Method and function aliases now have references to the module or +the class where the origin was defined if it differs from the current. Bound +methods are now listed in the static methods section. Methods of builtin +classes are now supported as well as methods of Python classes. From cc573c70b7d5e169de2a6e4297068de407dc8d4d Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 11 Feb 2024 19:07:08 +0300 Subject: [PATCH 069/126] gh-115282: Fix direct invocation of `test_traceback.py` (#115283) --- Lib/test/test_traceback.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 372fc48bf81a6a..dd9b1850adf086 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -3124,10 +3124,13 @@ def test_smoke_user_exception(self): class MyException(Exception): pass - self.do_test_smoke( - MyException('bad things happened'), - ('test.test_traceback.TestTracebackException.' - 'test_smoke_user_exception..MyException')) + if __name__ == '__main__': + expected = ('TestTracebackException.' + 'test_smoke_user_exception..MyException') + else: + expected = ('test.test_traceback.TestTracebackException.' + 'test_smoke_user_exception..MyException') + self.do_test_smoke(MyException('bad things happened'), expected) def test_from_exception(self): # Check all the parameters are accepted. From e1552fd19de17e7a6daa3c2a6d1ca207bb8eaf8e Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Sun, 11 Feb 2024 12:51:07 -0600 Subject: [PATCH 070/126] gh-101100: Clean up Doc/c-api/exceptions.rst and Doc/c-api/sys.rst (GH-114825) --- Doc/c-api/exceptions.rst | 14 +++++++------- Doc/c-api/sys.rst | 33 ++++++++++++++++++++++----------- Doc/tools/.nitignore | 2 -- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index eaf723fb2cc4cf..e6309ae7614d34 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -396,7 +396,7 @@ an error value). .. c:function:: int PyErr_ResourceWarning(PyObject *source, Py_ssize_t stack_level, const char *format, ...) Function similar to :c:func:`PyErr_WarnFormat`, but *category* is - :exc:`ResourceWarning` and it passes *source* to :func:`warnings.WarningMessage`. + :exc:`ResourceWarning` and it passes *source* to :class:`!warnings.WarningMessage`. .. versionadded:: 3.6 @@ -732,7 +732,7 @@ Exception Classes This creates a class object derived from :exc:`Exception` (accessible in C as :c:data:`PyExc_Exception`). - The :attr:`__module__` attribute of the new class is set to the first part (up + The :attr:`!__module__` attribute of the new class is set to the first part (up to the last dot) of the *name* argument, and the class name is set to the last part (after the last dot). The *base* argument can be used to specify alternate base classes; it can either be only one class or a tuple of classes. The *dict* @@ -904,8 +904,8 @@ because the :ref:`call protocol ` takes care of recursion handling. Marks a point where a recursive C-level call is about to be performed. - If :c:macro:`USE_STACKCHECK` is defined, this function checks if the OS - stack overflowed using :c:func:`PyOS_CheckStack`. In this is the case, it + If :c:macro:`!USE_STACKCHECK` is defined, this function checks if the OS + stack overflowed using :c:func:`PyOS_CheckStack`. If this is the case, it sets a :exc:`MemoryError` and returns a nonzero value. The function then checks if the recursion limit is reached. If this is the @@ -1158,11 +1158,11 @@ These are compatibility aliases to :c:data:`PyExc_OSError`: +-------------------------------------+----------+ | C Name | Notes | +=====================================+==========+ -| :c:data:`PyExc_EnvironmentError` | | +| :c:data:`!PyExc_EnvironmentError` | | +-------------------------------------+----------+ -| :c:data:`PyExc_IOError` | | +| :c:data:`!PyExc_IOError` | | +-------------------------------------+----------+ -| :c:data:`PyExc_WindowsError` | [2]_ | +| :c:data:`!PyExc_WindowsError` | [2]_ | +-------------------------------------+----------+ .. versionchanged:: 3.3 diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst index 35969b30120d2a..d6fca1a0b0a219 100644 --- a/Doc/c-api/sys.rst +++ b/Doc/c-api/sys.rst @@ -5,6 +5,7 @@ Operating System Utilities ========================== + .. c:function:: PyObject* PyOS_FSPath(PyObject *path) Return the file system representation for *path*. If the object is a @@ -97,27 +98,30 @@ Operating System Utilities .. c:function:: int PyOS_CheckStack() + .. index:: single: USE_STACKCHECK (C macro) + Return true when the interpreter runs out of stack space. This is a reliable - check, but is only available when :c:macro:`USE_STACKCHECK` is defined (currently + check, but is only available when :c:macro:`!USE_STACKCHECK` is defined (currently on certain versions of Windows using the Microsoft Visual C++ compiler). - :c:macro:`USE_STACKCHECK` will be defined automatically; you should never + :c:macro:`!USE_STACKCHECK` will be defined automatically; you should never change the definition in your own code. +.. c:type:: void (*PyOS_sighandler_t)(int) + + .. c:function:: PyOS_sighandler_t PyOS_getsig(int i) Return the current signal handler for signal *i*. This is a thin wrapper around either :c:func:`!sigaction` or :c:func:`!signal`. Do not call those functions - directly! :c:type:`PyOS_sighandler_t` is a typedef alias for :c:expr:`void - (\*)(int)`. + directly! .. c:function:: PyOS_sighandler_t PyOS_setsig(int i, PyOS_sighandler_t h) Set the signal handler for signal *i* to be *h*; return the old signal handler. This is a thin wrapper around either :c:func:`!sigaction` or :c:func:`!signal`. Do - not call those functions directly! :c:type:`PyOS_sighandler_t` is a typedef - alias for :c:expr:`void (\*)(int)`. + not call those functions directly! .. c:function:: wchar_t* Py_DecodeLocale(const char* arg, size_t *size) @@ -342,10 +346,8 @@ accessible to C code. They all work with the current interpreter thread's silently abort the operation by raising an error subclassed from :class:`Exception` (other errors will not be silenced). - The hook function is of type :c:expr:`int (*)(const char *event, PyObject - *args, void *userData)`, where *args* is guaranteed to be a - :c:type:`PyTupleObject`. The hook function is always called with the GIL - held by the Python interpreter that raised the event. + The hook function is always called with the GIL held by the Python + interpreter that raised the event. See :pep:`578` for a detailed description of auditing. Functions in the runtime and standard library that raise events are listed in the @@ -354,12 +356,21 @@ accessible to C code. They all work with the current interpreter thread's .. audit-event:: sys.addaudithook "" c.PySys_AddAuditHook - If the interpreter is initialized, this function raises a auditing event + If the interpreter is initialized, this function raises an auditing event ``sys.addaudithook`` with no arguments. If any existing hooks raise an exception derived from :class:`Exception`, the new hook will not be added and the exception is cleared. As a result, callers cannot assume that their hook has been added unless they control all existing hooks. + .. c:namespace:: NULL + .. c:type:: int (*Py_AuditHookFunction) (const char *event, PyObject *args, void *userData) + + The type of the hook function. + *event* is the C string event argument passed to :c:func:`PySys_Audit` or + :c:func:`PySys_AuditTuple`. + *args* is guaranteed to be a :c:type:`PyTupleObject`. + *userData* is the argument passed to PySys_AddAuditHook(). + .. versionadded:: 3.8 diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 2af116c2d79c54..33129e898e51d6 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -3,14 +3,12 @@ # Keep lines sorted lexicographically to help avoid merge conflicts. Doc/c-api/descriptor.rst -Doc/c-api/exceptions.rst Doc/c-api/float.rst Doc/c-api/init.rst Doc/c-api/init_config.rst Doc/c-api/intro.rst Doc/c-api/module.rst Doc/c-api/stable.rst -Doc/c-api/sys.rst Doc/c-api/type.rst Doc/c-api/typeobj.rst Doc/extending/extending.rst From 54bde5dcc3c04c4ddebcc9df2904ab325fa0b486 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 12 Feb 2024 10:27:12 +0300 Subject: [PATCH 071/126] gh-87804: Fix error handling and style in `_pystatvfs_fromstructstatfs` (#115236) --- Modules/posixmodule.c | 64 ++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index d05b4ba723ce8c..17032d9d490c78 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -12894,7 +12894,7 @@ os_WSTOPSIG_impl(PyObject *module, int status) #ifdef __APPLE__ /* On macOS struct statvfs uses 32-bit integers for block counts, - * resulting in overflow when filesystems are larger tan 4TB. Therefore + * resulting in overflow when filesystems are larger than 4TB. Therefore * os.statvfs is implemented in terms of statfs(2). */ @@ -12902,41 +12902,43 @@ static PyObject* _pystatvfs_fromstructstatfs(PyObject *module, struct statfs st) { PyObject *StatVFSResultType = get_posix_state(module)->StatVFSResultType; PyObject *v = PyStructSequence_New((PyTypeObject *)StatVFSResultType); - if (v == NULL) + if (v == NULL) { return NULL; + } - long flags = 0; - if (st.f_flags & MNT_RDONLY) { - flags |= ST_RDONLY; - } - if (st.f_flags & MNT_NOSUID) { - flags |= ST_NOSUID; - } + long flags = 0; + if (st.f_flags & MNT_RDONLY) { + flags |= ST_RDONLY; + } + if (st.f_flags & MNT_NOSUID) { + flags |= ST_NOSUID; + } - _Static_assert(sizeof(st.f_blocks) == sizeof(long long), "assuming large file"); + _Static_assert(sizeof(st.f_blocks) == sizeof(long long), "assuming large file"); - PyStructSequence_SET_ITEM(v, 0, PyLong_FromLong((long) st.f_iosize)); - PyStructSequence_SET_ITEM(v, 1, PyLong_FromLong((long) st.f_bsize)); - PyStructSequence_SET_ITEM(v, 2, - PyLong_FromLongLong((long long) st.f_blocks)); - PyStructSequence_SET_ITEM(v, 3, - PyLong_FromLongLong((long long) st.f_bfree)); - PyStructSequence_SET_ITEM(v, 4, - PyLong_FromLongLong((long long) st.f_bavail)); - PyStructSequence_SET_ITEM(v, 5, - PyLong_FromLongLong((long long) st.f_files)); - PyStructSequence_SET_ITEM(v, 6, - PyLong_FromLongLong((long long) st.f_ffree)); - PyStructSequence_SET_ITEM(v, 7, - PyLong_FromLongLong((long long) st.f_ffree)); - PyStructSequence_SET_ITEM(v, 8, PyLong_FromLong((long) flags)); +#define SET_ITEM(v, index, item) \ + do { \ + if (item == NULL) { \ + Py_DECREF(v); \ + return NULL; \ + } \ + PyStructSequence_SET_ITEM(v, index, item); \ + } while (0) \ - PyStructSequence_SET_ITEM(v, 9, PyLong_FromLong((long) NAME_MAX)); - PyStructSequence_SET_ITEM(v, 10, PyLong_FromUnsignedLong(st.f_fsid.val[0])); - if (PyErr_Occurred()) { - Py_DECREF(v); - return NULL; - } + SET_ITEM(v, 0, PyLong_FromLong((long) st.f_iosize)); + SET_ITEM(v, 1, PyLong_FromLong((long) st.f_bsize)); + SET_ITEM(v, 2, PyLong_FromLongLong((long long) st.f_blocks)); + SET_ITEM(v, 3, PyLong_FromLongLong((long long) st.f_bfree)); + SET_ITEM(v, 4, PyLong_FromLongLong((long long) st.f_bavail)); + SET_ITEM(v, 5, PyLong_FromLongLong((long long) st.f_files)); + SET_ITEM(v, 6, PyLong_FromLongLong((long long) st.f_ffree)); + SET_ITEM(v, 7, PyLong_FromLongLong((long long) st.f_ffree)); + SET_ITEM(v, 8, PyLong_FromLong((long) flags)); + + SET_ITEM(v, 9, PyLong_FromLong((long) NAME_MAX)); + SET_ITEM(v, 10, PyLong_FromUnsignedLong(st.f_fsid.val[0])); + +#undef SET_ITEM return v; } From 235cacff81931a68e8c400bb3919ae6e55462fb5 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 12 Feb 2024 01:04:36 -0800 Subject: [PATCH 072/126] GH-114695: Add `sys._clear_internal_caches` (GH-115152) --- Doc/library/sys.rst | 13 +++- Include/cpython/optimizer.h | 3 +- Lib/test/libregrtest/refleak.py | 4 +- Lib/test/test_capi/test_opt.py | 16 +++++ Lib/test/test_mailbox.py | 4 ++ ...-02-07-18-04-36.gh-issue-114695.o9wP5P.rst | 3 + Objects/codeobject.c | 22 ++----- Python/bytecodes.c | 23 ++----- Python/clinic/sysmodule.c.h | 20 +++++- Python/generated_cases.c.h | 25 ++------ Python/optimizer.c | 64 ++++++++++--------- Python/sysmodule.c | 17 +++++ 12 files changed, 130 insertions(+), 84 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-07-18-04-36.gh-issue-114695.o9wP5P.rst diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index a97a369b77b88a..ad8857fc2807f7 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -195,6 +195,17 @@ always available. This function should be used for internal and specialized purposes only. + .. deprecated:: 3.13 + Use the more general :func:`_clear_internal_caches` function instead. + + +.. function:: _clear_internal_caches() + + Clear all internal performance-related caches. Use this function *only* to + release unnecessary references and memory blocks when hunting for leaks. + + .. versionadded:: 3.13 + .. function:: _current_frames() @@ -724,7 +735,7 @@ always available. regardless of their size. This function is mainly useful for tracking and debugging memory leaks. Because of the interpreter's internal caches, the result can vary from call to call; you may have to call - :func:`_clear_type_cache()` and :func:`gc.collect()` to get more + :func:`_clear_internal_caches()` and :func:`gc.collect()` to get more predictable results. If a Python build or implementation cannot reasonably compute this diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 5a9ccaea3b2209..3928eca583ba5b 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -24,9 +24,10 @@ typedef struct { uint8_t opcode; uint8_t oparg; uint8_t valid; - uint8_t linked; + int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). _PyBloomFilter bloom; _PyExecutorLinkListNode links; + PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). } _PyVMData; typedef struct { diff --git a/Lib/test/libregrtest/refleak.py b/Lib/test/libregrtest/refleak.py index 7da16cf721f097..71a70af6882d16 100644 --- a/Lib/test/libregrtest/refleak.py +++ b/Lib/test/libregrtest/refleak.py @@ -201,8 +201,8 @@ def dash_R_cleanup(fs, ps, pic, zdc, abcs): # Clear caches clear_caches() - # Clear type cache at the end: previous function calls can modify types - sys._clear_type_cache() + # Clear other caches last (previous function calls can re-populate them): + sys._clear_internal_caches() def warm_caches(): diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 5c8c0596610303..e6b1b554c9af10 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1,5 +1,6 @@ import contextlib import opcode +import sys import textwrap import unittest @@ -181,6 +182,21 @@ def f(): _testinternalcapi.invalidate_executors(f.__code__) self.assertFalse(exe.is_valid()) + def test_sys__clear_internal_caches(self): + def f(): + for _ in range(1000): + pass + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + f() + exe = get_first_executor(f) + self.assertIsNotNone(exe) + self.assertTrue(exe.is_valid()) + sys._clear_internal_caches() + self.assertFalse(exe.is_valid()) + exe = get_first_executor(f) + self.assertIsNone(exe) + class TestUops(unittest.TestCase): def test_basic_loop(self): diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py index c52c014185bec7..d4628f91daf7e8 100644 --- a/Lib/test/test_mailbox.py +++ b/Lib/test/test_mailbox.py @@ -10,6 +10,7 @@ import tempfile from test import support from test.support import os_helper +from test.support import refleak_helper from test.support import socket_helper import unittest import textwrap @@ -2443,6 +2444,9 @@ def test__all__(self): def tearDownModule(): support.reap_children() + # reap_children may have re-populated caches: + if refleak_helper.hunting_for_refleaks(): + sys._clear_internal_caches() if __name__ == '__main__': diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-07-18-04-36.gh-issue-114695.o9wP5P.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-07-18-04-36.gh-issue-114695.o9wP5P.rst new file mode 100644 index 00000000000000..a1db4de393eecb --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-07-18-04-36.gh-issue-114695.o9wP5P.rst @@ -0,0 +1,3 @@ +Add :func:`sys._clear_internal_caches`, which clears all internal +performance-related caches (and deprecate the less-general +:func:`sys._clear_type_cache` function). diff --git a/Objects/codeobject.c b/Objects/codeobject.c index dc46b773c26528..30336fa86111a7 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1489,27 +1489,19 @@ PyCode_GetFreevars(PyCodeObject *code) static void clear_executors(PyCodeObject *co) { + assert(co->co_executors); for (int i = 0; i < co->co_executors->size; i++) { - Py_CLEAR(co->co_executors->executors[i]); + if (co->co_executors->executors[i]) { + _Py_ExecutorClear(co->co_executors->executors[i]); + } } PyMem_Free(co->co_executors); co->co_executors = NULL; } void -_PyCode_Clear_Executors(PyCodeObject *code) { - int code_len = (int)Py_SIZE(code); - for (int i = 0; i < code_len; i += _PyInstruction_GetLength(code, i)) { - _Py_CODEUNIT *instr = &_PyCode_CODE(code)[i]; - uint8_t opcode = instr->op.code; - uint8_t oparg = instr->op.arg; - if (opcode == ENTER_EXECUTOR) { - _PyExecutorObject *exec = code->co_executors->executors[oparg]; - assert(exec->vm_data.opcode != ENTER_EXECUTOR); - instr->op.code = exec->vm_data.opcode; - instr->op.arg = exec->vm_data.oparg; - } - } +_PyCode_Clear_Executors(PyCodeObject *code) +{ clear_executors(code); } @@ -2360,10 +2352,10 @@ _PyCode_ConstantKey(PyObject *op) void _PyStaticCode_Fini(PyCodeObject *co) { - deopt_code(co, _PyCode_CODE(co)); if (co->co_executors != NULL) { clear_executors(co); } + deopt_code(co, _PyCode_CODE(co)); PyMem_Free(co->co_extra); if (co->_co_cached != NULL) { Py_CLEAR(co->_co_cached->_co_code); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6fb4d719e43991..197dff4b9888ce 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2370,23 +2370,12 @@ dummy_func( CHECK_EVAL_BREAKER(); PyCodeObject *code = _PyFrame_GetCode(frame); - _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; - if (executor->vm_data.valid) { - Py_INCREF(executor); - current_executor = executor; - GOTO_TIER_TWO(); - } - else { - /* ENTER_EXECUTOR will be the first code unit of the instruction */ - assert(oparg < 256); - code->co_executors->executors[oparg] = NULL; - opcode = this_instr->op.code = executor->vm_data.opcode; - this_instr->op.arg = executor->vm_data.oparg; - oparg = executor->vm_data.oparg; - Py_DECREF(executor); - next_instr = this_instr; - DISPATCH_GOTO(); - } + current_executor = code->co_executors->executors[oparg & 255]; + assert(current_executor->vm_data.index == INSTR_OFFSET() - 1); + assert(current_executor->vm_data.code == code); + assert(current_executor->vm_data.valid); + Py_INCREF(current_executor); + GOTO_TIER_TWO(); } replaced op(_POP_JUMP_IF_FALSE, (cond -- )) { diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 93b8385a5b4097..13f4ea81eb8984 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -1131,6 +1131,24 @@ sys__clear_type_cache(PyObject *module, PyObject *Py_UNUSED(ignored)) return sys__clear_type_cache_impl(module); } +PyDoc_STRVAR(sys__clear_internal_caches__doc__, +"_clear_internal_caches($module, /)\n" +"--\n" +"\n" +"Clear all internal performance-related caches."); + +#define SYS__CLEAR_INTERNAL_CACHES_METHODDEF \ + {"_clear_internal_caches", (PyCFunction)sys__clear_internal_caches, METH_NOARGS, sys__clear_internal_caches__doc__}, + +static PyObject * +sys__clear_internal_caches_impl(PyObject *module); + +static PyObject * +sys__clear_internal_caches(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return sys__clear_internal_caches_impl(module); +} + PyDoc_STRVAR(sys_is_finalizing__doc__, "is_finalizing($module, /)\n" "--\n" @@ -1486,4 +1504,4 @@ sys__get_cpu_count_config(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=3dc3b2cb0ce38ebb input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b8b1c53e04c3b20c input=a9049054013a1b77]*/ diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 16f1db30620d72..e5244147d499af 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2363,29 +2363,18 @@ } TARGET(ENTER_EXECUTOR) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(ENTER_EXECUTOR); TIER_ONE_ONLY CHECK_EVAL_BREAKER(); PyCodeObject *code = _PyFrame_GetCode(frame); - _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; - if (executor->vm_data.valid) { - Py_INCREF(executor); - current_executor = executor; - GOTO_TIER_TWO(); - } - else { - /* ENTER_EXECUTOR will be the first code unit of the instruction */ - assert(oparg < 256); - code->co_executors->executors[oparg] = NULL; - opcode = this_instr->op.code = executor->vm_data.opcode; - this_instr->op.arg = executor->vm_data.oparg; - oparg = executor->vm_data.oparg; - Py_DECREF(executor); - next_instr = this_instr; - DISPATCH_GOTO(); - } + current_executor = code->co_executors->executors[oparg & 255]; + assert(current_executor->vm_data.index == INSTR_OFFSET() - 1); + assert(current_executor->vm_data.code == code); + assert(current_executor->vm_data.valid); + Py_INCREF(current_executor); + GOTO_TIER_TWO(); DISPATCH(); } diff --git a/Python/optimizer.c b/Python/optimizer.c index d71ca0aef0e11a..ad9ac382d300ef 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -73,25 +73,21 @@ insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorO Py_INCREF(executor); if (instr->op.code == ENTER_EXECUTOR) { assert(index == instr->op.arg); - _PyExecutorObject *old = code->co_executors->executors[index]; - executor->vm_data.opcode = old->vm_data.opcode; - executor->vm_data.oparg = old->vm_data.oparg; - old->vm_data.opcode = 0; - code->co_executors->executors[index] = executor; - Py_DECREF(old); + _Py_ExecutorClear(code->co_executors->executors[index]); } else { assert(code->co_executors->size == index); assert(code->co_executors->capacity > index); - executor->vm_data.opcode = instr->op.code; - executor->vm_data.oparg = instr->op.arg; - code->co_executors->executors[index] = executor; - assert(index < MAX_EXECUTORS_SIZE); - instr->op.code = ENTER_EXECUTOR; - instr->op.arg = index; code->co_executors->size++; } - return; + executor->vm_data.opcode = instr->op.code; + executor->vm_data.oparg = instr->op.arg; + executor->vm_data.code = code; + executor->vm_data.index = (int)(instr - _PyCode_CODE(code)); + code->co_executors->executors[index] = executor; + assert(index < MAX_EXECUTORS_SIZE); + instr->op.code = ENTER_EXECUTOR; + instr->op.arg = index; } int @@ -1071,7 +1067,7 @@ link_executor(_PyExecutorObject *executor) } head->vm_data.links.next = executor; } - executor->vm_data.linked = true; + executor->vm_data.valid = true; /* executor_list_head must be first in list */ assert(interp->executor_list_head->vm_data.links.previous == NULL); } @@ -1079,7 +1075,7 @@ link_executor(_PyExecutorObject *executor) static void unlink_executor(_PyExecutorObject *executor) { - if (!executor->vm_data.linked) { + if (!executor->vm_data.valid) { return; } _PyExecutorLinkListNode *links = &executor->vm_data.links; @@ -1097,7 +1093,7 @@ unlink_executor(_PyExecutorObject *executor) assert(interp->executor_list_head == executor); interp->executor_list_head = next; } - executor->vm_data.linked = false; + executor->vm_data.valid = false; } /* This must be called by optimizers before using the executor */ @@ -1116,12 +1112,24 @@ void _Py_ExecutorClear(_PyExecutorObject *executor) { unlink_executor(executor); + PyCodeObject *code = executor->vm_data.code; + if (code == NULL) { + return; + } + _Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index]; + assert(instruction->op.code == ENTER_EXECUTOR); + int index = instruction->op.arg; + assert(code->co_executors->executors[index] == executor); + instruction->op.code = executor->vm_data.opcode; + instruction->op.arg = executor->vm_data.oparg; + executor->vm_data.code = NULL; + Py_CLEAR(code->co_executors->executors[index]); } void _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj) { - assert(executor->vm_data.valid = true); + assert(executor->vm_data.valid); _Py_BloomFilter_Add(&executor->vm_data.bloom, obj); } @@ -1140,8 +1148,7 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) { - exec->vm_data.valid = false; - unlink_executor(exec); + _Py_ExecutorClear(exec); } exec = next; } @@ -1151,15 +1158,14 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) void _Py_Executors_InvalidateAll(PyInterpreterState *interp) { - /* Walk the list of executors */ - for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { - assert(exec->vm_data.valid); - _PyExecutorObject *next = exec->vm_data.links.next; - exec->vm_data.links.next = NULL; - exec->vm_data.links.previous = NULL; - exec->vm_data.valid = false; - exec->vm_data.linked = false; - exec = next; + while (interp->executor_list_head) { + _PyExecutorObject *executor = interp->executor_list_head; + if (executor->vm_data.code) { + // Clear the entire code object so its co_executors array be freed: + _PyCode_Clear_Executors(executor->vm_data.code); + } + else { + _Py_ExecutorClear(executor); + } } - interp->executor_list_head = NULL; } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 437d7f8dfc4958..69b6d886ccc3e9 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2127,6 +2127,22 @@ sys__clear_type_cache_impl(PyObject *module) Py_RETURN_NONE; } +/*[clinic input] +sys._clear_internal_caches + +Clear all internal performance-related caches. +[clinic start generated code]*/ + +static PyObject * +sys__clear_internal_caches_impl(PyObject *module) +/*[clinic end generated code: output=0ee128670a4966d6 input=253e741ca744f6e8]*/ +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _Py_Executors_InvalidateAll(interp); + PyType_ClearCache(); + Py_RETURN_NONE; +} + /* Note that, for now, we do not have a per-interpreter equivalent for sys.is_finalizing(). */ @@ -2461,6 +2477,7 @@ static PyMethodDef sys_methods[] = { {"audit", _PyCFunction_CAST(sys_audit), METH_FASTCALL, audit_doc }, {"breakpointhook", _PyCFunction_CAST(sys_breakpointhook), METH_FASTCALL | METH_KEYWORDS, breakpointhook_doc}, + SYS__CLEAR_INTERNAL_CACHES_METHODDEF SYS__CLEAR_TYPE_CACHE_METHODDEF SYS__CURRENT_FRAMES_METHODDEF SYS__CURRENT_EXCEPTIONS_METHODDEF From 72340d15cdfdfa4796fdd7c702094c852c2b32d2 Mon Sep 17 00:00:00 2001 From: John Belmonte Date: Mon, 12 Feb 2024 20:17:51 +0900 Subject: [PATCH 073/126] gh-114563: C decimal falls back to pydecimal for unsupported format strings (GH-114879) Immediate merits: * eliminate complex workarounds for 'z' format support (NOTE: mpdecimal recently added 'z' support, so this becomes efficient in the long term.) * fix 'z' format memory leak * fix 'z' format applied to 'F' * fix missing '#' format support Suggested and prototyped by Stefan Krah. Fixes gh-114563, gh-91060 Co-authored-by: Stefan Krah --- Lib/test/test_decimal.py | 22 +++ ...-02-11-20-23-36.gh-issue-114563.RzxNYT.rst | 4 + Modules/_decimal/_decimal.c | 184 ++++++------------ 3 files changed, 88 insertions(+), 122 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-11-20-23-36.gh-issue-114563.RzxNYT.rst diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 1423bc61c7f690..f23ea8af0c8772 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -1110,6 +1110,13 @@ def test_formatting(self): ('z>z6.1f', '-0.', 'zzz0.0'), ('x>z6.1f', '-0.', 'xxx0.0'), ('🖤>z6.1f', '-0.', '🖤🖤🖤0.0'), # multi-byte fill char + ('\x00>z6.1f', '-0.', '\x00\x00\x000.0'), # null fill char + + # issue 114563 ('z' format on F type in cdecimal) + ('z3,.10F', '-6.24E-323', '0.0000000000'), + + # issue 91060 ('#' format in cdecimal) + ('#', '0', '0.'), # issue 6850 ('a=-7.0', '0.12345', 'aaaa0.1'), @@ -5726,6 +5733,21 @@ def test_c_signaldict_segfault(self): with self.assertRaisesRegex(ValueError, err_msg): sd.copy() + def test_format_fallback_capitals(self): + # Fallback to _pydecimal formatting (triggered by `#` format which + # is unsupported by mpdecimal) should honor the current context. + x = C.Decimal('6.09e+23') + self.assertEqual(format(x, '#'), '6.09E+23') + with C.localcontext(capitals=0): + self.assertEqual(format(x, '#'), '6.09e+23') + + def test_format_fallback_rounding(self): + y = C.Decimal('6.09') + self.assertEqual(format(y, '#.1f'), '6.1') + with C.localcontext(rounding=C.ROUND_DOWN): + self.assertEqual(format(y, '#.1f'), '6.0') + + @requires_docstrings @requires_cdecimal class SignatureTest(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-02-11-20-23-36.gh-issue-114563.RzxNYT.rst b/Misc/NEWS.d/next/Library/2024-02-11-20-23-36.gh-issue-114563.RzxNYT.rst new file mode 100644 index 00000000000000..013b6db8e6dbd7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-11-20-23-36.gh-issue-114563.RzxNYT.rst @@ -0,0 +1,4 @@ +Fix several :func:`format()` bugs when using the C implementation of :class:`~decimal.Decimal`: +* memory leak in some rare cases when using the ``z`` format option (coerce negative 0) +* incorrect output when applying the ``z`` format option to type ``F`` (fixed-point with capital ``NAN`` / ``INF``) +* incorrect output when applying the ``#`` format option (alternate form) diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index 127f5f2887d4cd..5b053c73e20bc9 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -82,6 +82,9 @@ typedef struct { /* Convert rationals for comparison */ PyObject *Rational; + /* Invariant: NULL or pointer to _pydecimal.Decimal */ + PyObject *PyDecimal; + PyObject *SignalTuple; struct DecCondMap *signal_map; @@ -3336,56 +3339,6 @@ dotsep_as_utf8(const char *s) return utf8; } -/* copy of libmpdec _mpd_round() */ -static void -_mpd_round(mpd_t *result, const mpd_t *a, mpd_ssize_t prec, - const mpd_context_t *ctx, uint32_t *status) -{ - mpd_ssize_t exp = a->exp + a->digits - prec; - - if (prec <= 0) { - mpd_seterror(result, MPD_Invalid_operation, status); - return; - } - if (mpd_isspecial(a) || mpd_iszero(a)) { - mpd_qcopy(result, a, status); - return; - } - - mpd_qrescale_fmt(result, a, exp, ctx, status); - if (result->digits > prec) { - mpd_qrescale_fmt(result, result, exp+1, ctx, status); - } -} - -/* Locate negative zero "z" option within a UTF-8 format spec string. - * Returns pointer to "z", else NULL. - * The portion of the spec we're working with is [[fill]align][sign][z] */ -static const char * -format_spec_z_search(char const *fmt, Py_ssize_t size) { - char const *pos = fmt; - char const *fmt_end = fmt + size; - /* skip over [[fill]align] (fill may be multi-byte character) */ - pos += 1; - while (pos < fmt_end && *pos & 0x80) { - pos += 1; - } - if (pos < fmt_end && strchr("<>=^", *pos) != NULL) { - pos += 1; - } else { - /* fill not present-- skip over [align] */ - pos = fmt; - if (pos < fmt_end && strchr("<>=^", *pos) != NULL) { - pos += 1; - } - } - /* skip over [sign] */ - if (pos < fmt_end && strchr("+- ", *pos) != NULL) { - pos += 1; - } - return pos < fmt_end && *pos == 'z' ? pos : NULL; -} - static int dict_get_item_string(PyObject *dict, const char *key, PyObject **valueobj, const char **valuestr) { @@ -3411,6 +3364,48 @@ dict_get_item_string(PyObject *dict, const char *key, PyObject **valueobj, const return 0; } +/* + * Fallback _pydecimal formatting for new format specifiers that mpdecimal does + * not yet support. As documented, libmpdec follows the PEP-3101 format language: + * https://www.bytereef.org/mpdecimal/doc/libmpdec/assign-convert.html#to-string + */ +static PyObject * +pydec_format(PyObject *dec, PyObject *context, PyObject *fmt, decimal_state *state) +{ + PyObject *result; + PyObject *pydec; + PyObject *u; + + if (state->PyDecimal == NULL) { + state->PyDecimal = _PyImport_GetModuleAttrString("_pydecimal", "Decimal"); + if (state->PyDecimal == NULL) { + return NULL; + } + } + + u = dec_str(dec); + if (u == NULL) { + return NULL; + } + + pydec = PyObject_CallOneArg(state->PyDecimal, u); + Py_DECREF(u); + if (pydec == NULL) { + return NULL; + } + + result = PyObject_CallMethod(pydec, "__format__", "(OO)", fmt, context); + Py_DECREF(pydec); + + if (result == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { + /* Do not confuse users with the _pydecimal exception */ + PyErr_Clear(); + PyErr_SetString(PyExc_ValueError, "invalid format string"); + } + + return result; +} + /* Formatted representation of a PyDecObject. */ static PyObject * dec_format(PyObject *dec, PyObject *args) @@ -3423,16 +3418,11 @@ dec_format(PyObject *dec, PyObject *args) PyObject *fmtarg; PyObject *context; mpd_spec_t spec; - char const *fmt; - char *fmt_copy = NULL; + char *fmt; char *decstring = NULL; uint32_t status = 0; int replace_fillchar = 0; - int no_neg_0 = 0; Py_ssize_t size; - mpd_t *mpd = MPD(dec); - mpd_uint_t dt[MPD_MINALLOC_MAX]; - mpd_t tmp = {MPD_STATIC|MPD_STATIC_DATA,0,0,0,MPD_MINALLOC_MAX,dt}; decimal_state *state = get_module_state_by_def(Py_TYPE(dec)); @@ -3442,7 +3432,7 @@ dec_format(PyObject *dec, PyObject *args) } if (PyUnicode_Check(fmtarg)) { - fmt = PyUnicode_AsUTF8AndSize(fmtarg, &size); + fmt = (char *)PyUnicode_AsUTF8AndSize(fmtarg, &size); if (fmt == NULL) { return NULL; } @@ -3454,35 +3444,15 @@ dec_format(PyObject *dec, PyObject *args) } } - /* NOTE: If https://github.com/python/cpython/pull/29438 lands, the - * format string manipulation below can be eliminated by enhancing - * the forked mpd_parse_fmt_str(). */ if (size > 0 && fmt[0] == '\0') { /* NUL fill character: must be replaced with a valid UTF-8 char before calling mpd_parse_fmt_str(). */ replace_fillchar = 1; - fmt = fmt_copy = dec_strdup(fmt, size); - if (fmt_copy == NULL) { + fmt = dec_strdup(fmt, size); + if (fmt == NULL) { return NULL; } - fmt_copy[0] = '_'; - } - /* Strip 'z' option, which isn't understood by mpd_parse_fmt_str(). - * NOTE: fmt is always null terminated by PyUnicode_AsUTF8AndSize() */ - char const *z_position = format_spec_z_search(fmt, size); - if (z_position != NULL) { - no_neg_0 = 1; - size_t z_index = z_position - fmt; - if (fmt_copy == NULL) { - fmt = fmt_copy = dec_strdup(fmt, size); - if (fmt_copy == NULL) { - return NULL; - } - } - /* Shift characters (including null terminator) left, - overwriting the 'z' option. */ - memmove(fmt_copy + z_index, fmt_copy + z_index + 1, size - z_index); - size -= 1; + fmt[0] = '_'; } } else { @@ -3492,10 +3462,13 @@ dec_format(PyObject *dec, PyObject *args) } if (!mpd_parse_fmt_str(&spec, fmt, CtxCaps(context))) { - PyErr_SetString(PyExc_ValueError, - "invalid format string"); - goto finish; + if (replace_fillchar) { + PyMem_Free(fmt); + } + + return pydec_format(dec, context, fmtarg, state); } + if (replace_fillchar) { /* In order to avoid clobbering parts of UTF-8 thousands separators or decimal points when the substitution is reversed later, the actual @@ -3548,45 +3521,8 @@ dec_format(PyObject *dec, PyObject *args) } } - if (no_neg_0 && mpd_isnegative(mpd) && !mpd_isspecial(mpd)) { - /* Round into a temporary (carefully mirroring the rounding - of mpd_qformat_spec()), and check if the result is negative zero. - If so, clear the sign and format the resulting positive zero. */ - mpd_ssize_t prec; - mpd_qcopy(&tmp, mpd, &status); - if (spec.prec >= 0) { - switch (spec.type) { - case 'f': - mpd_qrescale(&tmp, &tmp, -spec.prec, CTX(context), &status); - break; - case '%': - tmp.exp += 2; - mpd_qrescale(&tmp, &tmp, -spec.prec, CTX(context), &status); - break; - case 'g': - prec = (spec.prec == 0) ? 1 : spec.prec; - if (tmp.digits > prec) { - _mpd_round(&tmp, &tmp, prec, CTX(context), &status); - } - break; - case 'e': - if (!mpd_iszero(&tmp)) { - _mpd_round(&tmp, &tmp, spec.prec+1, CTX(context), &status); - } - break; - } - } - if (status & MPD_Errors) { - PyErr_SetString(PyExc_ValueError, "unexpected error when rounding"); - goto finish; - } - if (mpd_iszero(&tmp)) { - mpd_set_positive(&tmp); - mpd = &tmp; - } - } - decstring = mpd_qformat_spec(mpd, &spec, CTX(context), &status); + decstring = mpd_qformat_spec(MPD(dec), &spec, CTX(context), &status); if (decstring == NULL) { if (status & MPD_Malloc_error) { PyErr_NoMemory(); @@ -3609,7 +3545,7 @@ dec_format(PyObject *dec, PyObject *args) Py_XDECREF(grouping); Py_XDECREF(sep); Py_XDECREF(dot); - if (fmt_copy) PyMem_Free(fmt_copy); + if (replace_fillchar) PyMem_Free(fmt); if (decstring) mpd_free(decstring); return result; } @@ -5987,6 +5923,9 @@ _decimal_exec(PyObject *m) Py_CLEAR(collections_abc); Py_CLEAR(MutableMapping); + /* For format specifiers not yet supported by libmpdec */ + state->PyDecimal = NULL; + /* Add types to the module */ CHECK_INT(PyModule_AddType(m, state->PyDec_Type)); CHECK_INT(PyModule_AddType(m, state->PyDecContext_Type)); @@ -6192,6 +6131,7 @@ decimal_clear(PyObject *module) Py_CLEAR(state->extended_context_template); Py_CLEAR(state->Rational); Py_CLEAR(state->SignalTuple); + Py_CLEAR(state->PyDecimal); PyMem_Free(state->signal_map); PyMem_Free(state->cond_map); From 705c76d4a202f1faf41027d48d44eac0e76bb1f0 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 12 Feb 2024 14:59:58 +0300 Subject: [PATCH 074/126] gh-114785: Remove content from `Porting from Python2` how-to (#114805) Keep the page though, because people might still rely on it (the traffic shows that they do). Instead of our own manual we now give links to the 3rd-party ones. Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/howto/index.rst | 1 - Doc/howto/pyporting.rst | 429 ++-------------------------------------- README.rst | 9 - 3 files changed, 19 insertions(+), 420 deletions(-) diff --git a/Doc/howto/index.rst b/Doc/howto/index.rst index a835bb5f13bd1c..bb507953582639 100644 --- a/Doc/howto/index.rst +++ b/Doc/howto/index.rst @@ -13,7 +13,6 @@ Currently, the HOWTOs are: .. toctree:: :maxdepth: 1 - pyporting.rst cporting.rst curses.rst descriptor.rst diff --git a/Doc/howto/pyporting.rst b/Doc/howto/pyporting.rst index 501b16d82d4d6f..d560364107bd12 100644 --- a/Doc/howto/pyporting.rst +++ b/Doc/howto/pyporting.rst @@ -1,3 +1,5 @@ +:orphan: + .. _pyporting-howto: ************************************* @@ -6,423 +8,30 @@ How to port Python 2 Code to Python 3 :author: Brett Cannon -.. topic:: Abstract - - Python 2 reached its official end-of-life at the start of 2020. This means - that no new bug reports, fixes, or changes will be made to Python 2 - it's - no longer supported. - - This guide is intended to provide you with a path to Python 3 for your - code, that includes compatibility with Python 2 as a first step. - - If you are looking to port an extension module instead of pure Python code, - please see :ref:`cporting-howto`. - - The archived python-porting_ mailing list may contain some useful guidance. - - -The Short Explanation -===================== - -To achieve Python 2/3 compatibility in a single code base, the basic steps -are: - -#. Only worry about supporting Python 2.7 -#. Make sure you have good test coverage (coverage.py_ can help; - ``python -m pip install coverage``) -#. Learn the differences between Python 2 and 3 -#. Use Futurize_ (or Modernize_) to update your code (e.g. ``python -m pip install future``) -#. Use Pylint_ to help make sure you don't regress on your Python 3 support - (``python -m pip install pylint``) -#. Use caniusepython3_ to find out which of your dependencies are blocking your - use of Python 3 (``python -m pip install caniusepython3``) -#. Once your dependencies are no longer blocking you, use continuous integration - to make sure you stay compatible with Python 2 and 3 (tox_ can help test - against multiple versions of Python; ``python -m pip install tox``) -#. Consider using optional :term:`static type checking ` - to make sure your type usage - works in both Python 2 and 3 (e.g. use mypy_ to check your typing under both - Python 2 and Python 3; ``python -m pip install mypy``). - -.. note:: - - Note: Using ``python -m pip install`` guarantees that the ``pip`` you invoke - is the one installed for the Python currently in use, whether it be - a system-wide ``pip`` or one installed within a - :ref:`virtual environment `. - -Details -======= - -Even if other factors - say, dependencies over which you have no control - -still require you to support Python 2, that does not prevent you taking the -step of including Python 3 support. - -Most changes required to support Python 3 lead to cleaner code using newer -practices even in Python 2 code. - - -Different versions of Python 2 ------------------------------- - -Ideally, your code should be compatible with Python 2.7, which was the -last supported version of Python 2. - -Some of the tools mentioned in this guide will not work with Python 2.6. - -If absolutely necessary, the six_ project can help you support Python 2.5 and -3 simultaneously. Do realize, though, that nearly all the projects listed in -this guide will not be available to you. - -If you are able to skip Python 2.5 and older, the required changes to your -code will be minimal. At worst you will have to use a function instead of a -method in some instances or have to import a function instead of using a -built-in one. - - -Make sure you specify the proper version support in your ``setup.py`` file --------------------------------------------------------------------------- - -In your ``setup.py`` file you should have the proper `trove classifier`_ -specifying what versions of Python you support. As your project does not support -Python 3 yet you should at least have -``Programming Language :: Python :: 2 :: Only`` specified. Ideally you should -also specify each major/minor version of Python that you do support, e.g. -``Programming Language :: Python :: 2.7``. - - -Have good test coverage ------------------------ - -Once you have your code supporting the oldest version of Python 2 you want it -to, you will want to make sure your test suite has good coverage. A good rule of -thumb is that if you want to be confident enough in your test suite that any -failures that appear after having tools rewrite your code are actual bugs in the -tools and not in your code. If you want a number to aim for, try to get over 80% -coverage (and don't feel bad if you find it hard to get better than 90% -coverage). If you don't already have a tool to measure test coverage then -coverage.py_ is recommended. - - -Be aware of the differences between Python 2 and 3 --------------------------------------------------- - -Once you have your code well-tested you are ready to begin porting your code to -Python 3! But to fully understand how your code is going to change and what -you want to look out for while you code, you will want to learn what changes -Python 3 makes in terms of Python 2. - -Some resources for understanding the differences and their implications for you -code: - -* the :ref:`"What's New" ` doc for each release of Python 3 -* the `Porting to Python 3`_ book (which is free online) -* the handy `cheat sheet`_ from the Python-Future project. - - -Update your code ----------------- - -There are tools available that can port your code automatically. - -Futurize_ does its best to make Python 3 idioms and practices exist in Python -2, e.g. backporting the ``bytes`` type from Python 3 so that you have -semantic parity between the major versions of Python. This is the better -approach for most cases. - -Modernize_, on the other hand, is more conservative and targets a Python 2/3 -subset of Python, directly relying on six_ to help provide compatibility. - -A good approach is to run the tool over your test suite first and visually -inspect the diff to make sure the transformation is accurate. After you have -transformed your test suite and verified that all the tests still pass as -expected, then you can transform your application code knowing that any tests -which fail is a translation failure. - -Unfortunately the tools can't automate everything to make your code work under -Python 3, and you will also need to read the tools' documentation in case some -options you need are turned off by default. - -Key issues to be aware of and check for: - -Division -++++++++ - -In Python 3, ``5 / 2 == 2.5`` and not ``2`` as it was in Python 2; all -division between ``int`` values result in a ``float``. This change has -actually been planned since Python 2.2 which was released in 2002. Since then -users have been encouraged to add ``from __future__ import division`` to any -and all files which use the ``/`` and ``//`` operators or to be running the -interpreter with the ``-Q`` flag. If you have not been doing this then you -will need to go through your code and do two things: - -#. Add ``from __future__ import division`` to your files -#. Update any division operator as necessary to either use ``//`` to use floor - division or continue using ``/`` and expect a float - -The reason that ``/`` isn't simply translated to ``//`` automatically is that if -an object defines a ``__truediv__`` method but not ``__floordiv__`` then your -code would begin to fail (e.g. a user-defined class that uses ``/`` to -signify some operation but not ``//`` for the same thing or at all). +Python 2 reached its official end-of-life at the start of 2020. This means +that no new bug reports, fixes, or changes will be made to Python 2 - it's +no longer supported: see :pep:`373` and +`status of Python versions `_. +If you are looking to port an extension module instead of pure Python code, +please see :ref:`cporting-howto`. -Text versus binary data -+++++++++++++++++++++++ +The archived python-porting_ mailing list may contain some useful guidance. -In Python 2 you could use the ``str`` type for both text and binary data. -Unfortunately this confluence of two different concepts could lead to brittle -code which sometimes worked for either kind of data, sometimes not. It also -could lead to confusing APIs if people didn't explicitly state that something -that accepted ``str`` accepted either text or binary data instead of one -specific type. This complicated the situation especially for anyone supporting -multiple languages as APIs wouldn't bother explicitly supporting ``unicode`` -when they claimed text data support. +Since Python 3.13 the original porting guide was discontinued. +You can find the old guide in the +`archive `_. -Python 3 made text and binary data distinct types that cannot simply be mixed -together. For any code that deals only with text or only binary data, this -separation doesn't pose an issue. But for code that has to deal with both, it -does mean you might have to now care about when you are using text compared -to binary data, which is why this cannot be entirely automated. -Decide which APIs take text and which take binary (it is **highly** recommended -you don't design APIs that can take both due to the difficulty of keeping the -code working; as stated earlier it is difficult to do well). In Python 2 this -means making sure the APIs that take text can work with ``unicode`` and those -that work with binary data work with the ``bytes`` type from Python 3 -(which is a subset of ``str`` in Python 2 and acts as an alias for ``bytes`` -type in Python 2). Usually the biggest issue is realizing which methods exist -on which types in Python 2 and 3 simultaneously (for text that's ``unicode`` -in Python 2 and ``str`` in Python 3, for binary that's ``str``/``bytes`` in -Python 2 and ``bytes`` in Python 3). +Third-party guides +================== -The following table lists the **unique** methods of each data type across -Python 2 and 3 (e.g., the ``decode()`` method is usable on the equivalent binary -data type in either Python 2 or 3, but it can't be used by the textual data -type consistently between Python 2 and 3 because ``str`` in Python 3 doesn't -have the method). Do note that as of Python 3.5 the ``__mod__`` method was -added to the bytes type. +There are also multiple third-party guides that might be useful: -======================== ===================== -**Text data** **Binary data** ------------------------- --------------------- -\ decode ------------------------- --------------------- -encode ------------------------- --------------------- -format ------------------------- --------------------- -isdecimal ------------------------- --------------------- -isnumeric -======================== ===================== +- `Guide by Fedora `_ +- `PyCon 2020 tutorial `_ +- `Guide by DigitalOcean `_ +- `Guide by ActiveState `_ -Making the distinction easier to handle can be accomplished by encoding and -decoding between binary data and text at the edge of your code. This means that -when you receive text in binary data, you should immediately decode it. And if -your code needs to send text as binary data then encode it as late as possible. -This allows your code to work with only text internally and thus eliminates -having to keep track of what type of data you are working with. -The next issue is making sure you know whether the string literals in your code -represent text or binary data. You should add a ``b`` prefix to any -literal that presents binary data. For text you should add a ``u`` prefix to -the text literal. (There is a :mod:`__future__` import to force all unspecified -literals to be Unicode, but usage has shown it isn't as effective as adding a -``b`` or ``u`` prefix to all literals explicitly) - -You also need to be careful about opening files. Possibly you have not always -bothered to add the ``b`` mode when opening a binary file (e.g., ``rb`` for -binary reading). Under Python 3, binary files and text files are clearly -distinct and mutually incompatible; see the :mod:`io` module for details. -Therefore, you **must** make a decision of whether a file will be used for -binary access (allowing binary data to be read and/or written) or textual access -(allowing text data to be read and/or written). You should also use :func:`io.open` -for opening files instead of the built-in :func:`open` function as the :mod:`io` -module is consistent from Python 2 to 3 while the built-in :func:`open` function -is not (in Python 3 it's actually :func:`io.open`). Do not bother with the -outdated practice of using :func:`codecs.open` as that's only necessary for -keeping compatibility with Python 2.5. - -The constructors of both ``str`` and ``bytes`` have different semantics for the -same arguments between Python 2 and 3. Passing an integer to ``bytes`` in Python 2 -will give you the string representation of the integer: ``bytes(3) == '3'``. -But in Python 3, an integer argument to ``bytes`` will give you a bytes object -as long as the integer specified, filled with null bytes: -``bytes(3) == b'\x00\x00\x00'``. A similar worry is necessary when passing a -bytes object to ``str``. In Python 2 you just get the bytes object back: -``str(b'3') == b'3'``. But in Python 3 you get the string representation of the -bytes object: ``str(b'3') == "b'3'"``. - -Finally, the indexing of binary data requires careful handling (slicing does -**not** require any special handling). In Python 2, -``b'123'[1] == b'2'`` while in Python 3 ``b'123'[1] == 50``. Because binary data -is simply a collection of binary numbers, Python 3 returns the integer value for -the byte you index on. But in Python 2 because ``bytes == str``, indexing -returns a one-item slice of bytes. The six_ project has a function -named ``six.indexbytes()`` which will return an integer like in Python 3: -``six.indexbytes(b'123', 1)``. - -To summarize: - -#. Decide which of your APIs take text and which take binary data -#. Make sure that your code that works with text also works with ``unicode`` and - code for binary data works with ``bytes`` in Python 2 (see the table above - for what methods you cannot use for each type) -#. Mark all binary literals with a ``b`` prefix, textual literals with a ``u`` - prefix -#. Decode binary data to text as soon as possible, encode text as binary data as - late as possible -#. Open files using :func:`io.open` and make sure to specify the ``b`` mode when - appropriate -#. Be careful when indexing into binary data - - -Use feature detection instead of version detection -++++++++++++++++++++++++++++++++++++++++++++++++++ - -Inevitably you will have code that has to choose what to do based on what -version of Python is running. The best way to do this is with feature detection -of whether the version of Python you're running under supports what you need. -If for some reason that doesn't work then you should make the version check be -against Python 2 and not Python 3. To help explain this, let's look at an -example. - -Let's pretend that you need access to a feature of :mod:`importlib` that -is available in Python's standard library since Python 3.3 and available for -Python 2 through importlib2_ on PyPI. You might be tempted to write code to -access e.g. the :mod:`importlib.abc` module by doing the following:: - - import sys - - if sys.version_info[0] == 3: - from importlib import abc - else: - from importlib2 import abc - -The problem with this code is what happens when Python 4 comes out? It would -be better to treat Python 2 as the exceptional case instead of Python 3 and -assume that future Python versions will be more compatible with Python 3 than -Python 2:: - - import sys - - if sys.version_info[0] > 2: - from importlib import abc - else: - from importlib2 import abc - -The best solution, though, is to do no version detection at all and instead rely -on feature detection. That avoids any potential issues of getting the version -detection wrong and helps keep you future-compatible:: - - try: - from importlib import abc - except ImportError: - from importlib2 import abc - - -Prevent compatibility regressions ---------------------------------- - -Once you have fully translated your code to be compatible with Python 3, you -will want to make sure your code doesn't regress and stop working under -Python 3. This is especially true if you have a dependency which is blocking you -from actually running under Python 3 at the moment. - -To help with staying compatible, any new modules you create should have -at least the following block of code at the top of it:: - - from __future__ import absolute_import - from __future__ import division - from __future__ import print_function - -You can also run Python 2 with the ``-3`` flag to be warned about various -compatibility issues your code triggers during execution. If you turn warnings -into errors with ``-Werror`` then you can make sure that you don't accidentally -miss a warning. - -You can also use the Pylint_ project and its ``--py3k`` flag to lint your code -to receive warnings when your code begins to deviate from Python 3 -compatibility. This also prevents you from having to run Modernize_ or Futurize_ -over your code regularly to catch compatibility regressions. This does require -you only support Python 2.7 and Python 3.4 or newer as that is Pylint's -minimum Python version support. - - -Check which dependencies block your transition ----------------------------------------------- - -**After** you have made your code compatible with Python 3 you should begin to -care about whether your dependencies have also been ported. The caniusepython3_ -project was created to help you determine which projects --- directly or indirectly -- are blocking you from supporting Python 3. There -is both a command-line tool as well as a web interface at -https://caniusepython3.com. - -The project also provides code which you can integrate into your test suite so -that you will have a failing test when you no longer have dependencies blocking -you from using Python 3. This allows you to avoid having to manually check your -dependencies and to be notified quickly when you can start running on Python 3. - - -Update your ``setup.py`` file to denote Python 3 compatibility --------------------------------------------------------------- - -Once your code works under Python 3, you should update the classifiers in -your ``setup.py`` to contain ``Programming Language :: Python :: 3`` and to not -specify sole Python 2 support. This will tell anyone using your code that you -support Python 2 **and** 3. Ideally you will also want to add classifiers for -each major/minor version of Python you now support. - - -Use continuous integration to stay compatible ---------------------------------------------- - -Once you are able to fully run under Python 3 you will want to make sure your -code always works under both Python 2 and 3. Probably the best tool for running -your tests under multiple Python interpreters is tox_. You can then integrate -tox with your continuous integration system so that you never accidentally break -Python 2 or 3 support. - -You may also want to use the ``-bb`` flag with the Python 3 interpreter to -trigger an exception when you are comparing bytes to strings or bytes to an int -(the latter is available starting in Python 3.5). By default type-differing -comparisons simply return ``False``, but if you made a mistake in your -separation of text/binary data handling or indexing on bytes you wouldn't easily -find the mistake. This flag will raise an exception when these kinds of -comparisons occur, making the mistake much easier to track down. - - -Consider using optional static type checking --------------------------------------------- - -Another way to help port your code is to use a :term:`static type checker` like -mypy_ or pytype_ on your code. These tools can be used to analyze your code as -if it's being run under Python 2, then you can run the tool a second time as if -your code is running under Python 3. By running a static type checker twice like -this you can discover if you're e.g. misusing binary data type in one version -of Python compared to another. If you add optional type hints to your code you -can also explicitly state whether your APIs use textual or binary data, helping -to make sure everything functions as expected in both versions of Python. - - -.. _caniusepython3: https://pypi.org/project/caniusepython3 -.. _cheat sheet: https://python-future.org/compatible_idioms.html -.. _coverage.py: https://pypi.org/project/coverage -.. _Futurize: https://python-future.org/automatic_conversion.html -.. _importlib2: https://pypi.org/project/importlib2 -.. _Modernize: https://python-modernize.readthedocs.io/ -.. _mypy: https://mypy-lang.org/ -.. _Porting to Python 3: http://python3porting.com/ -.. _Pylint: https://pypi.org/project/pylint - -.. _Python 3 Q & A: https://ncoghlan-devs-python-notes.readthedocs.io/en/latest/python3/questions_and_answers.html - -.. _pytype: https://github.com/google/pytype -.. _python-future: https://python-future.org/ .. _python-porting: https://mail.python.org/pipermail/python-porting/ -.. _six: https://pypi.org/project/six -.. _tox: https://pypi.org/project/tox -.. _trove classifier: https://pypi.org/classifiers - -.. _Why Python 3 exists: https://snarky.ca/why-python-3-exists diff --git a/README.rst b/README.rst index fbfae16a7dbb0b..1145fd43755840 100644 --- a/README.rst +++ b/README.rst @@ -161,15 +161,6 @@ For information about building Python's documentation, refer to `Doc/README.rst `_. -Converting From Python 2.x to 3.x ---------------------------------- - -Significant backward incompatible changes were made for the release of Python -3.0, which may cause programs written for Python 2 to fail when run with Python -3. For more information about porting your code from Python 2 to Python 3, see -the `Porting HOWTO `_. - - Testing ------- From 92483b21b30d451586c54dc4923665f7f7eedd7a Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 12 Feb 2024 14:40:41 +0200 Subject: [PATCH 075/126] gh-101100: Fix Sphinx warnings in `whatsnew/2.7.rst` and related (#115319) Co-authored-by: Alex Waygood --- Doc/library/asyncio-protocol.rst | 4 +- Doc/library/asyncio-subprocess.rst | 6 +- Doc/library/msvcrt.rst | 15 +++ Doc/library/multiprocessing.rst | 4 +- Doc/library/subprocess.rst | 4 +- Doc/whatsnew/2.6.rst | 8 +- Doc/whatsnew/2.7.rst | 170 +++++++++++++++-------------- Doc/whatsnew/3.1.rst | 2 +- Doc/whatsnew/3.2.rst | 6 +- 9 files changed, 119 insertions(+), 100 deletions(-) diff --git a/Doc/library/asyncio-protocol.rst b/Doc/library/asyncio-protocol.rst index 3f734f544afe21..ecd8cdc709af7d 100644 --- a/Doc/library/asyncio-protocol.rst +++ b/Doc/library/asyncio-protocol.rst @@ -417,8 +417,8 @@ Subprocess Transports Stop the subprocess. - On POSIX systems, this method sends SIGTERM to the subprocess. - On Windows, the Windows API function TerminateProcess() is called to + On POSIX systems, this method sends :py:const:`~signal.SIGTERM` to the subprocess. + On Windows, the Windows API function :c:func:`!TerminateProcess` is called to stop the subprocess. See also :meth:`subprocess.Popen.terminate`. diff --git a/Doc/library/asyncio-subprocess.rst b/Doc/library/asyncio-subprocess.rst index bf35b1cb798aee..817a6ff3052f4a 100644 --- a/Doc/library/asyncio-subprocess.rst +++ b/Doc/library/asyncio-subprocess.rst @@ -240,7 +240,7 @@ their completion. .. note:: - On Windows, :py:data:`SIGTERM` is an alias for :meth:`terminate`. + On Windows, :py:const:`~signal.SIGTERM` is an alias for :meth:`terminate`. ``CTRL_C_EVENT`` and ``CTRL_BREAK_EVENT`` can be sent to processes started with a *creationflags* parameter which includes ``CREATE_NEW_PROCESS_GROUP``. @@ -249,10 +249,10 @@ their completion. Stop the child process. - On POSIX systems this method sends :py:const:`signal.SIGTERM` to the + On POSIX systems this method sends :py:const:`~signal.SIGTERM` to the child process. - On Windows the Win32 API function :c:func:`TerminateProcess` is + On Windows the Win32 API function :c:func:`!TerminateProcess` is called to stop the child process. .. method:: kill() diff --git a/Doc/library/msvcrt.rst b/Doc/library/msvcrt.rst index 2a6d980ab78a60..ac3458c86fd4c4 100644 --- a/Doc/library/msvcrt.rst +++ b/Doc/library/msvcrt.rst @@ -252,3 +252,18 @@ Other Functions .. data:: CRTDBG_REPORT_MODE Returns current *mode* for the specified *type*. + + +.. data:: CRT_ASSEMBLY_VERSION + + The CRT Assembly version, from the :file:`crtassem.h` header file. + + +.. data:: VC_ASSEMBLY_PUBLICKEYTOKEN + + The VC Assembly public key token, from the :file:`crtassem.h` header file. + + +.. data:: LIBRARIES_ASSEMBLY_NAME_PREFIX + + The Libraries Assembly name prefix, from the :file:`crtassem.h` header file. diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index b104a6483b70e6..d570d4eb0dae78 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -649,8 +649,8 @@ The :mod:`multiprocessing` package mostly replicates the API of the .. method:: terminate() - Terminate the process. On POSIX this is done using the ``SIGTERM`` signal; - on Windows :c:func:`TerminateProcess` is used. Note that exit handlers and + Terminate the process. On POSIX this is done using the :py:const:`~signal.SIGTERM` signal; + on Windows :c:func:`!TerminateProcess` is used. Note that exit handlers and finally clauses, etc., will not be executed. Note that descendant processes of the process will *not* be terminated -- diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst index f63ca73b3ec067..1dcfea58a8e89f 100644 --- a/Doc/library/subprocess.rst +++ b/Doc/library/subprocess.rst @@ -857,8 +857,8 @@ Instances of the :class:`Popen` class have the following methods: .. method:: Popen.terminate() - Stop the child. On POSIX OSs the method sends SIGTERM to the - child. On Windows the Win32 API function :c:func:`TerminateProcess` is called + Stop the child. On POSIX OSs the method sends :py:const:`~signal.SIGTERM` to the + child. On Windows the Win32 API function :c:func:`!TerminateProcess` is called to stop the child. diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst index 7d3769a22286e2..05c21d313aae03 100644 --- a/Doc/whatsnew/2.6.rst +++ b/Doc/whatsnew/2.6.rst @@ -2388,11 +2388,11 @@ changes, or look through the Subversion logs for all the details. using the format character ``'?'``. (Contributed by David Remahl.) -* The :class:`Popen` objects provided by the :mod:`subprocess` module - now have :meth:`terminate`, :meth:`kill`, and :meth:`send_signal` methods. - On Windows, :meth:`send_signal` only supports the :const:`SIGTERM` +* The :class:`~subprocess.Popen` objects provided by the :mod:`subprocess` module + now have :meth:`~subprocess.Popen.terminate`, :meth:`~subprocess.Popen.kill`, and :meth:`~subprocess.Popen.send_signal` methods. + On Windows, :meth:`!send_signal` only supports the :py:const:`~signal.SIGTERM` signal, and all these methods are aliases for the Win32 API function - :c:func:`TerminateProcess`. + :c:func:`!TerminateProcess`. (Contributed by Christian Heimes.) * A new variable in the :mod:`sys` module, :attr:`float_info`, is an diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst index ada05aa22b46f6..2a42664c02852c 100644 --- a/Doc/whatsnew/2.7.rst +++ b/Doc/whatsnew/2.7.rst @@ -196,7 +196,7 @@ A partial list of 3.1 features that were backported to 2.7: Other new Python3-mode warnings include: -* :func:`operator.isCallable` and :func:`operator.sequenceIncludes`, +* :func:`!operator.isCallable` and :func:`!operator.sequenceIncludes`, which are not supported in 3.x, now trigger warnings. * The :option:`!-3` switch now automatically enables the :option:`!-Qwarn` switch that causes warnings @@ -455,11 +455,11 @@ a varying number of handlers. All this flexibility can require a lot of configuration. You can write Python statements to create objects and set their properties, but a complex set-up requires verbose but boring code. -:mod:`logging` also supports a :func:`~logging.fileConfig` +:mod:`logging` also supports a :func:`~logging.config.fileConfig` function that parses a file, but the file format doesn't support configuring filters, and it's messier to generate programmatically. -Python 2.7 adds a :func:`~logging.dictConfig` function that +Python 2.7 adds a :func:`~logging.config.dictConfig` function that uses a dictionary to configure logging. There are many ways to produce a dictionary from different sources: construct one with code; parse a file containing JSON; or use a YAML parsing library if one is @@ -533,7 +533,7 @@ implemented by Vinay Sajip, are: ``getLogger('app.network.listen')``. * The :class:`~logging.LoggerAdapter` class gained an - :meth:`~logging.LoggerAdapter.isEnabledFor` method that takes a + :meth:`~logging.Logger.isEnabledFor` method that takes a *level* and returns whether the underlying logger would process a message of that level of importance. @@ -554,8 +554,8 @@ called a :dfn:`view` instead of a fully materialized list. It's not possible to change the return values of :meth:`~dict.keys`, :meth:`~dict.values`, and :meth:`~dict.items` in Python 2.7 because too much code would break. Instead the 3.x versions were added -under the new names :meth:`~dict.viewkeys`, :meth:`~dict.viewvalues`, -and :meth:`~dict.viewitems`. +under the new names :meth:`!viewkeys`, :meth:`!viewvalues`, +and :meth:`!viewitems`. :: @@ -720,7 +720,7 @@ Some smaller changes made to the core Python language are: with B() as b: ... suite of statements ... - The :func:`contextlib.nested` function provides a very similar + The :func:`!contextlib.nested` function provides a very similar function, so it's no longer necessary and has been deprecated. (Proposed in https://codereview.appspot.com/53094; implemented by @@ -785,7 +785,7 @@ Some smaller changes made to the core Python language are: implemented by Mark Dickinson; :issue:`1811`.) * Implicit coercion for complex numbers has been removed; the interpreter - will no longer ever attempt to call a :meth:`__coerce__` method on complex + will no longer ever attempt to call a :meth:`!__coerce__` method on complex objects. (Removed by Meador Inge and Mark Dickinson; :issue:`5211`.) * The :meth:`str.format` method now supports automatic numbering of the replacement @@ -817,7 +817,7 @@ Some smaller changes made to the core Python language are: A low-level change: the :meth:`object.__format__` method now triggers a :exc:`PendingDeprecationWarning` if it's passed a format string, - because the :meth:`__format__` method for :class:`object` converts + because the :meth:`!__format__` method for :class:`object` converts the object to a string representation and formats that. Previously the method silently applied the format string to the string representation, but that could hide mistakes in Python code. If @@ -825,7 +825,7 @@ Some smaller changes made to the core Python language are: precision, presumably you're expecting the formatting to be applied in some object-specific way. (Fixed by Eric Smith; :issue:`7994`.) -* The :func:`int` and :func:`long` types gained a ``bit_length`` +* The :func:`int` and :func:`!long` types gained a ``bit_length`` method that returns the number of bits necessary to represent its argument in binary:: @@ -848,8 +848,8 @@ Some smaller changes made to the core Python language are: statements that were only working by accident. (Fixed by Meador Inge; :issue:`7902`.) -* It's now possible for a subclass of the built-in :class:`unicode` type - to override the :meth:`__unicode__` method. (Implemented by +* It's now possible for a subclass of the built-in :class:`!unicode` type + to override the :meth:`!__unicode__` method. (Implemented by Victor Stinner; :issue:`1583863`.) * The :class:`bytearray` type's :meth:`~bytearray.translate` method now accepts @@ -876,7 +876,7 @@ Some smaller changes made to the core Python language are: Forgeot d'Arc in :issue:`1616979`; CP858 contributed by Tim Hatch in :issue:`8016`.) -* The :class:`file` object will now set the :attr:`filename` attribute +* The :class:`!file` object will now set the :attr:`!filename` attribute on the :exc:`IOError` exception when trying to open a directory on POSIX platforms (noted by Jan Kaliszewski; :issue:`4764`), and now explicitly checks for and forbids writing to read-only file objects @@ -966,7 +966,7 @@ Several performance enhancements have been added: Apart from the performance improvements this change should be invisible to end users, with one exception: for testing and - debugging purposes there's a new structseq :data:`sys.long_info` that + debugging purposes there's a new structseq :data:`!sys.long_info` that provides information about the internal format, giving the number of bits per digit and the size in bytes of the C type used to store each digit:: @@ -1005,8 +1005,8 @@ Several performance enhancements have been added: conversion function that supports arbitrary bases. (Patch by Gawain Bolton; :issue:`6713`.) -* The :meth:`split`, :meth:`replace`, :meth:`rindex`, - :meth:`rpartition`, and :meth:`rsplit` methods of string-like types +* The :meth:`!split`, :meth:`!replace`, :meth:`!rindex`, + :meth:`!rpartition`, and :meth:`!rsplit` methods of string-like types (strings, Unicode strings, and :class:`bytearray` objects) now use a fast reverse-search algorithm instead of a character-by-character scan. This is sometimes faster by a factor of 10. (Added by @@ -1044,7 +1044,7 @@ changes, or look through the Subversion logs for all the details. used with :class:`memoryview` instances and other similar buffer objects. (Backported from 3.x by Florent Xicluna; :issue:`7703`.) -* Updated module: the :mod:`bsddb` module has been updated from 4.7.2devel9 +* Updated module: the :mod:`!bsddb` module has been updated from 4.7.2devel9 to version 4.8.4 of `the pybsddb package `__. The new version features better Python 3.x compatibility, various bug fixes, @@ -1129,7 +1129,7 @@ changes, or look through the Subversion logs for all the details. (Added by Raymond Hettinger; :issue:`1818`.) - Finally, the :class:`~collections.Mapping` abstract base class now + Finally, the :class:`~collections.abc.Mapping` abstract base class now returns :const:`NotImplemented` if a mapping is compared to another type that isn't a :class:`Mapping`. (Fixed by Daniel Stutzbach; :issue:`8729`.) @@ -1158,7 +1158,7 @@ changes, or look through the Subversion logs for all the details. (Contributed by Mats Kindahl; :issue:`7005`.) -* Deprecated function: :func:`contextlib.nested`, which allows +* Deprecated function: :func:`!contextlib.nested`, which allows handling more than one context manager with a single :keyword:`with` statement, has been deprecated, because the :keyword:`!with` statement now supports multiple context managers. @@ -1184,7 +1184,7 @@ changes, or look through the Subversion logs for all the details. * New method: the :class:`~decimal.Decimal` class gained a :meth:`~decimal.Decimal.from_float` class method that performs an exact - conversion of a floating-point number to a :class:`~decimal.Decimal`. + conversion of a floating-point number to a :class:`!Decimal`. This exact conversion strives for the closest decimal approximation to the floating-point representation's value; the resulting decimal value will therefore still include the inaccuracy, @@ -1198,9 +1198,9 @@ changes, or look through the Subversion logs for all the details. of the operands. Previously such comparisons would fall back to Python's default rules for comparing objects, which produced arbitrary results based on their type. Note that you still cannot combine - :class:`Decimal` and floating-point in other operations such as addition, + :class:`!Decimal` and floating-point in other operations such as addition, since you should be explicitly choosing how to convert between float and - :class:`~decimal.Decimal`. (Fixed by Mark Dickinson; :issue:`2531`.) + :class:`!Decimal`. (Fixed by Mark Dickinson; :issue:`2531`.) The constructor for :class:`~decimal.Decimal` now accepts floating-point numbers (added by Raymond Hettinger; :issue:`8257`) @@ -1218,7 +1218,7 @@ changes, or look through the Subversion logs for all the details. more sensible for numeric types. (Changed by Mark Dickinson; :issue:`6857`.) Comparisons involving a signaling NaN value (or ``sNAN``) now signal - :const:`InvalidOperation` instead of silently returning a true or + :const:`~decimal.InvalidOperation` instead of silently returning a true or false value depending on the comparison operator. Quiet NaN values (or ``NaN``) are now hashable. (Fixed by Mark Dickinson; :issue:`7279`.) @@ -1235,13 +1235,13 @@ changes, or look through the Subversion logs for all the details. created some new files that should be included. (Fixed by Tarek Ziadé; :issue:`8688`.) -* The :mod:`doctest` module's :const:`IGNORE_EXCEPTION_DETAIL` flag +* The :mod:`doctest` module's :const:`~doctest.IGNORE_EXCEPTION_DETAIL` flag will now ignore the name of the module containing the exception being tested. (Patch by Lennart Regebro; :issue:`7490`.) * The :mod:`email` module's :class:`~email.message.Message` class will now accept a Unicode-valued payload, automatically converting the - payload to the encoding specified by :attr:`output_charset`. + payload to the encoding specified by :attr:`!output_charset`. (Added by R. David Murray; :issue:`1368247`.) * The :class:`~fractions.Fraction` class now accepts a single float or @@ -1268,10 +1268,10 @@ changes, or look through the Subversion logs for all the details. :issue:`6845`.) * New class decorator: :func:`~functools.total_ordering` in the :mod:`functools` - module takes a class that defines an :meth:`__eq__` method and one of - :meth:`__lt__`, :meth:`__le__`, :meth:`__gt__`, or :meth:`__ge__`, + module takes a class that defines an :meth:`~object.__eq__` method and one of + :meth:`~object.__lt__`, :meth:`~object.__le__`, :meth:`~object.__gt__`, or :meth:`~object.__ge__`, and generates the missing comparison methods. Since the - :meth:`__cmp__` method is being deprecated in Python 3.x, + :meth:`!__cmp__` method is being deprecated in Python 3.x, this decorator makes it easier to define ordered classes. (Added by Raymond Hettinger; :issue:`5479`.) @@ -1300,7 +1300,7 @@ changes, or look through the Subversion logs for all the details. :mod:`gzip` module will now consume these trailing bytes. (Fixed by Tadek Pietraszek and Brian Curtin; :issue:`2846`.) -* New attribute: the :mod:`hashlib` module now has an :attr:`~hashlib.hashlib.algorithms` +* New attribute: the :mod:`hashlib` module now has an :attr:`!algorithms` attribute containing a tuple naming the supported algorithms. In Python 2.7, ``hashlib.algorithms`` contains ``('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512')``. @@ -1348,10 +1348,10 @@ changes, or look through the Subversion logs for all the details. * Updated module: The :mod:`io` library has been upgraded to the version shipped with Python 3.1. For 3.1, the I/O library was entirely rewritten in C and is 2 to 20 times faster depending on the task being performed. The - original Python version was renamed to the :mod:`_pyio` module. + original Python version was renamed to the :mod:`!_pyio` module. One minor resulting change: the :class:`io.TextIOBase` class now - has an :attr:`errors` attribute giving the error setting + has an :attr:`~io.TextIOBase.errors` attribute giving the error setting used for encoding and decoding errors (one of ``'strict'``, ``'replace'``, ``'ignore'``). @@ -1423,10 +1423,10 @@ changes, or look through the Subversion logs for all the details. passed to the callable. (Contributed by lekma; :issue:`5585`.) - The :class:`~multiprocessing.Pool` class, which controls a pool of worker processes, + The :class:`~multiprocessing.pool.Pool` class, which controls a pool of worker processes, now has an optional *maxtasksperchild* parameter. Worker processes will perform the specified number of tasks and then exit, causing the - :class:`~multiprocessing.Pool` to start a new worker. This is useful if tasks may leak + :class:`!Pool` to start a new worker. This is useful if tasks may leak memory or other resources, or if some tasks will cause the worker to become very large. (Contributed by Charles Cazabon; :issue:`6963`.) @@ -1498,7 +1498,7 @@ changes, or look through the Subversion logs for all the details. global site-packages directories, :func:`~site.getusersitepackages` returns the path of the user's site-packages directory, and - :func:`~site.getuserbase` returns the value of the :envvar:`USER_BASE` + :func:`~site.getuserbase` returns the value of the :data:`~site.USER_BASE` environment variable, giving the path to a directory that can be used to store data. (Contributed by Tarek Ziadé; :issue:`6693`.) @@ -1540,11 +1540,11 @@ changes, or look through the Subversion logs for all the details. * The :mod:`ssl` module's :class:`~ssl.SSLSocket` objects now support the buffer API, which fixed a test suite failure (fix by Antoine Pitrou; :issue:`7133`) and automatically set - OpenSSL's :c:macro:`SSL_MODE_AUTO_RETRY`, which will prevent an error + OpenSSL's :c:macro:`!SSL_MODE_AUTO_RETRY`, which will prevent an error code being returned from :meth:`recv` operations that trigger an SSL renegotiation (fix by Antoine Pitrou; :issue:`8222`). - The :func:`ssl.wrap_socket` constructor function now takes a + The :func:`~ssl.SSLContext.wrap_socket` constructor function now takes a *ciphers* argument that's a string listing the encryption algorithms to be allowed; the format of the string is described `in the OpenSSL documentation @@ -1568,8 +1568,8 @@ changes, or look through the Subversion logs for all the details. code (one of ``bBhHiIlLqQ``); it now always raises a :exc:`struct.error` exception. (Changed by Mark Dickinson; :issue:`1523`.) The :func:`~struct.pack` function will also - attempt to use :meth:`__index__` to convert and pack non-integers - before trying the :meth:`__int__` method or reporting an error. + attempt to use :meth:`~object.__index__` to convert and pack non-integers + before trying the :meth:`~object.__int__` method or reporting an error. (Changed by Mark Dickinson; :issue:`8300`.) * New function: the :mod:`subprocess` module's @@ -1590,7 +1590,7 @@ changes, or look through the Subversion logs for all the details. (Contributed by Gregory P. Smith.) The :mod:`subprocess` module will now retry its internal system calls - on receiving an :const:`EINTR` signal. (Reported by several people; final + on receiving an :const:`~errno.EINTR` signal. (Reported by several people; final patch by Gregory P. Smith in :issue:`1068268`.) * New function: :func:`~symtable.Symbol.is_declared_global` in the :mod:`symtable` module @@ -1602,16 +1602,16 @@ changes, or look through the Subversion logs for all the details. identifier instead of the previous default value of ``'python'``. (Changed by Sean Reifschneider; :issue:`8451`.) -* The ``sys.version_info`` value is now a named tuple, with attributes - named :attr:`major`, :attr:`minor`, :attr:`micro`, - :attr:`releaselevel`, and :attr:`serial`. (Contributed by Ross +* The :attr:`sys.version_info` value is now a named tuple, with attributes + named :attr:`!major`, :attr:`!minor`, :attr:`!micro`, + :attr:`!releaselevel`, and :attr:`!serial`. (Contributed by Ross Light; :issue:`4285`.) :func:`sys.getwindowsversion` also returns a named tuple, - with attributes named :attr:`major`, :attr:`minor`, :attr:`build`, - :attr:`platform`, :attr:`service_pack`, :attr:`service_pack_major`, - :attr:`service_pack_minor`, :attr:`suite_mask`, and - :attr:`product_type`. (Contributed by Brian Curtin; :issue:`7766`.) + with attributes named :attr:`!major`, :attr:`!minor`, :attr:`!build`, + :attr:`!platform`, :attr:`!service_pack`, :attr:`!service_pack_major`, + :attr:`!service_pack_minor`, :attr:`!suite_mask`, and + :attr:`!product_type`. (Contributed by Brian Curtin; :issue:`7766`.) * The :mod:`tarfile` module's default error handling has changed, to no longer suppress fatal errors. The default error level was previously 0, @@ -1691,7 +1691,7 @@ changes, or look through the Subversion logs for all the details. (Originally implemented in Python 3.x by Raymond Hettinger, and backported to 2.7 by Michael Foord.) -* The ElementTree library, :mod:`xml.etree`, no longer escapes +* The :mod:`xml.etree.ElementTree` library, no longer escapes ampersands and angle brackets when outputting an XML processing instruction (which looks like ````) or comment (which looks like ````). @@ -1701,8 +1701,8 @@ changes, or look through the Subversion logs for all the details. :mod:`SimpleXMLRPCServer ` modules, have improved performance by supporting HTTP/1.1 keep-alive and by optionally using gzip encoding to compress the XML being exchanged. The gzip compression is - controlled by the :attr:`encode_threshold` attribute of - :class:`SimpleXMLRPCRequestHandler`, which contains a size in bytes; + controlled by the :attr:`!encode_threshold` attribute of + :class:`~xmlrpc.server.SimpleXMLRPCRequestHandler`, which contains a size in bytes; responses larger than this will be compressed. (Contributed by Kristján Valur Jónsson; :issue:`6267`.) @@ -1713,7 +1713,8 @@ changes, or look through the Subversion logs for all the details. :mod:`zipfile` now also supports archiving empty directories and extracts them correctly. (Fixed by Kuba Wieczorek; :issue:`4710`.) Reading files out of an archive is faster, and interleaving - :meth:`~zipfile.ZipFile.read` and :meth:`~zipfile.ZipFile.readline` now works correctly. + :meth:`read() ` and + :meth:`readline() ` now works correctly. (Contributed by Nir Aides; :issue:`7610`.) The :func:`~zipfile.is_zipfile` function now @@ -1807,14 +1808,14 @@ closely resemble the native platform's widgets. This widget set was originally called Tile, but was renamed to Ttk (for "themed Tk") on being added to Tcl/Tck release 8.5. -To learn more, read the :mod:`ttk` module documentation. You may also +To learn more, read the :mod:`~tkinter.ttk` module documentation. You may also wish to read the Tcl/Tk manual page describing the Ttk theme engine, available at -https://www.tcl.tk/man/tcl8.5/TkCmd/ttk_intro.htm. Some +https://www.tcl.tk/man/tcl8.5/TkCmd/ttk_intro.html. Some screenshots of the Python/Ttk code in use are at https://code.google.com/archive/p/python-ttk/wikis/Screenshots.wiki. -The :mod:`ttk` module was written by Guilherme Polo and added in +The :mod:`tkinter.ttk` module was written by Guilherme Polo and added in :issue:`2983`. An alternate version called ``Tile.py``, written by Martin Franklin and maintained by Kevin Walzer, was proposed for inclusion in :issue:`2618`, but the authors argued that Guilherme @@ -1830,7 +1831,7 @@ The :mod:`unittest` module was greatly enhanced; many new features were added. Most of these features were implemented by Michael Foord, unless otherwise noted. The enhanced version of the module is downloadable separately for use with Python versions 2.4 to 2.6, -packaged as the :mod:`unittest2` package, from +packaged as the :mod:`!unittest2` package, from https://pypi.org/project/unittest2. When used from the command line, the module can automatically discover @@ -1938,19 +1939,20 @@ GvR worked on merging them into Python's version of :mod:`unittest`. differences in the two strings. This comparison is now used by default when Unicode strings are compared with :meth:`~unittest.TestCase.assertEqual`. -* :meth:`~unittest.TestCase.assertRegexpMatches` and - :meth:`~unittest.TestCase.assertNotRegexpMatches` checks whether the +* :meth:`assertRegexpMatches() ` and + :meth:`assertNotRegexpMatches() ` checks whether the first argument is a string matching or not matching the regular expression provided as the second argument (:issue:`8038`). -* :meth:`~unittest.TestCase.assertRaisesRegexp` checks whether a particular exception +* :meth:`assertRaisesRegexp() ` checks + whether a particular exception is raised, and then also checks that the string representation of the exception matches the provided regular expression. * :meth:`~unittest.TestCase.assertIn` and :meth:`~unittest.TestCase.assertNotIn` tests whether *first* is or is not in *second*. -* :meth:`~unittest.TestCase.assertItemsEqual` tests whether two provided sequences +* :meth:`assertItemsEqual() ` tests whether two provided sequences contain the same elements. * :meth:`~unittest.TestCase.assertSetEqual` compares whether two sets are equal, and @@ -1966,7 +1968,7 @@ GvR worked on merging them into Python's version of :mod:`unittest`. * :meth:`~unittest.TestCase.assertDictEqual` compares two dictionaries and reports the differences; it's now used by default when you compare two dictionaries - using :meth:`~unittest.TestCase.assertEqual`. :meth:`~unittest.TestCase.assertDictContainsSubset` checks whether + using :meth:`~unittest.TestCase.assertEqual`. :meth:`!assertDictContainsSubset` checks whether all of the key/value pairs in *first* are found in *second*. * :meth:`~unittest.TestCase.assertAlmostEqual` and :meth:`~unittest.TestCase.assertNotAlmostEqual` test @@ -2023,8 +2025,8 @@ version 1.3. Some of the new features are: p = ET.XMLParser(encoding='utf-8') t = ET.XML("""""", parser=p) - Errors in parsing XML now raise a :exc:`ParseError` exception, whose - instances have a :attr:`position` attribute + Errors in parsing XML now raise a :exc:`~xml.etree.ElementTree.ParseError` exception, whose + instances have a :attr:`!position` attribute containing a (*line*, *column*) tuple giving the location of the problem. * ElementTree's code for converting trees to a string has been @@ -2034,7 +2036,8 @@ version 1.3. Some of the new features are: "xml" (the default), "html", or "text". HTML mode will output empty elements as ```` instead of ````, and text mode will skip over elements and only output the text chunks. If - you set the :attr:`tag` attribute of an element to ``None`` but + you set the :attr:`~xml.etree.ElementTree.Element.tag` attribute of an + element to ``None`` but leave its children in place, the element will be omitted when the tree is written out, so you don't need to do more extensive rearrangement to remove a single element. @@ -2064,14 +2067,14 @@ version 1.3. Some of the new features are: # Outputs 1... print ET.tostring(new) -* New :class:`Element` method: +* New :class:`~xml.etree.ElementTree.Element` method: :meth:`~xml.etree.ElementTree.Element.iter` yields the children of the element as a generator. It's also possible to write ``for child in elem:`` to loop over an element's children. The existing method - :meth:`getiterator` is now deprecated, as is :meth:`getchildren` + :meth:`!getiterator` is now deprecated, as is :meth:`!getchildren` which constructs and returns a list of children. -* New :class:`Element` method: +* New :class:`~xml.etree.ElementTree.Element` method: :meth:`~xml.etree.ElementTree.Element.itertext` yields all chunks of text that are descendants of the element. For example:: @@ -2227,7 +2230,7 @@ Changes to Python's build process and to the C API include: (Fixed by Thomas Wouters; :issue:`1590864`.) * The :c:func:`Py_Finalize` function now calls the internal - :func:`threading._shutdown` function; this prevents some exceptions from + :func:`!threading._shutdown` function; this prevents some exceptions from being raised when an interpreter shuts down. (Patch by Adam Olsen; :issue:`1722344`.) @@ -2242,7 +2245,7 @@ Changes to Python's build process and to the C API include: Heller; :issue:`3102`.) * New configure option: the :option:`!--with-system-expat` switch allows - building the :mod:`pyexpat` module to use the system Expat library. + building the :mod:`pyexpat ` module to use the system Expat library. (Contributed by Arfrever Frehtes Taifersar Arahesis; :issue:`7609`.) * New configure option: the @@ -2329,9 +2332,9 @@ Port-Specific Changes: Windows * The :mod:`msvcrt` module now contains some constants from the :file:`crtassem.h` header file: - :data:`CRT_ASSEMBLY_VERSION`, - :data:`VC_ASSEMBLY_PUBLICKEYTOKEN`, - and :data:`LIBRARIES_ASSEMBLY_NAME_PREFIX`. + :data:`~msvcrt.CRT_ASSEMBLY_VERSION`, + :data:`~msvcrt.VC_ASSEMBLY_PUBLICKEYTOKEN`, + and :data:`~msvcrt.LIBRARIES_ASSEMBLY_NAME_PREFIX`. (Contributed by David Cournapeau; :issue:`4365`.) * The :mod:`_winreg ` module for accessing the registry now implements @@ -2342,21 +2345,21 @@ Port-Specific Changes: Windows were also tested and documented. (Implemented by Brian Curtin: :issue:`7347`.) -* The new :c:func:`_beginthreadex` API is used to start threads, and +* The new :c:func:`!_beginthreadex` API is used to start threads, and the native thread-local storage functions are now used. (Contributed by Kristján Valur Jónsson; :issue:`3582`.) * The :func:`os.kill` function now works on Windows. The signal value - can be the constants :const:`CTRL_C_EVENT`, - :const:`CTRL_BREAK_EVENT`, or any integer. The first two constants + can be the constants :const:`~signal.CTRL_C_EVENT`, + :const:`~signal.CTRL_BREAK_EVENT`, or any integer. The first two constants will send :kbd:`Control-C` and :kbd:`Control-Break` keystroke events to - subprocesses; any other value will use the :c:func:`TerminateProcess` + subprocesses; any other value will use the :c:func:`!TerminateProcess` API. (Contributed by Miki Tebeka; :issue:`1220212`.) * The :func:`os.listdir` function now correctly fails for an empty path. (Fixed by Hirokazu Yamamoto; :issue:`5913`.) -* The :mod:`mimelib` module will now read the MIME database from +* The :mod:`mimetypes` module will now read the MIME database from the Windows registry when initializing. (Patch by Gabriel Genellina; :issue:`4969`.) @@ -2385,7 +2388,7 @@ Port-Specific Changes: Mac OS X Port-Specific Changes: FreeBSD ----------------------------------- -* FreeBSD 7.1's :const:`SO_SETFIB` constant, used with the :func:`~socket.socket` methods +* FreeBSD 7.1's :const:`!SO_SETFIB` constant, used with the :func:`~socket.socket` methods :func:`~socket.socket.getsockopt`/:func:`~socket.socket.setsockopt` to select an alternate routing table, is now available in the :mod:`socket` module. (Added by Kyle VanderBeek; :issue:`8235`.) @@ -2441,7 +2444,7 @@ This section lists previously described changes and other bugfixes that may require changes to your code: * The :func:`range` function processes its arguments more - consistently; it will now call :meth:`__int__` on non-float, + consistently; it will now call :meth:`~object.__int__` on non-float, non-integer arguments that are supplied to it. (Fixed by Alexander Belopolsky; :issue:`1533`.) @@ -2486,13 +2489,13 @@ In the standard library: (or ``NaN``) are now hashable. (Fixed by Mark Dickinson; :issue:`7279`.) -* The ElementTree library, :mod:`xml.etree`, no longer escapes +* The :mod:`xml.etree.ElementTree` library no longer escapes ampersands and angle brackets when outputting an XML processing instruction (which looks like ````) or comment (which looks like ````). (Patch by Neil Muller; :issue:`2746`.) -* The :meth:`~StringIO.StringIO.readline` method of :class:`~StringIO.StringIO` objects now does +* The :meth:`!readline` method of :class:`~io.StringIO` objects now does nothing when a negative length is requested, as other file-like objects do. (:issue:`7348`). @@ -2577,11 +2580,11 @@ Two new environment variables for debug mode -------------------------------------------- In debug mode, the ``[xxx refs]`` statistic is not written by default, the -:envvar:`PYTHONSHOWREFCOUNT` environment variable now must also be set. +:envvar:`!PYTHONSHOWREFCOUNT` environment variable now must also be set. (Contributed by Victor Stinner; :issue:`31733`.) When Python is compiled with ``COUNT_ALLOC`` defined, allocation counts are no -longer dumped by default anymore: the :envvar:`PYTHONSHOWALLOCCOUNT` environment +longer dumped by default anymore: the :envvar:`!PYTHONSHOWALLOCCOUNT` environment variable must now also be set. Moreover, allocation counts are now dumped into stderr, rather than stdout. (Contributed by Victor Stinner; :issue:`31692`.) @@ -2712,7 +2715,8 @@ PEP 476: Enabling certificate verification by default for stdlib http clients ----------------------------------------------------------------------------- :pep:`476` updated :mod:`httplib ` and modules which use it, such as -:mod:`urllib2 ` and :mod:`xmlrpclib`, to now verify that the server +:mod:`urllib2 ` and :mod:`xmlrpclib `, to now +verify that the server presents a certificate which is signed by a Certificate Authority in the platform trust store and whose hostname matches the hostname being requested by default, significantly improving security for many applications. This @@ -2753,7 +2757,7 @@ entire Python process back to the default permissive behaviour of Python 2.7.8 and earlier. For cases where the connection establishment code can't be modified, but the -overall application can be, the new :func:`ssl._https_verify_certificates` +overall application can be, the new :func:`!ssl._https_verify_certificates` function can be used to adjust the default behaviour at runtime. diff --git a/Doc/whatsnew/3.1.rst b/Doc/whatsnew/3.1.rst index e237179f4b1829..c912a928ee4597 100644 --- a/Doc/whatsnew/3.1.rst +++ b/Doc/whatsnew/3.1.rst @@ -169,7 +169,7 @@ Some smaller changes made to the core Python language are: ... if '' in line: ... outfile.write(line) - With the new syntax, the :func:`contextlib.nested` function is no longer + With the new syntax, the :func:`!contextlib.nested` function is no longer needed and is now deprecated. (Contributed by Georg Brandl and Mattias Brändström; diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst index 9834bc03dc4b74..4f70d902243d4d 100644 --- a/Doc/whatsnew/3.2.rst +++ b/Doc/whatsnew/3.2.rst @@ -743,8 +743,8 @@ Several new and useful functions and methods have been added: Two methods have been deprecated: -* :meth:`xml.etree.ElementTree.getchildren` use ``list(elem)`` instead. -* :meth:`xml.etree.ElementTree.getiterator` use ``Element.iter`` instead. +* :meth:`!xml.etree.ElementTree.getchildren` use ``list(elem)`` instead. +* :meth:`!xml.etree.ElementTree.getiterator` use ``Element.iter`` instead. For details of the update, see `Introducing ElementTree `_ @@ -2682,7 +2682,7 @@ require changes to your code: (Contributed by Georg Brandl; :issue:`5675`.) -* The previously deprecated :func:`contextlib.nested` function has been removed +* The previously deprecated :func:`!contextlib.nested` function has been removed in favor of a plain :keyword:`with` statement which can accept multiple context managers. The latter technique is faster (because it is built-in), and it does a better job finalizing multiple context managers when one of them From 46190d9ea8a878a03d95b4e1bdcdc9ed576cf3fa Mon Sep 17 00:00:00 2001 From: Eugene Toder Date: Mon, 12 Feb 2024 07:44:56 -0500 Subject: [PATCH 076/126] gh-89039: Call subclass constructors in datetime.*.replace (GH-114780) When replace() method is called on a subclass of datetime, date or time, properly call derived constructor. Previously, only the base class's constructor was called. Also, make sure to pass non-zero fold values when creating subclasses in various methods. Previously, fold was silently ignored. --- Lib/test/datetimetester.py | 62 +++++++++++++-- ...3-12-18-20-10-50.gh-issue-89039.gqFdtU.rst | 6 ++ Modules/_datetimemodule.c | 77 +++++++++++++++---- 3 files changed, 124 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-12-18-20-10-50.gh-issue-89039.gqFdtU.rst diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 980a8e6c1b1836..31fc383e29707a 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1723,11 +1723,24 @@ def test_replace(self): def test_subclass_replace(self): class DateSubclass(self.theclass): - pass + def __new__(cls, *args, **kwargs): + result = self.theclass.__new__(cls, *args, **kwargs) + result.extra = 7 + return result dt = DateSubclass(2012, 1, 1) - self.assertIs(type(dt.replace(year=2013)), DateSubclass) - self.assertIs(type(copy.replace(dt, year=2013)), DateSubclass) + + test_cases = [ + ('self.replace', dt.replace(year=2013)), + ('copy.replace', copy.replace(dt, year=2013)), + ] + + for name, res in test_cases: + with self.subTest(name): + self.assertIs(type(res), DateSubclass) + self.assertEqual(res.year, 2013) + self.assertEqual(res.month, 1) + self.assertEqual(res.extra, 7) def test_subclass_date(self): @@ -3025,6 +3038,26 @@ def __new__(cls, *args, **kwargs): self.assertIsInstance(dt, DateTimeSubclass) self.assertEqual(dt.extra, 7) + def test_subclass_replace_fold(self): + class DateTimeSubclass(self.theclass): + pass + + dt = DateTimeSubclass(2012, 1, 1) + dt2 = DateTimeSubclass(2012, 1, 1, fold=1) + + test_cases = [ + ('self.replace', dt.replace(year=2013), 0), + ('self.replace', dt2.replace(year=2013), 1), + ('copy.replace', copy.replace(dt, year=2013), 0), + ('copy.replace', copy.replace(dt2, year=2013), 1), + ] + + for name, res, fold in test_cases: + with self.subTest(name, fold=fold): + self.assertIs(type(res), DateTimeSubclass) + self.assertEqual(res.year, 2013) + self.assertEqual(res.fold, fold) + def test_fromisoformat_datetime(self): # Test that isoformat() is reversible base_dates = [ @@ -3705,11 +3738,28 @@ def test_replace(self): def test_subclass_replace(self): class TimeSubclass(self.theclass): - pass + def __new__(cls, *args, **kwargs): + result = self.theclass.__new__(cls, *args, **kwargs) + result.extra = 7 + return result ctime = TimeSubclass(12, 30) - self.assertIs(type(ctime.replace(hour=10)), TimeSubclass) - self.assertIs(type(copy.replace(ctime, hour=10)), TimeSubclass) + ctime2 = TimeSubclass(12, 30, fold=1) + + test_cases = [ + ('self.replace', ctime.replace(hour=10), 0), + ('self.replace', ctime2.replace(hour=10), 1), + ('copy.replace', copy.replace(ctime, hour=10), 0), + ('copy.replace', copy.replace(ctime2, hour=10), 1), + ] + + for name, res, fold in test_cases: + with self.subTest(name, fold=fold): + self.assertIs(type(res), TimeSubclass) + self.assertEqual(res.hour, 10) + self.assertEqual(res.minute, 30) + self.assertEqual(res.extra, 7) + self.assertEqual(res.fold, fold) def test_subclass_time(self): diff --git a/Misc/NEWS.d/next/Library/2023-12-18-20-10-50.gh-issue-89039.gqFdtU.rst b/Misc/NEWS.d/next/Library/2023-12-18-20-10-50.gh-issue-89039.gqFdtU.rst new file mode 100644 index 00000000000000..d1998d75e9fd76 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-18-20-10-50.gh-issue-89039.gqFdtU.rst @@ -0,0 +1,6 @@ +When replace() method is called on a subclass of datetime, date or time, +properly call derived constructor. Previously, only the base class's +constructor was called. + +Also, make sure to pass non-zero fold values when creating subclasses in +various methods. Previously, fold was silently ignored. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index b984ea61b82f0f..014ccdd3f6effe 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1045,6 +1045,40 @@ new_datetime_ex(int year, int month, int day, int hour, int minute, new_datetime_ex2(y, m, d, hh, mm, ss, us, tzinfo, fold, \ &PyDateTime_DateTimeType) +static PyObject * +call_subclass_fold(PyObject *cls, int fold, const char *format, ...) +{ + PyObject *kwargs = NULL, *res = NULL; + va_list va; + + va_start(va, format); + PyObject *args = Py_VaBuildValue(format, va); + va_end(va); + if (args == NULL) { + return NULL; + } + if (fold) { + kwargs = PyDict_New(); + if (kwargs == NULL) { + goto Done; + } + PyObject *obj = PyLong_FromLong(fold); + if (obj == NULL) { + goto Done; + } + int err = PyDict_SetItemString(kwargs, "fold", obj); + Py_DECREF(obj); + if (err < 0) { + goto Done; + } + } + res = PyObject_Call(cls, args, kwargs); +Done: + Py_DECREF(args); + Py_XDECREF(kwargs); + return res; +} + static PyObject * new_datetime_subclass_fold_ex(int year, int month, int day, int hour, int minute, int second, int usecond, PyObject *tzinfo, @@ -1054,17 +1088,11 @@ new_datetime_subclass_fold_ex(int year, int month, int day, int hour, int minute // Use the fast path constructor dt = new_datetime(year, month, day, hour, minute, second, usecond, tzinfo, fold); - } else { + } + else { // Subclass - dt = PyObject_CallFunction(cls, "iiiiiiiO", - year, - month, - day, - hour, - minute, - second, - usecond, - tzinfo); + dt = call_subclass_fold(cls, fold, "iiiiiiiO", year, month, day, + hour, minute, second, usecond, tzinfo); } return dt; @@ -1120,6 +1148,24 @@ new_time_ex(int hour, int minute, int second, int usecond, #define new_time(hh, mm, ss, us, tzinfo, fold) \ new_time_ex2(hh, mm, ss, us, tzinfo, fold, &PyDateTime_TimeType) +static PyObject * +new_time_subclass_fold_ex(int hour, int minute, int second, int usecond, + PyObject *tzinfo, int fold, PyObject *cls) +{ + PyObject *t; + if ((PyTypeObject*)cls == &PyDateTime_TimeType) { + // Use the fast path constructor + t = new_time(hour, minute, second, usecond, tzinfo, fold); + } + else { + // Subclass + t = call_subclass_fold(cls, fold, "iiiiO", hour, minute, second, + usecond, tzinfo); + } + + return t; +} + /* Create a timedelta instance. Normalize the members iff normalize is * true. Passing false is a speed optimization, if you know for sure * that seconds and microseconds are already in their proper ranges. In any @@ -3480,7 +3526,7 @@ datetime_date_replace_impl(PyDateTime_Date *self, int year, int month, int day) /*[clinic end generated code: output=2a9430d1e6318aeb input=0d1f02685b3e90f6]*/ { - return new_date_ex(year, month, day, Py_TYPE(self)); + return new_date_subclass_ex(year, month, day, (PyObject *)Py_TYPE(self)); } static Py_hash_t @@ -4589,8 +4635,8 @@ datetime_time_replace_impl(PyDateTime_Time *self, int hour, int minute, int fold) /*[clinic end generated code: output=0b89a44c299e4f80 input=9b6a35b1e704b0ca]*/ { - return new_time_ex2(hour, minute, second, microsecond, tzinfo, fold, - Py_TYPE(self)); + return new_time_subclass_fold_ex(hour, minute, second, microsecond, tzinfo, + fold, (PyObject *)Py_TYPE(self)); } static PyObject * @@ -6039,8 +6085,9 @@ datetime_datetime_replace_impl(PyDateTime_DateTime *self, int year, int fold) /*[clinic end generated code: output=00bc96536833fddb input=9b38253d56d9bcad]*/ { - return new_datetime_ex2(year, month, day, hour, minute, second, - microsecond, tzinfo, fold, Py_TYPE(self)); + return new_datetime_subclass_fold_ex(year, month, day, hour, minute, + second, microsecond, tzinfo, fold, + (PyObject *)Py_TYPE(self)); } static PyObject * From dc8893af7df706138161d82ce7d1d2f9132d14f9 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 12 Feb 2024 16:16:16 +0200 Subject: [PATCH 077/126] Add missing sections to blurbs (#114553) --- Misc/NEWS.d/3.5.3.rst | 1 + Misc/NEWS.d/3.6.0.rst | 1 + Misc/NEWS.d/3.6.2.rst | 1 + 3 files changed, 3 insertions(+) diff --git a/Misc/NEWS.d/3.5.3.rst b/Misc/NEWS.d/3.5.3.rst index c3fcb67a4563f9..25db389ba5734f 100644 --- a/Misc/NEWS.d/3.5.3.rst +++ b/Misc/NEWS.d/3.5.3.rst @@ -3,5 +3,6 @@ .. no changes: True .. nonce: zYPqUK .. release date: 2017-01-17 +.. section: Library There were no code changes between 3.5.3rc1 and 3.5.3 final. diff --git a/Misc/NEWS.d/3.6.0.rst b/Misc/NEWS.d/3.6.0.rst index f9805cab28615e..d5c41f38838d93 100644 --- a/Misc/NEWS.d/3.6.0.rst +++ b/Misc/NEWS.d/3.6.0.rst @@ -3,5 +3,6 @@ .. no changes: True .. nonce: F9ENBV .. release date: 2016-12-23 +.. section: Library No changes since release candidate 2 diff --git a/Misc/NEWS.d/3.6.2.rst b/Misc/NEWS.d/3.6.2.rst index dba43d146df954..ee50670bd9f442 100644 --- a/Misc/NEWS.d/3.6.2.rst +++ b/Misc/NEWS.d/3.6.2.rst @@ -3,5 +3,6 @@ .. no changes: True .. nonce: F9ENBV .. release date: 2017-07-17 +.. section: Library No changes since release candidate 2 From 95ebd45613d6bf0a8b76778454f1d413d54209db Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 12 Feb 2024 17:23:54 +0300 Subject: [PATCH 078/126] Remove outdated comment about py3.6 in `test_typing` (#115318) --- Lib/test/test_typing.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index c3a092f3af3009..176623171c9888 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -6156,8 +6156,6 @@ def test_overload_registry_repeated(self): self.assertEqual(list(get_overloads(impl)), overloads) -# Definitions needed for features introduced in Python 3.6 - from test.typinganndata import ( ann_module, ann_module2, ann_module3, ann_module5, ann_module6, ) From 93ac78ac3ee124942bca7492149c3ff0003b6e30 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 12 Feb 2024 19:05:30 +0300 Subject: [PATCH 079/126] gh-115058: Add ``reset_rare_event_counters`` function in `_testinternalcapi` (GH-115128) --- Lib/test/test_optimizer.py | 3 +++ Modules/_testinternalcapi.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/Lib/test/test_optimizer.py b/Lib/test/test_optimizer.py index c8554c40df4b2d..dfea8be3c6956f 100644 --- a/Lib/test/test_optimizer.py +++ b/Lib/test/test_optimizer.py @@ -7,6 +7,9 @@ class TestRareEventCounters(unittest.TestCase): + def setUp(self): + _testinternalcapi.reset_rare_event_counters() + def test_set_class(self): class A: pass diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 0bb739b5398b11..3834f00009cea4 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1650,6 +1650,20 @@ get_rare_event_counters(PyObject *self, PyObject *type) ); } +static PyObject * +reset_rare_event_counters(PyObject *self, PyObject *Py_UNUSED(type)) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + + interp->rare_events.set_class = 0; + interp->rare_events.set_bases = 0; + interp->rare_events.set_eval_frame_func = 0; + interp->rare_events.builtin_dict = 0; + interp->rare_events.func_modification = 0; + + return Py_None; +} + #ifdef Py_GIL_DISABLED static PyObject * @@ -1727,6 +1741,7 @@ static PyMethodDef module_functions[] = { _TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF {"get_type_module_name", get_type_module_name, METH_O}, {"get_rare_event_counters", get_rare_event_counters, METH_NOARGS}, + {"reset_rare_event_counters", reset_rare_event_counters, METH_NOARGS}, #ifdef Py_GIL_DISABLED {"py_thread_id", get_py_thread_id, METH_NOARGS}, #endif From 814466101790d4381ca4800c3d3b0cc0aad50c62 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 12 Feb 2024 16:07:38 +0000 Subject: [PATCH 080/126] GH-113710: Fix updating of dict version tag and add watched dict stats (GH-115221) --- Include/cpython/pystats.h | 3 +++ Include/internal/pycore_dict.h | 5 +++-- Python/optimizer_analysis.c | 31 ++++++++++++------------------- Python/pylifecycle.c | 2 +- Python/specialize.c | 2 ++ Tools/scripts/summarize_stats.py | 2 +- 6 files changed, 22 insertions(+), 23 deletions(-) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index bf0cfe4cb695b4..0f50439b73848e 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -133,6 +133,9 @@ typedef struct _rare_event_stats { uint64_t builtin_dict; /* Modifying a function, e.g. func.__defaults__ = ..., etc. */ uint64_t func_modification; + /* Modifying a dict that is being watched */ + uint64_t watched_dict_modification; + uint64_t watched_globals_modification; } RareEventStats; typedef struct _stats { diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 233da058f464d1..0ebe701bc16f81 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -209,6 +209,7 @@ static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) { #define DICT_VERSION_INCREMENT (1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) #define DICT_WATCHER_MASK ((1 << DICT_MAX_WATCHERS) - 1) +#define DICT_WATCHER_AND_MODIFICATION_MASK ((1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) - 1) #ifdef Py_GIL_DISABLED #define DICT_NEXT_VERSION(INTERP) \ @@ -236,10 +237,10 @@ _PyDict_NotifyEvent(PyInterpreterState *interp, assert(Py_REFCNT((PyObject*)mp) > 0); int watcher_bits = mp->ma_version_tag & DICT_WATCHER_MASK; if (watcher_bits) { + RARE_EVENT_STAT_INC(watched_dict_modification); _PyDict_SendEvent(watcher_bits, event, mp, key, value); - return DICT_NEXT_VERSION(interp) | watcher_bits; } - return DICT_NEXT_VERSION(interp); + return DICT_NEXT_VERSION(interp) | (mp->ma_version_tag & DICT_WATCHER_AND_MODIFICATION_MASK); } extern PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2cfbf4b349d0f5..b14e6950b4a06b 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -28,25 +28,23 @@ increment_mutations(PyObject* dict) { d->ma_version_tag += (1 << DICT_MAX_WATCHERS); } +/* The first two dict watcher IDs are reserved for CPython, + * so we don't need to check that they haven't been used */ +#define BUILTINS_WATCHER_ID 0 +#define GLOBALS_WATCHER_ID 1 + static int globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict, PyObject* key, PyObject* new_value) { - if (event == PyDict_EVENT_CLONED) { - return 0; - } - uint64_t watched_mutations = get_mutations(dict); - if (watched_mutations < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) { - _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), dict); - increment_mutations(dict); - } - else { - PyDict_Unwatch(1, dict); - } + RARE_EVENT_STAT_INC(watched_globals_modification); + assert(get_mutations(dict) < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS); + _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), dict); + increment_mutations(dict); + PyDict_Unwatch(GLOBALS_WATCHER_ID, dict); return 0; } - static void global_to_const(_PyUOpInstruction *inst, PyObject *obj) { @@ -82,11 +80,6 @@ incorrect_keys(_PyUOpInstruction *inst, PyObject *obj) return 0; } -/* The first two dict watcher IDs are reserved for CPython, - * so we don't need to check that they haven't been used */ -#define BUILTINS_WATCHER_ID 0 -#define GLOBALS_WATCHER_ID 1 - /* Returns 1 if successfully optimized * 0 if the trace is not suitable for optimization (yet) * -1 if there was an error. */ @@ -117,8 +110,8 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, uint32_t builtins_watched = 0; uint32_t globals_checked = 0; uint32_t globals_watched = 0; - if (interp->dict_state.watchers[1] == NULL) { - interp->dict_state.watchers[1] = globals_watcher_callback; + if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) { + interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback; } for (int pc = 0; pc < buffer_size; pc++) { _PyUOpInstruction *inst = &buffer[pc]; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 61c9d4f9ea9575..230018068d751c 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -611,7 +611,7 @@ static int builtins_dict_watcher(PyDict_WatchEvent event, PyObject *dict, PyObject *key, PyObject *new_value) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (event != PyDict_EVENT_CLONED && interp->rare_events.builtin_dict < _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) { + if (interp->rare_events.builtin_dict < _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) { _Py_Executors_InvalidateAll(interp); } RARE_EVENT_INTERP_INC(interp, builtin_dict); diff --git a/Python/specialize.c b/Python/specialize.c index e38e3556a6d642..ea2638570f22d0 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -275,6 +275,8 @@ print_rare_event_stats(FILE *out, RareEventStats *stats) fprintf(out, "Rare event (set_eval_frame_func): %" PRIu64 "\n", stats->set_eval_frame_func); fprintf(out, "Rare event (builtin_dict): %" PRIu64 "\n", stats->builtin_dict); fprintf(out, "Rare event (func_modification): %" PRIu64 "\n", stats->func_modification); + fprintf(out, "Rare event (watched_dict_modification): %" PRIu64 "\n", stats->watched_dict_modification); + fprintf(out, "Rare event (watched_globals_modification): %" PRIu64 "\n", stats->watched_globals_modification); } static void diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 9b7e7b999ea7c7..7891b9cf923d33 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -415,7 +415,7 @@ def get_histogram(self, prefix: str) -> list[tuple[int, int]]: def get_rare_events(self) -> list[tuple[str, int]]: prefix = "Rare event " return [ - (key[len(prefix) + 1:-1], val) + (key[len(prefix) + 1:-1].replace("_", " "), val) for key, val in self._data.items() if key.startswith(prefix) ] From 91822018eeba12a6c9eabbc748363b2fd4291b30 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 12 Feb 2024 18:24:45 +0200 Subject: [PATCH 081/126] gh-115233: Fix an example in the Logging Cookbook (GH-115325) Also add more tests for LoggerAdapter. Also support stacklevel in LoggerAdapter._log(). --- Doc/howto/logging-cookbook.rst | 10 +- Lib/logging/__init__.py | 11 +-- Lib/test/test_logging.py | 91 +++++++++++++++++-- ...-02-12-12-26-17.gh-issue-115233.aug6r9.rst | 1 + 4 files changed, 90 insertions(+), 23 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2024-02-12-12-26-17.gh-issue-115233.aug6r9.rst diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst index 80147e31fcbae1..f7d885ec88483d 100644 --- a/Doc/howto/logging-cookbook.rst +++ b/Doc/howto/logging-cookbook.rst @@ -1744,13 +1744,11 @@ to the above, as in the following example:: return self.fmt.format(*self.args) class StyleAdapter(logging.LoggerAdapter): - def __init__(self, logger, extra=None): - super().__init__(logger, extra or {}) - - def log(self, level, msg, /, *args, **kwargs): + def log(self, level, msg, /, *args, stacklevel=1, **kwargs): if self.isEnabledFor(level): msg, kwargs = self.process(msg, kwargs) - self.logger._log(level, Message(msg, args), (), **kwargs) + self.logger.log(level, Message(msg, args), **kwargs, + stacklevel=stacklevel+1) logger = StyleAdapter(logging.getLogger(__name__)) @@ -1762,7 +1760,7 @@ to the above, as in the following example:: main() The above script should log the message ``Hello, world!`` when run with -Python 3.2 or later. +Python 3.8 or later. .. currentmodule:: logging diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py index 684b58d5548f91..fcec9e76b98661 100644 --- a/Lib/logging/__init__.py +++ b/Lib/logging/__init__.py @@ -1949,18 +1949,11 @@ def hasHandlers(self): """ return self.logger.hasHandlers() - def _log(self, level, msg, args, exc_info=None, extra=None, stack_info=False): + def _log(self, level, msg, args, **kwargs): """ Low-level log implementation, proxied to allow nested logger adapters. """ - return self.logger._log( - level, - msg, - args, - exc_info=exc_info, - extra=extra, - stack_info=stack_info, - ) + return self.logger._log(level, msg, args, **kwargs) @property def manager(self): diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 888523227c2ac4..cf09bad4c9187b 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -5478,6 +5478,7 @@ def test_critical(self): self.assertEqual(record.levelno, logging.CRITICAL) self.assertEqual(record.msg, msg) self.assertEqual(record.args, (self.recording,)) + self.assertEqual(record.funcName, 'test_critical') def test_is_enabled_for(self): old_disable = self.adapter.logger.manager.disable @@ -5496,15 +5497,9 @@ def test_has_handlers(self): self.assertFalse(self.adapter.hasHandlers()) def test_nested(self): - class Adapter(logging.LoggerAdapter): - prefix = 'Adapter' - - def process(self, msg, kwargs): - return f"{self.prefix} {msg}", kwargs - msg = 'Adapters can be nested, yo.' - adapter = Adapter(logger=self.logger, extra=None) - adapter_adapter = Adapter(logger=adapter, extra=None) + adapter = PrefixAdapter(logger=self.logger, extra=None) + adapter_adapter = PrefixAdapter(logger=adapter, extra=None) adapter_adapter.prefix = 'AdapterAdapter' self.assertEqual(repr(adapter), repr(adapter_adapter)) adapter_adapter.log(logging.CRITICAL, msg, self.recording) @@ -5513,6 +5508,7 @@ def process(self, msg, kwargs): self.assertEqual(record.levelno, logging.CRITICAL) self.assertEqual(record.msg, f"Adapter AdapterAdapter {msg}") self.assertEqual(record.args, (self.recording,)) + self.assertEqual(record.funcName, 'test_nested') orig_manager = adapter_adapter.manager self.assertIs(adapter.manager, orig_manager) self.assertIs(self.logger.manager, orig_manager) @@ -5528,6 +5524,61 @@ def process(self, msg, kwargs): self.assertIs(adapter.manager, orig_manager) self.assertIs(self.logger.manager, orig_manager) + def test_styled_adapter(self): + # Test an example from the Cookbook. + records = self.recording.records + adapter = StyleAdapter(self.logger) + adapter.warning('Hello, {}!', 'world') + self.assertEqual(str(records[-1].msg), 'Hello, world!') + self.assertEqual(records[-1].funcName, 'test_styled_adapter') + adapter.log(logging.WARNING, 'Goodbye {}.', 'world') + self.assertEqual(str(records[-1].msg), 'Goodbye world.') + self.assertEqual(records[-1].funcName, 'test_styled_adapter') + + def test_nested_styled_adapter(self): + records = self.recording.records + adapter = PrefixAdapter(self.logger) + adapter.prefix = '{}' + adapter2 = StyleAdapter(adapter) + adapter2.warning('Hello, {}!', 'world') + self.assertEqual(str(records[-1].msg), '{} Hello, world!') + self.assertEqual(records[-1].funcName, 'test_nested_styled_adapter') + adapter2.log(logging.WARNING, 'Goodbye {}.', 'world') + self.assertEqual(str(records[-1].msg), '{} Goodbye world.') + self.assertEqual(records[-1].funcName, 'test_nested_styled_adapter') + + def test_find_caller_with_stacklevel(self): + the_level = 1 + trigger = self.adapter.warning + + def innermost(): + trigger('test', stacklevel=the_level) + + def inner(): + innermost() + + def outer(): + inner() + + records = self.recording.records + outer() + self.assertEqual(records[-1].funcName, 'innermost') + lineno = records[-1].lineno + the_level += 1 + outer() + self.assertEqual(records[-1].funcName, 'inner') + self.assertGreater(records[-1].lineno, lineno) + lineno = records[-1].lineno + the_level += 1 + outer() + self.assertEqual(records[-1].funcName, 'outer') + self.assertGreater(records[-1].lineno, lineno) + lineno = records[-1].lineno + the_level += 1 + outer() + self.assertEqual(records[-1].funcName, 'test_find_caller_with_stacklevel') + self.assertGreater(records[-1].lineno, lineno) + def test_extra_in_records(self): self.adapter = logging.LoggerAdapter(logger=self.logger, extra={'foo': '1'}) @@ -5569,6 +5620,30 @@ def test_extra_merged_log_call_has_precedence(self): self.assertEqual(record.foo, '2') +class PrefixAdapter(logging.LoggerAdapter): + prefix = 'Adapter' + + def process(self, msg, kwargs): + return f"{self.prefix} {msg}", kwargs + + +class Message: + def __init__(self, fmt, args): + self.fmt = fmt + self.args = args + + def __str__(self): + return self.fmt.format(*self.args) + + +class StyleAdapter(logging.LoggerAdapter): + def log(self, level, msg, /, *args, stacklevel=1, **kwargs): + if self.isEnabledFor(level): + msg, kwargs = self.process(msg, kwargs) + self.logger.log(level, Message(msg, args), **kwargs, + stacklevel=stacklevel+1) + + class LoggerTest(BaseTest, AssertErrorMessage): def setUp(self): diff --git a/Misc/NEWS.d/next/Documentation/2024-02-12-12-26-17.gh-issue-115233.aug6r9.rst b/Misc/NEWS.d/next/Documentation/2024-02-12-12-26-17.gh-issue-115233.aug6r9.rst new file mode 100644 index 00000000000000..f37f94d12d4cf1 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2024-02-12-12-26-17.gh-issue-115233.aug6r9.rst @@ -0,0 +1 @@ +Fix an example for :class:`~logging.LoggerAdapter` in the Logging Cookbook. From 91bf01d4b15a40be4510fd9ee5e6dc8e9c019fce Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 12 Feb 2024 19:27:27 +0300 Subject: [PATCH 082/126] gh-87804: Fix the refleak in error handling of `_pystatvfs_fromstructstatfs` (#115335) It was the macro expansion! Sorry! --- Modules/posixmodule.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 17032d9d490c78..ef6d65623bf038 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -12916,14 +12916,15 @@ _pystatvfs_fromstructstatfs(PyObject *module, struct statfs st) { _Static_assert(sizeof(st.f_blocks) == sizeof(long long), "assuming large file"); -#define SET_ITEM(v, index, item) \ - do { \ - if (item == NULL) { \ - Py_DECREF(v); \ - return NULL; \ - } \ - PyStructSequence_SET_ITEM(v, index, item); \ - } while (0) \ +#define SET_ITEM(SEQ, INDEX, EXPR) \ + do { \ + PyObject *obj = (EXPR); \ + if (obj == NULL) { \ + Py_DECREF((SEQ)); \ + return NULL; \ + } \ + PyStructSequence_SET_ITEM((SEQ), (INDEX), obj); \ + } while (0) SET_ITEM(v, 0, PyLong_FromLong((long) st.f_iosize)); SET_ITEM(v, 1, PyLong_FromLong((long) st.f_bsize)); From c39272e143b346bd6a3c04ca4fbf299163888277 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 12 Feb 2024 17:05:38 +0000 Subject: [PATCH 083/126] gh-115049: Fix py.exe failing when user has no LocalAppData. (GH-115185) Also ensure we always display a debug message or error for RC_INTERNAL_ERROR --- ...2024-02-08-21-37-22.gh-issue-115049.X1ObpJ.rst | 1 + PC/launcher2.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-02-08-21-37-22.gh-issue-115049.X1ObpJ.rst diff --git a/Misc/NEWS.d/next/Windows/2024-02-08-21-37-22.gh-issue-115049.X1ObpJ.rst b/Misc/NEWS.d/next/Windows/2024-02-08-21-37-22.gh-issue-115049.X1ObpJ.rst new file mode 100644 index 00000000000000..a679391857dcb3 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-02-08-21-37-22.gh-issue-115049.X1ObpJ.rst @@ -0,0 +1 @@ +Fixes ``py.exe`` launcher failing when run as users without user profiles. diff --git a/PC/launcher2.c b/PC/launcher2.c index e426eccd700044..90b0fdebd3bdfb 100644 --- a/PC/launcher2.c +++ b/PC/launcher2.c @@ -1594,6 +1594,7 @@ _registryReadLegacyEnvironment(const SearchInfo *search, HKEY root, EnvironmentI int count = swprintf_s(realTag, tagLength + 4, L"%s-32", env->tag); if (count == -1) { + debug(L"# Failed to generate 32bit tag\n"); free(realTag); return RC_INTERNAL_ERROR; } @@ -1749,10 +1750,18 @@ appxSearch(const SearchInfo *search, EnvironmentInfo **result, const wchar_t *pa exeName = search->windowed ? L"pythonw.exe" : L"python.exe"; } - if (FAILED(SHGetFolderPathW(NULL, CSIDL_LOCAL_APPDATA, NULL, 0, buffer)) || - !join(buffer, MAXLEN, L"Microsoft\\WindowsApps") || + // Failure to get LocalAppData may just mean we're running as a user who + // doesn't have a profile directory. + // In this case, return "not found", but don't fail. + // Chances are they can't launch Store installs anyway. + if (FAILED(SHGetFolderPathW(NULL, CSIDL_LOCAL_APPDATA, NULL, 0, buffer))) { + return RC_NO_PYTHON; + } + + if (!join(buffer, MAXLEN, L"Microsoft\\WindowsApps") || !join(buffer, MAXLEN, packageFamilyName) || !join(buffer, MAXLEN, exeName)) { + debug(L"# Failed to construct App Execution Alias path\n"); return RC_INTERNAL_ERROR; } @@ -1982,6 +1991,7 @@ collectEnvironments(const SearchInfo *search, EnvironmentInfo **result) EnvironmentInfo *env = NULL; if (!result) { + debug(L"# collectEnvironments() was passed a NULL result\n"); return RC_INTERNAL_ERROR; } *result = NULL; @@ -2276,6 +2286,7 @@ int selectEnvironment(const SearchInfo *search, EnvironmentInfo *root, EnvironmentInfo **best) { if (!best) { + debug(L"# selectEnvironment() was passed a NULL best\n"); return RC_INTERNAL_ERROR; } if (!root) { From 879f4546bfbc9c47ef228e7c3d2f126f3d8d64bf Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 12 Feb 2024 18:13:10 +0100 Subject: [PATCH 084/126] gh-110850: Add PyTime_t C API (GH-115215) * gh-110850: Add PyTime_t C API Add PyTime_t API: * PyTime_t type. * PyTime_MIN and PyTime_MAX constants. * PyTime_AsSecondsDouble(), PyTime_Monotonic(), PyTime_PerfCounter() and PyTime_GetSystemClock() functions. Co-authored-by: Victor Stinner --- Doc/c-api/time.rst | 83 ++++++++++++++ Doc/c-api/utilities.rst | 1 + Doc/conf.py | 13 ++- Doc/whatsnew/3.13.rst | 10 ++ Include/Python.h | 1 + Include/cpython/pytime.h | 23 ++++ Include/internal/pycore_time.h | 99 +++++++++-------- Lib/test/test_capi/test_time.py | 71 ++++++++++++ Lib/test/test_time.py | 17 +-- ...-11-16-02-07-48.gh-issue-110850.DQGNfF.rst | 9 ++ Modules/Setup.stdlib.in | 2 +- Modules/_randommodule.c | 2 +- Modules/_testcapi/parts.h | 1 + Modules/_testcapi/time.c | 105 ++++++++++++++++++ Modules/_testcapimodule.c | 3 + Modules/_testinternalcapi/pytime.c | 16 --- PCbuild/_testcapi.vcxproj | 1 + PCbuild/_testcapi.vcxproj.filters | 3 + Python/pytime.c | 102 +++++++++++------ 19 files changed, 448 insertions(+), 114 deletions(-) create mode 100644 Doc/c-api/time.rst create mode 100644 Include/cpython/pytime.h create mode 100644 Lib/test/test_capi/test_time.py create mode 100644 Misc/NEWS.d/next/C API/2023-11-16-02-07-48.gh-issue-110850.DQGNfF.rst create mode 100644 Modules/_testcapi/time.c diff --git a/Doc/c-api/time.rst b/Doc/c-api/time.rst new file mode 100644 index 00000000000000..7791cdb1781055 --- /dev/null +++ b/Doc/c-api/time.rst @@ -0,0 +1,83 @@ +.. highlight:: c + +PyTime C API +============ + +.. versionadded:: 3.13 + +The clock C API provides access to system clocks. +It is similar to the Python :mod:`time` module. + +For C API related to the :mod:`datetime` module, see :ref:`datetimeobjects`. + + +Types +----- + +.. c:type:: PyTime_t + + A timestamp or duration in nanoseconds, represented as a signed 64-bit + integer. + + The reference point for timestamps depends on the clock used. For example, + :c:func:`PyTime_Time` returns timestamps relative to the UNIX epoch. + + The supported range is around [-292.3 years; +292.3 years]. + Using the Unix epoch (January 1st, 1970) as reference, the supported date + range is around [1677-09-21; 2262-04-11]. + The exact limits are exposed as constants: + +.. c:var:: PyTime_t PyTime_MIN + + Minimum value of :c:type:`PyTime_t`. + +.. c:var:: PyTime_t PyTime_MAX + + Maximum value of :c:type:`PyTime_t`. + + +Clock Functions +--------------- + +The following functions take a pointer to a :c:expr:`PyTime_t` that they +set to the value of a particular clock. +Details of each clock are given in the documentation of the corresponding +Python function. + +The functions return ``0`` on success, or ``-1`` (with an exception set) +on failure. + +On integer overflow, they set the :c:data:`PyExc_OverflowError` exception and +set ``*result`` to the value clamped to the ``[PyTime_MIN; PyTime_MAX]`` +range. +(On current systems, integer overflows are likely caused by misconfigured +system time.) + +As any other C API (unless otherwise specified), the functions must be called +with the :term:`GIL` held. + +.. c:function:: int PyTime_Monotonic(PyTime_t *result) + + Read the monotonic clock. + See :func:`time.monotonic` for important details on this clock. + +.. c:function:: int PyTime_PerfCounter(PyTime_t *result) + + Read the performance counter. + See :func:`time.perf_counter` for important details on this clock. + +.. c:function:: int PyTime_Time(PyTime_t *result) + + Read the “wall clock” time. + See :func:`time.time` for details important on this clock. + + +Conversion functions +-------------------- + +.. c:function:: double PyTime_AsSecondsDouble(PyTime_t t) + + Convert a timestamp to a number of seconds as a C :c:expr:`double`. + + The function cannot fail, but note that :c:expr:`double` has limited + accuracy for large values. diff --git a/Doc/c-api/utilities.rst b/Doc/c-api/utilities.rst index 48ae54acebe887..9d0abf440f791d 100644 --- a/Doc/c-api/utilities.rst +++ b/Doc/c-api/utilities.rst @@ -20,4 +20,5 @@ and parsing function arguments and constructing Python values from C values. hash.rst reflection.rst codec.rst + time.rst perfmaps.rst diff --git a/Doc/conf.py b/Doc/conf.py index c2d57696aeeaa3..aa7f85bc1b3efa 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -135,11 +135,14 @@ ('c:type', 'wchar_t'), ('c:type', '__int64'), ('c:type', 'unsigned __int64'), + ('c:type', 'double'), # Standard C structures ('c:struct', 'in6_addr'), ('c:struct', 'in_addr'), ('c:struct', 'stat'), ('c:struct', 'statvfs'), + ('c:struct', 'timeval'), + ('c:struct', 'timespec'), # Standard C macros ('c:macro', 'LLONG_MAX'), ('c:macro', 'LLONG_MIN'), @@ -269,12 +272,12 @@ ('py:meth', 'index'), # list.index, tuple.index, etc. ] -# gh-106948: Copy standard C types declared in the "c:type" domain to the -# "c:identifier" domain, since "c:function" markup looks for types in the -# "c:identifier" domain. Use list() to not iterate on items which are being -# added +# gh-106948: Copy standard C types declared in the "c:type" domain and C +# structures declared in the "c:struct" domain to the "c:identifier" domain, +# since "c:function" markup looks for types in the "c:identifier" domain. Use +# list() to not iterate on items which are being added for role, name in list(nitpick_ignore): - if role == 'c:type': + if role in ('c:type', 'c:struct'): nitpick_ignore.append(('c:identifier', name)) del role, name diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 1b803278ae0d5b..191657061f7403 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1516,6 +1516,16 @@ New Features * Add :c:func:`Py_HashPointer` function to hash a pointer. (Contributed by Victor Stinner in :gh:`111545`.) +* Add PyTime C API: + + * :c:type:`PyTime_t` type. + * :c:var:`PyTime_MIN` and :c:var:`PyTime_MAX` constants. + * :c:func:`PyTime_AsSecondsDouble` + :c:func:`PyTime_Monotonic`, :c:func:`PyTime_PerfCounter`, and + :c:func:`PyTime_Time` functions. + + (Contributed by Victor Stinner and Petr Viktorin in :gh:`110850`.) + Porting to Python 3.13 ---------------------- diff --git a/Include/Python.h b/Include/Python.h index 196751c3201e62..01fc45137a17bb 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -97,6 +97,7 @@ #include "weakrefobject.h" #include "structseq.h" #include "cpython/picklebufobject.h" +#include "cpython/pytime.h" #include "codecs.h" #include "pyerrors.h" #include "pythread.h" diff --git a/Include/cpython/pytime.h b/Include/cpython/pytime.h new file mode 100644 index 00000000000000..d8244700d614ce --- /dev/null +++ b/Include/cpython/pytime.h @@ -0,0 +1,23 @@ +// PyTime_t C API: see Doc/c-api/time.rst for the documentation. + +#ifndef Py_LIMITED_API +#ifndef Py_PYTIME_H +#define Py_PYTIME_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef int64_t PyTime_t; +#define PyTime_MIN INT64_MIN +#define PyTime_MAX INT64_MAX + +PyAPI_FUNC(double) PyTime_AsSecondsDouble(PyTime_t t); +PyAPI_FUNC(int) PyTime_Monotonic(PyTime_t *result); +PyAPI_FUNC(int) PyTime_PerfCounter(PyTime_t *result); +PyAPI_FUNC(int) PyTime_Time(PyTime_t *result); + +#ifdef __cplusplus +} +#endif +#endif /* Py_PYTIME_H */ +#endif /* Py_LIMITED_API */ diff --git a/Include/internal/pycore_time.h b/Include/internal/pycore_time.h index dabbd7b41556cd..1aad6ccea69ae3 100644 --- a/Include/internal/pycore_time.h +++ b/Include/internal/pycore_time.h @@ -1,34 +1,39 @@ -// The _PyTime_t API is written to use timestamp and timeout values stored in -// various formats and to read clocks. +// Internal PyTime_t C API: see Doc/c-api/time.rst for the documentation. // -// The _PyTime_t type is an integer to support directly common arithmetic -// operations like t1 + t2. +// The PyTime_t type is an integer to support directly common arithmetic +// operations such as t1 + t2. // -// The _PyTime_t API supports a resolution of 1 nanosecond. The _PyTime_t type -// is signed to support negative timestamps. The supported range is around -// [-292.3 years; +292.3 years]. Using the Unix epoch (January 1st, 1970), the -// supported date range is around [1677-09-21; 2262-04-11]. +// Time formats: // -// Formats: +// * Seconds. +// * Seconds as a floating point number (C double). +// * Milliseconds (10^-3 seconds). +// * Microseconds (10^-6 seconds). +// * 100 nanoseconds (10^-7 seconds), used on Windows. +// * Nanoseconds (10^-9 seconds). +// * timeval structure, 1 microsecond (10^-6 seconds). +// * timespec structure, 1 nanosecond (10^-9 seconds). // -// * seconds -// * seconds as a floating pointer number (C double) -// * milliseconds (10^-3 seconds) -// * microseconds (10^-6 seconds) -// * 100 nanoseconds (10^-7 seconds) -// * nanoseconds (10^-9 seconds) -// * timeval structure, 1 microsecond resolution (10^-6 seconds) -// * timespec structure, 1 nanosecond resolution (10^-9 seconds) +// Note that PyTime_t is now specified as int64_t, in nanoseconds. +// (If we need to change this, we'll need new public API with new names.) +// Previously, PyTime_t was configurable (in theory); some comments and code +// might still allude to that. // // Integer overflows are detected and raise OverflowError. Conversion to a -// resolution worse than 1 nanosecond is rounded correctly with the requested -// rounding mode. There are 4 rounding modes: floor (towards -inf), ceiling -// (towards +inf), half even and up (away from zero). +// resolution larger than 1 nanosecond is rounded correctly with the requested +// rounding mode. Available rounding modes: // -// Some functions clamp the result in the range [_PyTime_MIN; _PyTime_MAX], so -// the caller doesn't have to handle errors and doesn't need to hold the GIL. -// For example, _PyTime_Add(t1, t2) computes t1+t2 and clamp the result on -// overflow. +// * Round towards minus infinity (-inf). For example, used to read a clock. +// * Round towards infinity (+inf). For example, used for timeout to wait "at +// least" N seconds. +// * Round to nearest with ties going to nearest even integer. For example, used +// to round from a Python float. +// * Round away from zero. For example, used for timeout. +// +// Some functions clamp the result in the range [PyTime_MIN; PyTime_MAX]. The +// caller doesn't have to handle errors and so doesn't need to hold the GIL to +// handle exceptions. For example, _PyTime_Add(t1, t2) computes t1+t2 and +// clamps the result on overflow. // // Clocks: // @@ -36,10 +41,11 @@ // * Monotonic clock // * Performance counter // -// Operations like (t * k / q) with integers are implemented in a way to reduce -// the risk of integer overflow. Such operation is used to convert a clock -// value expressed in ticks with a frequency to _PyTime_t, like -// QueryPerformanceCounter() with QueryPerformanceFrequency(). +// Internally, operations like (t * k / q) with integers are implemented in a +// way to reduce the risk of integer overflow. Such operation is used to convert a +// clock value expressed in ticks with a frequency to PyTime_t, like +// QueryPerformanceCounter() with QueryPerformanceFrequency() on Windows. + #ifndef Py_INTERNAL_TIME_H #define Py_INTERNAL_TIME_H @@ -56,14 +62,7 @@ extern "C" { struct timeval; #endif -// _PyTime_t: Python timestamp with subsecond precision. It can be used to -// store a duration, and so indirectly a date (related to another date, like -// UNIX epoch). -typedef int64_t _PyTime_t; -// _PyTime_MIN nanoseconds is around -292.3 years -#define _PyTime_MIN INT64_MIN -// _PyTime_MAX nanoseconds is around +292.3 years -#define _PyTime_MAX INT64_MAX +typedef PyTime_t _PyTime_t; #define _SIZEOF_PYTIME_T 8 typedef enum { @@ -147,7 +146,7 @@ PyAPI_FUNC(_PyTime_t) _PyTime_FromSecondsDouble(double seconds, _PyTime_round_t PyAPI_FUNC(_PyTime_t) _PyTime_FromNanoseconds(_PyTime_t ns); // Create a timestamp from a number of microseconds. -// Clamp to [_PyTime_MIN; _PyTime_MAX] on overflow. +// Clamp to [PyTime_MIN; PyTime_MAX] on overflow. extern _PyTime_t _PyTime_FromMicrosecondsClamp(_PyTime_t us); // Create a timestamp from nanoseconds (Python int). @@ -169,10 +168,6 @@ PyAPI_FUNC(int) _PyTime_FromMillisecondsObject(_PyTime_t *t, PyObject *obj, _PyTime_round_t round); -// Convert a timestamp to a number of seconds as a C double. -// Export for '_socket' shared extension. -PyAPI_FUNC(double) _PyTime_AsSecondsDouble(_PyTime_t t); - // Convert timestamp to a number of milliseconds (10^-3 seconds). // Export for '_ssl' shared extension. PyAPI_FUNC(_PyTime_t) _PyTime_AsMilliseconds(_PyTime_t t, @@ -183,9 +178,6 @@ PyAPI_FUNC(_PyTime_t) _PyTime_AsMilliseconds(_PyTime_t t, PyAPI_FUNC(_PyTime_t) _PyTime_AsMicroseconds(_PyTime_t t, _PyTime_round_t round); -// Convert timestamp to a number of nanoseconds (10^-9 seconds). -extern _PyTime_t _PyTime_AsNanoseconds(_PyTime_t t); - #ifdef MS_WINDOWS // Convert timestamp to a number of 100 nanoseconds (10^-7 seconds). extern _PyTime_t _PyTime_As100Nanoseconds(_PyTime_t t, @@ -250,7 +242,7 @@ PyAPI_FUNC(void) _PyTime_AsTimespec_clamp(_PyTime_t t, struct timespec *ts); #endif -// Compute t1 + t2. Clamp to [_PyTime_MIN; _PyTime_MAX] on overflow. +// Compute t1 + t2. Clamp to [PyTime_MIN; PyTime_MAX] on overflow. extern _PyTime_t _PyTime_Add(_PyTime_t t1, _PyTime_t t2); // Structure used by time.get_clock_info() @@ -267,7 +259,8 @@ typedef struct { // On integer overflow, silently ignore the overflow and clamp the clock to // [_PyTime_MIN; _PyTime_MAX]. // -// Use _PyTime_GetSystemClockWithInfo() to check for failure. +// Use _PyTime_GetSystemClockWithInfo or the public PyTime_Time() to check +// for failure. // Export for '_random' shared extension. PyAPI_FUNC(_PyTime_t) _PyTime_GetSystemClock(void); @@ -287,7 +280,8 @@ extern int _PyTime_GetSystemClockWithInfo( // On integer overflow, silently ignore the overflow and clamp the clock to // [_PyTime_MIN; _PyTime_MAX]. // -// Use _PyTime_GetMonotonicClockWithInfo() to check for failure. +// Use _PyTime_GetMonotonicClockWithInfo or the public PyTime_Monotonic() +// to check for failure. // Export for '_random' shared extension. PyAPI_FUNC(_PyTime_t) _PyTime_GetMonotonicClock(void); @@ -322,10 +316,12 @@ PyAPI_FUNC(int) _PyTime_gmtime(time_t t, struct tm *tm); // On integer overflow, silently ignore the overflow and clamp the clock to // [_PyTime_MIN; _PyTime_MAX]. // -// Use _PyTime_GetPerfCounterWithInfo() to check for failure. +// Use _PyTime_GetPerfCounterWithInfo() or the public PyTime_PerfCounter +// to check for failure. // Export for '_lsprof' shared extension. PyAPI_FUNC(_PyTime_t) _PyTime_GetPerfCounter(void); + // Get the performance counter: clock with the highest available resolution to // measure a short duration. // @@ -336,6 +332,13 @@ extern int _PyTime_GetPerfCounterWithInfo( _PyTime_t *t, _Py_clock_info_t *info); +// Alias for backward compatibility +#define _PyTime_MIN PyTime_MIN +#define _PyTime_MAX PyTime_MAX +#define _PyTime_AsSecondsDouble PyTime_AsSecondsDouble + + +// --- _PyDeadline ----------------------------------------------------------- // Create a deadline. // Pseudo code: _PyTime_GetMonotonicClock() + timeout. diff --git a/Lib/test/test_capi/test_time.py b/Lib/test/test_capi/test_time.py new file mode 100644 index 00000000000000..10b7fbf2c372a3 --- /dev/null +++ b/Lib/test/test_capi/test_time.py @@ -0,0 +1,71 @@ +import time +import unittest +from test.support import import_helper +_testcapi = import_helper.import_module('_testcapi') + + +PyTime_MIN = _testcapi.PyTime_MIN +PyTime_MAX = _testcapi.PyTime_MAX +SEC_TO_NS = 10 ** 9 +DAY_TO_SEC = (24 * 60 * 60) +# Worst clock resolution: maximum delta between two clock reads. +CLOCK_RES = 0.050 + + +class CAPITest(unittest.TestCase): + def test_min_max(self): + # PyTime_t is just int64_t + self.assertEqual(PyTime_MIN, -2**63) + self.assertEqual(PyTime_MAX, 2**63 - 1) + + def check_clock(self, c_func, py_func): + t1 = c_func() + t2 = py_func() + self.assertAlmostEqual(t1, t2, delta=CLOCK_RES) + + def test_assecondsdouble(self): + # Test PyTime_AsSecondsDouble() + def ns_to_sec(ns): + if abs(ns) % SEC_TO_NS == 0: + return float(ns // SEC_TO_NS) + else: + return float(ns) / SEC_TO_NS + + seconds = ( + 0, + 1, + DAY_TO_SEC, + 365 * DAY_TO_SEC, + ) + values = { + PyTime_MIN, + PyTime_MIN + 1, + PyTime_MAX - 1, + PyTime_MAX, + } + for second in seconds: + ns = second * SEC_TO_NS + values.add(ns) + # test nanosecond before/after to test rounding + values.add(ns - 1) + values.add(ns + 1) + for ns in list(values): + if (-ns) > PyTime_MAX: + continue + values.add(-ns) + for ns in sorted(values): + with self.subTest(ns=ns): + self.assertEqual(_testcapi.PyTime_AsSecondsDouble(ns), + ns_to_sec(ns)) + + def test_monotonic(self): + # Test PyTime_Monotonic() + self.check_clock(_testcapi.PyTime_Monotonic, time.monotonic) + + def test_perf_counter(self): + # Test PyTime_PerfCounter() + self.check_clock(_testcapi.PyTime_PerfCounter, time.perf_counter) + + def test_time(self): + # Test PyTime_time() + self.check_clock(_testcapi.PyTime_Time, time.time) diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index 3b5640abdb6b89..a0aeea515afbd6 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -43,8 +43,8 @@ class _PyTime(enum.IntEnum): ROUND_UP = 3 # _PyTime_t is int64_t -_PyTime_MIN = -2 ** 63 -_PyTime_MAX = 2 ** 63 - 1 +PyTime_MIN = -2 ** 63 +PyTime_MAX = 2 ** 63 - 1 # Rounding modes supported by PyTime ROUNDING_MODES = ( @@ -934,7 +934,7 @@ def test_FromSecondsObject(self): _PyTime_FromSecondsObject(float('nan'), time_rnd) def test_AsSecondsDouble(self): - from _testinternalcapi import _PyTime_AsSecondsDouble + from _testcapi import PyTime_AsSecondsDouble def float_converter(ns): if abs(ns) % SEC_TO_NS == 0: @@ -942,15 +942,10 @@ def float_converter(ns): else: return float(ns) / SEC_TO_NS - self.check_int_rounding(lambda ns, rnd: _PyTime_AsSecondsDouble(ns), + self.check_int_rounding(lambda ns, rnd: PyTime_AsSecondsDouble(ns), float_converter, NS_TO_SEC) - # test nan - for time_rnd, _ in ROUNDING_MODES: - with self.assertRaises(TypeError): - _PyTime_AsSecondsDouble(float('nan')) - def create_decimal_converter(self, denominator): denom = decimal.Decimal(denominator) @@ -1009,7 +1004,7 @@ def test_AsTimeval_clamp(self): tv_sec_max = self.time_t_max tv_sec_min = self.time_t_min - for t in (_PyTime_MIN, _PyTime_MAX): + for t in (PyTime_MIN, PyTime_MAX): ts = _PyTime_AsTimeval_clamp(t, _PyTime.ROUND_CEILING) with decimal.localcontext() as context: context.rounding = decimal.ROUND_CEILING @@ -1028,7 +1023,7 @@ def test_AsTimeval_clamp(self): def test_AsTimespec_clamp(self): from _testinternalcapi import _PyTime_AsTimespec_clamp - for t in (_PyTime_MIN, _PyTime_MAX): + for t in (PyTime_MIN, PyTime_MAX): ts = _PyTime_AsTimespec_clamp(t) tv_sec, tv_nsec = divmod(t, NS_TO_SEC) if self.time_t_max < tv_sec: diff --git a/Misc/NEWS.d/next/C API/2023-11-16-02-07-48.gh-issue-110850.DQGNfF.rst b/Misc/NEWS.d/next/C API/2023-11-16-02-07-48.gh-issue-110850.DQGNfF.rst new file mode 100644 index 00000000000000..998d4426dd53f9 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-11-16-02-07-48.gh-issue-110850.DQGNfF.rst @@ -0,0 +1,9 @@ +Add PyTime C API: + +* :c:type:`PyTime_t` type. +* :c:var:`PyTime_MIN` and :c:var:`PyTime_MAX` constants. +* :c:func:`PyTime_AsSecondsDouble`, + :c:func:`PyTime_Monotonic`, :c:func:`PyTime_PerfCounter`, and + :c:func:`PyTime_Time` functions. + +Patch by Victor Stinner. diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 8a65a9cffb1b9d..e98775a4808765 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -162,7 +162,7 @@ @MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c @MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c -@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/bytearray.c _testcapi/bytes.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/pyos.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/heaptype_relative.c _testcapi/gc.c _testcapi/sys.c _testcapi/hash.c +@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/bytearray.c _testcapi/bytes.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/pyos.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/heaptype_relative.c _testcapi/gc.c _testcapi/sys.c _testcapi/hash.c _testcapi/time.c @MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c @MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c diff --git a/Modules/_randommodule.c b/Modules/_randommodule.c index 4403e1d132c057..5481ed9b348ed7 100644 --- a/Modules/_randommodule.c +++ b/Modules/_randommodule.c @@ -262,7 +262,7 @@ random_seed_urandom(RandomObject *self) static void random_seed_time_pid(RandomObject *self) { - _PyTime_t now; + PyTime_t now; uint32_t key[5]; now = _PyTime_GetSystemClock(); diff --git a/Modules/_testcapi/parts.h b/Modules/_testcapi/parts.h index 29817edd69b134..e8cfb2423500d4 100644 --- a/Modules/_testcapi/parts.h +++ b/Modules/_testcapi/parts.h @@ -59,6 +59,7 @@ int _PyTestCapi_Init_Immortal(PyObject *module); int _PyTestCapi_Init_GC(PyObject *module); int _PyTestCapi_Init_Sys(PyObject *module); int _PyTestCapi_Init_Hash(PyObject *module); +int _PyTestCapi_Init_Time(PyObject *module); int _PyTestCapi_Init_VectorcallLimited(PyObject *module); int _PyTestCapi_Init_HeaptypeRelative(PyObject *module); diff --git a/Modules/_testcapi/time.c b/Modules/_testcapi/time.c new file mode 100644 index 00000000000000..4fbf7dd14ebb66 --- /dev/null +++ b/Modules/_testcapi/time.c @@ -0,0 +1,105 @@ +#include "parts.h" + + +static int +pytime_from_nanoseconds(PyTime_t *tp, PyObject *obj) +{ + if (!PyLong_Check(obj)) { + PyErr_Format(PyExc_TypeError, "expect int, got %s", + Py_TYPE(obj)->tp_name); + return -1; + } + + long long nsec = PyLong_AsLongLong(obj); + if (nsec == -1 && PyErr_Occurred()) { + return -1; + } + + Py_BUILD_ASSERT(sizeof(long long) == sizeof(PyTime_t)); + *tp = (PyTime_t)nsec; + return 0; +} + + +static PyObject * +test_pytime_assecondsdouble(PyObject *Py_UNUSED(self), PyObject *args) +{ + PyObject *obj; + if (!PyArg_ParseTuple(args, "O", &obj)) { + return NULL; + } + PyTime_t ts; + if (pytime_from_nanoseconds(&ts, obj) < 0) { + return NULL; + } + double d = PyTime_AsSecondsDouble(ts); + return PyFloat_FromDouble(d); +} + + +static PyObject* +pytime_as_float(PyTime_t t) +{ + return PyFloat_FromDouble(PyTime_AsSecondsDouble(t)); +} + + + +static PyObject* +test_pytime_monotonic(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) +{ + PyTime_t t; + if (PyTime_Monotonic(&t) < 0) { + return NULL; + } + return pytime_as_float(t); +} + + +static PyObject* +test_pytime_perf_counter(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) +{ + PyTime_t t; + if (PyTime_PerfCounter(&t) < 0) { + return NULL; + } + return pytime_as_float(t); +} + + +static PyObject* +test_pytime_time(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) +{ + PyTime_t t; + if (PyTime_Time(&t) < 0) { + printf("ERR! %d\n", (int)t); + return NULL; + } + printf("... %d\n", (int)t); + return pytime_as_float(t); +} + + +static PyMethodDef test_methods[] = { + {"PyTime_AsSecondsDouble", test_pytime_assecondsdouble, METH_VARARGS}, + {"PyTime_Monotonic", test_pytime_monotonic, METH_NOARGS}, + {"PyTime_PerfCounter", test_pytime_perf_counter, METH_NOARGS}, + {"PyTime_Time", test_pytime_time, METH_NOARGS}, + {NULL}, +}; + +int +_PyTestCapi_Init_Time(PyObject *m) +{ + if (PyModule_AddFunctions(m, test_methods) < 0) { + return -1; + } + Py_BUILD_ASSERT(sizeof(long long) == sizeof(PyTime_t)); + if (PyModule_AddObject(m, "PyTime_MIN", PyLong_FromLongLong(PyTime_MIN)) < 0) { + return 1; + } + if (PyModule_AddObject(m, "PyTime_MAX", PyLong_FromLongLong(PyTime_MAX)) < 0) { + return 1; + } + return 0; +} diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index e67de3eeb6e17e..b03f871b089c8a 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -4107,6 +4107,9 @@ PyInit__testcapi(void) if (_PyTestCapi_Init_Hash(m) < 0) { return NULL; } + if (_PyTestCapi_Init_Time(m) < 0) { + return NULL; + } PyState_AddModule(m, &_testcapimodule); return m; diff --git a/Modules/_testinternalcapi/pytime.c b/Modules/_testinternalcapi/pytime.c index 2b5f9eb0ef2851..f0f758ea032df8 100644 --- a/Modules/_testinternalcapi/pytime.c +++ b/Modules/_testinternalcapi/pytime.c @@ -52,21 +52,6 @@ test_pytime_fromsecondsobject(PyObject *self, PyObject *args) return _PyTime_AsNanosecondsObject(ts); } -static PyObject * -test_pytime_assecondsdouble(PyObject *self, PyObject *args) -{ - PyObject *obj; - if (!PyArg_ParseTuple(args, "O", &obj)) { - return NULL; - } - _PyTime_t ts; - if (_PyTime_FromNanosecondsObject(&ts, obj) < 0) { - return NULL; - } - double d = _PyTime_AsSecondsDouble(ts); - return PyFloat_FromDouble(d); -} - static PyObject * test_PyTime_AsTimeval(PyObject *self, PyObject *args) { @@ -254,7 +239,6 @@ test_pytime_object_to_timespec(PyObject *self, PyObject *args) static PyMethodDef TestMethods[] = { {"_PyTime_AsMicroseconds", test_PyTime_AsMicroseconds, METH_VARARGS}, {"_PyTime_AsMilliseconds", test_PyTime_AsMilliseconds, METH_VARARGS}, - {"_PyTime_AsSecondsDouble", test_pytime_assecondsdouble, METH_VARARGS}, #ifdef HAVE_CLOCK_GETTIME {"_PyTime_AsTimespec", test_PyTime_AsTimespec, METH_VARARGS}, {"_PyTime_AsTimespec_clamp", test_PyTime_AsTimespec_clamp, METH_VARARGS}, diff --git a/PCbuild/_testcapi.vcxproj b/PCbuild/_testcapi.vcxproj index 6911aacab29b97..66df0a61b5b5a6 100644 --- a/PCbuild/_testcapi.vcxproj +++ b/PCbuild/_testcapi.vcxproj @@ -125,6 +125,7 @@ + diff --git a/PCbuild/_testcapi.vcxproj.filters b/PCbuild/_testcapi.vcxproj.filters index 6059959bb9a040..651eb1d6ba0b7f 100644 --- a/PCbuild/_testcapi.vcxproj.filters +++ b/PCbuild/_testcapi.vcxproj.filters @@ -105,6 +105,9 @@ Source Files + + Source Files + Source Files diff --git a/Python/pytime.c b/Python/pytime.c index 77cb95f8feb179..fb0ed85c541e68 100644 --- a/Python/pytime.c +++ b/Python/pytime.c @@ -50,7 +50,7 @@ # error "time_t is not a two's complement integer type" #endif -#if _PyTime_MIN + _PyTime_MAX != -1 +#if PyTime_MIN + PyTime_MAX != -1 # error "_PyTime_t is not a two's complement integer type" #endif @@ -124,16 +124,16 @@ pytime_as_nanoseconds(_PyTime_t t) } -// Compute t1 + t2. Clamp to [_PyTime_MIN; _PyTime_MAX] on overflow. +// Compute t1 + t2. Clamp to [PyTime_MIN; PyTime_MAX] on overflow. static inline int pytime_add(_PyTime_t *t1, _PyTime_t t2) { - if (t2 > 0 && *t1 > _PyTime_MAX - t2) { - *t1 = _PyTime_MAX; + if (t2 > 0 && *t1 > PyTime_MAX - t2) { + *t1 = PyTime_MAX; return -1; } - else if (t2 < 0 && *t1 < _PyTime_MIN - t2) { - *t1 = _PyTime_MIN; + else if (t2 < 0 && *t1 < PyTime_MIN - t2) { + *t1 = PyTime_MIN; return -1; } else { @@ -156,7 +156,7 @@ pytime_mul_check_overflow(_PyTime_t a, _PyTime_t b) { if (b != 0) { assert(b > 0); - return ((a < _PyTime_MIN / b) || (_PyTime_MAX / b < a)); + return ((a < PyTime_MIN / b) || (PyTime_MAX / b < a)); } else { return 0; @@ -164,13 +164,13 @@ pytime_mul_check_overflow(_PyTime_t a, _PyTime_t b) } -// Compute t * k. Clamp to [_PyTime_MIN; _PyTime_MAX] on overflow. +// Compute t * k. Clamp to [PyTime_MIN; PyTime_MAX] on overflow. static inline int pytime_mul(_PyTime_t *t, _PyTime_t k) { assert(k >= 0); if (pytime_mul_check_overflow(*t, k)) { - *t = (*t >= 0) ? _PyTime_MAX : _PyTime_MIN; + *t = (*t >= 0) ? PyTime_MAX : PyTime_MIN; return -1; } else { @@ -180,7 +180,7 @@ pytime_mul(_PyTime_t *t, _PyTime_t k) } -// Compute t * k. Clamp to [_PyTime_MIN; _PyTime_MAX] on overflow. +// Compute t * k. Clamp to [PyTime_MIN; PyTime_MAX] on overflow. static inline _PyTime_t _PyTime_Mul(_PyTime_t t, _PyTime_t k) { @@ -459,12 +459,12 @@ _PyTime_FromSeconds(int seconds) /* ensure that integer overflow cannot happen, int type should have 32 bits, whereas _PyTime_t type has at least 64 bits (SEC_TO_NS takes 30 bits). */ - static_assert(INT_MAX <= _PyTime_MAX / SEC_TO_NS, "_PyTime_t overflow"); - static_assert(INT_MIN >= _PyTime_MIN / SEC_TO_NS, "_PyTime_t underflow"); + static_assert(INT_MAX <= PyTime_MAX / SEC_TO_NS, "_PyTime_t overflow"); + static_assert(INT_MIN >= PyTime_MIN / SEC_TO_NS, "_PyTime_t underflow"); _PyTime_t t = (_PyTime_t)seconds; - assert((t >= 0 && t <= _PyTime_MAX / SEC_TO_NS) - || (t < 0 && t >= _PyTime_MIN / SEC_TO_NS)); + assert((t >= 0 && t <= PyTime_MAX / SEC_TO_NS) + || (t < 0 && t >= PyTime_MIN / SEC_TO_NS)); t *= SEC_TO_NS; return pytime_from_nanoseconds(t); } @@ -587,7 +587,7 @@ pytime_from_double(_PyTime_t *tp, double value, _PyTime_round_t round, d = pytime_round(d, round); /* See comments in pytime_double_to_denominator */ - if (!((double)_PyTime_MIN <= d && d < -(double)_PyTime_MIN)) { + if (!((double)PyTime_MIN <= d && d < -(double)PyTime_MIN)) { pytime_time_t_overflow(); return -1; } @@ -649,12 +649,12 @@ _PyTime_FromMillisecondsObject(_PyTime_t *tp, PyObject *obj, _PyTime_round_t rou double -_PyTime_AsSecondsDouble(_PyTime_t t) +PyTime_AsSecondsDouble(PyTime_t t) { /* volatile avoids optimization changing how numbers are rounded */ volatile double d; - _PyTime_t ns = pytime_as_nanoseconds(t); + PyTime_t ns = pytime_as_nanoseconds(t); if (ns % SEC_TO_NS == 0) { /* Divide using integers to avoid rounding issues on the integer part. 1e-9 cannot be stored exactly in IEEE 64-bit. */ @@ -695,7 +695,7 @@ pytime_divide_round_up(const _PyTime_t t, const _PyTime_t k) assert(k > 1); if (t >= 0) { // Don't use (t + k - 1) / k to avoid integer overflow - // if t is equal to _PyTime_MAX + // if t is equal to PyTime_MAX _PyTime_t q = t / k; if (t % k) { q += 1; @@ -704,7 +704,7 @@ pytime_divide_round_up(const _PyTime_t t, const _PyTime_t k) } else { // Don't use (t - (k - 1)) / k to avoid integer overflow - // if t is equals to _PyTime_MIN. + // if t is equals to PyTime_MIN. _PyTime_t q = t / k; if (t % k) { q -= 1; @@ -759,7 +759,7 @@ pytime_divide(const _PyTime_t t, const _PyTime_t k, // Compute (t / k, t % k) in (pq, pr). // Make sure that 0 <= pr < k. // Return 0 on success. -// Return -1 on underflow and store (_PyTime_MIN, 0) in (pq, pr). +// Return -1 on underflow and store (PyTime_MIN, 0) in (pq, pr). static int pytime_divmod(const _PyTime_t t, const _PyTime_t k, _PyTime_t *pq, _PyTime_t *pr) @@ -768,8 +768,8 @@ pytime_divmod(const _PyTime_t t, const _PyTime_t k, _PyTime_t q = t / k; _PyTime_t r = t % k; if (r < 0) { - if (q == _PyTime_MIN) { - *pq = _PyTime_MIN; + if (q == PyTime_MIN) { + *pq = PyTime_MIN; *pr = 0; return -1; } @@ -784,13 +784,6 @@ pytime_divmod(const _PyTime_t t, const _PyTime_t k, } -_PyTime_t -_PyTime_AsNanoseconds(_PyTime_t t) -{ - return pytime_as_nanoseconds(t); -} - - #ifdef MS_WINDOWS _PyTime_t _PyTime_As100Nanoseconds(_PyTime_t t, _PyTime_round_t round) @@ -926,6 +919,7 @@ _PyTime_AsTimespec(_PyTime_t t, struct timespec *ts) #endif +// N.B. If raise_exc=0, this may be called without the GIL. static int py_get_system_clock(_PyTime_t *tp, _Py_clock_info_t *info, int raise_exc) { @@ -1050,6 +1044,18 @@ _PyTime_GetSystemClock(void) } +int +PyTime_Time(PyTime_t *result) +{ + if (py_get_system_clock(result, NULL, 1) < 0) { + // If clock_gettime(CLOCK_REALTIME) or gettimeofday() fails: + // silently ignore the failure and return 0. + *result = 0; + return -1; + } + return 1; +} + int _PyTime_GetSystemClockWithInfo(_PyTime_t *t, _Py_clock_info_t *info) { @@ -1092,6 +1098,7 @@ py_mach_timebase_info(_PyTimeFraction *base, int raise) #endif +// N.B. If raise_exc=0, this may be called without the GIL. static int py_get_monotonic_clock(_PyTime_t *tp, _Py_clock_info_t *info, int raise_exc) { @@ -1102,13 +1109,13 @@ py_get_monotonic_clock(_PyTime_t *tp, _Py_clock_info_t *info, int raise_exc) static_assert(sizeof(ticks) <= sizeof(_PyTime_t), "ULONGLONG is larger than _PyTime_t"); _PyTime_t t; - if (ticks <= (ULONGLONG)_PyTime_MAX) { + if (ticks <= (ULONGLONG)PyTime_MAX) { t = (_PyTime_t)ticks; } else { // GetTickCount64() maximum is larger than _PyTime_t maximum: // ULONGLONG is unsigned, whereas _PyTime_t is signed. - t = _PyTime_MAX; + t = PyTime_MAX; } int res = pytime_mul(&t, MS_TO_NS); @@ -1151,7 +1158,7 @@ py_get_monotonic_clock(_PyTime_t *tp, _Py_clock_info_t *info, int raise_exc) uint64_t uticks = mach_absolute_time(); // unsigned => signed - assert(uticks <= (uint64_t)_PyTime_MAX); + assert(uticks <= (uint64_t)PyTime_MAX); _PyTime_t ticks = (_PyTime_t)uticks; _PyTime_t ns = _PyTimeFraction_Mul(ticks, &base); @@ -1229,6 +1236,17 @@ _PyTime_GetMonotonicClock(void) } +int +PyTime_Monotonic(PyTime_t *result) +{ + if (py_get_monotonic_clock(result, NULL, 1) < 0) { + *result = 0; + return -1; + } + return 0; +} + + int _PyTime_GetMonotonicClockWithInfo(_PyTime_t *tp, _Py_clock_info_t *info) { @@ -1268,6 +1286,7 @@ py_win_perf_counter_frequency(_PyTimeFraction *base, int raise) } +// N.B. If raise_exc=0, this may be called without the GIL. static int py_get_win_perf_counter(_PyTime_t *tp, _Py_clock_info_t *info, int raise_exc) { @@ -1335,6 +1354,25 @@ _PyTime_GetPerfCounter(void) } +int +PyTime_PerfCounter(PyTime_t *result) +{ + int res; +#ifdef MS_WINDOWS + res = py_get_win_perf_counter(result, NULL, 1); +#else + res = py_get_monotonic_clock(result, NULL, 1); +#endif + if (res < 0) { + // If py_win_perf_counter_frequency() or py_get_monotonic_clock() + // fails: silently ignore the failure and return 0. + *result = 0; + return -1; + } + return 0; +} + + int _PyTime_localtime(time_t t, struct tm *tm) { From de7d67b19b9f31d7712de7211ffac5bf6018157f Mon Sep 17 00:00:00 2001 From: mpage Date: Mon, 12 Feb 2024 09:44:00 -0800 Subject: [PATCH 085/126] gh-114271: Make `PyInterpreterState.threads.count` thread-safe in free-threaded builds (gh-115093) Use atomics to mutate PyInterpreterState.threads.count. --- Include/internal/pycore_interp.h | 2 +- Modules/_threadmodule.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 31d88071e19d0c..485b1914a44885 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -112,7 +112,7 @@ struct _is { /* The thread currently executing in the __main__ module, if any. */ PyThreadState *main; /* Used in Modules/_threadmodule.c. */ - long count; + Py_ssize_t count; /* Support for runtime thread stack size tuning. A value of 0 means using the platform's default stack size or the size specified by the THREAD_STACK_SIZE macro. */ diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index df02b023012fbd..d7840eaf45e8d6 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -1244,7 +1244,7 @@ thread_run(void *boot_raw) _PyThreadState_Bind(tstate); PyEval_AcquireThread(tstate); - tstate->interp->threads.count++; + _Py_atomic_add_ssize(&tstate->interp->threads.count, 1); PyObject *res = PyObject_Call(boot->func, boot->args, boot->kwargs); if (res == NULL) { @@ -1262,7 +1262,7 @@ thread_run(void *boot_raw) thread_bootstate_free(boot, 1); - tstate->interp->threads.count--; + _Py_atomic_add_ssize(&tstate->interp->threads.count, -1); PyThreadState_Clear(tstate); _PyThreadState_DeleteCurrent(tstate); @@ -1539,7 +1539,7 @@ static PyObject * thread__count(PyObject *self, PyObject *Py_UNUSED(ignored)) { PyInterpreterState *interp = _PyInterpreterState_GET(); - return PyLong_FromLong(interp->threads.count); + return PyLong_FromSsize_t(_Py_atomic_load_ssize(&interp->threads.count)); } PyDoc_STRVAR(_count_doc, From 4297d7301b97aba2e0df9f9cc5fa4010e53a8950 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 12 Feb 2024 21:31:07 +0300 Subject: [PATCH 086/126] gh-115285: Fix `test_dataclasses` with `-OO` mode (#115286) --- Lib/test/test_dataclasses/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/test/test_dataclasses/__init__.py b/Lib/test/test_dataclasses/__init__.py index 272d427875ae40..ede74b0dd15ccf 100644 --- a/Lib/test/test_dataclasses/__init__.py +++ b/Lib/test/test_dataclasses/__init__.py @@ -22,6 +22,8 @@ import typing # Needed for the string "typing.ClassVar[int]" to work as an annotation. import dataclasses # Needed for the string "dataclasses.InitVar[int]" to work as an annotation. +from test import support + # Just any custom exception we can catch. class CustomError(Exception): pass @@ -2216,6 +2218,7 @@ def assertDocStrEqual(self, a, b): # whitespace stripped. self.assertEqual(a.replace(' ', ''), b.replace(' ', '')) + @support.requires_docstrings def test_existing_docstring_not_overridden(self): @dataclass class C: From a82fbc13d0e352b9af7d7ffbef4bc04cf635f07f Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Mon, 12 Feb 2024 20:23:45 +0100 Subject: [PATCH 087/126] Remove stray backtick in NEWS entry (#115356) --- .../next/Library/2024-02-09-07-20-16.gh-issue-115165.yfJLXA.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-02-09-07-20-16.gh-issue-115165.yfJLXA.rst b/Misc/NEWS.d/next/Library/2024-02-09-07-20-16.gh-issue-115165.yfJLXA.rst index 73d3d001f07f3f..3e6eef183ad524 100644 --- a/Misc/NEWS.d/next/Library/2024-02-09-07-20-16.gh-issue-115165.yfJLXA.rst +++ b/Misc/NEWS.d/next/Library/2024-02-09-07-20-16.gh-issue-115165.yfJLXA.rst @@ -1,4 +1,4 @@ Most exceptions are now ignored when attempting to set the ``__orig_class__`` attribute on objects returned when calling :mod:`typing` generic aliases (including generic aliases created using :data:`typing.Annotated`). -Previously only :exc:`AttributeError`` was ignored. Patch by Dave Shawley. +Previously only :exc:`AttributeError` was ignored. Patch by Dave Shawley. From 7861dfd26a41e40c2b4361eb0bb1356b9b4a064b Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 12 Feb 2024 20:13:13 +0000 Subject: [PATCH 088/126] gh-111140: Adds PyLong_AsNativeBytes and PyLong_FromNative[Unsigned]Bytes functions (GH-114886) --- Doc/c-api/long.rst | 66 ++++++ Doc/whatsnew/3.13.rst | 7 +- Include/cpython/longobject.h | 36 ++- Lib/test/test_capi/test_long.py | 145 ++++++++++++ ...-02-05-17-11-15.gh-issue-111140.WMEjid.rst | 2 + Modules/_io/textio.c | 2 +- Modules/_pickle.c | 3 +- Modules/_randommodule.c | 3 +- Modules/_sqlite/util.c | 2 +- Modules/_struct.c | 20 +- Modules/_testcapi/long.c | 48 +++- Modules/_tkinter.c | 3 +- Modules/cjkcodecs/multibytecodec.c | 6 +- Objects/longobject.c | 216 +++++++++++++++++- 14 files changed, 533 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index f42e23db89ae39..c39823e5e6787f 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -113,6 +113,28 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. retrieved from the resulting value using :c:func:`PyLong_AsVoidPtr`. +.. c:function:: PyObject* PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, int endianness) + + Create a Python integer from the value contained in the first *n_bytes* of + *buffer*, interpreted as a two's-complement signed number. + + *endianness* may be passed ``-1`` for the native endian that CPython was + compiled with, or else ``0`` for big endian and ``1`` for little. + + .. versionadded:: 3.13 + + +.. c:function:: PyObject* PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n_bytes, int endianness) + + Create a Python integer from the value contained in the first *n_bytes* of + *buffer*, interpreted as an unsigned number. + + *endianness* may be passed ``-1`` for the native endian that CPython was + compiled with, or else ``0`` for big endian and ``1`` for little. + + .. versionadded:: 3.13 + + .. XXX alias PyLong_AS_LONG (for now) .. c:function:: long PyLong_AsLong(PyObject *obj) @@ -332,6 +354,50 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Returns ``NULL`` on error. Use :c:func:`PyErr_Occurred` to disambiguate. +.. c:function:: Py_ssize_t PyLong_AsNativeBytes(PyObject *pylong, void* buffer, Py_ssize_t n_bytes, int endianness) + + Copy the Python integer value to a native *buffer* of size *n_bytes*:: + + int value; + Py_ssize_t bytes = PyLong_CopyBits(v, &value, sizeof(value), -1); + if (bytes < 0) { + // Error occurred + return NULL; + } + else if (bytes > sizeof(value)) { + // Overflow occurred, but 'value' contains as much as could fit + } + + *endianness* may be passed ``-1`` for the native endian that CPython was + compiled with, or ``0`` for big endian and ``1`` for little. + + Return ``-1`` with an exception raised if *pylong* cannot be interpreted as + an integer. Otherwise, return the size of the buffer required to store the + value. If this is equal to or less than *n_bytes*, the entire value was + copied. + + Unless an exception is raised, all *n_bytes* of the buffer will be written + with as much of the value as can fit. This allows the caller to ignore all + non-negative results if the intent is to match the typical behavior of a + C-style downcast. + + Values are always copied as twos-complement, and sufficient size will be + requested for a sign bit. For example, this may cause an value that fits into + 8 bytes when treated as unsigned to request 9 bytes, even though all eight + bytes were copied into the buffer. What has been omitted is the zero sign + bit, which is redundant when the intention is to treat the value as unsigned. + + Passing *n_bytes* of zero will always return the requested buffer size. + + .. note:: + + When the value does not fit in the provided buffer, the requested size + returned from the function may be larger than necessary. Passing 0 to this + function is not an accurate way to determine the bit length of a value. + + .. versionadded:: 3.13 + + .. c:function:: int PyUnstable_Long_IsCompact(const PyLongObject* op) Return 1 if *op* is compact, 0 otherwise. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 191657061f7403..b96720df0a2f2d 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -587,6 +587,7 @@ Tier 2 IR by Mark Shannon and Guido van Rossum. Tier 2 optimizer by Ken Jin.) + Deprecated ========== @@ -1526,6 +1527,11 @@ New Features (Contributed by Victor Stinner and Petr Viktorin in :gh:`110850`.) +* Add :c:func:`PyLong_AsNativeBytes`, :c:func:`PyLong_FromNativeBytes` and + :c:func:`PyLong_FromUnsignedNativeBytes` functions to simplify converting + between native integer types and Python :class:`int` objects. + (Contributed by Steve Dower in :gh:`111140`.) + Porting to Python 3.13 ---------------------- @@ -1585,7 +1591,6 @@ Porting to Python 3.13 platforms, the ``HAVE_STDDEF_H`` macro is only defined on Windows. (Contributed by Victor Stinner in :gh:`108765`.) - Deprecated ---------- diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index fd1be29ed397d1..07251db6bcc203 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -4,6 +4,40 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); +/* PyLong_AsNativeBytes: Copy the integer value to a native variable. + buffer points to the first byte of the variable. + n_bytes is the number of bytes available in the buffer. Pass 0 to request + the required size for the value. + endianness is -1 for native endian, 0 for big endian or 1 for little. + Big endian mode will write the most significant byte into the address + directly referenced by buffer; little endian will write the least significant + byte into that address. + + If an exception is raised, returns a negative value. + Otherwise, returns the number of bytes that are required to store the value. + To check that the full value is represented, ensure that the return value is + equal or less than n_bytes. + All n_bytes are guaranteed to be written (unless an exception occurs), and + so ignoring a positive return value is the equivalent of a downcast in C. + In cases where the full value could not be represented, the returned value + may be larger than necessary - this function is not an accurate way to + calculate the bit length of an integer object. + */ +PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer, + Py_ssize_t n_bytes, int endianness); + +/* PyLong_FromNativeBytes: Create an int value from a native integer + n_bytes is the number of bytes to read from the buffer. Passing 0 will + always produce the zero int. + PyLong_FromUnsignedNativeBytes always produces a non-negative int. + endianness is -1 for native endian, 0 for big endian or 1 for little. + + Returns the int object, or NULL with an exception set. */ +PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, + int endianness); +PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer, + size_t n_bytes, int endianness); + PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op); @@ -50,7 +84,7 @@ PyAPI_FUNC(PyObject *) _PyLong_FromByteArray( */ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v, unsigned char* bytes, size_t n, - int little_endian, int is_signed); + int little_endian, int is_signed, int with_exceptions); /* For use by the gcd function in mathmodule.c */ PyAPI_FUNC(PyObject *) _PyLong_GCD(PyObject *, PyObject *); diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 8e3ef25d1ff86f..fc82cbfa66ea7a 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -1,5 +1,6 @@ import unittest import sys +import test.support as support from test.support import import_helper @@ -423,6 +424,150 @@ def test_long_asvoidptr(self): self.assertRaises(OverflowError, asvoidptr, -2**1000) # CRASHES asvoidptr(NULL) + def test_long_asnativebytes(self): + import math + from _testcapi import ( + pylong_asnativebytes as asnativebytes, + SIZE_MAX, + ) + + # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot + SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) + MAX_SSIZE = 2 ** (SZ * 8 - 1) - 1 + MAX_USIZE = 2 ** (SZ * 8) - 1 + if support.verbose: + print(f"SIZEOF_SIZE={SZ}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}") + + # These tests check that the requested buffer size is correct + for v, expect in [ + (0, SZ), + (512, SZ), + (-512, SZ), + (MAX_SSIZE, SZ), + (MAX_USIZE, SZ + 1), + (-MAX_SSIZE, SZ), + (-MAX_USIZE, SZ + 1), + (2**255-1, 32), + (-(2**255-1), 32), + (2**256-1, 33), + (-(2**256-1), 33), + ]: + with self.subTest(f"sizeof-{v:X}"): + buffer = bytearray(1) + self.assertEqual(expect, asnativebytes(v, buffer, 0, -1), + "PyLong_AsNativeBytes(v, NULL, 0, -1)") + # Also check via the __index__ path + self.assertEqual(expect, asnativebytes(Index(v), buffer, 0, -1), + "PyLong_AsNativeBytes(Index(v), NULL, 0, -1)") + + # We request as many bytes as `expect_be` contains, and always check + # the result (both big and little endian). We check the return value + # independently, since the buffer should always be filled correctly even + # if we need more bytes + for v, expect_be, expect_n in [ + (0, b'\x00', 1), + (0, b'\x00' * 2, 2), + (0, b'\x00' * 8, min(8, SZ)), + (1, b'\x01', 1), + (1, b'\x00' * 10 + b'\x01', min(11, SZ)), + (42, b'\x2a', 1), + (42, b'\x00' * 10 + b'\x2a', min(11, SZ)), + (-1, b'\xff', 1), + (-1, b'\xff' * 10, min(11, SZ)), + (-42, b'\xd6', 1), + (-42, b'\xff' * 10 + b'\xd6', min(11, SZ)), + # Extracts 255 into a single byte, but requests sizeof(Py_ssize_t) + (255, b'\xff', SZ), + (255, b'\x00\xff', 2), + (256, b'\x01\x00', 2), + # Extracts successfully (unsigned), but requests 9 bytes + (2**63, b'\x80' + b'\x00' * 7, 9), + # "Extracts", but requests 9 bytes + (-2**63, b'\x80' + b'\x00' * 7, 9), + (2**63, b'\x00\x80' + b'\x00' * 7, 9), + (-2**63, b'\xff\x80' + b'\x00' * 7, 9), + + (2**255-1, b'\x7f' + b'\xff' * 31, 32), + (-(2**255-1), b'\x80' + b'\x00' * 30 + b'\x01', 32), + # Request extra bytes, but result says we only needed 32 + (-(2**255-1), b'\xff\x80' + b'\x00' * 30 + b'\x01', 32), + (-(2**255-1), b'\xff\xff\x80' + b'\x00' * 30 + b'\x01', 32), + + # Extracting 256 bits of integer will request 33 bytes, but still + # copy as many bits as possible into the buffer. So we *can* copy + # into a 32-byte buffer, though negative number may be unrecoverable + (2**256-1, b'\xff' * 32, 33), + (2**256-1, b'\x00' + b'\xff' * 32, 33), + (-(2**256-1), b'\x00' * 31 + b'\x01', 33), + (-(2**256-1), b'\xff' + b'\x00' * 31 + b'\x01', 33), + (-(2**256-1), b'\xff\xff' + b'\x00' * 31 + b'\x01', 33), + + # The classic "Windows HRESULT as negative number" case + # HRESULT hr; + # PyLong_CopyBits(<-2147467259>, &hr, sizeof(HRESULT)) + # assert(hr == E_FAIL) + (-2147467259, b'\x80\x00\x40\x05', 4), + ]: + with self.subTest(f"{v:X}-{len(expect_be)}bytes"): + n = len(expect_be) + buffer = bytearray(n) + expect_le = expect_be[::-1] + + self.assertEqual(expect_n, asnativebytes(v, buffer, n, 0), + f"PyLong_AsNativeBytes(v, buffer, {n}, )") + self.assertEqual(expect_be, buffer[:n], "") + self.assertEqual(expect_n, asnativebytes(v, buffer, n, 1), + f"PyLong_AsNativeBytes(v, buffer, {n}, )") + self.assertEqual(expect_le, buffer[:n], "") + + # Check a few error conditions. These are validated in code, but are + # unspecified in docs, so if we make changes to the implementation, it's + # fine to just update these tests rather than preserve the behaviour. + with self.assertRaises(SystemError): + asnativebytes(1, buffer, 0, 2) + with self.assertRaises(TypeError): + asnativebytes('not a number', buffer, 0, -1) + + def test_long_fromnativebytes(self): + import math + from _testcapi import ( + pylong_fromnativebytes as fromnativebytes, + SIZE_MAX, + ) + + # Abbreviate sizeof(Py_ssize_t) to SZ because we use it a lot + SZ = int(math.ceil(math.log(SIZE_MAX + 1) / math.log(2)) / 8) + MAX_SSIZE = 2 ** (SZ * 8 - 1) - 1 + MAX_USIZE = 2 ** (SZ * 8) - 1 + + for v_be, expect_s, expect_u in [ + (b'\x00', 0, 0), + (b'\x01', 1, 1), + (b'\xff', -1, 255), + (b'\x00\xff', 255, 255), + (b'\xff\xff', -1, 65535), + ]: + with self.subTest(f"{expect_s}-{expect_u:X}-{len(v_be)}bytes"): + n = len(v_be) + v_le = v_be[::-1] + + self.assertEqual(expect_s, fromnativebytes(v_be, n, 0, 1), + f"PyLong_FromNativeBytes(buffer, {n}, )") + self.assertEqual(expect_s, fromnativebytes(v_le, n, 1, 1), + f"PyLong_FromNativeBytes(buffer, {n}, )") + self.assertEqual(expect_u, fromnativebytes(v_be, n, 0, 0), + f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + self.assertEqual(expect_u, fromnativebytes(v_le, n, 1, 0), + f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + + # Check native endian when the result would be the same either + # way and we can test it. + if v_be == v_le: + self.assertEqual(expect_s, fromnativebytes(v_be, n, -1, 1), + f"PyLong_FromNativeBytes(buffer, {n}, )") + self.assertEqual(expect_u, fromnativebytes(v_be, n, -1, 0), + f"PyLong_FromUnsignedNativeBytes(buffer, {n}, )") + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst b/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst new file mode 100644 index 00000000000000..a8aa191b5eb3ba --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-02-05-17-11-15.gh-issue-111140.WMEjid.rst @@ -0,0 +1,2 @@ +Adds :c:func:`PyLong_AsNativeBytes`, :c:func:`PyLong_FromNativeBytes` and +:c:func:`PyLong_FromUnsignedNativeBytes` functions. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index d794af8de2b8f0..a3239ec0f52960 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -2393,7 +2393,7 @@ textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj) return -1; if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer), - PY_LITTLE_ENDIAN, 0) < 0) { + PY_LITTLE_ENDIAN, 0, 1) < 0) { Py_DECREF(cookieLong); return -1; } diff --git a/Modules/_pickle.c b/Modules/_pickle.c index f210c0ca205991..0d83261168185d 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2162,7 +2162,8 @@ save_long(PicklerObject *self, PyObject *obj) pdata = (unsigned char *)PyBytes_AS_STRING(repr); i = _PyLong_AsByteArray((PyLongObject *)obj, pdata, nbytes, - 1 /* little endian */ , 1 /* signed */ ); + 1 /* little endian */ , 1 /* signed */ , + 1 /* with exceptions */); if (i < 0) goto error; /* If the int is negative, this may be a byte more than diff --git a/Modules/_randommodule.c b/Modules/_randommodule.c index 5481ed9b348ed7..4463157d62248d 100644 --- a/Modules/_randommodule.c +++ b/Modules/_randommodule.c @@ -342,7 +342,8 @@ random_seed(RandomObject *self, PyObject *arg) res = _PyLong_AsByteArray((PyLongObject *)n, (unsigned char *)key, keyused * 4, PY_LITTLE_ENDIAN, - 0); /* unsigned */ + 0, /* unsigned */ + 1); /* with exceptions */ if (res == -1) { goto Done; } diff --git a/Modules/_sqlite/util.c b/Modules/_sqlite/util.c index 833a666301d8ff..9e8613ef67916e 100644 --- a/Modules/_sqlite/util.c +++ b/Modules/_sqlite/util.c @@ -162,7 +162,7 @@ _pysqlite_long_as_int64(PyObject * py_val) sqlite_int64 int64val; if (_PyLong_AsByteArray((PyLongObject *)py_val, (unsigned char *)&int64val, sizeof(int64val), - IS_LITTLE_ENDIAN, 1 /* signed */) >= 0) { + IS_LITTLE_ENDIAN, 1 /* signed */, 0) >= 0) { return int64val; } } diff --git a/Modules/_struct.c b/Modules/_struct.c index bd16fa89f18945..fa2cd37e003e0a 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -1000,9 +1000,10 @@ bp_longlong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f) (unsigned char *)p, 8, 0, /* little_endian */ - 1 /* signed */); + 1, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires %lld <= number <= %lld", f->format, @@ -1024,9 +1025,10 @@ bp_ulonglong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f (unsigned char *)p, 8, 0, /* little_endian */ - 0 /* signed */); + 0, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires 0 <= number <= %llu", f->format, @@ -1260,9 +1262,10 @@ lp_longlong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f) (unsigned char *)p, 8, 1, /* little_endian */ - 1 /* signed */); + 1, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires %lld <= number <= %lld", f->format, @@ -1284,9 +1287,10 @@ lp_ulonglong(_structmodulestate *state, char *p, PyObject *v, const formatdef *f (unsigned char *)p, 8, 1, /* little_endian */ - 0 /* signed */); + 0, /* signed */ + 0 /* !with_exceptions */); Py_DECREF(v); - if (res == -1 && PyErr_Occurred()) { + if (res < 0) { PyErr_Format(state->StructError, "'%c' format requires 0 <= number <= %llu", f->format, diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 32ad8d32ab8523..dc21cf9f475228 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -776,6 +776,51 @@ pylong_asvoidptr(PyObject *module, PyObject *arg) return Py_NewRef((PyObject *)value); } +static PyObject * +pylong_asnativebytes(PyObject *module, PyObject *args) +{ + PyObject *v; + Py_buffer buffer; + Py_ssize_t n, endianness; + if (!PyArg_ParseTuple(args, "Ow*nn", &v, &buffer, &n, &endianness)) { + return NULL; + } + if (buffer.readonly) { + PyErr_SetString(PyExc_TypeError, "buffer must be writable"); + PyBuffer_Release(&buffer); + return NULL; + } + if (buffer.len < n) { + PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); + PyBuffer_Release(&buffer); + return NULL; + } + Py_ssize_t res = PyLong_AsNativeBytes(v, buffer.buf, n, (int)endianness); + PyBuffer_Release(&buffer); + return res >= 0 ? PyLong_FromSsize_t(res) : NULL; +} + +static PyObject * +pylong_fromnativebytes(PyObject *module, PyObject *args) +{ + Py_buffer buffer; + Py_ssize_t n, endianness, signed_; + if (!PyArg_ParseTuple(args, "y*nnn", &buffer, &n, &endianness, &signed_)) { + return NULL; + } + if (buffer.len < n) { + PyErr_SetString(PyExc_ValueError, "buffer must be at least 'n' bytes"); + PyBuffer_Release(&buffer); + return NULL; + } + PyObject *res = signed_ + ? PyLong_FromNativeBytes(buffer.buf, n, (int)endianness) + : PyLong_FromUnsignedNativeBytes(buffer.buf, n, (int)endianness); + PyBuffer_Release(&buffer); + return res; +} + + static PyMethodDef test_methods[] = { _TESTCAPI_TEST_LONG_AND_OVERFLOW_METHODDEF _TESTCAPI_TEST_LONG_API_METHODDEF @@ -804,6 +849,8 @@ static PyMethodDef test_methods[] = { {"pylong_as_size_t", pylong_as_size_t, METH_O}, {"pylong_asdouble", pylong_asdouble, METH_O}, {"pylong_asvoidptr", pylong_asvoidptr, METH_O}, + {"pylong_asnativebytes", pylong_asnativebytes, METH_VARARGS}, + {"pylong_fromnativebytes", pylong_fromnativebytes, METH_VARARGS}, {NULL}, }; @@ -813,6 +860,5 @@ _PyTestCapi_Init_Long(PyObject *mod) if (PyModule_AddFunctions(mod, test_methods) < 0) { return -1; } - return 0; } diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index f6181168a85ae1..e3789867dc085f 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -926,7 +926,8 @@ AsObj(PyObject *value) (unsigned char *)(void *)&wideValue, sizeof(wideValue), PY_LITTLE_ENDIAN, - /* signed */ 1) == 0) { + /* signed */ 1, + /* with_exceptions */ 1) == 0) { return Tcl_NewWideIntObj(wideValue); } PyErr_Clear(); diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 5d3c16a98423ba..2125da437963d2 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -973,7 +973,8 @@ _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEn if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes), 1 /* little-endian */ , - 0 /* unsigned */ ) < 0) { + 0 /* unsigned */ , + 1 /* with_exceptions */) < 0) { goto errorexit; } @@ -1255,7 +1256,8 @@ _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDe if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes), 1 /* little-endian */ , - 0 /* unsigned */ ) < 0) { + 0 /* unsigned */ , + 1 /* with_exceptions */) < 0) { return NULL; } diff --git a/Objects/longobject.c b/Objects/longobject.c index e655ba19e8f1c1..932111f58425f2 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -928,7 +928,8 @@ _PyLong_FromByteArray(const unsigned char* bytes, size_t n, int _PyLong_AsByteArray(PyLongObject* v, unsigned char* bytes, size_t n, - int little_endian, int is_signed) + int little_endian, int is_signed, + int with_exceptions) { Py_ssize_t i; /* index into v->long_value.ob_digit */ Py_ssize_t ndigits; /* number of digits */ @@ -945,8 +946,10 @@ _PyLong_AsByteArray(PyLongObject* v, ndigits = _PyLong_DigitCount(v); if (_PyLong_IsNegative(v)) { if (!is_signed) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative int to unsigned"); + if (with_exceptions) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative int to unsigned"); + } return -1; } do_twos_comp = 1; @@ -967,7 +970,12 @@ _PyLong_AsByteArray(PyLongObject* v, /* Copy over all the Python digits. It's crucial that every Python digit except for the MSD contribute exactly PyLong_SHIFT bits to the total, so first assert that the int is - normalized. */ + normalized. + NOTE: PyLong_AsNativeBytes() assumes that this function will fill in 'n' + bytes even if it eventually fails to convert the whole number. Make sure + you account for that if you are changing this algorithm to return without + doing that. + */ assert(ndigits == 0 || v->long_value.ob_digit[ndigits - 1] != 0); j = 0; accum = 0; @@ -1052,11 +1060,203 @@ _PyLong_AsByteArray(PyLongObject* v, return 0; Overflow: - PyErr_SetString(PyExc_OverflowError, "int too big to convert"); + if (with_exceptions) { + PyErr_SetString(PyExc_OverflowError, "int too big to convert"); + } return -1; } +// Refactored out for readability, not reuse +static inline int +_fits_in_n_bits(Py_ssize_t v, Py_ssize_t n) +{ + if (n >= (Py_ssize_t)sizeof(Py_ssize_t) * 8) { + return 1; + } + // If all bits above n are the same, we fit. + // (Use n-1 if we require the sign bit to be consistent.) + Py_ssize_t v_extended = v >> ((int)n - 1); + return v_extended == 0 || v_extended == -1; +} + +static inline int +_resolve_endianness(int *endianness) +{ + if (*endianness < 0) { + *endianness = PY_LITTLE_ENDIAN; + } + if (*endianness != 0 && *endianness != 1) { + PyErr_SetString(PyExc_SystemError, "invalid 'endianness' value"); + return -1; + } + return 0; +} + +Py_ssize_t +PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) +{ + PyLongObject *v; + union { + Py_ssize_t v; + unsigned char b[sizeof(Py_ssize_t)]; + } cv; + int do_decref = 0; + Py_ssize_t res = 0; + + if (vv == NULL || n < 0) { + PyErr_BadInternalCall(); + return -1; + } + + int little_endian = endianness; + if (_resolve_endianness(&little_endian) < 0) { + return -1; + } + + if (PyLong_Check(vv)) { + v = (PyLongObject *)vv; + } + else { + v = (PyLongObject *)_PyNumber_Index(vv); + if (v == NULL) { + return -1; + } + do_decref = 1; + } + + if (_PyLong_IsCompact(v)) { + res = 0; + cv.v = _PyLong_CompactValue(v); + /* Most paths result in res = sizeof(compact value). Only the case + * where 0 < n < sizeof(compact value) do we need to check and adjust + * our return value. */ + res = sizeof(cv.b); + if (n <= 0) { + // nothing to do! + } + else if (n <= sizeof(cv.b)) { +#if PY_LITTLE_ENDIAN + if (little_endian) { + memcpy(buffer, cv.b, n); + } + else { + for (Py_ssize_t i = 0; i < n; ++i) { + ((unsigned char*)buffer)[n - i - 1] = cv.b[i]; + } + } +#else + if (little_endian) { + for (Py_ssize_t i = 0; i < n; ++i) { + ((unsigned char*)buffer)[i] = cv.b[sizeof(cv.b) - i - 1]; + } + } + else { + memcpy(buffer, &cv.b[sizeof(cv.b) - n], n); + } +#endif + + /* If we fit, return the requested number of bytes */ + if (_fits_in_n_bits(cv.v, n * 8)) { + res = n; + } + } + else { + unsigned char fill = cv.v < 0 ? 0xFF : 0x00; +#if PY_LITTLE_ENDIAN + if (little_endian) { + memcpy(buffer, cv.b, sizeof(cv.b)); + memset((char *)buffer + sizeof(cv.b), fill, n - sizeof(cv.b)); + } + else { + unsigned char *b = (unsigned char *)buffer; + for (Py_ssize_t i = 0; i < n - (int)sizeof(cv.b); ++i) { + *b++ = fill; + } + for (Py_ssize_t i = sizeof(cv.b); i > 0; --i) { + *b++ = cv.b[i - 1]; + } + } +#else + if (little_endian) { + unsigned char *b = (unsigned char *)buffer; + for (Py_ssize_t i = sizeof(cv.b); i > 0; --i) { + *b++ = cv.b[i - 1]; + } + for (Py_ssize_t i = 0; i < n - sizeof(cv.b); ++i) { + *b++ = fill; + } + } + else { + memset(buffer, fill, n - sizeof(cv.b)); + memcpy((char *)buffer + n - sizeof(cv.b), cv.b, sizeof(cv.b)); + } +#endif + } + } + else { + if (n > 0) { + _PyLong_AsByteArray(v, buffer, (size_t)n, little_endian, 1, 0); + } + + // More efficient calculation for number of bytes required? + size_t nb = _PyLong_NumBits((PyObject *)v); + /* Normally this would be((nb - 1) / 8) + 1 to avoid rounding up + * multiples of 8 to the next byte, but we add an implied bit for + * the sign and it cancels out. */ + size_t n_needed = (nb / 8) + 1; + res = (Py_ssize_t)n_needed; + if ((size_t)res != n_needed) { + PyErr_SetString(PyExc_OverflowError, + "value too large to convert"); + res = -1; + } + } + + if (do_decref) { + Py_DECREF(v); + } + + return res; +} + + +PyObject * +PyLong_FromNativeBytes(const void* buffer, size_t n, int endianness) +{ + if (!buffer) { + PyErr_BadInternalCall(); + return NULL; + } + + int little_endian = endianness; + if (_resolve_endianness(&little_endian) < 0) { + return NULL; + } + + return _PyLong_FromByteArray((const unsigned char *)buffer, n, + little_endian, 1); +} + + +PyObject * +PyLong_FromUnsignedNativeBytes(const void* buffer, size_t n, int endianness) +{ + if (!buffer) { + PyErr_BadInternalCall(); + return NULL; + } + + int little_endian = endianness; + if (_resolve_endianness(&little_endian) < 0) { + return NULL; + } + + return _PyLong_FromByteArray((const unsigned char *)buffer, n, + little_endian, 0); +} + + /* Create a new int object from a C pointer */ PyObject * @@ -1231,7 +1431,7 @@ PyLong_AsLongLong(PyObject *vv) } else { res = _PyLong_AsByteArray((PyLongObject *)v, (unsigned char *)&bytes, - SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 1); + SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 1, 1); } if (do_decref) { Py_DECREF(v); @@ -1270,7 +1470,7 @@ PyLong_AsUnsignedLongLong(PyObject *vv) } else { res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes, - SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 0); + SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 0, 1); } /* Plan 9 can't handle long long in ? : expressions */ @@ -6068,7 +6268,7 @@ int_to_bytes_impl(PyObject *self, Py_ssize_t length, PyObject *byteorder, if (_PyLong_AsByteArray((PyLongObject *)self, (unsigned char *)PyBytes_AS_STRING(bytes), - length, little_endian, is_signed) < 0) { + length, little_endian, is_signed, 1) < 0) { Py_DECREF(bytes); return NULL; } From bee2a11946a8d6df6b6c384abccf3dfb4e75d3fc Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 12 Feb 2024 23:52:25 +0300 Subject: [PATCH 089/126] gh-115258: Temporarily skip some `queue` tests on all platforms (#115361) --- Lib/test/test_queue.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_queue.py b/Lib/test/test_queue.py index d308a212999429..92d670ca6f8f5b 100644 --- a/Lib/test/test_queue.py +++ b/Lib/test/test_queue.py @@ -403,11 +403,11 @@ def _shutdown_all_methods_in_many_threads(self, immediate): for thread in ps[1:]: thread.join() - @unittest.skipIf(sys.platform == "win32", "test times out (gh-115258)") + @unittest.skip("test times out (gh-115258)") def test_shutdown_all_methods_in_many_threads(self): return self._shutdown_all_methods_in_many_threads(False) - @unittest.skipIf(sys.platform == "win32", "test times out (gh-115258)") + @unittest.skip("test times out (gh-115258)") def test_shutdown_immediate_all_methods_in_many_threads(self): return self._shutdown_all_methods_in_many_threads(True) From 341d7874f063dcb141672b09f62c19ffedd0a557 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 13 Feb 2024 00:17:33 +0200 Subject: [PATCH 090/126] gh-115317: Rewrite changelog filter to use vanilla JavaScript (#115324) Co-authored-by: Tomas R --- .editorconfig | 4 +- Doc/tools/static/changelog_search.js | 102 ++++++++++++++------------- 2 files changed, 56 insertions(+), 50 deletions(-) diff --git a/.editorconfig b/.editorconfig index 0169eed951cd3f..a6187d64f3ce46 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,6 +1,6 @@ root = true -[*.{py,c,cpp,h,rst,md,yml}] +[*.{py,c,cpp,h,js,rst,md,yml}] trim_trailing_whitespace = true insert_final_newline = true indent_style = space @@ -11,5 +11,5 @@ indent_size = 4 [*.rst] indent_size = 3 -[*.yml] +[*.{js,yml}] indent_size = 2 diff --git a/Doc/tools/static/changelog_search.js b/Doc/tools/static/changelog_search.js index c881a9bd4c84a7..0a77c0d71ae937 100644 --- a/Doc/tools/static/changelog_search.js +++ b/Doc/tools/static/changelog_search.js @@ -1,53 +1,59 @@ -$(document).ready(function() { - // add the search form and bind the events - $('h1').after([ - '

Filter entries by content:', - '', - '

' - ].join('\n')); +document.addEventListener("DOMContentLoaded", function () { + // add the search form and bind the events + document + .querySelector("h1") + .insertAdjacentHTML( + "afterend", + [ + "

Filter entries by content:", + '', + '

', + ].join("\n"), + ); - function dofilter() { - try { - var query = new RegExp($('#searchbox').val(), 'i'); + function doFilter() { + let query; + try { + query = new RegExp(document.querySelector("#searchbox").value, "i"); + } catch (e) { + return; // not a valid regex (yet) + } + // find headers for the versions (What's new in Python X.Y.Z?) + const h2s = document.querySelectorAll("#changelog h2"); + for (const h2 of h2s) { + let sections_found = 0; + // find headers for the sections (Core, Library, etc.) + const h3s = h2.parentNode.querySelectorAll("h3"); + for (const h3 of h3s) { + let entries_found = 0; + // find all the entries + const lis = h3.parentNode.querySelectorAll("li"); + for (let li of lis) { + // check if the query matches the entry + if (query.test(li.textContent)) { + li.style.display = "block"; + entries_found++; + } else { + li.style.display = "none"; + } } - catch (e) { - return; // not a valid regex (yet) + // if there are entries, show the section, otherwise hide it + if (entries_found > 0) { + h3.parentNode.style.display = "block"; + sections_found++; + } else { + h3.parentNode.style.display = "none"; } - // find headers for the versions (What's new in Python X.Y.Z?) - $('#changelog h2').each(function(index1, h2) { - var h2_parent = $(h2).parent(); - var sections_found = 0; - // find headers for the sections (Core, Library, etc.) - h2_parent.find('h3').each(function(index2, h3) { - var h3_parent = $(h3).parent(); - var entries_found = 0; - // find all the entries - h3_parent.find('li').each(function(index3, li) { - var li = $(li); - // check if the query matches the entry - if (query.test(li.text())) { - li.show(); - entries_found++; - } - else { - li.hide(); - } - }); - // if there are entries, show the section, otherwise hide it - if (entries_found > 0) { - h3_parent.show(); - sections_found++; - } - else { - h3_parent.hide(); - } - }); - if (sections_found > 0) - h2_parent.show(); - else - h2_parent.hide(); - }); + } + if (sections_found > 0) { + h2.parentNode.style.display = "block"; + } else { + h2.parentNode.style.display = "none"; + } } - $('#searchbox').keyup(dofilter); - $('#searchbox-submit').click(dofilter); + } + document.querySelector("#searchbox").addEventListener("keyup", doFilter); + document + .querySelector("#searchbox-submit") + .addEventListener("click", doFilter); }); From 10756b10ff8e47ece33f7fbf62c9a06f8a866fed Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 12 Feb 2024 22:28:36 +0000 Subject: [PATCH 091/126] gh-111140: Minor doc fixes for PyLong_AsNativeBytes (GH-115375) --- Doc/c-api/long.rst | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index c39823e5e6787f..f24282e76a33d1 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -359,13 +359,16 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Copy the Python integer value to a native *buffer* of size *n_bytes*:: int value; - Py_ssize_t bytes = PyLong_CopyBits(v, &value, sizeof(value), -1); + Py_ssize_t bytes = PyLong_AsNativeBytes(v, &value, sizeof(value), -1); if (bytes < 0) { // Error occurred return NULL; } - else if (bytes > sizeof(value)) { - // Overflow occurred, but 'value' contains as much as could fit + else if (bytes <= (Py_ssize_t)sizeof(value)) { + // Success! + } + else { + // Overflow occurred, but 'value' contains truncated value } *endianness* may be passed ``-1`` for the native endian that CPython was @@ -379,15 +382,16 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Unless an exception is raised, all *n_bytes* of the buffer will be written with as much of the value as can fit. This allows the caller to ignore all non-negative results if the intent is to match the typical behavior of a - C-style downcast. + C-style downcast. No exception is set for this case. - Values are always copied as twos-complement, and sufficient size will be - requested for a sign bit. For example, this may cause an value that fits into - 8 bytes when treated as unsigned to request 9 bytes, even though all eight - bytes were copied into the buffer. What has been omitted is the zero sign - bit, which is redundant when the intention is to treat the value as unsigned. + Values are always copied as two's-complement, and sufficient buffer will be + requested to include a sign bit. For example, this may cause an value that + fits into 8 bytes when treated as unsigned to request 9 bytes, even though + all eight bytes were copied into the buffer. What has been omitted is the + zero sign bit, which is redundant when the intention is to treat the value as + unsigned. - Passing *n_bytes* of zero will always return the requested buffer size. + Passing zero to *n_bytes* will return the requested buffer size. .. note:: From 2f0778675ad0eaf346924ef6a2f60529b92ffcfa Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Tue, 13 Feb 2024 07:10:24 +0800 Subject: [PATCH 092/126] gh-114099: Refactor configure and Makefile to accomodate non-macOS frameworks (#115120) Part of the PEP 730 work to add iOS support. This change lays the groundwork for introducing iOS/tvOS/watchOS frameworks; it includes the structural refactoring needed so that iOS branches can be added into in a subsequent PR. Summary of changes: * Updates config.sub to the 2024-01-01 release. This is the "as released" version of config.sub. * Adds a RESSRCDIR variable to allow sharing of macOS and iOS Makefile steps. * Adds an INSTALLTARGETS variable so platforms can customise which targets are actually installed. This will be used to exclude certain targets (e.g., binaries, manfiles) from iOS framework installs. * Adds a PYTHONFRAMEWORKINSTALLNAMEPREFIX variable; this is used as the install name for the library. This is needed to allow for iOS frameworks to specify an @rpath-based install name. * Evaluates MACHDEP earlier in the configure process so that ac_sys_system is available. * Modifies _PYTHON_HOST_PLATFORM evaluation for cross-platform builds so that the CPU architecture is differentiated from the host identifier. This will be used to generate a _PYTHON_HOST_PLATFORM definition that includes ABI information, not just CPU architecture. * Differentiates between SOABI_PLATFORM and PLATFORM_TRIPLET. SOABI_PLATFORM is used in binary module names, and includes the ABI, but not the OS or CPU architecture (e.g., math.cpython-313-iphonesimulator.dylib). PLATFORM_TRIPLET is used as the sys._multiarch value, and on iOS will contains the ABI and architecture (e.g., iphoneos-arm64). This differentiation hasn't historically been needed because while macOS is a multiarch platform, it uses a bare darwin as PLATFORM_TRIPLE. * Removes the use of the deprecated -Wl,-single_module flag when compiling macOS frameworks. * Some whitespace normalisation where there was a mix of spaces and tabs in a single block. --- Makefile.pre.in | 11 +- ...-02-07-08-23-48.gh-issue-114099.XcEXEZ.rst | 2 + config.sub | 251 ++++++++---- configure | 383 +++++++++-------- configure.ac | 385 ++++++++++-------- 5 files changed, 600 insertions(+), 432 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-02-07-08-23-48.gh-issue-114099.XcEXEZ.rst diff --git a/Makefile.pre.in b/Makefile.pre.in index 4dabe328ce0362..e0527633ccd03b 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -184,6 +184,8 @@ PYTHONFRAMEWORK= @PYTHONFRAMEWORK@ PYTHONFRAMEWORKDIR= @PYTHONFRAMEWORKDIR@ PYTHONFRAMEWORKPREFIX= @PYTHONFRAMEWORKPREFIX@ PYTHONFRAMEWORKINSTALLDIR= @PYTHONFRAMEWORKINSTALLDIR@ +PYTHONFRAMEWORKINSTALLNAMEPREFIX= @PYTHONFRAMEWORKINSTALLNAMEPREFIX@ +RESSRCDIR= @RESSRCDIR@ # Deployment target selected during configure, to be checked # by distutils. The export statement is needed to ensure that the # deployment target is active during build. @@ -866,7 +868,7 @@ libpython3.so: libpython$(LDVERSION).so $(BLDSHARED) $(NO_AS_NEEDED) -o $@ -Wl,-h$@ $^ libpython$(LDVERSION).dylib: $(LIBRARY_OBJS) - $(CC) -dynamiclib -Wl,-single_module $(PY_CORE_LDFLAGS) -undefined dynamic_lookup -Wl,-install_name,$(prefix)/lib/libpython$(LDVERSION).dylib -Wl,-compatibility_version,$(VERSION) -Wl,-current_version,$(VERSION) -o $@ $(LIBRARY_OBJS) $(DTRACE_OBJS) $(SHLIBS) $(LIBC) $(LIBM); \ + $(CC) -dynamiclib $(PY_CORE_LDFLAGS) -undefined dynamic_lookup -Wl,-install_name,$(PYTHONFRAMEWORKINSTALLNAMEPREFIX)/lib/libpython$(LDVERSION).dylib -Wl,-compatibility_version,$(VERSION) -Wl,-current_version,$(VERSION) -o $@ $(LIBRARY_OBJS) $(DTRACE_OBJS) $(SHLIBS) $(LIBC) $(LIBM); \ libpython$(VERSION).sl: $(LIBRARY_OBJS) @@ -891,14 +893,13 @@ $(BUILDPYTHON)-gdb.py: $(SRC_GDB_HOOKS) # This rule is here for OPENSTEP/Rhapsody/MacOSX. It builds a temporary # minimal framework (not including the Lib directory and such) in the current # directory. -RESSRCDIR=Mac/Resources/framework $(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK): \ $(LIBRARY) \ $(RESSRCDIR)/Info.plist $(INSTALL) -d -m $(DIRMODE) $(PYTHONFRAMEWORKDIR)/Versions/$(VERSION) $(CC) -o $(LDLIBRARY) $(PY_CORE_LDFLAGS) -dynamiclib \ - -all_load $(LIBRARY) -Wl,-single_module \ - -install_name $(DESTDIR)$(PYTHONFRAMEWORKINSTALLDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK) \ + -all_load $(LIBRARY) \ + -install_name $(DESTDIR)$(PYTHONFRAMEWORKINSTALLNAMEPREFIX)/$(PYTHONFRAMEWORK) \ -compatibility_version $(VERSION) \ -current_version $(VERSION) \ -framework CoreFoundation $(LIBS); @@ -2000,7 +2001,7 @@ multissltest: all # which can lead to two parallel `./python setup.py build` processes that # step on each others toes. .PHONY: install -install: @FRAMEWORKINSTALLFIRST@ commoninstall bininstall maninstall @FRAMEWORKINSTALLLAST@ +install: @FRAMEWORKINSTALLFIRST@ @INSTALLTARGETS@ @FRAMEWORKINSTALLLAST@ if test "x$(ENSUREPIP)" != "xno" ; then \ case $(ENSUREPIP) in \ upgrade) ensurepip="--upgrade" ;; \ diff --git a/Misc/NEWS.d/next/Build/2024-02-07-08-23-48.gh-issue-114099.XcEXEZ.rst b/Misc/NEWS.d/next/Build/2024-02-07-08-23-48.gh-issue-114099.XcEXEZ.rst new file mode 100644 index 00000000000000..5e4acfba8a6949 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-02-07-08-23-48.gh-issue-114099.XcEXEZ.rst @@ -0,0 +1,2 @@ +configure and Makefile were refactored to accomodate framework builds on +Apple platforms other than macOS. diff --git a/config.sub b/config.sub index d74fb6deac942a..2c6a07ab3c34ea 100755 --- a/config.sub +++ b/config.sub @@ -1,14 +1,14 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2021 Free Software Foundation, Inc. +# Copyright 1992-2024 Free Software Foundation, Inc. # shellcheck disable=SC2006,SC2268 # see below for rationale -timestamp='2021-08-14' +timestamp='2024-01-01' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or +# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but @@ -76,13 +76,13 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright 1992-2021 Free Software Foundation, Inc. +Copyright 1992-2024 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." help=" -Try \`$me --help' for more information." +Try '$me --help' for more information." # Parse command line while test $# -gt 0 ; do @@ -130,7 +130,7 @@ IFS=$saved_IFS # Separate into logical components for further validation case $1 in *-*-*-*-*) - echo Invalid configuration \`"$1"\': more than four components >&2 + echo "Invalid configuration '$1': more than four components" >&2 exit 1 ;; *-*-*-*) @@ -145,7 +145,8 @@ case $1 in nto-qnx* | linux-* | uclinux-uclibc* \ | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \ | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \ - | storm-chaos* | os2-emx* | rtmk-nova*) + | storm-chaos* | os2-emx* | rtmk-nova* | managarm-* \ + | windows-* ) basic_machine=$field1 basic_os=$maybe_os ;; @@ -943,7 +944,7 @@ $basic_machine EOF IFS=$saved_IFS ;; - # We use `pc' rather than `unknown' + # We use 'pc' rather than 'unknown' # because (1) that's what they normally are, and # (2) the word "unknown" tends to confuse beginning users. i*86 | x86_64) @@ -1020,6 +1021,11 @@ case $cpu-$vendor in ;; # Here we normalize CPU types with a missing or matching vendor + armh-unknown | armh-alt) + cpu=armv7l + vendor=alt + basic_os=${basic_os:-linux-gnueabihf} + ;; dpx20-unknown | dpx20-bull) cpu=rs6000 vendor=bull @@ -1070,7 +1076,7 @@ case $cpu-$vendor in pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) cpu=i586 ;; - pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*) + pentiumpro-* | p6-* | 6x86-* | athlon-* | athlon_*-*) cpu=i686 ;; pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) @@ -1121,7 +1127,7 @@ case $cpu-$vendor in xscale-* | xscalee[bl]-*) cpu=`echo "$cpu" | sed 's/^xscale/arm/'` ;; - arm64-*) + arm64-* | aarch64le-*) cpu=aarch64 ;; @@ -1175,7 +1181,7 @@ case $cpu-$vendor in case $cpu in 1750a | 580 \ | a29k \ - | aarch64 | aarch64_be \ + | aarch64 | aarch64_be | aarch64c | arm64ec \ | abacus \ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \ @@ -1194,50 +1200,29 @@ case $cpu-$vendor in | d10v | d30v | dlx | dsp16xx \ | e2k | elxsi | epiphany \ | f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \ + | javascript \ | h8300 | h8500 \ | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | hexagon \ | i370 | i*86 | i860 | i960 | ia16 | ia64 \ | ip2k | iq2000 \ | k1om \ + | kvx \ | le32 | le64 \ | lm32 \ - | loongarch32 | loongarch64 | loongarchx32 \ + | loongarch32 | loongarch64 \ | m32c | m32r | m32rle \ | m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \ | m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \ | m88110 | m88k | maxq | mb | mcore | mep | metag \ | microblaze | microblazeel \ - | mips | mipsbe | mipseb | mipsel | mipsle \ - | mips16 \ - | mips64 | mips64eb | mips64el \ - | mips64octeon | mips64octeonel \ - | mips64orion | mips64orionel \ - | mips64r5900 | mips64r5900el \ - | mips64vr | mips64vrel \ - | mips64vr4100 | mips64vr4100el \ - | mips64vr4300 | mips64vr4300el \ - | mips64vr5000 | mips64vr5000el \ - | mips64vr5900 | mips64vr5900el \ - | mipsisa32 | mipsisa32el \ - | mipsisa32r2 | mipsisa32r2el \ - | mipsisa32r3 | mipsisa32r3el \ - | mipsisa32r5 | mipsisa32r5el \ - | mipsisa32r6 | mipsisa32r6el \ - | mipsisa64 | mipsisa64el \ - | mipsisa64r2 | mipsisa64r2el \ - | mipsisa64r3 | mipsisa64r3el \ - | mipsisa64r5 | mipsisa64r5el \ - | mipsisa64r6 | mipsisa64r6el \ - | mipsisa64sb1 | mipsisa64sb1el \ - | mipsisa64sr71k | mipsisa64sr71kel \ - | mipsr5900 | mipsr5900el \ - | mipstx39 | mipstx39el \ + | mips* \ | mmix \ | mn10200 | mn10300 \ | moxie \ | mt \ | msp430 \ + | nanomips* \ | nds32 | nds32le | nds32be \ | nfp \ | nios | nios2 | nios2eb | nios2el \ @@ -1269,6 +1254,7 @@ case $cpu-$vendor in | ubicom32 \ | v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \ | vax \ + | vc4 \ | visium \ | w65 \ | wasm32 | wasm64 \ @@ -1280,7 +1266,7 @@ case $cpu-$vendor in ;; *) - echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2 + echo "Invalid configuration '$1': machine '$cpu-$vendor' not recognized" 1>&2 exit 1 ;; esac @@ -1301,11 +1287,12 @@ esac # Decode manufacturer-specific aliases for certain operating systems. -if test x$basic_os != x +if test x"$basic_os" != x then -# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just +# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just # set os. +obj= case $basic_os in gnu/linux*) kernel=linux @@ -1336,6 +1323,10 @@ EOF kernel=linux os=`echo "$basic_os" | sed -e 's|linux|gnu|'` ;; + managarm*) + kernel=managarm + os=`echo "$basic_os" | sed -e 's|managarm|mlibc|'` + ;; *) kernel= os=$basic_os @@ -1501,10 +1492,16 @@ case $os in os=eabi ;; *) - os=elf + os= + obj=elf ;; esac ;; + aout* | coff* | elf* | pe*) + # These are machine code file formats, not OSes + obj=$os + os= + ;; *) # No normalization, but not necessarily accepted, that comes below. ;; @@ -1523,12 +1520,15 @@ else # system, and we'll never get to this point. kernel= +obj= case $cpu-$vendor in score-*) - os=elf + os= + obj=elf ;; spu-*) - os=elf + os= + obj=elf ;; *-acorn) os=riscix1.2 @@ -1538,28 +1538,35 @@ case $cpu-$vendor in os=gnu ;; arm*-semi) - os=aout + os= + obj=aout ;; c4x-* | tic4x-*) - os=coff + os= + obj=coff ;; c8051-*) - os=elf + os= + obj=elf ;; clipper-intergraph) os=clix ;; hexagon-*) - os=elf + os= + obj=elf ;; tic54x-*) - os=coff + os= + obj=coff ;; tic55x-*) - os=coff + os= + obj=coff ;; tic6x-*) - os=coff + os= + obj=coff ;; # This must come before the *-dec entry. pdp10-*) @@ -1581,19 +1588,24 @@ case $cpu-$vendor in os=sunos3 ;; m68*-cisco) - os=aout + os= + obj=aout ;; mep-*) - os=elf + os= + obj=elf ;; mips*-cisco) - os=elf + os= + obj=elf ;; - mips*-*) - os=elf + mips*-*|nanomips*-*) + os= + obj=elf ;; or32-*) - os=coff + os= + obj=coff ;; *-tti) # must be before sparc entry or we get the wrong os. os=sysv3 @@ -1602,7 +1614,8 @@ case $cpu-$vendor in os=sunos4.1.1 ;; pru-*) - os=elf + os= + obj=elf ;; *-be) os=beos @@ -1683,10 +1696,12 @@ case $cpu-$vendor in os=uxpv ;; *-rom68k) - os=coff + os= + obj=coff ;; *-*bug) - os=coff + os= + obj=coff ;; *-apple) os=macos @@ -1704,10 +1719,11 @@ esac fi -# Now, validate our (potentially fixed-up) OS. +# Now, validate our (potentially fixed-up) individual pieces (OS, OBJ). + case $os in # Sometimes we do "kernel-libc", so those need to count as OSes. - musl* | newlib* | relibc* | uclibc*) + llvm* | musl* | newlib* | relibc* | uclibc*) ;; # Likewise for "kernel-abi" eabi* | gnueabi*) @@ -1715,6 +1731,9 @@ case $os in # VxWorks passes extra cpu info in the 4th filed. simlinux | simwindows | spe) ;; + # See `case $cpu-$os` validation below + ghcjs) + ;; # Now accept the basic system types. # The portable systems comes first. # Each alternative MUST end in a * to match a version number. @@ -1723,7 +1742,7 @@ case $os in | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \ | sym* | plan9* | psp* | sim* | xray* | os68k* | v88r* \ | hiux* | abug | nacl* | netware* | windows* \ - | os9* | macos* | osx* | ios* \ + | os9* | macos* | osx* | ios* | tvos* | watchos* \ | mpw* | magic* | mmixware* | mon960* | lnews* \ | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \ | aos* | aros* | cloudabi* | sortix* | twizzler* \ @@ -1732,11 +1751,11 @@ case $os in | mirbsd* | netbsd* | dicos* | openedition* | ose* \ | bitrig* | openbsd* | secbsd* | solidbsd* | libertybsd* | os108* \ | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \ - | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \ - | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \ + | bosx* | nextstep* | cxux* | oabi* \ + | ptx* | ecoff* | winnt* | domain* | vsta* \ | udi* | lites* | ieee* | go32* | aux* | hcos* \ | chorusrdb* | cegcc* | glidix* | serenity* \ - | cygwin* | msys* | pe* | moss* | proelf* | rtems* \ + | cygwin* | msys* | moss* | proelf* | rtems* \ | midipix* | mingw32* | mingw64* | mint* \ | uxpv* | beos* | mpeix* | udk* | moxiebox* \ | interix* | uwin* | mks* | rhapsody* | darwin* \ @@ -1748,49 +1767,117 @@ case $os in | skyos* | haiku* | rdos* | toppers* | drops* | es* \ | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \ | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \ - | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr*) + | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr* \ + | fiwix* | mlibc* | cos* | mbr* | ironclad* ) ;; # This one is extra strict with allowed versions sco3.2v2 | sco3.2v[4-9]* | sco5v6*) # Don't forget version if it is 3.2v4 or newer. ;; + # This refers to builds using the UEFI calling convention + # (which depends on the architecture) and PE file format. + # Note that this is both a different calling convention and + # different file format than that of GNU-EFI + # (x86_64-w64-mingw32). + uefi) + ;; none) ;; + kernel* | msvc* ) + # Restricted further below + ;; + '') + if test x"$obj" = x + then + echo "Invalid configuration '$1': Blank OS only allowed with explicit machine code file format" 1>&2 + fi + ;; *) - echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2 + echo "Invalid configuration '$1': OS '$os' not recognized" 1>&2 + exit 1 + ;; +esac + +case $obj in + aout* | coff* | elf* | pe*) + ;; + '') + # empty is fine + ;; + *) + echo "Invalid configuration '$1': Machine code format '$obj' not recognized" 1>&2 + exit 1 + ;; +esac + +# Here we handle the constraint that a (synthetic) cpu and os are +# valid only in combination with each other and nowhere else. +case $cpu-$os in + # The "javascript-unknown-ghcjs" triple is used by GHC; we + # accept it here in order to tolerate that, but reject any + # variations. + javascript-ghcjs) + ;; + javascript-* | *-ghcjs) + echo "Invalid configuration '$1': cpu '$cpu' is not valid with os '$os$obj'" 1>&2 exit 1 ;; esac # As a final step for OS-related things, validate the OS-kernel combination # (given a valid OS), if there is a kernel. -case $kernel-$os in - linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* \ - | linux-musl* | linux-relibc* | linux-uclibc* ) +case $kernel-$os-$obj in + linux-gnu*- | linux-android*- | linux-dietlibc*- | linux-llvm*- \ + | linux-mlibc*- | linux-musl*- | linux-newlib*- \ + | linux-relibc*- | linux-uclibc*- ) + ;; + uclinux-uclibc*- ) + ;; + managarm-mlibc*- | managarm-kernel*- ) ;; - uclinux-uclibc* ) + windows*-msvc*-) ;; - -dietlibc* | -newlib* | -musl* | -relibc* | -uclibc* ) + -dietlibc*- | -llvm*- | -mlibc*- | -musl*- | -newlib*- | -relibc*- \ + | -uclibc*- ) # These are just libc implementations, not actual OSes, and thus # require a kernel. - echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2 + echo "Invalid configuration '$1': libc '$os' needs explicit kernel." 1>&2 exit 1 ;; - kfreebsd*-gnu* | kopensolaris*-gnu*) + -kernel*- ) + echo "Invalid configuration '$1': '$os' needs explicit kernel." 1>&2 + exit 1 ;; - vxworks-simlinux | vxworks-simwindows | vxworks-spe) + *-kernel*- ) + echo "Invalid configuration '$1': '$kernel' does not support '$os'." 1>&2 + exit 1 ;; - nto-qnx*) + *-msvc*- ) + echo "Invalid configuration '$1': '$os' needs 'windows'." 1>&2 + exit 1 ;; - os2-emx) + kfreebsd*-gnu*- | kopensolaris*-gnu*-) + ;; + vxworks-simlinux- | vxworks-simwindows- | vxworks-spe-) + ;; + nto-qnx*-) + ;; + os2-emx-) ;; - *-eabi* | *-gnueabi*) + *-eabi*- | *-gnueabi*-) ;; - -*) + none--*) + # None (no kernel, i.e. freestanding / bare metal), + # can be paired with an machine code file format + ;; + -*-) # Blank kernel with real OS is always fine. ;; - *-*) - echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2 + --*) + # Blank kernel and OS with real machine code file format is always fine. + ;; + *-*-*) + echo "Invalid configuration '$1': Kernel '$kernel' not known to work with OS '$os'." 1>&2 exit 1 ;; esac @@ -1873,7 +1960,7 @@ case $vendor in ;; esac -echo "$cpu-$vendor-${kernel:+$kernel-}$os" +echo "$cpu-$vendor${kernel:+-$kernel}${os:+-$os}${obj:+-$obj}" exit # Local variables: diff --git a/configure b/configure index 705a778cafced3..ba2d49df7c65fe 100755 --- a/configure +++ b/configure @@ -972,7 +972,7 @@ HAS_XCRUN EXPORT_MACOSX_DEPLOYMENT_TARGET CONFIGURE_MACOSX_DEPLOYMENT_TARGET _PYTHON_HOST_PLATFORM -MACHDEP +INSTALLTARGETS FRAMEWORKINSTALLAPPSPREFIX FRAMEWORKUNIXTOOLSPREFIX FRAMEWORKPYTHONW @@ -980,6 +980,8 @@ FRAMEWORKALTINSTALLLAST FRAMEWORKALTINSTALLFIRST FRAMEWORKINSTALLLAST FRAMEWORKINSTALLFIRST +RESSRCDIR +PYTHONFRAMEWORKINSTALLNAMEPREFIX PYTHONFRAMEWORKINSTALLDIR PYTHONFRAMEWORKPREFIX PYTHONFRAMEWORKDIR @@ -989,6 +991,7 @@ LIPO_INTEL64_FLAGS LIPO_32BIT_FLAGS ARCH_RUN_32BIT UNIVERSALSDK +MACHDEP PKG_CONFIG_LIBDIR PKG_CONFIG_PATH PKG_CONFIG @@ -4004,6 +4007,77 @@ if test "$with_pkg_config" = yes -a -z "$PKG_CONFIG"; then as_fn_error $? "pkg-config is required" "$LINENO" 5] fi +# Set name for machine-dependent library files + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking MACHDEP" >&5 +printf %s "checking MACHDEP... " >&6; } +if test -z "$MACHDEP" +then + # avoid using uname for cross builds + if test "$cross_compiling" = yes; then + # ac_sys_system and ac_sys_release are used for setting + # a lot of different things including 'define_xopen_source' + # in the case statement below. + case "$host" in + *-*-linux-android*) + ac_sys_system=Linux-android + ;; + *-*-linux*) + ac_sys_system=Linux + ;; + *-*-cygwin*) + ac_sys_system=Cygwin + ;; + *-*-vxworks*) + ac_sys_system=VxWorks + ;; + *-*-emscripten) + ac_sys_system=Emscripten + ;; + *-*-wasi) + ac_sys_system=WASI + ;; + *) + # for now, limit cross builds to known configurations + MACHDEP="unknown" + as_fn_error $? "cross build not supported for $host" "$LINENO" 5 + esac + ac_sys_release= + else + ac_sys_system=`uname -s` + if test "$ac_sys_system" = "AIX" \ + -o "$ac_sys_system" = "UnixWare" -o "$ac_sys_system" = "OpenUNIX"; then + ac_sys_release=`uname -v` + else + ac_sys_release=`uname -r` + fi + fi + ac_md_system=`echo $ac_sys_system | + tr -d '/ ' | tr '[A-Z]' '[a-z]'` + ac_md_release=`echo $ac_sys_release | + tr -d '/ ' | sed 's/^[A-Z]\.//' | sed 's/\..*//'` + MACHDEP="$ac_md_system$ac_md_release" + + case $MACHDEP in + aix*) MACHDEP="aix";; + linux*) MACHDEP="linux";; + cygwin*) MACHDEP="cygwin";; + darwin*) MACHDEP="darwin";; + '') MACHDEP="unknown";; + esac + + if test "$ac_sys_system" = "SunOS"; then + # For Solaris, there isn't an OS version specific macro defined + # in most compilers, so we define one here. + SUNOS_VERSION=`echo $ac_sys_release | sed -e 's!\.\(0-9\)$!.0\1!g' | tr -d '.'` + +printf "%s\n" "#define Py_SUNOS_VERSION $SUNOS_VERSION" >>confdefs.h + + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: \"$MACHDEP\"" >&5 +printf "%s\n" "\"$MACHDEP\"" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --enable-universalsdk" >&5 printf %s "checking for --enable-universalsdk... " >&6; } # Check whether --enable-universalsdk was given. @@ -4127,11 +4201,15 @@ then : PYTHONFRAMEWORKDIR=no-framework PYTHONFRAMEWORKPREFIX= PYTHONFRAMEWORKINSTALLDIR= + PYTHONFRAMEWORKINSTALLNAMEPREFIX= + RESSRCDIR= FRAMEWORKINSTALLFIRST= FRAMEWORKINSTALLLAST= FRAMEWORKALTINSTALLFIRST= FRAMEWORKALTINSTALLLAST= FRAMEWORKPYTHONW= + INSTALLTARGETS="commoninstall bininstall maninstall" + if test "x${prefix}" = "xNONE"; then FRAMEWORKUNIXTOOLSPREFIX="${ac_default_prefix}" else @@ -4144,65 +4222,76 @@ then : PYTHONFRAMEWORKINSTALLDIR=$PYTHONFRAMEWORKPREFIX/$PYTHONFRAMEWORKDIR FRAMEWORKINSTALLFIRST="frameworkinstallstructure" FRAMEWORKALTINSTALLFIRST="frameworkinstallstructure " - FRAMEWORKINSTALLLAST="frameworkinstallmaclib frameworkinstallapps frameworkinstallunixtools" - FRAMEWORKALTINSTALLLAST="frameworkinstallmaclib frameworkinstallapps frameworkaltinstallunixtools" - FRAMEWORKPYTHONW="frameworkpythonw" - FRAMEWORKINSTALLAPPSPREFIX="/Applications" - if test "x${prefix}" = "xNONE" ; then - FRAMEWORKUNIXTOOLSPREFIX="${ac_default_prefix}" + case $ac_sys_system in #( + Darwin) : + FRAMEWORKINSTALLLAST="frameworkinstallmaclib frameworkinstallapps frameworkinstallunixtools" + FRAMEWORKALTINSTALLLAST="frameworkinstallmaclib frameworkinstallapps frameworkaltinstallunixtools" + FRAMEWORKPYTHONW="frameworkpythonw" + FRAMEWORKINSTALLAPPSPREFIX="/Applications" + INSTALLTARGETS="commoninstall bininstall maninstall" - else - FRAMEWORKUNIXTOOLSPREFIX="${prefix}" - fi + if test "x${prefix}" = "xNONE" ; then + FRAMEWORKUNIXTOOLSPREFIX="${ac_default_prefix}" - case "${enableval}" in - /System*) - FRAMEWORKINSTALLAPPSPREFIX="/Applications" - if test "${prefix}" = "NONE" ; then - # See below - FRAMEWORKUNIXTOOLSPREFIX="/usr" - fi - ;; + else + FRAMEWORKUNIXTOOLSPREFIX="${prefix}" + fi - /Library*) - FRAMEWORKINSTALLAPPSPREFIX="/Applications" - ;; + case "${enableval}" in + /System*) + FRAMEWORKINSTALLAPPSPREFIX="/Applications" + if test "${prefix}" = "NONE" ; then + # See below + FRAMEWORKUNIXTOOLSPREFIX="/usr" + fi + ;; + + /Library*) + FRAMEWORKINSTALLAPPSPREFIX="/Applications" + ;; + + */Library/Frameworks) + MDIR="`dirname "${enableval}"`" + MDIR="`dirname "${MDIR}"`" + FRAMEWORKINSTALLAPPSPREFIX="${MDIR}/Applications" + + if test "${prefix}" = "NONE"; then + # User hasn't specified the + # --prefix option, but wants to install + # the framework in a non-default location, + # ensure that the compatibility links get + # installed relative to that prefix as well + # instead of in /usr/local. + FRAMEWORKUNIXTOOLSPREFIX="${MDIR}" + fi + ;; - */Library/Frameworks) - MDIR="`dirname "${enableval}"`" - MDIR="`dirname "${MDIR}"`" - FRAMEWORKINSTALLAPPSPREFIX="${MDIR}/Applications" - - if test "${prefix}" = "NONE"; then - # User hasn't specified the - # --prefix option, but wants to install - # the framework in a non-default location, - # ensure that the compatibility links get - # installed relative to that prefix as well - # instead of in /usr/local. - FRAMEWORKUNIXTOOLSPREFIX="${MDIR}" - fi - ;; - - *) - FRAMEWORKINSTALLAPPSPREFIX="/Applications" - ;; - esac + *) + FRAMEWORKINSTALLAPPSPREFIX="/Applications" + ;; + esac - prefix=$PYTHONFRAMEWORKINSTALLDIR/Versions/$VERSION + prefix=$PYTHONFRAMEWORKINSTALLDIR/Versions/$VERSION + PYTHONFRAMEWORKINSTALLNAMEPREFIX=${prefix} + RESSRCDIR=Mac/Resources/framework - # Add files for Mac specific code to the list of output - # files: - ac_config_files="$ac_config_files Mac/Makefile" + # Add files for Mac specific code to the list of output + # files: + ac_config_files="$ac_config_files Mac/Makefile" - ac_config_files="$ac_config_files Mac/PythonLauncher/Makefile" + ac_config_files="$ac_config_files Mac/PythonLauncher/Makefile" - ac_config_files="$ac_config_files Mac/Resources/framework/Info.plist" + ac_config_files="$ac_config_files Mac/Resources/framework/Info.plist" - ac_config_files="$ac_config_files Mac/Resources/app/Info.plist" + ac_config_files="$ac_config_files Mac/Resources/app/Info.plist" - esac + ;; + *) + as_fn_error $? "Unknown platform for framework build" "$LINENO" 5 + ;; + esac + esac else $as_nop @@ -4210,11 +4299,14 @@ else $as_nop PYTHONFRAMEWORKDIR=no-framework PYTHONFRAMEWORKPREFIX= PYTHONFRAMEWORKINSTALLDIR= + PYTHONFRAMEWORKINSTALLNAMEPREFIX= + RESSRCDIR= FRAMEWORKINSTALLFIRST= FRAMEWORKINSTALLLAST= FRAMEWORKALTINSTALLFIRST= FRAMEWORKALTINSTALLLAST= FRAMEWORKPYTHONW= + INSTALLTARGETS="commoninstall bininstall maninstall" if test "x${prefix}" = "xNONE" ; then FRAMEWORKUNIXTOOLSPREFIX="${ac_default_prefix}" else @@ -4239,79 +4331,11 @@ fi -printf "%s\n" "#define _PYTHONFRAMEWORK \"${PYTHONFRAMEWORK}\"" >>confdefs.h -# Set name for machine-dependent library files -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking MACHDEP" >&5 -printf %s "checking MACHDEP... " >&6; } -if test -z "$MACHDEP" -then - # avoid using uname for cross builds - if test "$cross_compiling" = yes; then - # ac_sys_system and ac_sys_release are used for setting - # a lot of different things including 'define_xopen_source' - # in the case statement below. - case "$host" in - *-*-linux-android*) - ac_sys_system=Linux-android - ;; - *-*-linux*) - ac_sys_system=Linux - ;; - *-*-cygwin*) - ac_sys_system=Cygwin - ;; - *-*-vxworks*) - ac_sys_system=VxWorks - ;; - *-*-emscripten) - ac_sys_system=Emscripten - ;; - *-*-wasi) - ac_sys_system=WASI - ;; - *) - # for now, limit cross builds to known configurations - MACHDEP="unknown" - as_fn_error $? "cross build not supported for $host" "$LINENO" 5 - esac - ac_sys_release= - else - ac_sys_system=`uname -s` - if test "$ac_sys_system" = "AIX" \ - -o "$ac_sys_system" = "UnixWare" -o "$ac_sys_system" = "OpenUNIX"; then - ac_sys_release=`uname -v` - else - ac_sys_release=`uname -r` - fi - fi - ac_md_system=`echo $ac_sys_system | - tr -d '/ ' | tr '[A-Z]' '[a-z]'` - ac_md_release=`echo $ac_sys_release | - tr -d '/ ' | sed 's/^[A-Z]\.//' | sed 's/\..*//'` - MACHDEP="$ac_md_system$ac_md_release" - - case $MACHDEP in - aix*) MACHDEP="aix";; - linux*) MACHDEP="linux";; - cygwin*) MACHDEP="cygwin";; - darwin*) MACHDEP="darwin";; - '') MACHDEP="unknown";; - esac - - if test "$ac_sys_system" = "SunOS"; then - # For Solaris, there isn't an OS version specific macro defined - # in most compilers, so we define one here. - SUNOS_VERSION=`echo $ac_sys_release | sed -e 's!\.\(0-9\)$!.0\1!g' | tr -d '.'` +printf "%s\n" "#define _PYTHONFRAMEWORK \"${PYTHONFRAMEWORK}\"" >>confdefs.h -printf "%s\n" "#define Py_SUNOS_VERSION $SUNOS_VERSION" >>confdefs.h - - fi -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: \"$MACHDEP\"" >&5 -printf "%s\n" "\"$MACHDEP\"" >&6; } if test "$cross_compiling" = yes; then @@ -4319,27 +4343,27 @@ if test "$cross_compiling" = yes; then *-*-linux*) case "$host_cpu" in arm*) - _host_cpu=arm + _host_ident=arm ;; *) - _host_cpu=$host_cpu + _host_ident=$host_cpu esac ;; *-*-cygwin*) - _host_cpu= + _host_ident= ;; *-*-vxworks*) - _host_cpu=$host_cpu + _host_ident=$host_cpu ;; wasm32-*-* | wasm64-*-*) - _host_cpu=$host_cpu + _host_ident=$host_cpu ;; *) # for now, limit cross builds to known configurations MACHDEP="unknown" as_fn_error $? "cross build not supported for $host" "$LINENO" 5 esac - _PYTHON_HOST_PLATFORM="$MACHDEP${_host_cpu:+-$_host_cpu}" + _PYTHON_HOST_PLATFORM="$MACHDEP${_host_ident:+-$_host_ident}" fi # Some systems cannot stand _XOPEN_SOURCE being defined at all; they @@ -6769,8 +6793,6 @@ case $ac_sys_system in #( ;; esac -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MULTIARCH" >&5 -printf "%s\n" "$MULTIARCH" >&6; } if test x$PLATFORM_TRIPLET != x && test x$MULTIARCH != x; then if test x$PLATFORM_TRIPLET != x$MULTIARCH; then @@ -6780,6 +6802,14 @@ elif test x$PLATFORM_TRIPLET != x && test x$MULTIARCH = x; then MULTIARCH=$PLATFORM_TRIPLET fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MULTIARCH" >&5 +printf "%s\n" "$MULTIARCH" >&6; } + +case $ac_sys_system in #( + *) : + SOABI_PLATFORM=$PLATFORM_TRIPLET + ;; +esac if test x$MULTIARCH != x; then MULTIARCH_CPPFLAGS="-DMULTIARCH=\\\"$MULTIARCH\\\"" @@ -7271,7 +7301,7 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking LDLIBRARY" >&5 printf %s "checking LDLIBRARY... " >&6; } -# MacOSX framework builds need more magic. LDLIBRARY is the dynamic +# Apple framework builds need more magic. LDLIBRARY is the dynamic # library that we build, but we do not want to link against it (we # will find it with a -framework option). For this reason there is an # extra variable BLDLIBRARY against which Python and the extension @@ -7279,9 +7309,14 @@ printf %s "checking LDLIBRARY... " >&6; } # LDLIBRARY, but empty for MacOSX framework builds. if test "$enable_framework" then - LDLIBRARY='$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' - RUNSHARED=DYLD_FRAMEWORK_PATH=`pwd`${DYLD_FRAMEWORK_PATH:+:${DYLD_FRAMEWORK_PATH}} + case $ac_sys_system in + Darwin) + LDLIBRARY='$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)';; + *) + as_fn_error $? "Unknown platform for framework build" "$LINENO" 5;; + esac BLDLIBRARY='' + RUNSHARED=DYLD_FRAMEWORK_PATH=`pwd`${DYLD_FRAMEWORK_PATH:+:${DYLD_FRAMEWORK_PATH}} else BLDLIBRARY='$(LDLIBRARY)' fi @@ -7294,64 +7329,64 @@ printf "%s\n" "#define Py_ENABLE_SHARED 1" >>confdefs.h case $ac_sys_system in CYGWIN*) - LDLIBRARY='libpython$(LDVERSION).dll.a' - DLLLIBRARY='libpython$(LDVERSION).dll' - ;; + LDLIBRARY='libpython$(LDVERSION).dll.a' + DLLLIBRARY='libpython$(LDVERSION).dll' + ;; SunOS*) - LDLIBRARY='libpython$(LDVERSION).so' - BLDLIBRARY='-Wl,-R,$(LIBDIR) -L. -lpython$(LDVERSION)' - RUNSHARED=LD_LIBRARY_PATH=`pwd`${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} - INSTSONAME="$LDLIBRARY".$SOVERSION - if test "$with_pydebug" != yes - then - PY3LIBRARY=libpython3.so - fi - ;; + LDLIBRARY='libpython$(LDVERSION).so' + BLDLIBRARY='-Wl,-R,$(LIBDIR) -L. -lpython$(LDVERSION)' + RUNSHARED=LD_LIBRARY_PATH=`pwd`${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + INSTSONAME="$LDLIBRARY".$SOVERSION + if test "$with_pydebug" != yes + then + PY3LIBRARY=libpython3.so + fi + ;; Linux*|GNU*|NetBSD*|FreeBSD*|DragonFly*|OpenBSD*|VxWorks*) - LDLIBRARY='libpython$(LDVERSION).so' - BLDLIBRARY='-L. -lpython$(LDVERSION)' - RUNSHARED=LD_LIBRARY_PATH=`pwd`${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} - INSTSONAME="$LDLIBRARY".$SOVERSION - if test "$with_pydebug" != yes - then - PY3LIBRARY=libpython3.so - fi - ;; + LDLIBRARY='libpython$(LDVERSION).so' + BLDLIBRARY='-L. -lpython$(LDVERSION)' + RUNSHARED=LD_LIBRARY_PATH=`pwd`${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + INSTSONAME="$LDLIBRARY".$SOVERSION + if test "$with_pydebug" != yes + then + PY3LIBRARY=libpython3.so + fi + ;; hp*|HP*) - case `uname -m` in - ia64) - LDLIBRARY='libpython$(LDVERSION).so' - ;; - *) - LDLIBRARY='libpython$(LDVERSION).sl' - ;; - esac - BLDLIBRARY='-Wl,+b,$(LIBDIR) -L. -lpython$(LDVERSION)' - RUNSHARED=SHLIB_PATH=`pwd`${SHLIB_PATH:+:${SHLIB_PATH}} - ;; + case `uname -m` in + ia64) + LDLIBRARY='libpython$(LDVERSION).so' + ;; + *) + LDLIBRARY='libpython$(LDVERSION).sl' + ;; + esac + BLDLIBRARY='-Wl,+b,$(LIBDIR) -L. -lpython$(LDVERSION)' + RUNSHARED=SHLIB_PATH=`pwd`${SHLIB_PATH:+:${SHLIB_PATH}} + ;; Darwin*) - LDLIBRARY='libpython$(LDVERSION).dylib' - BLDLIBRARY='-L. -lpython$(LDVERSION)' - RUNSHARED=DYLD_LIBRARY_PATH=`pwd`${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_PATH}} - ;; + LDLIBRARY='libpython$(LDVERSION).dylib' + BLDLIBRARY='-L. -lpython$(LDVERSION)' + RUNSHARED=DYLD_LIBRARY_PATH=`pwd`${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_PATH}} + ;; AIX*) - LDLIBRARY='libpython$(LDVERSION).so' - RUNSHARED=LIBPATH=`pwd`${LIBPATH:+:${LIBPATH}} - ;; + LDLIBRARY='libpython$(LDVERSION).so' + RUNSHARED=LIBPATH=`pwd`${LIBPATH:+:${LIBPATH}} + ;; esac else # shared is disabled PY_ENABLE_SHARED=0 case $ac_sys_system in CYGWIN*) - BLDLIBRARY='$(LIBRARY)' - LDLIBRARY='libpython$(LDVERSION).dll.a' - ;; + BLDLIBRARY='$(LIBRARY)' + LDLIBRARY='libpython$(LDVERSION).dll.a' + ;; esac fi if test "$cross_compiling" = yes; then - RUNSHARED= + RUNSHARED= fi @@ -23898,7 +23933,7 @@ printf %s "checking ABIFLAGS... " >&6; } printf "%s\n" "$ABIFLAGS" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking SOABI" >&5 printf %s "checking SOABI... " >&6; } -SOABI='cpython-'`echo $VERSION | tr -d .`${ABIFLAGS}${PLATFORM_TRIPLET:+-$PLATFORM_TRIPLET} +SOABI='cpython-'`echo $VERSION | tr -d .`${ABIFLAGS}${SOABI_PLATFORM:+-$SOABI_PLATFORM} { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $SOABI" >&5 printf "%s\n" "$SOABI" >&6; } @@ -23907,7 +23942,7 @@ printf "%s\n" "$SOABI" >&6; } if test "$Py_DEBUG" = 'true'; then # Similar to SOABI but remove "d" flag from ABIFLAGS - ALT_SOABI='cpython-'`echo $VERSION | tr -d .``echo $ABIFLAGS | tr -d d`${PLATFORM_TRIPLET:+-$PLATFORM_TRIPLET} + ALT_SOABI='cpython-'`echo $VERSION | tr -d .``echo $ABIFLAGS | tr -d d`${SOABI_PLATFORM:+-$SOABI_PLATFORM} printf "%s\n" "#define ALT_SOABI \"${ALT_SOABI}\"" >>confdefs.h diff --git a/configure.ac b/configure.ac index dee7ed552b370f..b39af7422c4c7c 100644 --- a/configure.ac +++ b/configure.ac @@ -307,6 +307,74 @@ if test "$with_pkg_config" = yes -a -z "$PKG_CONFIG"; then AC_MSG_ERROR([pkg-config is required])] fi +# Set name for machine-dependent library files +AC_ARG_VAR([MACHDEP], [name for machine-dependent library files]) +AC_MSG_CHECKING([MACHDEP]) +if test -z "$MACHDEP" +then + # avoid using uname for cross builds + if test "$cross_compiling" = yes; then + # ac_sys_system and ac_sys_release are used for setting + # a lot of different things including 'define_xopen_source' + # in the case statement below. + case "$host" in + *-*-linux-android*) + ac_sys_system=Linux-android + ;; + *-*-linux*) + ac_sys_system=Linux + ;; + *-*-cygwin*) + ac_sys_system=Cygwin + ;; + *-*-vxworks*) + ac_sys_system=VxWorks + ;; + *-*-emscripten) + ac_sys_system=Emscripten + ;; + *-*-wasi) + ac_sys_system=WASI + ;; + *) + # for now, limit cross builds to known configurations + MACHDEP="unknown" + AC_MSG_ERROR([cross build not supported for $host]) + esac + ac_sys_release= + else + ac_sys_system=`uname -s` + if test "$ac_sys_system" = "AIX" \ + -o "$ac_sys_system" = "UnixWare" -o "$ac_sys_system" = "OpenUNIX"; then + ac_sys_release=`uname -v` + else + ac_sys_release=`uname -r` + fi + fi + ac_md_system=`echo $ac_sys_system | + tr -d '[/ ]' | tr '[[A-Z]]' '[[a-z]]'` + ac_md_release=`echo $ac_sys_release | + tr -d '[/ ]' | sed 's/^[[A-Z]]\.//' | sed 's/\..*//'` + MACHDEP="$ac_md_system$ac_md_release" + + case $MACHDEP in + aix*) MACHDEP="aix";; + linux*) MACHDEP="linux";; + cygwin*) MACHDEP="cygwin";; + darwin*) MACHDEP="darwin";; + '') MACHDEP="unknown";; + esac + + if test "$ac_sys_system" = "SunOS"; then + # For Solaris, there isn't an OS version specific macro defined + # in most compilers, so we define one here. + SUNOS_VERSION=`echo $ac_sys_release | sed -e 's!\.\([0-9]\)$!.0\1!g' | tr -d '.'` + AC_DEFINE_UNQUOTED([Py_SUNOS_VERSION], [$SUNOS_VERSION], + [The version of SunOS/Solaris as reported by `uname -r' without the dot.]) + fi +fi +AC_MSG_RESULT(["$MACHDEP"]) + AC_MSG_CHECKING([for --enable-universalsdk]) AC_ARG_ENABLE([universalsdk], AS_HELP_STRING([--enable-universalsdk@<:@=SDKDIR@:>@], @@ -424,11 +492,15 @@ AC_ARG_ENABLE([framework], PYTHONFRAMEWORKDIR=no-framework PYTHONFRAMEWORKPREFIX= PYTHONFRAMEWORKINSTALLDIR= + PYTHONFRAMEWORKINSTALLNAMEPREFIX= + RESSRCDIR= FRAMEWORKINSTALLFIRST= FRAMEWORKINSTALLLAST= FRAMEWORKALTINSTALLFIRST= FRAMEWORKALTINSTALLLAST= FRAMEWORKPYTHONW= + INSTALLTARGETS="commoninstall bininstall maninstall" + if test "x${prefix}" = "xNONE"; then FRAMEWORKUNIXTOOLSPREFIX="${ac_default_prefix}" else @@ -441,71 +513,85 @@ AC_ARG_ENABLE([framework], PYTHONFRAMEWORKINSTALLDIR=$PYTHONFRAMEWORKPREFIX/$PYTHONFRAMEWORKDIR FRAMEWORKINSTALLFIRST="frameworkinstallstructure" FRAMEWORKALTINSTALLFIRST="frameworkinstallstructure " - FRAMEWORKINSTALLLAST="frameworkinstallmaclib frameworkinstallapps frameworkinstallunixtools" - FRAMEWORKALTINSTALLLAST="frameworkinstallmaclib frameworkinstallapps frameworkaltinstallunixtools" - FRAMEWORKPYTHONW="frameworkpythonw" - FRAMEWORKINSTALLAPPSPREFIX="/Applications" - if test "x${prefix}" = "xNONE" ; then - FRAMEWORKUNIXTOOLSPREFIX="${ac_default_prefix}" + case $ac_sys_system in #( + Darwin) : + FRAMEWORKINSTALLLAST="frameworkinstallmaclib frameworkinstallapps frameworkinstallunixtools" + FRAMEWORKALTINSTALLLAST="frameworkinstallmaclib frameworkinstallapps frameworkaltinstallunixtools" + FRAMEWORKPYTHONW="frameworkpythonw" + FRAMEWORKINSTALLAPPSPREFIX="/Applications" + INSTALLTARGETS="commoninstall bininstall maninstall" - else - FRAMEWORKUNIXTOOLSPREFIX="${prefix}" - fi + if test "x${prefix}" = "xNONE" ; then + FRAMEWORKUNIXTOOLSPREFIX="${ac_default_prefix}" - case "${enableval}" in - /System*) - FRAMEWORKINSTALLAPPSPREFIX="/Applications" - if test "${prefix}" = "NONE" ; then - # See below - FRAMEWORKUNIXTOOLSPREFIX="/usr" - fi - ;; + else + FRAMEWORKUNIXTOOLSPREFIX="${prefix}" + fi - /Library*) - FRAMEWORKINSTALLAPPSPREFIX="/Applications" - ;; + case "${enableval}" in + /System*) + FRAMEWORKINSTALLAPPSPREFIX="/Applications" + if test "${prefix}" = "NONE" ; then + # See below + FRAMEWORKUNIXTOOLSPREFIX="/usr" + fi + ;; + + /Library*) + FRAMEWORKINSTALLAPPSPREFIX="/Applications" + ;; + + */Library/Frameworks) + MDIR="`dirname "${enableval}"`" + MDIR="`dirname "${MDIR}"`" + FRAMEWORKINSTALLAPPSPREFIX="${MDIR}/Applications" + + if test "${prefix}" = "NONE"; then + # User hasn't specified the + # --prefix option, but wants to install + # the framework in a non-default location, + # ensure that the compatibility links get + # installed relative to that prefix as well + # instead of in /usr/local. + FRAMEWORKUNIXTOOLSPREFIX="${MDIR}" + fi + ;; - */Library/Frameworks) - MDIR="`dirname "${enableval}"`" - MDIR="`dirname "${MDIR}"`" - FRAMEWORKINSTALLAPPSPREFIX="${MDIR}/Applications" - - if test "${prefix}" = "NONE"; then - # User hasn't specified the - # --prefix option, but wants to install - # the framework in a non-default location, - # ensure that the compatibility links get - # installed relative to that prefix as well - # instead of in /usr/local. - FRAMEWORKUNIXTOOLSPREFIX="${MDIR}" - fi - ;; + *) + FRAMEWORKINSTALLAPPSPREFIX="/Applications" + ;; + esac - *) - FRAMEWORKINSTALLAPPSPREFIX="/Applications" - ;; + prefix=$PYTHONFRAMEWORKINSTALLDIR/Versions/$VERSION + PYTHONFRAMEWORKINSTALLNAMEPREFIX=${prefix} + RESSRCDIR=Mac/Resources/framework + + # Add files for Mac specific code to the list of output + # files: + AC_CONFIG_FILES([Mac/Makefile]) + AC_CONFIG_FILES([Mac/PythonLauncher/Makefile]) + AC_CONFIG_FILES([Mac/Resources/framework/Info.plist]) + AC_CONFIG_FILES([Mac/Resources/app/Info.plist]) + ;; + *) + AC_MSG_ERROR([Unknown platform for framework build]) + ;; + esac esac - - prefix=$PYTHONFRAMEWORKINSTALLDIR/Versions/$VERSION - - # Add files for Mac specific code to the list of output - # files: - AC_CONFIG_FILES([Mac/Makefile]) - AC_CONFIG_FILES([Mac/PythonLauncher/Makefile]) - AC_CONFIG_FILES([Mac/Resources/framework/Info.plist]) - AC_CONFIG_FILES([Mac/Resources/app/Info.plist]) - esac ],[ PYTHONFRAMEWORK= PYTHONFRAMEWORKDIR=no-framework PYTHONFRAMEWORKPREFIX= PYTHONFRAMEWORKINSTALLDIR= + PYTHONFRAMEWORKINSTALLNAMEPREFIX= + RESSRCDIR= FRAMEWORKINSTALLFIRST= FRAMEWORKINSTALLLAST= FRAMEWORKALTINSTALLFIRST= FRAMEWORKALTINSTALLLAST= FRAMEWORKPYTHONW= + INSTALLTARGETS="commoninstall bininstall maninstall" if test "x${prefix}" = "xNONE" ; then FRAMEWORKUNIXTOOLSPREFIX="${ac_default_prefix}" else @@ -519,6 +605,8 @@ AC_SUBST([PYTHONFRAMEWORKIDENTIFIER]) AC_SUBST([PYTHONFRAMEWORKDIR]) AC_SUBST([PYTHONFRAMEWORKPREFIX]) AC_SUBST([PYTHONFRAMEWORKINSTALLDIR]) +AC_SUBST([PYTHONFRAMEWORKINSTALLNAMEPREFIX]) +AC_SUBST([RESSRCDIR]) AC_SUBST([FRAMEWORKINSTALLFIRST]) AC_SUBST([FRAMEWORKINSTALLLAST]) AC_SUBST([FRAMEWORKALTINSTALLFIRST]) @@ -526,105 +614,38 @@ AC_SUBST([FRAMEWORKALTINSTALLLAST]) AC_SUBST([FRAMEWORKPYTHONW]) AC_SUBST([FRAMEWORKUNIXTOOLSPREFIX]) AC_SUBST([FRAMEWORKINSTALLAPPSPREFIX]) +AC_SUBST([INSTALLTARGETS]) AC_DEFINE_UNQUOTED([_PYTHONFRAMEWORK], ["${PYTHONFRAMEWORK}"], [framework name]) -# Set name for machine-dependent library files -AC_ARG_VAR([MACHDEP], [name for machine-dependent library files]) -AC_MSG_CHECKING([MACHDEP]) -if test -z "$MACHDEP" -then - # avoid using uname for cross builds - if test "$cross_compiling" = yes; then - # ac_sys_system and ac_sys_release are used for setting - # a lot of different things including 'define_xopen_source' - # in the case statement below. - case "$host" in - *-*-linux-android*) - ac_sys_system=Linux-android - ;; - *-*-linux*) - ac_sys_system=Linux - ;; - *-*-cygwin*) - ac_sys_system=Cygwin - ;; - *-*-vxworks*) - ac_sys_system=VxWorks - ;; - *-*-emscripten) - ac_sys_system=Emscripten - ;; - *-*-wasi) - ac_sys_system=WASI - ;; - *) - # for now, limit cross builds to known configurations - MACHDEP="unknown" - AC_MSG_ERROR([cross build not supported for $host]) - esac - ac_sys_release= - else - ac_sys_system=`uname -s` - if test "$ac_sys_system" = "AIX" \ - -o "$ac_sys_system" = "UnixWare" -o "$ac_sys_system" = "OpenUNIX"; then - ac_sys_release=`uname -v` - else - ac_sys_release=`uname -r` - fi - fi - ac_md_system=`echo $ac_sys_system | - tr -d '[/ ]' | tr '[[A-Z]]' '[[a-z]]'` - ac_md_release=`echo $ac_sys_release | - tr -d '[/ ]' | sed 's/^[[A-Z]]\.//' | sed 's/\..*//'` - MACHDEP="$ac_md_system$ac_md_release" - - case $MACHDEP in - aix*) MACHDEP="aix";; - linux*) MACHDEP="linux";; - cygwin*) MACHDEP="cygwin";; - darwin*) MACHDEP="darwin";; - '') MACHDEP="unknown";; - esac - - if test "$ac_sys_system" = "SunOS"; then - # For Solaris, there isn't an OS version specific macro defined - # in most compilers, so we define one here. - SUNOS_VERSION=`echo $ac_sys_release | sed -e 's!\.\([0-9]\)$!.0\1!g' | tr -d '.'` - AC_DEFINE_UNQUOTED([Py_SUNOS_VERSION], [$SUNOS_VERSION], - [The version of SunOS/Solaris as reported by `uname -r' without the dot.]) - fi -fi -AC_MSG_RESULT(["$MACHDEP"]) - AC_SUBST([_PYTHON_HOST_PLATFORM]) if test "$cross_compiling" = yes; then case "$host" in *-*-linux*) case "$host_cpu" in arm*) - _host_cpu=arm + _host_ident=arm ;; *) - _host_cpu=$host_cpu + _host_ident=$host_cpu esac ;; *-*-cygwin*) - _host_cpu= + _host_ident= ;; *-*-vxworks*) - _host_cpu=$host_cpu + _host_ident=$host_cpu ;; wasm32-*-* | wasm64-*-*) - _host_cpu=$host_cpu + _host_ident=$host_cpu ;; *) # for now, limit cross builds to known configurations MACHDEP="unknown" AC_MSG_ERROR([cross build not supported for $host]) esac - _PYTHON_HOST_PLATFORM="$MACHDEP${_host_cpu:+-$_host_cpu}" + _PYTHON_HOST_PLATFORM="$MACHDEP${_host_ident:+-$_host_ident}" fi # Some systems cannot stand _XOPEN_SOURCE being defined at all; they @@ -935,6 +956,14 @@ else fi rm -f conftest.out +dnl On some platforms, using a true "triplet" for MULTIARCH would be redundant. +dnl For example, `arm64-apple-darwin` is redundant, because there isn't a +dnl non-Apple Darwin. Including the CPU architecture can also be potentially +dnl redundant - on macOS, for example, it's possible to do a single compile +dnl pass that includes multiple architectures, so it would be misleading for +dnl MULTIARCH (and thus the sysconfigdata module name) to include a single CPU +dnl architecture. PLATFORM_TRIPLET will be a pair or single value for these +dnl platforms. AC_MSG_CHECKING([for multiarch]) AS_CASE([$ac_sys_system], [Darwin*], [MULTIARCH=""], @@ -942,7 +971,6 @@ AS_CASE([$ac_sys_system], [MULTIARCH=$($CC --print-multiarch 2>/dev/null)] ) AC_SUBST([MULTIARCH]) -AC_MSG_RESULT([$MULTIARCH]) if test x$PLATFORM_TRIPLET != x && test x$MULTIARCH != x; then if test x$PLATFORM_TRIPLET != x$MULTIARCH; then @@ -952,6 +980,16 @@ elif test x$PLATFORM_TRIPLET != x && test x$MULTIARCH = x; then MULTIARCH=$PLATFORM_TRIPLET fi AC_SUBST([PLATFORM_TRIPLET]) +AC_MSG_RESULT([$MULTIARCH]) + +dnl Even if we *do* include the CPU architecture in the MULTIARCH value, some +dnl platforms don't need the CPU architecture in the SOABI tag. These platforms +dnl will have multiple sysconfig modules (one for each CPU architecture), but +dnl use a single "fat" binary at runtime. SOABI_PLATFORM is the component of +dnl the PLATFORM_TRIPLET that will be used in binary module extensions. +AS_CASE([$ac_sys_system], + [SOABI_PLATFORM=$PLATFORM_TRIPLET] +) if test x$MULTIARCH != x; then MULTIARCH_CPPFLAGS="-DMULTIARCH=\\\"$MULTIARCH\\\"" @@ -1294,7 +1332,7 @@ fi AC_MSG_CHECKING([LDLIBRARY]) -# MacOSX framework builds need more magic. LDLIBRARY is the dynamic +# Apple framework builds need more magic. LDLIBRARY is the dynamic # library that we build, but we do not want to link against it (we # will find it with a -framework option). For this reason there is an # extra variable BLDLIBRARY against which Python and the extension @@ -1302,9 +1340,14 @@ AC_MSG_CHECKING([LDLIBRARY]) # LDLIBRARY, but empty for MacOSX framework builds. if test "$enable_framework" then - LDLIBRARY='$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)' - RUNSHARED=DYLD_FRAMEWORK_PATH=`pwd`${DYLD_FRAMEWORK_PATH:+:${DYLD_FRAMEWORK_PATH}} + case $ac_sys_system in + Darwin) + LDLIBRARY='$(PYTHONFRAMEWORKDIR)/Versions/$(VERSION)/$(PYTHONFRAMEWORK)';; + *) + AC_MSG_ERROR([Unknown platform for framework build]);; + esac BLDLIBRARY='' + RUNSHARED=DYLD_FRAMEWORK_PATH=`pwd`${DYLD_FRAMEWORK_PATH:+:${DYLD_FRAMEWORK_PATH}} else BLDLIBRARY='$(LDLIBRARY)' fi @@ -1316,64 +1359,64 @@ if test $enable_shared = "yes"; then [Defined if Python is built as a shared library.]) case $ac_sys_system in CYGWIN*) - LDLIBRARY='libpython$(LDVERSION).dll.a' - DLLLIBRARY='libpython$(LDVERSION).dll' - ;; + LDLIBRARY='libpython$(LDVERSION).dll.a' + DLLLIBRARY='libpython$(LDVERSION).dll' + ;; SunOS*) - LDLIBRARY='libpython$(LDVERSION).so' - BLDLIBRARY='-Wl,-R,$(LIBDIR) -L. -lpython$(LDVERSION)' - RUNSHARED=LD_LIBRARY_PATH=`pwd`${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} - INSTSONAME="$LDLIBRARY".$SOVERSION - if test "$with_pydebug" != yes - then - PY3LIBRARY=libpython3.so - fi - ;; + LDLIBRARY='libpython$(LDVERSION).so' + BLDLIBRARY='-Wl,-R,$(LIBDIR) -L. -lpython$(LDVERSION)' + RUNSHARED=LD_LIBRARY_PATH=`pwd`${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + INSTSONAME="$LDLIBRARY".$SOVERSION + if test "$with_pydebug" != yes + then + PY3LIBRARY=libpython3.so + fi + ;; Linux*|GNU*|NetBSD*|FreeBSD*|DragonFly*|OpenBSD*|VxWorks*) - LDLIBRARY='libpython$(LDVERSION).so' - BLDLIBRARY='-L. -lpython$(LDVERSION)' - RUNSHARED=LD_LIBRARY_PATH=`pwd`${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} - INSTSONAME="$LDLIBRARY".$SOVERSION - if test "$with_pydebug" != yes - then - PY3LIBRARY=libpython3.so - fi - ;; + LDLIBRARY='libpython$(LDVERSION).so' + BLDLIBRARY='-L. -lpython$(LDVERSION)' + RUNSHARED=LD_LIBRARY_PATH=`pwd`${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + INSTSONAME="$LDLIBRARY".$SOVERSION + if test "$with_pydebug" != yes + then + PY3LIBRARY=libpython3.so + fi + ;; hp*|HP*) - case `uname -m` in - ia64) - LDLIBRARY='libpython$(LDVERSION).so' - ;; - *) - LDLIBRARY='libpython$(LDVERSION).sl' - ;; - esac - BLDLIBRARY='-Wl,+b,$(LIBDIR) -L. -lpython$(LDVERSION)' - RUNSHARED=SHLIB_PATH=`pwd`${SHLIB_PATH:+:${SHLIB_PATH}} - ;; + case `uname -m` in + ia64) + LDLIBRARY='libpython$(LDVERSION).so' + ;; + *) + LDLIBRARY='libpython$(LDVERSION).sl' + ;; + esac + BLDLIBRARY='-Wl,+b,$(LIBDIR) -L. -lpython$(LDVERSION)' + RUNSHARED=SHLIB_PATH=`pwd`${SHLIB_PATH:+:${SHLIB_PATH}} + ;; Darwin*) - LDLIBRARY='libpython$(LDVERSION).dylib' - BLDLIBRARY='-L. -lpython$(LDVERSION)' - RUNSHARED=DYLD_LIBRARY_PATH=`pwd`${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_PATH}} - ;; + LDLIBRARY='libpython$(LDVERSION).dylib' + BLDLIBRARY='-L. -lpython$(LDVERSION)' + RUNSHARED=DYLD_LIBRARY_PATH=`pwd`${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_PATH}} + ;; AIX*) - LDLIBRARY='libpython$(LDVERSION).so' - RUNSHARED=LIBPATH=`pwd`${LIBPATH:+:${LIBPATH}} - ;; + LDLIBRARY='libpython$(LDVERSION).so' + RUNSHARED=LIBPATH=`pwd`${LIBPATH:+:${LIBPATH}} + ;; esac else # shared is disabled PY_ENABLE_SHARED=0 case $ac_sys_system in CYGWIN*) - BLDLIBRARY='$(LIBRARY)' - LDLIBRARY='libpython$(LDVERSION).dll.a' - ;; + BLDLIBRARY='$(LIBRARY)' + LDLIBRARY='libpython$(LDVERSION).dll.a' + ;; esac fi if test "$cross_compiling" = yes; then - RUNSHARED= + RUNSHARED= fi AC_ARG_VAR([HOSTRUNNER], [Program to run CPython for the host platform]) @@ -5824,7 +5867,7 @@ AC_SUBST([SOABI]) AC_MSG_CHECKING([ABIFLAGS]) AC_MSG_RESULT([$ABIFLAGS]) AC_MSG_CHECKING([SOABI]) -SOABI='cpython-'`echo $VERSION | tr -d .`${ABIFLAGS}${PLATFORM_TRIPLET:+-$PLATFORM_TRIPLET} +SOABI='cpython-'`echo $VERSION | tr -d .`${ABIFLAGS}${SOABI_PLATFORM:+-$SOABI_PLATFORM} AC_MSG_RESULT([$SOABI]) # Release build, debug build (Py_DEBUG), and trace refs build (Py_TRACE_REFS) @@ -5832,7 +5875,7 @@ AC_MSG_RESULT([$SOABI]) if test "$Py_DEBUG" = 'true'; then # Similar to SOABI but remove "d" flag from ABIFLAGS AC_SUBST([ALT_SOABI]) - ALT_SOABI='cpython-'`echo $VERSION | tr -d .``echo $ABIFLAGS | tr -d d`${PLATFORM_TRIPLET:+-$PLATFORM_TRIPLET} + ALT_SOABI='cpython-'`echo $VERSION | tr -d .``echo $ABIFLAGS | tr -d d`${SOABI_PLATFORM:+-$SOABI_PLATFORM} AC_DEFINE_UNQUOTED([ALT_SOABI], ["${ALT_SOABI}"], [Alternative SOABI used in debug build to load C extensions built in release mode]) fi From ea25f32d5f7d9ae4358338a3fb49bba9b68051a5 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 13 Feb 2024 00:28:35 +0000 Subject: [PATCH 093/126] gh-89240: Enable multiprocessing on Windows to use large process pools (GH-107873) We add _winapi.BatchedWaitForMultipleObjects to wait for larger numbers of handles. This is an internal module, hence undocumented, and should be used with caution. Check the docstring for info before using BatchedWaitForMultipleObjects. --- .../pycore_global_objects_fini_generated.h | 10 + Include/internal/pycore_global_strings.h | 10 + .../internal/pycore_runtime_init_generated.h | 10 + .../internal/pycore_unicodeobject_generated.h | 30 ++ Lib/multiprocessing/connection.py | 14 +- Lib/test/_test_multiprocessing.py | 18 + Lib/test/test_winapi.py | 94 ++++ ...3-08-11-18-21-38.gh-issue-89240.dtSOLG.rst | 1 + Modules/_winapi.c | 506 ++++++++++++++++++ Modules/clinic/_winapi.c.h | 498 ++++++++++++++++- Objects/exceptions.c | 7 +- PC/errmap.h | 3 + 12 files changed, 1195 insertions(+), 6 deletions(-) create mode 100644 Lib/test/test_winapi.py create mode 100644 Misc/NEWS.d/next/Windows/2023-08-11-18-21-38.gh-issue-89240.dtSOLG.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 932738c3049882..11755210d65432 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -883,6 +883,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(defaultaction)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(delete)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(depth)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(desired_access)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(detect_types)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(deterministic)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(device)); @@ -973,6 +974,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(groups)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(h)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(handle)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(handle_seq)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hash_name)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(header)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(headers)); @@ -990,9 +992,12 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(indexgroup)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(inf)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(infer_variance)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(inherit_handle)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(inheritable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initial)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initial_bytes)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initial_owner)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initial_state)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initial_value)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initval)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(inner_size)); @@ -1048,6 +1053,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(locals)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(logoption)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(loop)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(manual_reset)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mapping)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(match)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(max_length)); @@ -1064,6 +1070,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(metadata)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(method)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(microsecond)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(milliseconds)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(minute)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mod)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mode)); @@ -1073,6 +1080,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(month)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mro)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(msg)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mutex)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mycmp)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_arg)); @@ -1176,6 +1184,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sched_priority)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(scheduler)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(second)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(security_attributes)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(seek)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(seekable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(selectors)); @@ -1263,6 +1272,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(values)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(version)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(volume)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(wait_all)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(warnings)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(warnoptions)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(wbits)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index da62b4f0a951ff..576ac703ca1508 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -372,6 +372,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(defaultaction) STRUCT_FOR_ID(delete) STRUCT_FOR_ID(depth) + STRUCT_FOR_ID(desired_access) STRUCT_FOR_ID(detect_types) STRUCT_FOR_ID(deterministic) STRUCT_FOR_ID(device) @@ -462,6 +463,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(groups) STRUCT_FOR_ID(h) STRUCT_FOR_ID(handle) + STRUCT_FOR_ID(handle_seq) STRUCT_FOR_ID(hash_name) STRUCT_FOR_ID(header) STRUCT_FOR_ID(headers) @@ -479,9 +481,12 @@ struct _Py_global_strings { STRUCT_FOR_ID(indexgroup) STRUCT_FOR_ID(inf) STRUCT_FOR_ID(infer_variance) + STRUCT_FOR_ID(inherit_handle) STRUCT_FOR_ID(inheritable) STRUCT_FOR_ID(initial) STRUCT_FOR_ID(initial_bytes) + STRUCT_FOR_ID(initial_owner) + STRUCT_FOR_ID(initial_state) STRUCT_FOR_ID(initial_value) STRUCT_FOR_ID(initval) STRUCT_FOR_ID(inner_size) @@ -537,6 +542,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(locals) STRUCT_FOR_ID(logoption) STRUCT_FOR_ID(loop) + STRUCT_FOR_ID(manual_reset) STRUCT_FOR_ID(mapping) STRUCT_FOR_ID(match) STRUCT_FOR_ID(max_length) @@ -553,6 +559,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(metadata) STRUCT_FOR_ID(method) STRUCT_FOR_ID(microsecond) + STRUCT_FOR_ID(milliseconds) STRUCT_FOR_ID(minute) STRUCT_FOR_ID(mod) STRUCT_FOR_ID(mode) @@ -562,6 +569,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(month) STRUCT_FOR_ID(mro) STRUCT_FOR_ID(msg) + STRUCT_FOR_ID(mutex) STRUCT_FOR_ID(mycmp) STRUCT_FOR_ID(n) STRUCT_FOR_ID(n_arg) @@ -665,6 +673,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(sched_priority) STRUCT_FOR_ID(scheduler) STRUCT_FOR_ID(second) + STRUCT_FOR_ID(security_attributes) STRUCT_FOR_ID(seek) STRUCT_FOR_ID(seekable) STRUCT_FOR_ID(selectors) @@ -752,6 +761,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(values) STRUCT_FOR_ID(version) STRUCT_FOR_ID(volume) + STRUCT_FOR_ID(wait_all) STRUCT_FOR_ID(warnings) STRUCT_FOR_ID(warnoptions) STRUCT_FOR_ID(wbits) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 68fbbcb4378e17..e682c97e7c0248 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -881,6 +881,7 @@ extern "C" { INIT_ID(defaultaction), \ INIT_ID(delete), \ INIT_ID(depth), \ + INIT_ID(desired_access), \ INIT_ID(detect_types), \ INIT_ID(deterministic), \ INIT_ID(device), \ @@ -971,6 +972,7 @@ extern "C" { INIT_ID(groups), \ INIT_ID(h), \ INIT_ID(handle), \ + INIT_ID(handle_seq), \ INIT_ID(hash_name), \ INIT_ID(header), \ INIT_ID(headers), \ @@ -988,9 +990,12 @@ extern "C" { INIT_ID(indexgroup), \ INIT_ID(inf), \ INIT_ID(infer_variance), \ + INIT_ID(inherit_handle), \ INIT_ID(inheritable), \ INIT_ID(initial), \ INIT_ID(initial_bytes), \ + INIT_ID(initial_owner), \ + INIT_ID(initial_state), \ INIT_ID(initial_value), \ INIT_ID(initval), \ INIT_ID(inner_size), \ @@ -1046,6 +1051,7 @@ extern "C" { INIT_ID(locals), \ INIT_ID(logoption), \ INIT_ID(loop), \ + INIT_ID(manual_reset), \ INIT_ID(mapping), \ INIT_ID(match), \ INIT_ID(max_length), \ @@ -1062,6 +1068,7 @@ extern "C" { INIT_ID(metadata), \ INIT_ID(method), \ INIT_ID(microsecond), \ + INIT_ID(milliseconds), \ INIT_ID(minute), \ INIT_ID(mod), \ INIT_ID(mode), \ @@ -1071,6 +1078,7 @@ extern "C" { INIT_ID(month), \ INIT_ID(mro), \ INIT_ID(msg), \ + INIT_ID(mutex), \ INIT_ID(mycmp), \ INIT_ID(n), \ INIT_ID(n_arg), \ @@ -1174,6 +1182,7 @@ extern "C" { INIT_ID(sched_priority), \ INIT_ID(scheduler), \ INIT_ID(second), \ + INIT_ID(security_attributes), \ INIT_ID(seek), \ INIT_ID(seekable), \ INIT_ID(selectors), \ @@ -1261,6 +1270,7 @@ extern "C" { INIT_ID(values), \ INIT_ID(version), \ INIT_ID(volume), \ + INIT_ID(wait_all), \ INIT_ID(warnings), \ INIT_ID(warnoptions), \ INIT_ID(wbits), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index c8458b4e36ccc9..739af0e73c23ff 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -957,6 +957,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(depth); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(desired_access); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(detect_types); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1227,6 +1230,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(handle); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(handle_seq); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(hash_name); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1278,6 +1284,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(infer_variance); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(inherit_handle); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(inheritable); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1287,6 +1296,12 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(initial_bytes); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(initial_owner); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(initial_state); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(initial_value); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1452,6 +1467,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(loop); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(manual_reset); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(mapping); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1500,6 +1518,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(microsecond); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(milliseconds); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(minute); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1527,6 +1548,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(msg); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(mutex); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(mycmp); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1836,6 +1860,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(second); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(security_attributes); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(seek); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -2097,6 +2124,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(volume); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(wait_all); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(warnings); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index c6a66a1bc963c3..58d697fdecacc0 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -1011,8 +1011,20 @@ def _exhaustive_wait(handles, timeout): # returning the first signalled might create starvation issues.) L = list(handles) ready = [] + # Windows limits WaitForMultipleObjects at 64 handles, and we use a + # few for synchronisation, so we switch to batched waits at 60. + if len(L) > 60: + try: + res = _winapi.BatchedWaitForMultipleObjects(L, False, timeout) + except TimeoutError: + return [] + ready.extend(L[i] for i in res) + if res: + L = [h for i, h in enumerate(L) if i > res[0] & i not in res] + timeout = 0 while L: - res = _winapi.WaitForMultipleObjects(L, False, timeout) + short_L = L[:60] if len(L) > 60 else L + res = _winapi.WaitForMultipleObjects(short_L, False, timeout) if res == WAIT_TIMEOUT: break elif WAIT_OBJECT_0 <= res < WAIT_OBJECT_0 + len(L): diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index c0d3ca50f17d69..94ce85cac754ae 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -6113,6 +6113,24 @@ def test_spawn_sys_executable_none_allows_import(self): self.assertEqual(rc, 0) self.assertFalse(err, msg=err.decode('utf-8')) + def test_large_pool(self): + # + # gh-89240: Check that large pools are always okay + # + testfn = os_helper.TESTFN + self.addCleanup(os_helper.unlink, testfn) + with open(testfn, 'w', encoding='utf-8') as f: + f.write(textwrap.dedent('''\ + import multiprocessing + def f(x): return x*x + if __name__ == '__main__': + with multiprocessing.Pool(200) as p: + print(sum(p.map(f, range(1000)))) + ''')) + rc, out, err = script_helper.assert_python_ok(testfn) + self.assertEqual("332833500", out.decode('utf-8').strip()) + self.assertFalse(err, msg=err.decode('utf-8')) + # # Mixins diff --git a/Lib/test/test_winapi.py b/Lib/test/test_winapi.py new file mode 100644 index 00000000000000..014aeea7239e2b --- /dev/null +++ b/Lib/test/test_winapi.py @@ -0,0 +1,94 @@ +# Test the Windows-only _winapi module + +import random +import threading +import time +import unittest +from test.support import import_helper + +_winapi = import_helper.import_module('_winapi', required_on=['win']) + +MAXIMUM_WAIT_OBJECTS = 64 +MAXIMUM_BATCHED_WAIT_OBJECTS = (MAXIMUM_WAIT_OBJECTS - 1) ** 2 + +class WinAPIBatchedWaitForMultipleObjectsTests(unittest.TestCase): + def _events_waitall_test(self, n): + evts = [_winapi.CreateEventW(0, False, False, None) for _ in range(n)] + + with self.assertRaises(TimeoutError): + _winapi.BatchedWaitForMultipleObjects(evts, True, 100) + + # Ensure no errors raised when all are triggered + for e in evts: + _winapi.SetEvent(e) + try: + _winapi.BatchedWaitForMultipleObjects(evts, True, 100) + except TimeoutError: + self.fail("expected wait to complete immediately") + + # Choose 8 events to set, distributed throughout the list, to make sure + # we don't always have them in the first chunk + chosen = [i * (len(evts) // 8) for i in range(8)] + + # Replace events with invalid handles to make sure we fail + for i in chosen: + old_evt = evts[i] + evts[i] = -1 + with self.assertRaises(OSError): + _winapi.BatchedWaitForMultipleObjects(evts, True, 100) + evts[i] = old_evt + + + def _events_waitany_test(self, n): + evts = [_winapi.CreateEventW(0, False, False, None) for _ in range(n)] + + with self.assertRaises(TimeoutError): + _winapi.BatchedWaitForMultipleObjects(evts, False, 100) + + # Choose 8 events to set, distributed throughout the list, to make sure + # we don't always have them in the first chunk + chosen = [i * (len(evts) // 8) for i in range(8)] + + # Trigger one by one. They are auto-reset events, so will only trigger once + for i in chosen: + with self.subTest(f"trigger event {i} of {len(evts)}"): + _winapi.SetEvent(evts[i]) + triggered = _winapi.BatchedWaitForMultipleObjects(evts, False, 10000) + self.assertSetEqual(set(triggered), {i}) + + # Trigger all at once. This may require multiple calls + for i in chosen: + _winapi.SetEvent(evts[i]) + triggered = set() + while len(triggered) < len(chosen): + triggered.update(_winapi.BatchedWaitForMultipleObjects(evts, False, 10000)) + self.assertSetEqual(triggered, set(chosen)) + + # Replace events with invalid handles to make sure we fail + for i in chosen: + with self.subTest(f"corrupt event {i} of {len(evts)}"): + old_evt = evts[i] + evts[i] = -1 + with self.assertRaises(OSError): + _winapi.BatchedWaitForMultipleObjects(evts, False, 100) + evts[i] = old_evt + + + def test_few_events_waitall(self): + self._events_waitall_test(16) + + def test_many_events_waitall(self): + self._events_waitall_test(256) + + def test_max_events_waitall(self): + self._events_waitall_test(MAXIMUM_BATCHED_WAIT_OBJECTS) + + + def test_few_events_waitany(self): + self._events_waitany_test(16) + + def test_many_events_waitany(self): + self._events_waitany_test(256) + + def test_max_events_waitany(self): + self._events_waitany_test(MAXIMUM_BATCHED_WAIT_OBJECTS) diff --git a/Misc/NEWS.d/next/Windows/2023-08-11-18-21-38.gh-issue-89240.dtSOLG.rst b/Misc/NEWS.d/next/Windows/2023-08-11-18-21-38.gh-issue-89240.dtSOLG.rst new file mode 100644 index 00000000000000..8ffe328b16598a --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2023-08-11-18-21-38.gh-issue-89240.dtSOLG.rst @@ -0,0 +1 @@ +Allows :mod:`multiprocessing` to create pools of greater than 62 processes. diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 5e5eb123c4ccff..83a4ccd4802ae0 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -438,6 +438,39 @@ _winapi_ConnectNamedPipe_impl(PyObject *module, HANDLE handle, Py_RETURN_NONE; } +/*[clinic input] +_winapi.CreateEventW -> HANDLE + + security_attributes: LPSECURITY_ATTRIBUTES + manual_reset: BOOL + initial_state: BOOL + name: LPCWSTR(accept={str, NoneType}) +[clinic start generated code]*/ + +static HANDLE +_winapi_CreateEventW_impl(PyObject *module, + LPSECURITY_ATTRIBUTES security_attributes, + BOOL manual_reset, BOOL initial_state, + LPCWSTR name) +/*[clinic end generated code: output=2d4c7d5852ecb298 input=4187cee28ac763f8]*/ +{ + HANDLE handle; + + if (PySys_Audit("_winapi.CreateEventW", "bbu", manual_reset, initial_state, name) < 0) { + return INVALID_HANDLE_VALUE; + } + + Py_BEGIN_ALLOW_THREADS + handle = CreateEventW(security_attributes, manual_reset, initial_state, name); + Py_END_ALLOW_THREADS + + if (handle == INVALID_HANDLE_VALUE) { + PyErr_SetFromWindowsErr(0); + } + + return handle; +} + /*[clinic input] _winapi.CreateFile -> HANDLE @@ -674,6 +707,37 @@ _winapi_CreateJunction_impl(PyObject *module, LPCWSTR src_path, Py_RETURN_NONE; } +/*[clinic input] +_winapi.CreateMutexW -> HANDLE + + security_attributes: LPSECURITY_ATTRIBUTES + initial_owner: BOOL + name: LPCWSTR(accept={str, NoneType}) +[clinic start generated code]*/ + +static HANDLE +_winapi_CreateMutexW_impl(PyObject *module, + LPSECURITY_ATTRIBUTES security_attributes, + BOOL initial_owner, LPCWSTR name) +/*[clinic end generated code: output=31b9ee8fc37e49a5 input=7d54b921e723254a]*/ +{ + HANDLE handle; + + if (PySys_Audit("_winapi.CreateMutexW", "bu", initial_owner, name) < 0) { + return INVALID_HANDLE_VALUE; + } + + Py_BEGIN_ALLOW_THREADS + handle = CreateMutexW(security_attributes, initial_owner, name); + Py_END_ALLOW_THREADS + + if (handle == INVALID_HANDLE_VALUE) { + PyErr_SetFromWindowsErr(0); + } + + return handle; +} + /*[clinic input] _winapi.CreateNamedPipe -> HANDLE @@ -1590,6 +1654,67 @@ _winapi_UnmapViewOfFile_impl(PyObject *module, LPCVOID address) Py_RETURN_NONE; } +/*[clinic input] +_winapi.OpenEventW -> HANDLE + + desired_access: DWORD + inherit_handle: BOOL + name: LPCWSTR +[clinic start generated code]*/ + +static HANDLE +_winapi_OpenEventW_impl(PyObject *module, DWORD desired_access, + BOOL inherit_handle, LPCWSTR name) +/*[clinic end generated code: output=c4a45e95545a4bd2 input=dec26598748d35aa]*/ +{ + HANDLE handle; + + if (PySys_Audit("_winapi.OpenEventW", "Iu", desired_access, name) < 0) { + return INVALID_HANDLE_VALUE; + } + + Py_BEGIN_ALLOW_THREADS + handle = OpenEventW(desired_access, inherit_handle, name); + Py_END_ALLOW_THREADS + + if (handle == INVALID_HANDLE_VALUE) { + PyErr_SetFromWindowsErr(0); + } + + return handle; +} + + +/*[clinic input] +_winapi.OpenMutexW -> HANDLE + + desired_access: DWORD + inherit_handle: BOOL + name: LPCWSTR +[clinic start generated code]*/ + +static HANDLE +_winapi_OpenMutexW_impl(PyObject *module, DWORD desired_access, + BOOL inherit_handle, LPCWSTR name) +/*[clinic end generated code: output=dda39d7844397bf0 input=f3a7b466c5307712]*/ +{ + HANDLE handle; + + if (PySys_Audit("_winapi.OpenMutexW", "Iu", desired_access, name) < 0) { + return INVALID_HANDLE_VALUE; + } + + Py_BEGIN_ALLOW_THREADS + handle = OpenMutexW(desired_access, inherit_handle, name); + Py_END_ALLOW_THREADS + + if (handle == INVALID_HANDLE_VALUE) { + PyErr_SetFromWindowsErr(0); + } + + return handle; +} + /*[clinic input] _winapi.OpenFileMapping -> HANDLE @@ -1820,6 +1945,75 @@ _winapi_ReadFile_impl(PyObject *module, HANDLE handle, DWORD size, return Py_BuildValue("NI", buf, err); } +/*[clinic input] +_winapi.ReleaseMutex + + mutex: HANDLE +[clinic start generated code]*/ + +static PyObject * +_winapi_ReleaseMutex_impl(PyObject *module, HANDLE mutex) +/*[clinic end generated code: output=5b9001a72dd8af37 input=49e9d20de3559d84]*/ +{ + int err = 0; + + Py_BEGIN_ALLOW_THREADS + if (!ReleaseMutex(mutex)) { + err = GetLastError(); + } + Py_END_ALLOW_THREADS + if (err) { + return PyErr_SetFromWindowsErr(err); + } + Py_RETURN_NONE; +} + +/*[clinic input] +_winapi.ResetEvent + + event: HANDLE +[clinic start generated code]*/ + +static PyObject * +_winapi_ResetEvent_impl(PyObject *module, HANDLE event) +/*[clinic end generated code: output=81c8501d57c0530d input=e2d42d990322e87a]*/ +{ + int err = 0; + + Py_BEGIN_ALLOW_THREADS + if (!ResetEvent(event)) { + err = GetLastError(); + } + Py_END_ALLOW_THREADS + if (err) { + return PyErr_SetFromWindowsErr(err); + } + Py_RETURN_NONE; +} + +/*[clinic input] +_winapi.SetEvent + + event: HANDLE +[clinic start generated code]*/ + +static PyObject * +_winapi_SetEvent_impl(PyObject *module, HANDLE event) +/*[clinic end generated code: output=c18ba09eb9aa774d input=e660e830a37c09f8]*/ +{ + int err = 0; + + Py_BEGIN_ALLOW_THREADS + if (!SetEvent(event)) { + err = GetLastError(); + } + Py_END_ALLOW_THREADS + if (err) { + return PyErr_SetFromWindowsErr(err); + } + Py_RETURN_NONE; +} + /*[clinic input] _winapi.SetNamedPipeHandleState @@ -1942,6 +2136,310 @@ _winapi_WaitNamedPipe_impl(PyObject *module, LPCTSTR name, DWORD timeout) Py_RETURN_NONE; } + +typedef struct { + HANDLE handles[MAXIMUM_WAIT_OBJECTS]; + HANDLE cancel_event; + DWORD handle_base; + DWORD handle_count; + HANDLE thread; + volatile DWORD result; +} BatchedWaitData; + +static DWORD WINAPI +_batched_WaitForMultipleObjects_thread(LPVOID param) +{ + BatchedWaitData *data = (BatchedWaitData *)param; + data->result = WaitForMultipleObjects( + data->handle_count, + data->handles, + FALSE, + INFINITE + ); + if (data->result == WAIT_FAILED) { + DWORD err = GetLastError(); + SetEvent(data->cancel_event); + return err; + } else if (data->result >= WAIT_ABANDONED_0 && data->result < WAIT_ABANDONED_0 + MAXIMUM_WAIT_OBJECTS) { + data->result = WAIT_FAILED; + SetEvent(data->cancel_event); + return ERROR_ABANDONED_WAIT_0; + } + return 0; +} + +/*[clinic input] +_winapi.BatchedWaitForMultipleObjects + + handle_seq: object + wait_all: BOOL + milliseconds: DWORD(c_default='INFINITE') = _winapi.INFINITE + +Supports a larger number of handles than WaitForMultipleObjects + +Note that the handles may be waited on other threads, which could cause +issues for objects like mutexes that become associated with the thread +that was waiting for them. Objects may also be left signalled, even if +the wait fails. + +It is recommended to use WaitForMultipleObjects whenever possible, and +only switch to BatchedWaitForMultipleObjects for scenarios where you +control all the handles involved, such as your own thread pool or +files, and all wait objects are left unmodified by a wait (for example, +manual reset events, threads, and files/pipes). + +Overlapped handles returned from this module use manual reset events. +[clinic start generated code]*/ + +static PyObject * +_winapi_BatchedWaitForMultipleObjects_impl(PyObject *module, + PyObject *handle_seq, + BOOL wait_all, DWORD milliseconds) +/*[clinic end generated code: output=d21c1a4ad0a252fd input=7e196f29005dc77b]*/ +{ + Py_ssize_t thread_count = 0, handle_count = 0, i, j; + Py_ssize_t nhandles; + BatchedWaitData *thread_data[MAXIMUM_WAIT_OBJECTS]; + HANDLE handles[MAXIMUM_WAIT_OBJECTS]; + HANDLE sigint_event = NULL; + HANDLE cancel_event = NULL; + DWORD result; + + const Py_ssize_t _MAXIMUM_TOTAL_OBJECTS = (MAXIMUM_WAIT_OBJECTS - 1) * (MAXIMUM_WAIT_OBJECTS - 1); + + if (!PySequence_Check(handle_seq)) { + PyErr_Format(PyExc_TypeError, + "sequence type expected, got '%s'", + Py_TYPE(handle_seq)->tp_name); + return NULL; + } + nhandles = PySequence_Length(handle_seq); + if (nhandles == -1) { + return NULL; + } + if (nhandles == 0) { + return wait_all ? Py_NewRef(Py_None) : PyList_New(0); + } + + /* If this is the main thread then make the wait interruptible + by Ctrl-C. When waiting for *all* handles, it is only checked + in between batches. */ + if (_PyOS_IsMainThread()) { + sigint_event = _PyOS_SigintEvent(); + assert(sigint_event != NULL); + } + + if (nhandles < 0 || nhandles > _MAXIMUM_TOTAL_OBJECTS) { + PyErr_Format(PyExc_ValueError, + "need at most %zd handles, got a sequence of length %zd", + _MAXIMUM_TOTAL_OBJECTS, nhandles); + return NULL; + } + + if (!wait_all) { + cancel_event = CreateEventW(NULL, TRUE, FALSE, NULL); + if (!cancel_event) { + PyErr_SetExcFromWindowsErr(PyExc_OSError, 0); + return NULL; + } + } + + i = 0; + while (i < nhandles) { + BatchedWaitData *data = (BatchedWaitData*)PyMem_Malloc(sizeof(BatchedWaitData)); + if (!data) { + goto error; + } + thread_data[thread_count++] = data; + data->thread = NULL; + data->cancel_event = cancel_event; + data->handle_base = Py_SAFE_DOWNCAST(i, Py_ssize_t, DWORD); + data->handle_count = Py_SAFE_DOWNCAST(nhandles - i, Py_ssize_t, DWORD); + if (data->handle_count > MAXIMUM_WAIT_OBJECTS - 1) { + data->handle_count = MAXIMUM_WAIT_OBJECTS - 1; + } + for (j = 0; j < data->handle_count; ++i, ++j) { + PyObject *v = PySequence_GetItem(handle_seq, i); + if (!v || !PyArg_Parse(v, F_HANDLE, &data->handles[j])) { + Py_XDECREF(v); + goto error; + } + Py_DECREF(v); + } + if (!wait_all) { + data->handles[data->handle_count++] = cancel_event; + } + } + + DWORD err = 0; + + /* We need to use different strategies when waiting for ALL handles + as opposed to ANY handle. This is because there is no way to + (safely) interrupt a thread that is waiting for all handles in a + group. So for ALL handles, we loop over each set and wait. For + ANY handle, we use threads and wait on them. */ + if (wait_all) { + Py_BEGIN_ALLOW_THREADS + long long deadline = 0; + if (milliseconds != INFINITE) { + deadline = (long long)GetTickCount64() + milliseconds; + } + + for (i = 0; !err && i < thread_count; ++i) { + DWORD timeout = milliseconds; + if (deadline) { + long long time_to_deadline = deadline - GetTickCount64(); + if (time_to_deadline <= 0) { + err = WAIT_TIMEOUT; + break; + } else if (time_to_deadline < UINT_MAX) { + timeout = (DWORD)time_to_deadline; + } + } + result = WaitForMultipleObjects(thread_data[i]->handle_count, + thread_data[i]->handles, TRUE, timeout); + // ABANDONED is not possible here because we own all the handles + if (result == WAIT_FAILED) { + err = GetLastError(); + } else if (result == WAIT_TIMEOUT) { + err = WAIT_TIMEOUT; + } + + if (!err && sigint_event) { + result = WaitForSingleObject(sigint_event, 0); + if (result == WAIT_OBJECT_0) { + err = ERROR_CONTROL_C_EXIT; + } else if (result == WAIT_FAILED) { + err = GetLastError(); + } + } + } + + CloseHandle(cancel_event); + + Py_END_ALLOW_THREADS + } else { + Py_BEGIN_ALLOW_THREADS + + for (i = 0; i < thread_count; ++i) { + BatchedWaitData *data = thread_data[i]; + data->thread = CreateThread( + NULL, + 1, // smallest possible initial stack + _batched_WaitForMultipleObjects_thread, + (LPVOID)data, + CREATE_SUSPENDED, + NULL + ); + if (!data->thread) { + err = GetLastError(); + break; + } + handles[handle_count++] = data->thread; + } + Py_END_ALLOW_THREADS + + if (err) { + PyErr_SetExcFromWindowsErr(PyExc_OSError, err); + goto error; + } + if (handle_count > MAXIMUM_WAIT_OBJECTS - 1) { + // basically an assert, but stronger + PyErr_SetString(PyExc_SystemError, "allocated too many wait objects"); + goto error; + } + + Py_BEGIN_ALLOW_THREADS + + // Once we start resuming threads, can no longer "goto error" + for (i = 0; i < thread_count; ++i) { + ResumeThread(thread_data[i]->thread); + } + if (sigint_event) { + handles[handle_count++] = sigint_event; + } + result = WaitForMultipleObjects((DWORD)handle_count, handles, wait_all, milliseconds); + // ABANDONED is not possible here because we own all the handles + if (result == WAIT_FAILED) { + err = GetLastError(); + } else if (result == WAIT_TIMEOUT) { + err = WAIT_TIMEOUT; + } else if (sigint_event && result == WAIT_OBJECT_0 + handle_count) { + err = ERROR_CONTROL_C_EXIT; + } + + SetEvent(cancel_event); + + // Wait for all threads to finish before we start freeing their memory + if (sigint_event) { + handle_count -= 1; + } + WaitForMultipleObjects((DWORD)handle_count, handles, TRUE, INFINITE); + + for (i = 0; i < thread_count; ++i) { + if (!err && thread_data[i]->result == WAIT_FAILED) { + if (!GetExitCodeThread(thread_data[i]->thread, &err)) { + err = GetLastError(); + } + } + CloseHandle(thread_data[i]->thread); + } + + CloseHandle(cancel_event); + + Py_END_ALLOW_THREADS + + } + + PyObject *triggered_indices; + if (sigint_event != NULL && err == ERROR_CONTROL_C_EXIT) { + errno = EINTR; + PyErr_SetFromErrno(PyExc_OSError); + triggered_indices = NULL; + } else if (err) { + PyErr_SetExcFromWindowsErr(PyExc_OSError, err); + triggered_indices = NULL; + } else if (wait_all) { + triggered_indices = Py_NewRef(Py_None); + } else { + triggered_indices = PyList_New(0); + if (triggered_indices) { + for (i = 0; i < thread_count; ++i) { + Py_ssize_t triggered = (Py_ssize_t)thread_data[i]->result - WAIT_OBJECT_0; + if (triggered >= 0 && triggered < thread_data[i]->handle_count - 1) { + PyObject *v = PyLong_FromSsize_t(thread_data[i]->handle_base + triggered); + if (!v || PyList_Append(triggered_indices, v) < 0) { + Py_XDECREF(v); + Py_CLEAR(triggered_indices); + break; + } + Py_DECREF(v); + } + } + } + } + + for (i = 0; i < thread_count; ++i) { + PyMem_Free((void *)thread_data[i]); + } + + return triggered_indices; + +error: + // We should only enter here before any threads start running. + // Once we start resuming threads, different cleanup is required + CloseHandle(cancel_event); + while (--thread_count >= 0) { + HANDLE t = thread_data[thread_count]->thread; + if (t) { + TerminateThread(t, WAIT_ABANDONED_0); + CloseHandle(t); + } + PyMem_Free((void *)thread_data[thread_count]); + } + return NULL; +} + /*[clinic input] _winapi.WaitForMultipleObjects @@ -2335,8 +2833,10 @@ _winapi_CopyFile2_impl(PyObject *module, LPCWSTR existing_file_name, static PyMethodDef winapi_functions[] = { _WINAPI_CLOSEHANDLE_METHODDEF _WINAPI_CONNECTNAMEDPIPE_METHODDEF + _WINAPI_CREATEEVENTW_METHODDEF _WINAPI_CREATEFILE_METHODDEF _WINAPI_CREATEFILEMAPPING_METHODDEF + _WINAPI_CREATEMUTEXW_METHODDEF _WINAPI_CREATENAMEDPIPE_METHODDEF _WINAPI_CREATEPIPE_METHODDEF _WINAPI_CREATEPROCESS_METHODDEF @@ -2350,17 +2850,23 @@ static PyMethodDef winapi_functions[] = { _WINAPI_GETSTDHANDLE_METHODDEF _WINAPI_GETVERSION_METHODDEF _WINAPI_MAPVIEWOFFILE_METHODDEF + _WINAPI_OPENEVENTW_METHODDEF _WINAPI_OPENFILEMAPPING_METHODDEF + _WINAPI_OPENMUTEXW_METHODDEF _WINAPI_OPENPROCESS_METHODDEF _WINAPI_PEEKNAMEDPIPE_METHODDEF _WINAPI_LCMAPSTRINGEX_METHODDEF _WINAPI_READFILE_METHODDEF + _WINAPI_RELEASEMUTEX_METHODDEF + _WINAPI_RESETEVENT_METHODDEF + _WINAPI_SETEVENT_METHODDEF _WINAPI_SETNAMEDPIPEHANDLESTATE_METHODDEF _WINAPI_TERMINATEPROCESS_METHODDEF _WINAPI_UNMAPVIEWOFFILE_METHODDEF _WINAPI_VIRTUALQUERYSIZE_METHODDEF _WINAPI_WAITNAMEDPIPE_METHODDEF _WINAPI_WAITFORMULTIPLEOBJECTS_METHODDEF + _WINAPI_BATCHEDWAITFORMULTIPLEOBJECTS_METHODDEF _WINAPI_WAITFORSINGLEOBJECT_METHODDEF _WINAPI_WRITEFILE_METHODDEF _WINAPI_GETACP_METHODDEF diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index d1052f38919dde..468457e624c691 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -151,6 +151,76 @@ _winapi_ConnectNamedPipe(PyObject *module, PyObject *const *args, Py_ssize_t nar return return_value; } +PyDoc_STRVAR(_winapi_CreateEventW__doc__, +"CreateEventW($module, /, security_attributes, manual_reset,\n" +" initial_state, name)\n" +"--\n" +"\n"); + +#define _WINAPI_CREATEEVENTW_METHODDEF \ + {"CreateEventW", _PyCFunction_CAST(_winapi_CreateEventW), METH_FASTCALL|METH_KEYWORDS, _winapi_CreateEventW__doc__}, + +static HANDLE +_winapi_CreateEventW_impl(PyObject *module, + LPSECURITY_ATTRIBUTES security_attributes, + BOOL manual_reset, BOOL initial_state, + LPCWSTR name); + +static PyObject * +_winapi_CreateEventW(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 4 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(security_attributes), &_Py_ID(manual_reset), &_Py_ID(initial_state), &_Py_ID(name), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"security_attributes", "manual_reset", "initial_state", "name", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .format = "" F_POINTER "iiO&:CreateEventW", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + LPSECURITY_ATTRIBUTES security_attributes; + BOOL manual_reset; + BOOL initial_state; + LPCWSTR name = NULL; + HANDLE _return_value; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &security_attributes, &manual_reset, &initial_state, _PyUnicode_WideCharString_Opt_Converter, &name)) { + goto exit; + } + _return_value = _winapi_CreateEventW_impl(module, security_attributes, manual_reset, initial_state, name); + if ((_return_value == INVALID_HANDLE_VALUE) && PyErr_Occurred()) { + goto exit; + } + if (_return_value == NULL) { + Py_RETURN_NONE; + } + return_value = HANDLE_TO_PYNUM(_return_value); + +exit: + /* Cleanup for name */ + PyMem_Free((void *)name); + + return return_value; +} + PyDoc_STRVAR(_winapi_CreateFile__doc__, "CreateFile($module, file_name, desired_access, share_mode,\n" " security_attributes, creation_disposition,\n" @@ -297,6 +367,73 @@ _winapi_CreateJunction(PyObject *module, PyObject *const *args, Py_ssize_t nargs return return_value; } +PyDoc_STRVAR(_winapi_CreateMutexW__doc__, +"CreateMutexW($module, /, security_attributes, initial_owner, name)\n" +"--\n" +"\n"); + +#define _WINAPI_CREATEMUTEXW_METHODDEF \ + {"CreateMutexW", _PyCFunction_CAST(_winapi_CreateMutexW), METH_FASTCALL|METH_KEYWORDS, _winapi_CreateMutexW__doc__}, + +static HANDLE +_winapi_CreateMutexW_impl(PyObject *module, + LPSECURITY_ATTRIBUTES security_attributes, + BOOL initial_owner, LPCWSTR name); + +static PyObject * +_winapi_CreateMutexW(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(security_attributes), &_Py_ID(initial_owner), &_Py_ID(name), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"security_attributes", "initial_owner", "name", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .format = "" F_POINTER "iO&:CreateMutexW", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + LPSECURITY_ATTRIBUTES security_attributes; + BOOL initial_owner; + LPCWSTR name = NULL; + HANDLE _return_value; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &security_attributes, &initial_owner, _PyUnicode_WideCharString_Opt_Converter, &name)) { + goto exit; + } + _return_value = _winapi_CreateMutexW_impl(module, security_attributes, initial_owner, name); + if ((_return_value == INVALID_HANDLE_VALUE) && PyErr_Occurred()) { + goto exit; + } + if (_return_value == NULL) { + Py_RETURN_NONE; + } + return_value = HANDLE_TO_PYNUM(_return_value); + +exit: + /* Cleanup for name */ + PyMem_Free((void *)name); + + return return_value; +} + PyDoc_STRVAR(_winapi_CreateNamedPipe__doc__, "CreateNamedPipe($module, name, open_mode, pipe_mode, max_instances,\n" " out_buffer_size, in_buffer_size, default_timeout,\n" @@ -771,6 +908,138 @@ _winapi_UnmapViewOfFile(PyObject *module, PyObject *arg) return return_value; } +PyDoc_STRVAR(_winapi_OpenEventW__doc__, +"OpenEventW($module, /, desired_access, inherit_handle, name)\n" +"--\n" +"\n"); + +#define _WINAPI_OPENEVENTW_METHODDEF \ + {"OpenEventW", _PyCFunction_CAST(_winapi_OpenEventW), METH_FASTCALL|METH_KEYWORDS, _winapi_OpenEventW__doc__}, + +static HANDLE +_winapi_OpenEventW_impl(PyObject *module, DWORD desired_access, + BOOL inherit_handle, LPCWSTR name); + +static PyObject * +_winapi_OpenEventW(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(desired_access), &_Py_ID(inherit_handle), &_Py_ID(name), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"desired_access", "inherit_handle", "name", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .format = "kiO&:OpenEventW", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + DWORD desired_access; + BOOL inherit_handle; + LPCWSTR name = NULL; + HANDLE _return_value; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &desired_access, &inherit_handle, _PyUnicode_WideCharString_Converter, &name)) { + goto exit; + } + _return_value = _winapi_OpenEventW_impl(module, desired_access, inherit_handle, name); + if ((_return_value == INVALID_HANDLE_VALUE) && PyErr_Occurred()) { + goto exit; + } + if (_return_value == NULL) { + Py_RETURN_NONE; + } + return_value = HANDLE_TO_PYNUM(_return_value); + +exit: + /* Cleanup for name */ + PyMem_Free((void *)name); + + return return_value; +} + +PyDoc_STRVAR(_winapi_OpenMutexW__doc__, +"OpenMutexW($module, /, desired_access, inherit_handle, name)\n" +"--\n" +"\n"); + +#define _WINAPI_OPENMUTEXW_METHODDEF \ + {"OpenMutexW", _PyCFunction_CAST(_winapi_OpenMutexW), METH_FASTCALL|METH_KEYWORDS, _winapi_OpenMutexW__doc__}, + +static HANDLE +_winapi_OpenMutexW_impl(PyObject *module, DWORD desired_access, + BOOL inherit_handle, LPCWSTR name); + +static PyObject * +_winapi_OpenMutexW(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(desired_access), &_Py_ID(inherit_handle), &_Py_ID(name), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"desired_access", "inherit_handle", "name", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .format = "kiO&:OpenMutexW", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + DWORD desired_access; + BOOL inherit_handle; + LPCWSTR name = NULL; + HANDLE _return_value; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &desired_access, &inherit_handle, _PyUnicode_WideCharString_Converter, &name)) { + goto exit; + } + _return_value = _winapi_OpenMutexW_impl(module, desired_access, inherit_handle, name); + if ((_return_value == INVALID_HANDLE_VALUE) && PyErr_Occurred()) { + goto exit; + } + if (_return_value == NULL) { + Py_RETURN_NONE; + } + return_value = HANDLE_TO_PYNUM(_return_value); + +exit: + /* Cleanup for name */ + PyMem_Free((void *)name); + + return return_value; +} + PyDoc_STRVAR(_winapi_OpenFileMapping__doc__, "OpenFileMapping($module, desired_access, inherit_handle, name, /)\n" "--\n" @@ -991,6 +1260,162 @@ _winapi_ReadFile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb return return_value; } +PyDoc_STRVAR(_winapi_ReleaseMutex__doc__, +"ReleaseMutex($module, /, mutex)\n" +"--\n" +"\n"); + +#define _WINAPI_RELEASEMUTEX_METHODDEF \ + {"ReleaseMutex", _PyCFunction_CAST(_winapi_ReleaseMutex), METH_FASTCALL|METH_KEYWORDS, _winapi_ReleaseMutex__doc__}, + +static PyObject * +_winapi_ReleaseMutex_impl(PyObject *module, HANDLE mutex); + +static PyObject * +_winapi_ReleaseMutex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(mutex), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"mutex", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .format = "" F_HANDLE ":ReleaseMutex", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + HANDLE mutex; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &mutex)) { + goto exit; + } + return_value = _winapi_ReleaseMutex_impl(module, mutex); + +exit: + return return_value; +} + +PyDoc_STRVAR(_winapi_ResetEvent__doc__, +"ResetEvent($module, /, event)\n" +"--\n" +"\n"); + +#define _WINAPI_RESETEVENT_METHODDEF \ + {"ResetEvent", _PyCFunction_CAST(_winapi_ResetEvent), METH_FASTCALL|METH_KEYWORDS, _winapi_ResetEvent__doc__}, + +static PyObject * +_winapi_ResetEvent_impl(PyObject *module, HANDLE event); + +static PyObject * +_winapi_ResetEvent(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(event), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"event", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .format = "" F_HANDLE ":ResetEvent", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + HANDLE event; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &event)) { + goto exit; + } + return_value = _winapi_ResetEvent_impl(module, event); + +exit: + return return_value; +} + +PyDoc_STRVAR(_winapi_SetEvent__doc__, +"SetEvent($module, /, event)\n" +"--\n" +"\n"); + +#define _WINAPI_SETEVENT_METHODDEF \ + {"SetEvent", _PyCFunction_CAST(_winapi_SetEvent), METH_FASTCALL|METH_KEYWORDS, _winapi_SetEvent__doc__}, + +static PyObject * +_winapi_SetEvent_impl(PyObject *module, HANDLE event); + +static PyObject * +_winapi_SetEvent(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(event), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"event", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .format = "" F_HANDLE ":SetEvent", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + HANDLE event; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &event)) { + goto exit; + } + return_value = _winapi_SetEvent_impl(module, event); + +exit: + return return_value; +} + PyDoc_STRVAR(_winapi_SetNamedPipeHandleState__doc__, "SetNamedPipeHandleState($module, named_pipe, mode,\n" " max_collection_count, collect_data_timeout, /)\n" @@ -1114,6 +1539,77 @@ _winapi_WaitNamedPipe(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(_winapi_BatchedWaitForMultipleObjects__doc__, +"BatchedWaitForMultipleObjects($module, /, handle_seq, wait_all,\n" +" milliseconds=_winapi.INFINITE)\n" +"--\n" +"\n" +"Supports a larger number of handles than WaitForMultipleObjects\n" +"\n" +"Note that the handles may be waited on other threads, which could cause\n" +"issues for objects like mutexes that become associated with the thread\n" +"that was waiting for them. Objects may also be left signalled, even if\n" +"the wait fails.\n" +"\n" +"It is recommended to use WaitForMultipleObjects whenever possible, and\n" +"only switch to BatchedWaitForMultipleObjects for scenarios where you\n" +"control all the handles involved, such as your own thread pool or\n" +"files, and all wait objects are left unmodified by a wait (for example,\n" +"manual reset events, threads, and files/pipes).\n" +"\n" +"Overlapped handles returned from this module use manual reset events."); + +#define _WINAPI_BATCHEDWAITFORMULTIPLEOBJECTS_METHODDEF \ + {"BatchedWaitForMultipleObjects", _PyCFunction_CAST(_winapi_BatchedWaitForMultipleObjects), METH_FASTCALL|METH_KEYWORDS, _winapi_BatchedWaitForMultipleObjects__doc__}, + +static PyObject * +_winapi_BatchedWaitForMultipleObjects_impl(PyObject *module, + PyObject *handle_seq, + BOOL wait_all, DWORD milliseconds); + +static PyObject * +_winapi_BatchedWaitForMultipleObjects(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(handle_seq), &_Py_ID(wait_all), &_Py_ID(milliseconds), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"handle_seq", "wait_all", "milliseconds", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .format = "Oi|k:BatchedWaitForMultipleObjects", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *handle_seq; + BOOL wait_all; + DWORD milliseconds = INFINITE; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &handle_seq, &wait_all, &milliseconds)) { + goto exit; + } + return_value = _winapi_BatchedWaitForMultipleObjects_impl(module, handle_seq, wait_all, milliseconds); + +exit: + return return_value; +} + PyDoc_STRVAR(_winapi_WaitForMultipleObjects__doc__, "WaitForMultipleObjects($module, handle_seq, wait_flag,\n" " milliseconds=_winapi.INFINITE, /)\n" @@ -1482,4 +1978,4 @@ _winapi_CopyFile2(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO return return_value; } -/*[clinic end generated code: output=2350d4f2275d3a6f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1f5bbcfa8d1847c5 input=a9049054013a1b77]*/ diff --git a/Objects/exceptions.c b/Objects/exceptions.c index cff55d05163b6b..3df3a9b3b1a253 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -3539,7 +3539,6 @@ SimpleExtendsException(PyExc_Warning, ResourceWarning, #undef EOPNOTSUPP #undef EPROTONOSUPPORT #undef EPROTOTYPE -#undef ETIMEDOUT #undef EWOULDBLOCK #if defined(WSAEALREADY) && !defined(EALREADY) @@ -3560,9 +3559,6 @@ SimpleExtendsException(PyExc_Warning, ResourceWarning, #if defined(WSAESHUTDOWN) && !defined(ESHUTDOWN) #define ESHUTDOWN WSAESHUTDOWN #endif -#if defined(WSAETIMEDOUT) && !defined(ETIMEDOUT) -#define ETIMEDOUT WSAETIMEDOUT -#endif #if defined(WSAEWOULDBLOCK) && !defined(EWOULDBLOCK) #define EWOULDBLOCK WSAEWOULDBLOCK #endif @@ -3747,6 +3743,9 @@ _PyExc_InitState(PyInterpreterState *interp) #endif ADD_ERRNO(ProcessLookupError, ESRCH); ADD_ERRNO(TimeoutError, ETIMEDOUT); +#ifdef WSAETIMEDOUT + ADD_ERRNO(TimeoutError, WSAETIMEDOUT); +#endif return _PyStatus_OK(); diff --git a/PC/errmap.h b/PC/errmap.h index a7489ab75c6561..a064ecb80b1ed9 100644 --- a/PC/errmap.h +++ b/PC/errmap.h @@ -129,6 +129,9 @@ winerror_to_errno(int winerror) case ERROR_NO_UNICODE_TRANSLATION: // 1113 return EILSEQ; + case WAIT_TIMEOUT: // 258 + return ETIMEDOUT; + case ERROR_INVALID_FUNCTION: // 1 case ERROR_INVALID_ACCESS: // 12 case ERROR_INVALID_DATA: // 13 From 0a6e1a4119864bec0247b04a5c99fdd9799cd8eb Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 13 Feb 2024 08:31:49 +0200 Subject: [PATCH 094/126] Update "Using Python on a Mac" (#115024) --- Doc/conf.py | 4 ++ Doc/using/mac.rst | 105 ++++++++++++++++++++++++++-------------------- 2 files changed, 63 insertions(+), 46 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index aa7f85bc1b3efa..677d139046e5d0 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -64,6 +64,10 @@ import patchlevel version, release = patchlevel.get_version_info() +rst_epilog = f""" +.. |python_version_literal| replace:: ``Python {version}`` +""" + # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: today = '' diff --git a/Doc/using/mac.rst b/Doc/using/mac.rst index eb1413af2cbc3d..e99993238895f9 100644 --- a/Doc/using/mac.rst +++ b/Doc/using/mac.rst @@ -10,41 +10,46 @@ Using Python on a Mac Python on a Mac running macOS is in principle very similar to Python on any other Unix platform, but there are a number of additional features such as -the IDE and the Package Manager that are worth pointing out. +the integrated development environment (IDE) and the Package Manager that are +worth pointing out. + .. _getting-osx: +.. _getting-and-installing-macpython: -Getting and Installing MacPython -================================ +Getting and Installing Python +============================= macOS used to come with Python 2.7 pre-installed between versions 10.8 and `12.3 `_. -You are invited to install the most recent version of Python 3 from the Python -website (https://www.python.org). A current "universal binary" build of Python, -which runs natively on the Mac's new Intel and legacy PPC CPU's, is available -there. +You are invited to install the most recent version of Python 3 from the `Python +website `__. +A current "universal2 binary" build of Python, which runs natively on the Mac's +new Apple Silicon and legacy Intel processors, is available there. What you get after installing is a number of things: -* A :file:`Python 3.12` folder in your :file:`Applications` folder. In here +* A |python_version_literal| folder in your :file:`Applications` folder. In here you find IDLE, the development environment that is a standard part of official - Python distributions; and PythonLauncher, which handles double-clicking Python + Python distributions; and :program:`Python Launcher`, which handles double-clicking Python scripts from the Finder. * A framework :file:`/Library/Frameworks/Python.framework`, which includes the Python executable and libraries. The installer adds this location to your shell - path. To uninstall MacPython, you can simply remove these three things. A - symlink to the Python executable is placed in /usr/local/bin/. - -The Apple-provided build of Python is installed in -:file:`/System/Library/Frameworks/Python.framework` and :file:`/usr/bin/python`, -respectively. You should never modify or delete these, as they are -Apple-controlled and are used by Apple- or third-party software. Remember that -if you choose to install a newer Python version from python.org, you will have -two different but functional Python installations on your computer, so it will -be important that your paths and usages are consistent with what you want to do. - -IDLE includes a help menu that allows you to access Python documentation. If you + path. To uninstall Python, you can remove these three things. A + symlink to the Python executable is placed in :file:`/usr/local/bin/`. + +.. note:: + + On macOS 10.8-12.3, the Apple-provided build of Python is installed in + :file:`/System/Library/Frameworks/Python.framework` and :file:`/usr/bin/python`, + respectively. You should never modify or delete these, as they are + Apple-controlled and are used by Apple- or third-party software. Remember that + if you choose to install a newer Python version from python.org, you will have + two different but functional Python installations on your computer, so it will + be important that your paths and usages are consistent with what you want to do. + +IDLE includes a Help menu that allows you to access Python documentation. If you are completely new to Python you should start reading the tutorial introduction in that document. @@ -56,29 +61,29 @@ How to run a Python script -------------------------- Your best way to get started with Python on macOS is through the IDLE -integrated development environment, see section :ref:`ide` and use the Help menu +integrated development environment; see section :ref:`ide` and use the Help menu when the IDE is running. If you want to run Python scripts from the Terminal window command line or from the Finder you first need an editor to create your script. macOS comes with a -number of standard Unix command line editors, :program:`vim` and -:program:`emacs` among them. If you want a more Mac-like editor, -:program:`BBEdit` or :program:`TextWrangler` from Bare Bones Software (see -http://www.barebones.com/products/bbedit/index.html) are good choices, as is -:program:`TextMate` (see https://macromates.com/). Other editors include -:program:`Gvim` (https://macvim.org/macvim/) and :program:`Aquamacs` -(http://aquamacs.org/). +number of standard Unix command line editors, :program:`vim` +:program:`nano` among them. If you want a more Mac-like editor, +:program:`BBEdit` from Bare Bones Software (see +https://www.barebones.com/products/bbedit/index.html) are good choices, as is +:program:`TextMate` (see https://macromates.com). Other editors include +:program:`MacVim` (https://macvim.org) and :program:`Aquamacs` +(https://aquamacs.org). To run your script from the Terminal window you must make sure that :file:`/usr/local/bin` is in your shell search path. To run your script from the Finder you have two options: -* Drag it to :program:`PythonLauncher` +* Drag it to :program:`Python Launcher`. -* Select :program:`PythonLauncher` as the default application to open your - script (or any .py script) through the finder Info window and double-click it. - :program:`PythonLauncher` has various preferences to control how your script is +* Select :program:`Python Launcher` as the default application to open your + script (or any ``.py`` script) through the finder Info window and double-click it. + :program:`Python Launcher` has various preferences to control how your script is launched. Option-dragging allows you to change these for one invocation, or use its Preferences menu to change things globally. @@ -103,10 +108,11 @@ Python on macOS honors all standard Unix environment variables such as :envvar:`PYTHONPATH`, but setting these variables for programs started from the Finder is non-standard as the Finder does not read your :file:`.profile` or :file:`.cshrc` at startup. You need to create a file -:file:`~/.MacOSX/environment.plist`. See Apple's Technical Document QA1067 for -details. +:file:`~/.MacOSX/environment.plist`. See Apple's +`Technical Q&A QA1067 `__ +for details. -For more information on installation Python packages in MacPython, see section +For more information on installation Python packages, see section :ref:`mac-package-manager`. @@ -115,9 +121,9 @@ For more information on installation Python packages in MacPython, see section The IDE ======= -MacPython ships with the standard IDLE development environment. A good +Python ships with the standard IDLE development environment. A good introduction to using IDLE can be found at -http://www.hashcollision.org/hkn/python/idle_intro/index.html. +https://www.hashcollision.org/hkn/python/idle_intro/index.html. .. _mac-package-manager: @@ -130,8 +136,10 @@ This section has moved to the `Python Packaging User Guide`_. .. _Python Packaging User Guide: https://packaging.python.org/en/latest/tutorials/installing-packages/ -GUI Programming on the Mac -========================== +.. _gui-programming-on-the-mac: + +GUI Programming +=============== There are several options for building GUI applications on the Mac with Python. @@ -151,20 +159,25 @@ macOS. Packages and documentation are available from https://www.wxpython.org. macOS. More information can be found at https://riverbankcomputing.com/software/pyqt/intro. +*PySide* is another cross-platform Qt-based toolkit. More information at +https://www.qt.io/qt-for-python. + -Distributing Python Applications on the Mac -=========================================== +.. _distributing-python-applications-on-the-mac: + +Distributing Python Applications +================================ The standard tool for deploying standalone Python applications on the Mac is -:program:`py2app`. More information on installing and using py2app can be found -at https://pypi.org/project/py2app/. +:program:`py2app`. More information on installing and using :program:`py2app` +can be found at https://pypi.org/project/py2app/. Other Resources =============== -The MacPython mailing list is an excellent support resource for Python users and -developers on the Mac: +The Pythonmac-SIG mailing list is an excellent support resource for Python users +and developers on the Mac: https://www.python.org/community/sigs/current/pythonmac-sig/ From d823c235495e69fb4c1286b4ed751731bb31bda9 Mon Sep 17 00:00:00 2001 From: Mariusz Felisiak Date: Tue, 13 Feb 2024 09:47:40 +0100 Subject: [PATCH 095/126] gh-115032: Update DictConfigurator.configure_formatter() comment about `fmt` retry. (GH-115303) --- Lib/logging/config.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/logging/config.py b/Lib/logging/config.py index de06090942d965..ea37dd7544564a 100644 --- a/Lib/logging/config.py +++ b/Lib/logging/config.py @@ -667,10 +667,9 @@ def configure_formatter(self, config): except TypeError as te: if "'format'" not in str(te): raise - #Name of parameter changed from fmt to format. - #Retry with old name. - #This is so that code can be used with older Python versions - #(e.g. by Django) + # logging.Formatter and its subclasses expect the `fmt` + # parameter instead of `format`. Retry passing configuration + # with `fmt`. config['fmt'] = config.pop('format') config['()'] = factory result = self.configure_custom(config) From ca3604a3e33d833ef698b44a4b82c5bc8c771fcb Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 13 Feb 2024 12:21:20 +0200 Subject: [PATCH 096/126] gh-115252: Fix test_enum with -OO mode again (GH-115334) --- Lib/test/test_enum.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 5d7dae8829574b..61060f3dc29fd4 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -4905,15 +4905,15 @@ class Color(enum.Enum) | value | | ---------------------------------------------------------------------- - | Methods inherited from enum.EnumType: + | Static methods inherited from enum.EnumType: | - | __contains__(value) from enum.EnumType + | __contains__(value) | - | __getitem__(name) from enum.EnumType + | __getitem__(name) | - | __iter__() from enum.EnumType + | __iter__() | - | __len__() from enum.EnumType + | __len__() | | ---------------------------------------------------------------------- | Readonly properties inherited from enum.EnumType: From ccc76c3e88647e416184bb1f5210b4e8946ae358 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Tue, 13 Feb 2024 13:40:40 +0300 Subject: [PATCH 097/126] gh-108303: Move all `pydoc` related test files to new `test.test_pydoc` package (#114506) --- Lib/pydoc.py | 2 +- Lib/test/libregrtest/findtests.py | 1 + Lib/test/test_pydoc/__init__.py | 6 +++ Lib/test/{ => test_pydoc}/pydoc_mod.py | 0 Lib/test/{ => test_pydoc}/pydocfodder.py | 0 Lib/test/{ => test_pydoc}/test_pydoc.py | 51 ++++++++++++------------ Makefile.pre.in | 1 + 7 files changed, 34 insertions(+), 27 deletions(-) create mode 100644 Lib/test/test_pydoc/__init__.py rename Lib/test/{ => test_pydoc}/pydoc_mod.py (100%) rename Lib/test/{ => test_pydoc}/pydocfodder.py (100%) rename Lib/test/{ => test_pydoc}/test_pydoc.py (98%) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 17f7346e5cc619..6d145abda9d4ab 100755 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -552,7 +552,7 @@ def getdocloc(self, object, basedir=sysconfig.get_path('stdlib')): '_thread', 'zipimport') or (file.startswith(basedir) and not file.startswith(os.path.join(basedir, 'site-packages')))) and - object.__name__ not in ('xml.etree', 'test.pydoc_mod')): + object.__name__ not in ('xml.etree', 'test.test_pydoc.pydoc_mod')): if docloc.startswith(("http://", "https://")): docloc = "{}/{}.html".format(docloc.rstrip("/"), object.__name__.lower()) else: diff --git a/Lib/test/libregrtest/findtests.py b/Lib/test/libregrtest/findtests.py index ee890b5b1db4cd..4ac95e23a56b8f 100644 --- a/Lib/test/libregrtest/findtests.py +++ b/Lib/test/libregrtest/findtests.py @@ -23,6 +23,7 @@ "test_future_stmt", "test_gdb", "test_inspect", + "test_pydoc", "test_multiprocessing_fork", "test_multiprocessing_forkserver", "test_multiprocessing_spawn", diff --git a/Lib/test/test_pydoc/__init__.py b/Lib/test/test_pydoc/__init__.py new file mode 100644 index 00000000000000..f2a39a3fe29c7f --- /dev/null +++ b/Lib/test/test_pydoc/__init__.py @@ -0,0 +1,6 @@ +import os +from test import support + + +def load_tests(*args): + return support.load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/pydoc_mod.py b/Lib/test/test_pydoc/pydoc_mod.py similarity index 100% rename from Lib/test/pydoc_mod.py rename to Lib/test/test_pydoc/pydoc_mod.py diff --git a/Lib/test/pydocfodder.py b/Lib/test/test_pydoc/pydocfodder.py similarity index 100% rename from Lib/test/pydocfodder.py rename to Lib/test/test_pydoc/pydocfodder.py diff --git a/Lib/test/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py similarity index 98% rename from Lib/test/test_pydoc.py rename to Lib/test/test_pydoc/test_pydoc.py index f3c26624c624f5..0dd24e6d347364 100644 --- a/Lib/test/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -34,8 +34,8 @@ captured_stderr, is_emscripten, is_wasi, requires_docstrings, MISSING_C_DOCSTRINGS) from test.support.os_helper import (TESTFN, rmtree, unlink) -from test import pydoc_mod -from test import pydocfodder +from test.test_pydoc import pydoc_mod +from test.test_pydoc import pydocfodder class nonascii: @@ -52,7 +52,7 @@ class nonascii: expected_text_pattern = """ NAME - test.pydoc_mod - This is a test module for test_pydoc + test.test_pydoc.pydoc_mod - This is a test module for test_pydoc %s CLASSES builtins.object @@ -125,7 +125,7 @@ class C(builtins.object) DATA __xyz__ = 'X, Y and Z' - c_alias = test.pydoc_mod.C[int] + c_alias = test.test_pydoc.pydoc_mod.C[int] list_alias1 = typing.List[int] list_alias2 = list[int] type_union1 = typing.Union[int, str] @@ -148,7 +148,7 @@ class C(builtins.object) for s in expected_data_docstrings) html2text_of_expected = """ -test.pydoc_mod (version 1.2.3.4) +test.test_pydoc.pydoc_mod (version 1.2.3.4) This is a test module for test_pydoc Modules @@ -213,7 +213,7 @@ class C(builtins.object) Data __xyz__ = 'X, Y and Z' - c_alias = test.pydoc_mod.C[int] + c_alias = test.test_pydoc.pydoc_mod.C[int] list_alias1 = typing.List[int] list_alias2 = list[int] type_union1 = typing.Union[int, str] @@ -342,7 +342,7 @@ def get_pydoc_link(module): "Returns a documentation web link of a module" abspath = os.path.abspath dirname = os.path.dirname - basedir = dirname(dirname(abspath(__file__))) + basedir = dirname(dirname(dirname(abspath(__file__)))) doc = pydoc.TextDoc() loc = doc.getdocloc(module, basedir=basedir) return loc @@ -489,7 +489,7 @@ def test_not_here(self): @requires_docstrings def test_not_ascii(self): - result = run_pydoc('test.test_pydoc.nonascii', PYTHONIOENCODING='ascii') + result = run_pydoc('test.test_pydoc.test_pydoc.nonascii', PYTHONIOENCODING='ascii') encoded = nonascii.__doc__.encode('ascii', 'backslashreplace') self.assertIn(encoded, result) @@ -669,9 +669,9 @@ def test_help_output_redirect(self): buf = StringIO() helper = pydoc.Helper(output=buf) unused, doc_loc = get_pydoc_text(pydoc_mod) - module = "test.pydoc_mod" + module = "test.test_pydoc.pydoc_mod" help_header = """ - Help on module test.pydoc_mod in test: + Help on module test.test_pydoc.pydoc_mod in test.test_pydoc: """.lstrip() help_header = textwrap.dedent(help_header) @@ -1142,7 +1142,6 @@ class TestDescriptions(unittest.TestCase): def test_module(self): # Check that pydocfodder module can be described - from test import pydocfodder doc = pydoc.render_doc(pydocfodder) self.assertIn("pydocfodder", doc) @@ -1425,10 +1424,10 @@ def smeth(*args, **kwargs): self.assertEqual(self._get_summary_line(C.meth), "meth" + unbound) self.assertEqual(self._get_summary_line(C().meth), - "meth" + bound + " method of test.test_pydoc.C instance") + "meth" + bound + " method of test.test_pydoc.test_pydoc.C instance") C.cmeth.__func__.__text_signature__ = text_signature self.assertEqual(self._get_summary_line(C.cmeth), - "cmeth" + bound + " class method of test.test_pydoc.C") + "cmeth" + bound + " class method of test.test_pydoc.test_pydoc.C") C.smeth.__text_signature__ = text_signature self.assertEqual(self._get_summary_line(C.smeth), "smeth" + unbound) @@ -1465,7 +1464,7 @@ def cm(cls, x): 'cm(...)\n' ' A class method\n') self.assertEqual(self._get_summary_lines(X.cm), """\ -cm(x) class method of test.test_pydoc.X +cm(x) class method of test.test_pydoc.test_pydoc.X A class method """) self.assertIn(""" @@ -1647,19 +1646,19 @@ def test_text_doc_routines_in_class(self, cls=pydocfodder.B): lines = self.getsection(result, f' | Methods {where}:', ' | ' + '-'*70) self.assertIn(' | A_method_alias = A_method(self)', lines) self.assertIn(' | B_method_alias = B_method(self)', lines) - self.assertIn(' | A_staticmethod(x, y) from test.pydocfodder.A', lines) + self.assertIn(' | A_staticmethod(x, y) from test.test_pydoc.pydocfodder.A', lines) self.assertIn(' | A_staticmethod_alias = A_staticmethod(x, y)', lines) - self.assertIn(' | global_func(x, y) from test.pydocfodder', lines) + self.assertIn(' | global_func(x, y) from test.test_pydoc.pydocfodder', lines) self.assertIn(' | global_func_alias = global_func(x, y)', lines) - self.assertIn(' | global_func2_alias = global_func2(x, y) from test.pydocfodder', lines) + self.assertIn(' | global_func2_alias = global_func2(x, y) from test.test_pydoc.pydocfodder', lines) self.assertIn(' | __repr__(self, /) from builtins.object', lines) self.assertIn(' | object_repr = __repr__(self, /)', lines) lines = self.getsection(result, f' | Static methods {where}:', ' | ' + '-'*70) - self.assertIn(' | A_classmethod_ref = A_classmethod(x) class method of test.pydocfodder.A', lines) - note = '' if cls is pydocfodder.B else ' class method of test.pydocfodder.B' + self.assertIn(' | A_classmethod_ref = A_classmethod(x) class method of test.test_pydoc.pydocfodder.A', lines) + note = '' if cls is pydocfodder.B else ' class method of test.test_pydoc.pydocfodder.B' self.assertIn(' | B_classmethod_ref = B_classmethod(x)' + note, lines) - self.assertIn(' | A_method_ref = A_method() method of test.pydocfodder.A instance', lines) + self.assertIn(' | A_method_ref = A_method() method of test.test_pydoc.pydocfodder.A instance', lines) self.assertIn(' | get(key, default=None, /) method of builtins.dict instance', lines) self.assertIn(' | dict_get = get(key, default=None, /) method of builtins.dict instance', lines) @@ -1675,19 +1674,19 @@ def test_html_doc_routines_in_class(self, cls=pydocfodder.B): lines = self.getsection(result, f'Methods {where}:', '-'*70) self.assertIn('A_method_alias = A_method(self)', lines) self.assertIn('B_method_alias = B_method(self)', lines) - self.assertIn('A_staticmethod(x, y) from test.pydocfodder.A', lines) + self.assertIn('A_staticmethod(x, y) from test.test_pydoc.pydocfodder.A', lines) self.assertIn('A_staticmethod_alias = A_staticmethod(x, y)', lines) - self.assertIn('global_func(x, y) from test.pydocfodder', lines) + self.assertIn('global_func(x, y) from test.test_pydoc.pydocfodder', lines) self.assertIn('global_func_alias = global_func(x, y)', lines) - self.assertIn('global_func2_alias = global_func2(x, y) from test.pydocfodder', lines) + self.assertIn('global_func2_alias = global_func2(x, y) from test.test_pydoc.pydocfodder', lines) self.assertIn('__repr__(self, /) from builtins.object', lines) self.assertIn('object_repr = __repr__(self, /)', lines) lines = self.getsection(result, f'Static methods {where}:', '-'*70) - self.assertIn('A_classmethod_ref = A_classmethod(x) class method of test.pydocfodder.A', lines) - note = '' if cls is pydocfodder.B else ' class method of test.pydocfodder.B' + self.assertIn('A_classmethod_ref = A_classmethod(x) class method of test.test_pydoc.pydocfodder.A', lines) + note = '' if cls is pydocfodder.B else ' class method of test.test_pydoc.pydocfodder.B' self.assertIn('B_classmethod_ref = B_classmethod(x)' + note, lines) - self.assertIn('A_method_ref = A_method() method of test.pydocfodder.A instance', lines) + self.assertIn('A_method_ref = A_method() method of test.test_pydoc.pydocfodder.A instance', lines) lines = self.getsection(result, f'Class methods {where}:', '-'*70) self.assertIn('B_classmethod(x)', lines) diff --git a/Makefile.pre.in b/Makefile.pre.in index e0527633ccd03b..cf182980c120ee 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2307,6 +2307,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_module \ test/test_pathlib \ test/test_peg_generator \ + test/test_pydoc \ test/test_sqlite3 \ test/test_tkinter \ test/test_tomllib \ From 7cce8576226249461baa91c4a89770a1823b44a4 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Tue, 13 Feb 2024 21:24:48 +0800 Subject: [PATCH 098/126] gh-114058: Foundations of the Tier2 redundancy eliminator (GH-115085) --------- Co-authored-by: Mark Shannon <9448417+markshannon@users.noreply.github.com> Co-authored-by: Jules <57632293+JuliaPoo@users.noreply.github.com> Co-authored-by: Guido van Rossum --- .gitattributes | 1 + Include/cpython/pystats.h | 3 + Include/internal/pycore_opcode_metadata.h | 10 +- Include/internal/pycore_optimizer.h | 7 + Include/internal/pycore_uop_metadata.h | 10 +- Lib/test/test_capi/test_opt.py | 209 ++ Lib/test/test_generated_cases.py | 153 ++ Makefile.pre.in | 8 +- ...-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst | 1 + Python/bytecodes.c | 46 +- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 2 +- Python/optimizer.c | 13 +- Python/optimizer_analysis.c | 555 +++++- Python/specialize.c | 5 + .../tier2_redundancy_eliminator_bytecodes.c | 272 +++ Python/tier2_redundancy_eliminator_cases.c.h | 1676 +++++++++++++++++ Tools/c-analyzer/cpython/_parser.py | 2 + Tools/c-analyzer/cpython/ignored.tsv | 2 +- Tools/cases_generator/README.md | 3 + Tools/cases_generator/analyzer.py | 6 +- .../cases_generator/interpreter_definition.md | 26 +- Tools/cases_generator/parsing.py | 26 +- Tools/cases_generator/stack.py | 4 +- .../tier2_abstract_generator.py | 235 +++ 25 files changed, 3137 insertions(+), 140 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst create mode 100644 Python/tier2_redundancy_eliminator_bytecodes.c create mode 100644 Python/tier2_redundancy_eliminator_cases.c.h create mode 100644 Tools/cases_generator/tier2_abstract_generator.py diff --git a/.gitattributes b/.gitattributes index 2a48df079e1aeb..07d877027b09f6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -94,6 +94,7 @@ Programs/test_frozenmain.h generated Python/Python-ast.c generated Python/executor_cases.c.h generated Python/generated_cases.c.h generated +Python/tier2_redundancy_eliminator_bytecodes.c.h generated Python/opcode_targets.h generated Python/stdlib_module_names.h generated Tools/peg_generator/pegen/grammar_parser.py generated diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 0f50439b73848e..db9aaedec950e4 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -120,6 +120,9 @@ typedef struct _optimization_stats { uint64_t trace_length_hist[_Py_UOP_HIST_SIZE]; uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE]; uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE]; + uint64_t optimizer_attempts; + uint64_t optimizer_successes; + uint64_t optimizer_failure_reason_no_memory; } OptimizationStats; typedef struct _rare_event_stats { diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 75d7f44025328e..6b60a6fbffdc5e 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1094,7 +1094,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [MATCH_KEYS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [MATCH_MAPPING] = { true, INSTR_FMT_IX, 0 }, [MATCH_SEQUENCE] = { true, INSTR_FMT_IX, 0 }, - [NOP] = { true, INSTR_FMT_IX, 0 }, + [NOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, @@ -1156,10 +1156,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [LOAD_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ZERO_SUPER_ATTR] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ZERO_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [POP_BLOCK] = { true, -1, 0 }, - [SETUP_CLEANUP] = { true, -1, HAS_ARG_FLAG }, - [SETUP_FINALLY] = { true, -1, HAS_ARG_FLAG }, - [SETUP_WITH] = { true, -1, HAS_ARG_FLAG }, + [POP_BLOCK] = { true, -1, HAS_PURE_FLAG }, + [SETUP_CLEANUP] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_FINALLY] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_WITH] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, [STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, }; #endif diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index e21412fc815540..eee71c700d4904 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -8,6 +8,13 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_uop_ids.h" + +// This is the length of the trace we project initially. +#define UOP_MAX_TRACE_LENGTH 512 + +#define TRACE_STACK_SIZE 5 + int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, _PyUOpInstruction *trace, int trace_len, int curr_stackentries, _PyBloomFilter *dependencies); diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 2b5b37e6b8d6a4..30dc5a881574e7 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -16,7 +16,7 @@ extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; #ifdef NEED_OPCODE_METADATA const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { - [_NOP] = 0, + [_NOP] = HAS_PURE_FLAG, [_RESUME_CHECK] = HAS_DEOPT_FLAG, [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG, [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, @@ -202,10 +202,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, - [_LOAD_CONST_INLINE] = 0, - [_LOAD_CONST_INLINE_BORROW] = 0, - [_LOAD_CONST_INLINE_WITH_NULL] = 0, - [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 0, + [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, [_CHECK_GLOBALS] = HAS_DEOPT_FLAG, [_CHECK_BUILTINS] = HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index e6b1b554c9af10..b64aed10d2d653 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -3,6 +3,7 @@ import sys import textwrap import unittest +import gc import _testinternalcapi @@ -556,6 +557,214 @@ def testfunc(n): # too much already. self.assertEqual(count, 1) +class TestUopsOptimization(unittest.TestCase): + + def test_int_type_propagation(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + a = x + 1 + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(32) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 63) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertGreaterEqual(len(binop_count), 3) + self.assertLessEqual(len(guard_both_int_count), 1) + + def test_int_type_propagation_through_frame(self): + def double(x): + return x + x + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + a = double(x) + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(32) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 124) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertGreaterEqual(len(binop_count), 3) + self.assertLessEqual(len(guard_both_int_count), 1) + + def test_int_type_propagation_from_frame(self): + def double(x): + return x + x + def testfunc(loops): + num = 0 + while num < loops: + a = double(num) + x = a + a + num += 1 + return x + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(32) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + self.assertEqual(res, 124) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertGreaterEqual(len(binop_count), 3) + self.assertLessEqual(len(guard_both_int_count), 1) + + def test_int_impure_region(self): + def testfunc(loops): + num = 0 + while num < loops: + x = num + num + y = 1 + x // 2 + a = x + y + num += 1 + return a + + opt = _testinternalcapi.get_uop_optimizer() + res = None + with temporary_optimizer(opt): + res = testfunc(64) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + self.assertGreaterEqual(len(binop_count), 3) + + def test_call_py_exact_args(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + self.assertIn("_BINARY_OP_ADD_INT", uops) + self.assertNotIn("_CHECK_PEP_523", uops) + + def test_int_type_propagate_through_range(self): + def testfunc(n): + + for i in range(n): + x = i + i + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 19 * 2) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_GUARD_BOTH_INT", uops) + + def test_int_value_numbering(self): + def testfunc(n): + + y = 1 + for i in range(n): + x = y + z = x + a = z + b = a + res = x + z + a + b + return res + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + res = testfunc(20) + + ex = get_first_executor(testfunc) + self.assertEqual(res, 4) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_GUARD_BOTH_INT", uops) + guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + self.assertEqual(len(guard_count), 1) + + def test_comprehension(self): + def testfunc(n): + for _ in range(n): + return [i for i in range(n)] + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + + def test_call_py_exact_args_disappearing(self): + def dummy(x): + return x+1 + + def testfunc(n): + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + # Trigger specialization + testfunc(8) + with temporary_optimizer(opt): + del dummy + gc.collect() + + def dummy(x): + return x + 2 + testfunc(10) + + ex = get_first_executor(testfunc) + # Honestly as long as it doesn't crash it's fine. + # Whether we get an executor or not is non-deterministic, + # because it's decided by when the function is freed. + # This test is a little implementation specific. + + def test_promote_globals_to_constants(self): + def testfunc(n): + for i in range(n): + x = range(i) + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(20) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertNotIn("_LOAD_GLOBAL_BUILTIN", uops) + self.assertIn("_LOAD_CONST_INLINE_BORROW_WITH_NULL", uops) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index ca1228ee7008a9..a7ad6c7320b4ee 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -33,6 +33,7 @@ def skip_if_different_mount_drives(): import parser from stack import Stack import tier1_generator + import tier2_abstract_generator def handle_stderr(): @@ -793,5 +794,157 @@ def test_annotated_op(self): self.run_cases_test(input, output) +class TestGeneratedAbstractCases(unittest.TestCase): + def setUp(self) -> None: + super().setUp() + self.maxDiff = None + + self.temp_dir = tempfile.gettempdir() + self.temp_input_filename = os.path.join(self.temp_dir, "input.txt") + self.temp_input2_filename = os.path.join(self.temp_dir, "input2.txt") + self.temp_output_filename = os.path.join(self.temp_dir, "output.txt") + + def tearDown(self) -> None: + for filename in [ + self.temp_input_filename, + self.temp_input2_filename, + self.temp_output_filename, + ]: + try: + os.remove(filename) + except: + pass + super().tearDown() + + def run_cases_test(self, input: str, input2: str, expected: str): + with open(self.temp_input_filename, "w+") as temp_input: + temp_input.write(parser.BEGIN_MARKER) + temp_input.write(input) + temp_input.write(parser.END_MARKER) + temp_input.flush() + + with open(self.temp_input2_filename, "w+") as temp_input: + temp_input.write(parser.BEGIN_MARKER) + temp_input.write(input2) + temp_input.write(parser.END_MARKER) + temp_input.flush() + + with handle_stderr(): + tier2_abstract_generator.generate_tier2_abstract_from_files( + [self.temp_input_filename, self.temp_input2_filename], + self.temp_output_filename + ) + + with open(self.temp_output_filename) as temp_output: + lines = temp_output.readlines() + while lines and lines[0].startswith(("// ", "#", " #", "\n")): + lines.pop(0) + while lines and lines[-1].startswith(("#", "\n")): + lines.pop(-1) + actual = "".join(lines) + self.assertEqual(actual.strip(), expected.strip()) + + def test_overridden_abstract(self): + input = """ + pure op(OP, (--)) { + spam(); + } + """ + input2 = """ + pure op(OP, (--)) { + eggs(); + } + """ + output = """ + case OP: { + eggs(); + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_overridden_abstract_args(self): + input = """ + pure op(OP, (arg1 -- out)) { + spam(); + } + op(OP2, (arg1 -- out)) { + eggs(); + } + """ + input2 = """ + op(OP, (arg1 -- out)) { + eggs(); + } + """ + output = """ + case OP: { + _Py_UOpsSymType *arg1; + _Py_UOpsSymType *out; + arg1 = stack_pointer[-1]; + eggs(); + stack_pointer[-1] = out; + break; + } + + case OP2: { + _Py_UOpsSymType *out; + out = sym_new_unknown(ctx); + if (out == NULL) goto out_of_space; + stack_pointer[-1] = out; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_no_overridden_case(self): + input = """ + pure op(OP, (arg1 -- out)) { + spam(); + } + + pure op(OP2, (arg1 -- out)) { + } + + """ + input2 = """ + pure op(OP2, (arg1 -- out)) { + } + """ + output = """ + case OP: { + _Py_UOpsSymType *out; + out = sym_new_unknown(ctx); + if (out == NULL) goto out_of_space; + stack_pointer[-1] = out; + break; + } + + case OP2: { + _Py_UOpsSymType *arg1; + _Py_UOpsSymType *out; + arg1 = stack_pointer[-1]; + stack_pointer[-1] = out; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_missing_override_failure(self): + input = """ + pure op(OP, (arg1 -- out)) { + spam(); + } + """ + input2 = """ + pure op(OTHER, (arg1 -- out)) { + } + """ + output = """ + """ + with self.assertRaisesRegex(AssertionError, "All abstract uops"): + self.run_cases_test(input, input2, output) + + if __name__ == "__main__": unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index cf182980c120ee..d3b18acad61ce5 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1863,6 +1863,10 @@ regen-cases: -o $(srcdir)/Python/generated_cases.c.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_generator.py \ -o $(srcdir)/Python/executor_cases.c.h.new $(srcdir)/Python/bytecodes.c + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_abstract_generator.py \ + -o $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h.new \ + $(srcdir)/Python/tier2_redundancy_eliminator_bytecodes.c \ + $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/opcode_metadata_generator.py \ -o $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/uop_metadata_generator.py -o \ @@ -1874,6 +1878,7 @@ regen-cases: $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_opcode_metadata.h $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_uop_metadata.h $(srcdir)/Include/internal/pycore_uop_metadata.h.new $(UPDATE_FILE) $(srcdir)/Python/executor_cases.c.h $(srcdir)/Python/executor_cases.c.h.new + $(UPDATE_FILE) $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Lib/_opcode_metadata.py $(srcdir)/Lib/_opcode_metadata.py.new Python/compile.o: $(srcdir)/Include/internal/pycore_opcode_metadata.h @@ -1895,7 +1900,8 @@ Python/optimizer.o: \ Python/optimizer_analysis.o: \ $(srcdir)/Include/internal/pycore_opcode_metadata.h \ - $(srcdir)/Include/internal/pycore_optimizer.h + $(srcdir)/Include/internal/pycore_optimizer.h \ + $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h Python/frozen.o: $(FROZEN_FILES_OUT) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst new file mode 100644 index 00000000000000..beb82dbcd3cccd --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-16-14-41-54.gh-issue-114058.Cb2b8h.rst @@ -0,0 +1 @@ +Implement the foundations of the Tier 2 redundancy eliminator. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 197dff4b9888ce..f7c7e3669b7e6f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -133,7 +133,7 @@ dummy_func( switch (opcode) { // BEGIN BYTECODES // - inst(NOP, (--)) { + pure inst(NOP, (--)) { } family(RESUME, 0) = { @@ -411,12 +411,12 @@ dummy_func( // BINARY_OP_INPLACE_ADD_UNICODE, // See comments at that opcode. }; - op(_GUARD_BOTH_INT, (left, right -- left: &PYLONG_TYPE, right: &PYLONG_TYPE)) { + op(_GUARD_BOTH_INT, (left, right -- left, right)) { DEOPT_IF(!PyLong_CheckExact(left)); DEOPT_IF(!PyLong_CheckExact(right)); } - pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res: &PYLONG_TYPE)) { + pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -424,7 +424,7 @@ dummy_func( ERROR_IF(res == NULL, error); } - pure op(_BINARY_OP_ADD_INT, (left, right -- res: &PYLONG_TYPE)) { + pure op(_BINARY_OP_ADD_INT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -432,7 +432,7 @@ dummy_func( ERROR_IF(res == NULL, error); } - pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res: &PYLONG_TYPE)) { + pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); @@ -447,12 +447,12 @@ dummy_func( macro(BINARY_OP_SUBTRACT_INT) = _GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT; - op(_GUARD_BOTH_FLOAT, (left, right -- left: &PYFLOAT_TYPE, right: &PYFLOAT_TYPE)) { + op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { DEOPT_IF(!PyFloat_CheckExact(left)); DEOPT_IF(!PyFloat_CheckExact(right)); } - pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) { + pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); double dres = ((PyFloatObject *)left)->ob_fval * @@ -460,7 +460,7 @@ dummy_func( DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); } - pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) { + pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); double dres = ((PyFloatObject *)left)->ob_fval + @@ -468,7 +468,7 @@ dummy_func( DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); } - pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) { + pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); double dres = ((PyFloatObject *)left)->ob_fval - @@ -483,12 +483,12 @@ dummy_func( macro(BINARY_OP_SUBTRACT_FLOAT) = _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT; - op(_GUARD_BOTH_UNICODE, (left, right -- left: &PYUNICODE_TYPE, right: &PYUNICODE_TYPE)) { + op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) { DEOPT_IF(!PyUnicode_CheckExact(left)); DEOPT_IF(!PyUnicode_CheckExact(right)); } - pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res: &PYUNICODE_TYPE)) { + pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { STAT_INC(BINARY_OP, hit); res = PyUnicode_Concat(left, right); _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); @@ -1877,7 +1877,7 @@ dummy_func( something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ DECREF_INPUTS(); ERROR_IF(attr == NULL, error); @@ -1901,7 +1901,7 @@ dummy_func( LOAD_ATTR, }; - op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner: &(GUARD_TYPE_VERSION_TYPE + type_version))) { + op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version); @@ -2082,7 +2082,7 @@ dummy_func( DISPATCH_INLINED(new_frame); } - op(_GUARD_DORV_VALUES, (owner -- owner: &GUARD_DORV_VALUES_TYPE)) { + op(_GUARD_DORV_VALUES, (owner -- owner)) { assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); DEOPT_IF(!_PyDictOrValues_IsValues(dorv)); @@ -2711,7 +2711,7 @@ dummy_func( DEOPT_IF(r->len <= 0); } - op(_ITER_NEXT_RANGE, (iter -- iter, next: &PYLONG_TYPE)) { + op(_ITER_NEXT_RANGE, (iter -- iter, next)) { _PyRangeIterObject *r = (_PyRangeIterObject *)iter; assert(Py_TYPE(r) == &PyRangeIter_Type); assert(r->len > 0); @@ -2869,13 +2869,13 @@ dummy_func( exc_info->exc_value = Py_NewRef(new_exc); } - op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner: &GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE)) { + op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner)) { assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner); DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv)); } - op(_GUARD_KEYS_VERSION, (keys_version/2, owner -- owner: &(GUARD_KEYS_VERSION_TYPE + keys_version))) { + op(_GUARD_KEYS_VERSION, (keys_version/2, owner -- owner)) { PyTypeObject *owner_cls = Py_TYPE(owner); PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls; DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version); @@ -3090,7 +3090,7 @@ dummy_func( macro(CALL) = _SPECIALIZE_CALL + unused/2 + _CALL; - op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable: &PYMETHOD_TYPE, null: &NULL_TYPE, unused[oparg])) { + op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) { DEOPT_IF(null != NULL); DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type); } @@ -3108,7 +3108,7 @@ dummy_func( DEOPT_IF(tstate->interp->eval_frame); } - op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable: &(PYFUNCTION_TYPE_VERSION_TYPE + func_version), self_or_null, unused[oparg])) { + op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { DEOPT_IF(!PyFunction_Check(callable)); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version); @@ -4059,23 +4059,23 @@ dummy_func( DEOPT_IF(!current_executor->vm_data.valid); } - op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + pure op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { TIER_TWO_ONLY value = Py_NewRef(ptr); } - op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { TIER_TWO_ONLY value = ptr; } - op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { + pure op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { TIER_TWO_ONLY value = Py_NewRef(ptr); null = NULL; } - op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { + pure op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { TIER_TWO_ONLY value = ptr; null = NULL; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2d914b82dbf88f..7d48d6a05a17b0 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1598,7 +1598,7 @@ something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ Py_DECREF(owner); if (attr == NULL) goto pop_1_error_tier_two; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e5244147d499af..afb6650e5920fb 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3420,7 +3420,7 @@ something was returned by a descriptor protocol). Set the second element of the stack to NULL, to signal CALL that it's not a method call. - NULL | meth | arg1 | ... | argN + meth | NULL | arg1 | ... | argN */ Py_DECREF(owner); if (attr == NULL) goto pop_1_error; diff --git a/Python/optimizer.c b/Python/optimizer.c index ad9ac382d300ef..f31f83113d3f25 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -17,8 +17,6 @@ #include "pycore_uop_metadata.h" // Uop tables #undef NEED_OPCODE_METADATA -#define UOP_MAX_TRACE_LENGTH 512 - #define MAX_EXECUTORS_SIZE 256 @@ -308,8 +306,6 @@ BRANCH_TO_GUARD[4][2] = { [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP, }; -#define TRACE_STACK_SIZE 5 - #define CONFIDENCE_RANGE 1000 #define CONFIDENCE_CUTOFF 333 @@ -323,10 +319,11 @@ BRANCH_TO_GUARD[4][2] = { #define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \ DPRINTF(2, \ - " ADD_TO_TRACE(%s, %d, %" PRIu64 ")\n", \ + " ADD_TO_TRACE(%s, %d, %" PRIu64 ", %d)\n", \ _PyUOpName(OPCODE), \ (OPARG), \ - (uint64_t)(OPERAND)); \ + (uint64_t)(OPERAND), \ + TARGET); \ assert(trace_length < max_length); \ trace[trace_length].opcode = (OPCODE); \ trace[trace_length].oparg = (OPARG); \ @@ -825,11 +822,13 @@ uop_optimize( char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); if (uop_optimize == NULL || *uop_optimize > '0') { err = _Py_uop_analyze_and_optimize(frame, buffer, - UOP_MAX_TRACE_LENGTH, curr_stackentries, &dependencies); + UOP_MAX_TRACE_LENGTH, + curr_stackentries, &dependencies); if (err <= 0) { return err; } } + assert(err == 1); _PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies); if (executor == NULL) { return -1; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b14e6950b4a06b..e02ca4d6acf6c1 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1,3 +1,14 @@ +/* + * This file contains the support code for CPython's uops redundancy eliminator. + * It also performs some simple optimizations. + * It performs a traditional data-flow analysis[1] over the trace of uops. + * Using the information gained, it chooses to emit, or skip certain instructions + * if possible. + * + * [1] For information on data-flow analysis, please see + * https://clang.llvm.org/docs/DataFlowAnalysisIntro.html + * + * */ #include "Python.h" #include "opcode.h" #include "pycore_dict.h" @@ -9,10 +20,355 @@ #include "pycore_dict.h" #include "pycore_long.h" #include "cpython/optimizer.h" +#include "pycore_optimizer.h" +#include "pycore_object.h" +#include "pycore_dict.h" +#include "pycore_function.h" +#include "pycore_uop_metadata.h" +#include "pycore_uop_ids.h" +#include "pycore_range.h" + +#include #include #include #include -#include "pycore_optimizer.h" + +// Holds locals, stack, locals, stack ... co_consts (in that order) +#define MAX_ABSTRACT_INTERP_SIZE 4096 + +#define OVERALLOCATE_FACTOR 5 + +#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * OVERALLOCATE_FACTOR) + +// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) +#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) + +#ifdef Py_DEBUG + static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; + static inline int get_lltrace(void) { + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + return lltrace; + } + #define DPRINTF(level, ...) \ + if (get_lltrace() >= (level)) { printf(__VA_ARGS__); } +#else + #define DPRINTF(level, ...) +#endif + + +// Flags for below. +#define KNOWN 1 << 0 +#define TRUE_CONST 1 << 1 +#define IS_NULL 1 << 2 +#define NOT_NULL 1 << 3 + +typedef struct { + int flags; + PyTypeObject *typ; + // constant propagated value (might be NULL) + PyObject *const_val; +} _Py_UOpsSymType; + + +typedef struct _Py_UOpsAbstractFrame { + // Max stacklen + int stack_len; + int locals_len; + + _Py_UOpsSymType **stack_pointer; + _Py_UOpsSymType **stack; + _Py_UOpsSymType **locals; +} _Py_UOpsAbstractFrame; + + +typedef struct ty_arena { + int ty_curr_number; + int ty_max_number; + _Py_UOpsSymType arena[TY_ARENA_SIZE]; +} ty_arena; + +// Tier 2 types meta interpreter +typedef struct _Py_UOpsAbstractInterpContext { + PyObject_HEAD + // The current "executing" frame. + _Py_UOpsAbstractFrame *frame; + _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; + int curr_frame_depth; + + // Arena for the symbolic types. + ty_arena t_arena; + + _Py_UOpsSymType **n_consumed; + _Py_UOpsSymType **limit; + _Py_UOpsSymType *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; +} _Py_UOpsAbstractInterpContext; + +static inline _Py_UOpsSymType* sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx); + +// 0 on success, -1 on error. +static _Py_UOpsAbstractFrame * +ctx_frame_new( + _Py_UOpsAbstractInterpContext *ctx, + PyCodeObject *co, + _Py_UOpsSymType **localsplus_start, + int n_locals_already_filled, + int curr_stackentries +) +{ + assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); + _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; + + frame->stack_len = co->co_stacksize; + frame->locals_len = co->co_nlocalsplus; + + frame->locals = localsplus_start; + frame->stack = frame->locals + co->co_nlocalsplus; + frame->stack_pointer = frame->stack + curr_stackentries; + ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); + if (ctx->n_consumed >= ctx->limit) { + return NULL; + } + + + // Initialize with the initial state of all local variables + for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) { + _Py_UOpsSymType *local = sym_new_unknown(ctx); + if (local == NULL) { + return NULL; + } + frame->locals[i] = local; + } + + + // Initialize the stack as well + for (int i = 0; i < curr_stackentries; i++) { + _Py_UOpsSymType *stackvar = sym_new_unknown(ctx); + if (stackvar == NULL) { + return NULL; + } + frame->stack[i] = stackvar; + } + + return frame; +} + +static void +abstractcontext_fini(_Py_UOpsAbstractInterpContext *ctx) +{ + if (ctx == NULL) { + return; + } + ctx->curr_frame_depth = 0; + int tys = ctx->t_arena.ty_curr_number; + for (int i = 0; i < tys; i++) { + Py_CLEAR(ctx->t_arena.arena[i].const_val); + } +} + +static int +abstractcontext_init( + _Py_UOpsAbstractInterpContext *ctx, + PyCodeObject *co, + int curr_stacklen, + int ir_entries +) +{ + ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; + ctx->n_consumed = ctx->locals_and_stack; +#ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter. + for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { + ctx->locals_and_stack[i] = NULL; + } +#endif + + // Setup the arena for sym expressions. + ctx->t_arena.ty_curr_number = 0; + ctx->t_arena.ty_max_number = TY_ARENA_SIZE; + + // Frame setup + ctx->curr_frame_depth = 0; + _Py_UOpsAbstractFrame *frame = ctx_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen); + if (frame == NULL) { + return -1; + } + ctx->curr_frame_depth++; + ctx->frame = frame; + return 0; +} + + +static int +ctx_frame_pop( + _Py_UOpsAbstractInterpContext *ctx +) +{ + _Py_UOpsAbstractFrame *frame = ctx->frame; + + ctx->n_consumed = frame->locals; + ctx->curr_frame_depth--; + assert(ctx->curr_frame_depth >= 1); + ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; + + return 0; +} + + +// Takes a borrowed reference to const_val, turns that into a strong reference. +static _Py_UOpsSymType* +sym_new(_Py_UOpsAbstractInterpContext *ctx, + PyObject *const_val) +{ + _Py_UOpsSymType *self = &ctx->t_arena.arena[ctx->t_arena.ty_curr_number]; + if (ctx->t_arena.ty_curr_number >= ctx->t_arena.ty_max_number) { + OPT_STAT_INC(optimizer_failure_reason_no_memory); + DPRINTF(1, "out of space for symbolic expression type\n"); + return NULL; + } + ctx->t_arena.ty_curr_number++; + self->const_val = NULL; + self->typ = NULL; + self->flags = 0; + + if (const_val != NULL) { + self->const_val = Py_NewRef(const_val); + } + + return self; +} + +static inline void +sym_set_flag(_Py_UOpsSymType *sym, int flag) +{ + sym->flags |= flag; +} + +static inline void +sym_clear_flag(_Py_UOpsSymType *sym, int flag) +{ + sym->flags &= (~flag); +} + +static inline bool +sym_has_flag(_Py_UOpsSymType *sym, int flag) +{ + return (sym->flags & flag) != 0; +} + +static inline bool +sym_is_known(_Py_UOpsSymType *sym) +{ + return sym_has_flag(sym, KNOWN); +} + +static inline bool +sym_is_not_null(_Py_UOpsSymType *sym) +{ + return (sym->flags & (IS_NULL | NOT_NULL)) == NOT_NULL; +} + +static inline bool +sym_is_null(_Py_UOpsSymType *sym) +{ + return (sym->flags & (IS_NULL | NOT_NULL)) == IS_NULL; +} + +static inline void +sym_set_type(_Py_UOpsSymType *sym, PyTypeObject *tp) +{ + assert(PyType_Check(tp)); + sym->typ = tp; + sym_set_flag(sym, KNOWN); + sym_set_flag(sym, NOT_NULL); +} + +static inline void +sym_set_null(_Py_UOpsSymType *sym) +{ + sym_set_flag(sym, IS_NULL); + sym_set_flag(sym, KNOWN); +} + + +static inline _Py_UOpsSymType* +sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx) +{ + return sym_new(ctx,NULL); +} + +static inline _Py_UOpsSymType* +sym_new_known_notnull(_Py_UOpsAbstractInterpContext *ctx) +{ + _Py_UOpsSymType *res = sym_new_unknown(ctx); + if (res == NULL) { + return NULL; + } + sym_set_flag(res, NOT_NULL); + return res; +} + +static inline _Py_UOpsSymType* +sym_new_known_type(_Py_UOpsAbstractInterpContext *ctx, + PyTypeObject *typ) +{ + _Py_UOpsSymType *res = sym_new(ctx,NULL); + if (res == NULL) { + return NULL; + } + sym_set_type(res, typ); + return res; +} + +// Takes a borrowed reference to const_val. +static inline _Py_UOpsSymType* +sym_new_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val) +{ + assert(const_val != NULL); + _Py_UOpsSymType *temp = sym_new( + ctx, + const_val + ); + if (temp == NULL) { + return NULL; + } + sym_set_type(temp, Py_TYPE(const_val)); + sym_set_flag(temp, TRUE_CONST); + sym_set_flag(temp, KNOWN); + sym_set_flag(temp, NOT_NULL); + return temp; +} + +static _Py_UOpsSymType* +sym_new_null(_Py_UOpsAbstractInterpContext *ctx) +{ + _Py_UOpsSymType *null_sym = sym_new_unknown(ctx); + if (null_sym == NULL) { + return NULL; + } + sym_set_null(null_sym); + return null_sym; +} + + +static inline bool +sym_matches_type(_Py_UOpsSymType *sym, PyTypeObject *typ) +{ + assert(typ == NULL || PyType_Check(typ)); + if (!sym_has_flag(sym, KNOWN)) { + return false; + } + return sym->typ == typ; +} + + +static inline bool +op_is_end(uint32_t opcode) +{ + return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP; +} static int get_mutations(PyObject* dict) { @@ -199,14 +555,138 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, builtins = func->func_builtins; break; } - case _JUMP_TO_TOP: - case _EXIT_TRACE: - return 1; + default: + if (op_is_end(opcode)) { + return 1; + } + break; + } + } + return 0; +} + + + +#define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) + +#define GETLOCAL(idx) ((ctx->frame->locals[idx])) + +#define REPLACE_OP(INST, OP, ARG, OPERAND) \ + INST->opcode = OP; \ + INST->oparg = ARG; \ + INST->operand = OPERAND; + +#define _LOAD_ATTR_NOT_NULL \ + do { \ + attr = sym_new_known_notnull(ctx); \ + if (attr == NULL) { \ + goto error; \ + } \ + null = sym_new_null(ctx); \ + if (null == NULL) { \ + goto error; \ + } \ + } while (0); + + +/* 1 for success, 0 for not ready, cannot error at the moment. */ +static int +uop_redundancy_eliminator( + PyCodeObject *co, + _PyUOpInstruction *trace, + int trace_len, + int curr_stacklen +) +{ + + _Py_UOpsAbstractInterpContext context; + _Py_UOpsAbstractInterpContext *ctx = &context; + + if (abstractcontext_init( + ctx, + co, curr_stacklen, + trace_len) < 0) { + goto out_of_space; + } + + for (_PyUOpInstruction *this_instr = trace; + this_instr < trace + trace_len && !op_is_end(this_instr->opcode); + this_instr++) { + + int oparg = this_instr->oparg; + uint32_t opcode = this_instr->opcode; + + _Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer; + + DPRINTF(3, "Abstract interpreting %s:%d ", + _PyOpcode_uop_name[opcode], + oparg); + switch (opcode) { +#include "tier2_redundancy_eliminator_cases.c.h" + + default: + DPRINTF(1, "Unknown opcode in abstract interpreter\n"); + Py_UNREACHABLE(); } + assert(ctx->frame != NULL); + DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); + ctx->frame->stack_pointer = stack_pointer; + assert(STACK_LEVEL() >= 0); } + + abstractcontext_fini(ctx); + return 1; + +out_of_space: + DPRINTF(1, "Out of space in abstract interpreter\n"); + abstractcontext_fini(ctx); + return 0; + +error: + DPRINTF(1, "Encountered error in abstract interpreter\n"); + abstractcontext_fini(ctx); return 0; } + +static void +remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) +{ + int last_set_ip = -1; + bool maybe_invalid = false; + for (int pc = 0; pc < buffer_size; pc++) { + int opcode = buffer[pc].opcode; + if (opcode == _SET_IP) { + buffer[pc].opcode = NOP; + last_set_ip = pc; + } + else if (opcode == _CHECK_VALIDITY) { + if (maybe_invalid) { + maybe_invalid = false; + } + else { + buffer[pc].opcode = NOP; + } + } + else if (op_is_end(opcode)) { + break; + } + else { + if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { + maybe_invalid = true; + if (last_set_ip >= 0) { + buffer[last_set_ip].opcode = _SET_IP; + } + } + if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { + if (last_set_ip >= 0) { + buffer[last_set_ip].opcode = _SET_IP; + } + } + } + } +} + static void peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size) { @@ -250,44 +730,9 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s } } -static void -remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) -{ - int last_set_ip = -1; - bool maybe_invalid = false; - for (int pc = 0; pc < buffer_size; pc++) { - int opcode = buffer[pc].opcode; - if (opcode == _SET_IP) { - buffer[pc].opcode = NOP; - last_set_ip = pc; - } - else if (opcode == _CHECK_VALIDITY) { - if (maybe_invalid) { - maybe_invalid = false; - } - else { - buffer[pc].opcode = NOP; - } - } - else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { - break; - } - else { - if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { - maybe_invalid = true; - if (last_set_ip >= 0) { - buffer[last_set_ip].opcode = _SET_IP; - } - } - if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { - if (last_set_ip >= 0) { - buffer[last_set_ip].opcode = _SET_IP; - } - } - } - } -} - +// 0 - failure, no error raised, just fall back to Tier 1 +// -1 - failure, and raise error +// 1 - optimizer success int _Py_uop_analyze_and_optimize( _PyInterpreterFrame *frame, @@ -297,11 +742,33 @@ _Py_uop_analyze_and_optimize( _PyBloomFilter *dependencies ) { + OPT_STAT_INC(optimizer_attempts); + int err = remove_globals(frame, buffer, buffer_size, dependencies); - if (err <= 0) { - return err; + if (err == 0) { + goto not_ready; + } + if (err < 0) { + goto error; } + peephole_opt(frame, buffer, buffer_size); + + err = uop_redundancy_eliminator( + (PyCodeObject *)frame->f_executable, buffer, + buffer_size, curr_stacklen); + + if (err == 0) { + goto not_ready; + } + assert(err == 1); + remove_unneeded_uops(buffer, buffer_size); + + OPT_STAT_INC(optimizer_successes); return 1; +not_ready: + return 0; +error: + return -1; } diff --git a/Python/specialize.c b/Python/specialize.c index ea2638570f22d0..2256d79b387c56 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -240,6 +240,11 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) print_histogram(out, "Trace run length", stats->trace_run_length_hist); print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist); + fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts); + fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes); + fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n", + stats->optimizer_failure_reason_no_memory); + const char* const* names; for (int i = 0; i < 512; i++) { if (i < 256) { diff --git a/Python/tier2_redundancy_eliminator_bytecodes.c b/Python/tier2_redundancy_eliminator_bytecodes.c new file mode 100644 index 00000000000000..3272b187f20d0e --- /dev/null +++ b/Python/tier2_redundancy_eliminator_bytecodes.c @@ -0,0 +1,272 @@ +#include "Python.h" +#include "pycore_uops.h" +#include "pycore_uop_ids.h" + +#define op(name, ...) /* NAME is ignored */ + +typedef struct _Py_UOpsSymType _Py_UOpsSymType; +typedef struct _Py_UOpsAbstractInterpContext _Py_UOpsAbstractInterpContext; +typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; + +static int +dummy_func(void) { + + PyCodeObject *code; + int oparg; + _Py_UOpsSymType *flag; + _Py_UOpsSymType *left; + _Py_UOpsSymType *right; + _Py_UOpsSymType *value; + _Py_UOpsSymType *res; + _Py_UOpsSymType *iter; + _Py_UOpsSymType *top; + _Py_UOpsSymType *bottom; + _Py_UOpsAbstractFrame *frame; + _Py_UOpsAbstractInterpContext *ctx; + _PyUOpInstruction *this_instr; + _PyBloomFilter *dependencies; + int modified; + +// BEGIN BYTECODES // + + op(_LOAD_FAST_CHECK, (-- value)) { + value = GETLOCAL(oparg); + // We guarantee this will error - just bail and don't optimize it. + if (sym_is_null(value)) { + goto out_of_space; + } + } + + op(_LOAD_FAST, (-- value)) { + value = GETLOCAL(oparg); + } + + op(_LOAD_FAST_AND_CLEAR, (-- value)) { + value = GETLOCAL(oparg); + _Py_UOpsSymType *temp = sym_new_null(ctx); + if (temp == NULL) { + goto out_of_space; + } + GETLOCAL(oparg) = temp; + } + + op(_STORE_FAST, (value --)) { + GETLOCAL(oparg) = value; + } + + op(_PUSH_NULL, (-- res)) { + res = sym_new_null(ctx); + if (res == NULL) { + goto out_of_space; + }; + } + + op(_GUARD_BOTH_INT, (left, right -- left, right)) { + if (sym_matches_type(left, &PyLong_Type) && + sym_matches_type(right, &PyLong_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type(left, &PyLong_Type); + sym_set_type(right, &PyLong_Type); + } + + op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { + if (sym_matches_type(left, &PyFloat_Type) && + sym_matches_type(right, &PyFloat_Type)) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } + sym_set_type(left, &PyFloat_Type); + sym_set_type(right, &PyFloat_Type); + } + + + op(_BINARY_OP_ADD_INT, (left, right -- res)) { + // TODO constant propagation + (void)left; + (void)right; + res = sym_new_known_type(ctx, &PyLong_Type); + if (res == NULL) { + goto out_of_space; + } + } + + op(_LOAD_CONST, (-- value)) { + // There should be no LOAD_CONST. It should be all + // replaced by peephole_opt. + Py_UNREACHABLE(); + } + + op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + } + + op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + } + + op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + null = sym_new_null(ctx); + if (null == NULL) { + goto out_of_space; + } + } + + op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + null = sym_new_null(ctx); + if (null == NULL) { + goto out_of_space; + } + } + + + op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { + assert(oparg > 0); + top = bottom; + } + + op(_SWAP, (bottom, unused[oparg-2], top -- + top, unused[oparg-2], bottom)) { + } + + op(_LOAD_ATTR_INSTANCE_VALUE, (index/1, owner -- attr, null if (oparg & 1))) { + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + } + + op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) { + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + } + + op(_LOAD_ATTR_WITH_HINT, (hint/1, owner -- attr, null if (oparg & 1))) { + _LOAD_ATTR_NOT_NULL + (void)hint; + (void)owner; + } + + op(_LOAD_ATTR_SLOT, (index/1, owner -- attr, null if (oparg & 1))) { + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + } + + op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr, null if (oparg & 1))) { + _LOAD_ATTR_NOT_NULL + (void)descr; + (void)owner; + } + + op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { + sym_set_type(callable, &PyFunction_Type); + (void)self_or_null; + (void)func_version; + } + + op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) { + sym_set_null(null); + sym_set_type(callable, &PyMethod_Type); + } + + op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { + int argcount = oparg; + + (void)callable; + + PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; + if (func == NULL) { + goto error; + } + PyCodeObject *co = (PyCodeObject *)func->func_code; + + assert(self_or_null != NULL); + assert(args != NULL); + if (sym_is_not_null(self_or_null)) { + // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM + args--; + argcount++; + } + + _Py_UOpsSymType **localsplus_start = ctx->n_consumed; + int n_locals_already_filled = 0; + // Can determine statically, so we interleave the new locals + // and make the current stack the new locals. + // This also sets up for true call inlining. + if (sym_is_known(self_or_null)) { + localsplus_start = args; + n_locals_already_filled = argcount; + } + new_frame = ctx_frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0); + if (new_frame == NULL){ + goto out_of_space; + } + } + + op(_POP_FRAME, (retval -- res)) { + SYNC_SP(); + ctx->frame->stack_pointer = stack_pointer; + ctx_frame_pop(ctx); + stack_pointer = ctx->frame->stack_pointer; + res = retval; + } + + op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- unused if (0))) { + SYNC_SP(); + ctx->frame->stack_pointer = stack_pointer; + ctx->frame = new_frame; + ctx->curr_frame_depth++; + stack_pointer = new_frame->stack_pointer; + } + + op(_UNPACK_SEQUENCE, (seq -- values[oparg])) { + /* This has to be done manually */ + (void)seq; + for (int i = 0; i < oparg; i++) { + values[i] = sym_new_unknown(ctx); + if (values[i] == NULL) { + goto out_of_space; + } + } + } + + op(_UNPACK_EX, (seq -- values[oparg & 0xFF], unused, unused[oparg >> 8])) { + /* This has to be done manually */ + (void)seq; + int totalargs = (oparg & 0xFF) + (oparg >> 8) + 1; + for (int i = 0; i < totalargs; i++) { + values[i] = sym_new_unknown(ctx); + if (values[i] == NULL) { + goto out_of_space; + } + } + } + + op(_ITER_NEXT_RANGE, (iter -- iter, next)) { + next = sym_new_known_type(ctx, &PyLong_Type); + if (next == NULL) { + goto out_of_space; + } + (void)iter; + } + + + + +// END BYTECODES // + +} \ No newline at end of file diff --git a/Python/tier2_redundancy_eliminator_cases.c.h b/Python/tier2_redundancy_eliminator_cases.c.h new file mode 100644 index 00000000000000..77a7f5b2360c3b --- /dev/null +++ b/Python/tier2_redundancy_eliminator_cases.c.h @@ -0,0 +1,1676 @@ +// This file is generated by Tools/cases_generator/tier2_abstract_generator.py +// from: +// Python/tier2_redundancy_eliminator_bytecodes.c +// Do not edit! + + case _NOP: { + break; + } + + case _RESUME_CHECK: { + break; + } + + /* _INSTRUMENTED_RESUME is not a viable micro-op for tier 2 */ + + case _LOAD_FAST_CHECK: { + _Py_UOpsSymType *value; + value = GETLOCAL(oparg); + // We guarantee this will error - just bail and don't optimize it. + if (sym_is_null(value)) { + goto out_of_space; + } + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_FAST: { + _Py_UOpsSymType *value; + value = GETLOCAL(oparg); + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_FAST_AND_CLEAR: { + _Py_UOpsSymType *value; + value = GETLOCAL(oparg); + _Py_UOpsSymType *temp = sym_new_null(ctx); + if (temp == NULL) { + goto out_of_space; + } + GETLOCAL(oparg) = temp; + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_CONST: { + _Py_UOpsSymType *value; + // There should be no LOAD_CONST. It should be all + // replaced by peephole_opt. + Py_UNREACHABLE(); + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _STORE_FAST: { + _Py_UOpsSymType *value; + value = stack_pointer[-1]; + GETLOCAL(oparg) = value; + stack_pointer += -1; + break; + } + + case _POP_TOP: { + stack_pointer += -1; + break; + } + + case _PUSH_NULL: { + _Py_UOpsSymType *res; + res = sym_new_null(ctx); + if (res == NULL) { + goto out_of_space; + }; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _END_SEND: { + _Py_UOpsSymType *value; + value = sym_new_unknown(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[-2] = value; + stack_pointer += -1; + break; + } + + case _UNARY_NEGATIVE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _UNARY_NOT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_BOOL: { + break; + } + + case _TO_BOOL_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_LIST: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_NONE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_STR: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_ALWAYS_TRUE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _UNARY_INVERT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _GUARD_BOTH_INT: { + _Py_UOpsSymType *right; + _Py_UOpsSymType *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_matches_type(left, &PyLong_Type) && + sym_matches_type(right, &PyLong_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type(left, &PyLong_Type); + sym_set_type(right, &PyLong_Type); + break; + } + + case _BINARY_OP_MULTIPLY_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_OP_ADD_INT: { + _Py_UOpsSymType *right; + _Py_UOpsSymType *left; + _Py_UOpsSymType *res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + // TODO constant propagation + (void)left; + (void)right; + res = sym_new_known_type(ctx, &PyLong_Type); + if (res == NULL) { + goto out_of_space; + } + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_OP_SUBTRACT_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _GUARD_BOTH_FLOAT: { + _Py_UOpsSymType *right; + _Py_UOpsSymType *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_matches_type(left, &PyFloat_Type) && + sym_matches_type(right, &PyFloat_Type)) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } + sym_set_type(left, &PyFloat_Type); + sym_set_type(right, &PyFloat_Type); + break; + } + + case _BINARY_OP_MULTIPLY_FLOAT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_OP_ADD_FLOAT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_OP_SUBTRACT_FLOAT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _GUARD_BOTH_UNICODE: { + break; + } + + case _BINARY_OP_ADD_UNICODE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_SLICE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-3] = res; + stack_pointer += -2; + break; + } + + case _STORE_SLICE: { + stack_pointer += -4; + break; + } + + case _BINARY_SUBSCR_LIST_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_STR_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_TUPLE_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _BINARY_SUBSCR_DICT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + /* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 */ + + case _LIST_APPEND: { + stack_pointer += -1; + break; + } + + case _SET_ADD: { + stack_pointer += -1; + break; + } + + case _STORE_SUBSCR: { + stack_pointer += -3; + break; + } + + case _STORE_SUBSCR_LIST_INT: { + stack_pointer += -3; + break; + } + + case _STORE_SUBSCR_DICT: { + stack_pointer += -3; + break; + } + + case _DELETE_SUBSCR: { + stack_pointer += -2; + break; + } + + case _CALL_INTRINSIC_1: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _CALL_INTRINSIC_2: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _POP_FRAME: { + _Py_UOpsSymType *retval; + _Py_UOpsSymType *res; + retval = stack_pointer[-1]; + stack_pointer += -1; + ctx->frame->stack_pointer = stack_pointer; + ctx_frame_pop(ctx); + stack_pointer = ctx->frame->stack_pointer; + res = retval; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + /* _INSTRUMENTED_RETURN_VALUE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_RETURN_CONST is not a viable micro-op for tier 2 */ + + case _GET_AITER: { + _Py_UOpsSymType *iter; + iter = sym_new_unknown(ctx); + if (iter == NULL) goto out_of_space; + stack_pointer[-1] = iter; + break; + } + + case _GET_ANEXT: { + _Py_UOpsSymType *awaitable; + awaitable = sym_new_unknown(ctx); + if (awaitable == NULL) goto out_of_space; + stack_pointer[0] = awaitable; + stack_pointer += 1; + break; + } + + case _GET_AWAITABLE: { + _Py_UOpsSymType *iter; + iter = sym_new_unknown(ctx); + if (iter == NULL) goto out_of_space; + stack_pointer[-1] = iter; + break; + } + + /* _SEND is not a viable micro-op for tier 2 */ + + /* _SEND_GEN is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_YIELD_VALUE is not a viable micro-op for tier 2 */ + + case _POP_EXCEPT: { + stack_pointer += -1; + break; + } + + case _LOAD_ASSERTION_ERROR: { + _Py_UOpsSymType *value; + value = sym_new_unknown(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_BUILD_CLASS: { + _Py_UOpsSymType *bc; + bc = sym_new_unknown(ctx); + if (bc == NULL) goto out_of_space; + stack_pointer[0] = bc; + stack_pointer += 1; + break; + } + + case _STORE_NAME: { + stack_pointer += -1; + break; + } + + case _DELETE_NAME: { + break; + } + + case _UNPACK_SEQUENCE: { + _Py_UOpsSymType *seq; + _Py_UOpsSymType **values; + seq = stack_pointer[-1]; + values = &stack_pointer[-1]; + /* This has to be done manually */ + (void)seq; + for (int i = 0; i < oparg; i++) { + values[i] = sym_new_unknown(ctx); + if (values[i] == NULL) { + goto out_of_space; + } + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_TWO_TUPLE: { + _Py_UOpsSymType **values; + values = &stack_pointer[-1]; + for (int _i = oparg; --_i >= 0;) { + values[_i] = sym_new_unknown(ctx); + if (values[_i] == NULL) goto out_of_space; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_TUPLE: { + _Py_UOpsSymType **values; + values = &stack_pointer[-1]; + for (int _i = oparg; --_i >= 0;) { + values[_i] = sym_new_unknown(ctx); + if (values[_i] == NULL) goto out_of_space; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_SEQUENCE_LIST: { + _Py_UOpsSymType **values; + values = &stack_pointer[-1]; + for (int _i = oparg; --_i >= 0;) { + values[_i] = sym_new_unknown(ctx); + if (values[_i] == NULL) goto out_of_space; + } + stack_pointer += -1 + oparg; + break; + } + + case _UNPACK_EX: { + _Py_UOpsSymType *seq; + _Py_UOpsSymType **values; + seq = stack_pointer[-1]; + values = &stack_pointer[-1]; + /* This has to be done manually */ + (void)seq; + int totalargs = (oparg & 0xFF) + (oparg >> 8) + 1; + for (int i = 0; i < totalargs; i++) { + values[i] = sym_new_unknown(ctx); + if (values[i] == NULL) { + goto out_of_space; + } + } + stack_pointer += (oparg >> 8) + (oparg & 0xFF); + break; + } + + case _STORE_ATTR: { + stack_pointer += -2; + break; + } + + case _DELETE_ATTR: { + stack_pointer += -1; + break; + } + + case _STORE_GLOBAL: { + stack_pointer += -1; + break; + } + + case _DELETE_GLOBAL: { + break; + } + + case _LOAD_LOCALS: { + _Py_UOpsSymType *locals; + locals = sym_new_unknown(ctx); + if (locals == NULL) goto out_of_space; + stack_pointer[0] = locals; + stack_pointer += 1; + break; + } + + case _LOAD_FROM_DICT_OR_GLOBALS: { + _Py_UOpsSymType *v; + v = sym_new_unknown(ctx); + if (v == NULL) goto out_of_space; + stack_pointer[-1] = v; + break; + } + + case _LOAD_NAME: { + _Py_UOpsSymType *v; + v = sym_new_unknown(ctx); + if (v == NULL) goto out_of_space; + stack_pointer[0] = v; + stack_pointer += 1; + break; + } + + case _LOAD_GLOBAL: { + _Py_UOpsSymType *res; + _Py_UOpsSymType *null = NULL; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + null = sym_new_null(ctx); + if (null == NULL) goto out_of_space; + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _GUARD_GLOBALS_VERSION: { + break; + } + + case _GUARD_BUILTINS_VERSION: { + break; + } + + case _LOAD_GLOBAL_MODULE: { + _Py_UOpsSymType *res; + _Py_UOpsSymType *null = NULL; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + null = sym_new_null(ctx); + if (null == NULL) goto out_of_space; + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + _Py_UOpsSymType *res; + _Py_UOpsSymType *null = NULL; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + null = sym_new_null(ctx); + if (null == NULL) goto out_of_space; + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + break; + } + + case _DELETE_FAST: { + break; + } + + case _MAKE_CELL: { + break; + } + + case _DELETE_DEREF: { + break; + } + + case _LOAD_FROM_DICT_OR_DEREF: { + _Py_UOpsSymType *value; + value = sym_new_unknown(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[-1] = value; + break; + } + + case _LOAD_DEREF: { + _Py_UOpsSymType *value; + value = sym_new_unknown(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _STORE_DEREF: { + stack_pointer += -1; + break; + } + + case _COPY_FREE_VARS: { + break; + } + + case _BUILD_STRING: { + _Py_UOpsSymType *str; + str = sym_new_unknown(ctx); + if (str == NULL) goto out_of_space; + stack_pointer[-oparg] = str; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_TUPLE: { + _Py_UOpsSymType *tup; + tup = sym_new_unknown(ctx); + if (tup == NULL) goto out_of_space; + stack_pointer[-oparg] = tup; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_LIST: { + _Py_UOpsSymType *list; + list = sym_new_unknown(ctx); + if (list == NULL) goto out_of_space; + stack_pointer[-oparg] = list; + stack_pointer += 1 - oparg; + break; + } + + case _LIST_EXTEND: { + stack_pointer += -1; + break; + } + + case _SET_UPDATE: { + stack_pointer += -1; + break; + } + + case _BUILD_SET: { + _Py_UOpsSymType *set; + set = sym_new_unknown(ctx); + if (set == NULL) goto out_of_space; + stack_pointer[-oparg] = set; + stack_pointer += 1 - oparg; + break; + } + + case _BUILD_MAP: { + _Py_UOpsSymType *map; + map = sym_new_unknown(ctx); + if (map == NULL) goto out_of_space; + stack_pointer[-oparg*2] = map; + stack_pointer += 1 - oparg*2; + break; + } + + case _SETUP_ANNOTATIONS: { + break; + } + + case _BUILD_CONST_KEY_MAP: { + _Py_UOpsSymType *map; + map = sym_new_unknown(ctx); + if (map == NULL) goto out_of_space; + stack_pointer[-1 - oparg] = map; + stack_pointer += -oparg; + break; + } + + case _DICT_UPDATE: { + stack_pointer += -1; + break; + } + + case _DICT_MERGE: { + stack_pointer += -1; + break; + } + + case _MAP_ADD: { + stack_pointer += -2; + break; + } + + /* _INSTRUMENTED_LOAD_SUPER_ATTR is not a viable micro-op for tier 2 */ + + case _LOAD_SUPER_ATTR_ATTR: { + _Py_UOpsSymType *attr; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + stack_pointer[-3] = attr; + stack_pointer += -2 + ((0) ? 1 : 0); + break; + } + + case _LOAD_SUPER_ATTR_METHOD: { + _Py_UOpsSymType *attr; + _Py_UOpsSymType *self_or_null; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + self_or_null = sym_new_unknown(ctx); + if (self_or_null == NULL) goto out_of_space; + stack_pointer[-3] = attr; + stack_pointer[-2] = self_or_null; + stack_pointer += -1; + break; + } + + case _LOAD_ATTR: { + _Py_UOpsSymType *attr; + _Py_UOpsSymType *self_or_null = NULL; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + self_or_null = sym_new_unknown(ctx); + if (self_or_null == NULL) goto out_of_space; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = self_or_null; + stack_pointer += (oparg & 1); + break; + } + + case _GUARD_TYPE_VERSION: { + break; + } + + case _CHECK_MANAGED_OBJECT_HAS_VALUES: { + break; + } + + case _LOAD_ATTR_INSTANCE_VALUE: { + _Py_UOpsSymType *owner; + _Py_UOpsSymType *attr; + _Py_UOpsSymType *null = NULL; + owner = stack_pointer[-1]; + uint16_t index = (uint16_t)this_instr->operand; + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_MODULE: { + break; + } + + case _LOAD_ATTR_MODULE: { + _Py_UOpsSymType *owner; + _Py_UOpsSymType *attr; + _Py_UOpsSymType *null = NULL; + owner = stack_pointer[-1]; + uint16_t index = (uint16_t)this_instr->operand; + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_WITH_HINT: { + break; + } + + case _LOAD_ATTR_WITH_HINT: { + _Py_UOpsSymType *owner; + _Py_UOpsSymType *attr; + _Py_UOpsSymType *null = NULL; + owner = stack_pointer[-1]; + uint16_t hint = (uint16_t)this_instr->operand; + _LOAD_ATTR_NOT_NULL + (void)hint; + (void)owner; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + break; + } + + case _LOAD_ATTR_SLOT: { + _Py_UOpsSymType *owner; + _Py_UOpsSymType *attr; + _Py_UOpsSymType *null = NULL; + owner = stack_pointer[-1]; + uint16_t index = (uint16_t)this_instr->operand; + _LOAD_ATTR_NOT_NULL + (void)index; + (void)owner; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + break; + } + + case _CHECK_ATTR_CLASS: { + break; + } + + case _LOAD_ATTR_CLASS: { + _Py_UOpsSymType *owner; + _Py_UOpsSymType *attr; + _Py_UOpsSymType *null = NULL; + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand; + _LOAD_ATTR_NOT_NULL + (void)descr; + (void)owner; + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + break; + } + + /* _LOAD_ATTR_PROPERTY is not a viable micro-op for tier 2 */ + + /* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 */ + + case _GUARD_DORV_VALUES: { + break; + } + + case _STORE_ATTR_INSTANCE_VALUE: { + stack_pointer += -2; + break; + } + + /* _STORE_ATTR_WITH_HINT is not a viable micro-op for tier 2 */ + + case _STORE_ATTR_SLOT: { + stack_pointer += -2; + break; + } + + case _COMPARE_OP: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_FLOAT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_INT: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _COMPARE_OP_STR: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _IS_OP: { + _Py_UOpsSymType *b; + b = sym_new_unknown(ctx); + if (b == NULL) goto out_of_space; + stack_pointer[-2] = b; + stack_pointer += -1; + break; + } + + case _CONTAINS_OP: { + _Py_UOpsSymType *b; + b = sym_new_unknown(ctx); + if (b == NULL) goto out_of_space; + stack_pointer[-2] = b; + stack_pointer += -1; + break; + } + + case _CHECK_EG_MATCH: { + _Py_UOpsSymType *rest; + _Py_UOpsSymType *match; + rest = sym_new_unknown(ctx); + if (rest == NULL) goto out_of_space; + match = sym_new_unknown(ctx); + if (match == NULL) goto out_of_space; + stack_pointer[-2] = rest; + stack_pointer[-1] = match; + break; + } + + case _CHECK_EXC_MATCH: { + _Py_UOpsSymType *b; + b = sym_new_unknown(ctx); + if (b == NULL) goto out_of_space; + stack_pointer[-1] = b; + break; + } + + /* _JUMP_BACKWARD is not a viable micro-op for tier 2 */ + + /* _POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */ + + /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ + + case _IS_NONE: { + _Py_UOpsSymType *b; + b = sym_new_unknown(ctx); + if (b == NULL) goto out_of_space; + stack_pointer[-1] = b; + break; + } + + case _GET_LEN: { + _Py_UOpsSymType *len_o; + len_o = sym_new_unknown(ctx); + if (len_o == NULL) goto out_of_space; + stack_pointer[0] = len_o; + stack_pointer += 1; + break; + } + + case _MATCH_CLASS: { + _Py_UOpsSymType *attrs; + attrs = sym_new_unknown(ctx); + if (attrs == NULL) goto out_of_space; + stack_pointer[-3] = attrs; + stack_pointer += -2; + break; + } + + case _MATCH_MAPPING: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _MATCH_SEQUENCE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _MATCH_KEYS: { + _Py_UOpsSymType *values_or_none; + values_or_none = sym_new_unknown(ctx); + if (values_or_none == NULL) goto out_of_space; + stack_pointer[0] = values_or_none; + stack_pointer += 1; + break; + } + + case _GET_ITER: { + _Py_UOpsSymType *iter; + iter = sym_new_unknown(ctx); + if (iter == NULL) goto out_of_space; + stack_pointer[-1] = iter; + break; + } + + case _GET_YIELD_FROM_ITER: { + _Py_UOpsSymType *iter; + iter = sym_new_unknown(ctx); + if (iter == NULL) goto out_of_space; + stack_pointer[-1] = iter; + break; + } + + /* _FOR_ITER is not a viable micro-op for tier 2 */ + + case _FOR_ITER_TIER_TWO: { + _Py_UOpsSymType *next; + next = sym_new_unknown(ctx); + if (next == NULL) goto out_of_space; + stack_pointer[0] = next; + stack_pointer += 1; + break; + } + + /* _INSTRUMENTED_FOR_ITER is not a viable micro-op for tier 2 */ + + case _ITER_CHECK_LIST: { + break; + } + + /* _ITER_JUMP_LIST is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_LIST: { + break; + } + + case _ITER_NEXT_LIST: { + _Py_UOpsSymType *next; + next = sym_new_unknown(ctx); + if (next == NULL) goto out_of_space; + stack_pointer[0] = next; + stack_pointer += 1; + break; + } + + case _ITER_CHECK_TUPLE: { + break; + } + + /* _ITER_JUMP_TUPLE is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_TUPLE: { + break; + } + + case _ITER_NEXT_TUPLE: { + _Py_UOpsSymType *next; + next = sym_new_unknown(ctx); + if (next == NULL) goto out_of_space; + stack_pointer[0] = next; + stack_pointer += 1; + break; + } + + case _ITER_CHECK_RANGE: { + break; + } + + /* _ITER_JUMP_RANGE is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_RANGE: { + break; + } + + case _ITER_NEXT_RANGE: { + _Py_UOpsSymType *iter; + _Py_UOpsSymType *next; + iter = stack_pointer[-1]; + next = sym_new_known_type(ctx, &PyLong_Type); + if (next == NULL) { + goto out_of_space; + } + (void)iter; + stack_pointer[0] = next; + stack_pointer += 1; + break; + } + + /* _FOR_ITER_GEN is not a viable micro-op for tier 2 */ + + case _BEFORE_ASYNC_WITH: { + _Py_UOpsSymType *exit; + _Py_UOpsSymType *res; + exit = sym_new_unknown(ctx); + if (exit == NULL) goto out_of_space; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = exit; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _BEFORE_WITH: { + _Py_UOpsSymType *exit; + _Py_UOpsSymType *res; + exit = sym_new_unknown(ctx); + if (exit == NULL) goto out_of_space; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = exit; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _WITH_EXCEPT_START: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + + case _PUSH_EXC_INFO: { + _Py_UOpsSymType *prev_exc; + _Py_UOpsSymType *new_exc; + prev_exc = sym_new_unknown(ctx); + if (prev_exc == NULL) goto out_of_space; + new_exc = sym_new_unknown(ctx); + if (new_exc == NULL) goto out_of_space; + stack_pointer[-1] = prev_exc; + stack_pointer[0] = new_exc; + stack_pointer += 1; + break; + } + + case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: { + break; + } + + case _GUARD_KEYS_VERSION: { + break; + } + + case _LOAD_ATTR_METHOD_WITH_VALUES: { + _Py_UOpsSymType *attr; + _Py_UOpsSymType *self = NULL; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + self = sym_new_unknown(ctx); + if (self == NULL) goto out_of_space; + stack_pointer[-1] = attr; + if (1) stack_pointer[0] = self; + stack_pointer += ((1) ? 1 : 0); + break; + } + + case _LOAD_ATTR_METHOD_NO_DICT: { + _Py_UOpsSymType *attr; + _Py_UOpsSymType *self = NULL; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + self = sym_new_unknown(ctx); + if (self == NULL) goto out_of_space; + stack_pointer[-1] = attr; + if (1) stack_pointer[0] = self; + stack_pointer += ((1) ? 1 : 0); + break; + } + + case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + _Py_UOpsSymType *attr; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + stack_pointer[-1] = attr; + stack_pointer += ((0) ? 1 : 0); + break; + } + + case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + _Py_UOpsSymType *attr; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + stack_pointer[-1] = attr; + stack_pointer += ((0) ? 1 : 0); + break; + } + + case _CHECK_ATTR_METHOD_LAZY_DICT: { + break; + } + + case _LOAD_ATTR_METHOD_LAZY_DICT: { + _Py_UOpsSymType *attr; + _Py_UOpsSymType *self = NULL; + attr = sym_new_unknown(ctx); + if (attr == NULL) goto out_of_space; + self = sym_new_unknown(ctx); + if (self == NULL) goto out_of_space; + stack_pointer[-1] = attr; + if (1) stack_pointer[0] = self; + stack_pointer += ((1) ? 1 : 0); + break; + } + + /* _INSTRUMENTED_CALL is not a viable micro-op for tier 2 */ + + /* _CALL is not a viable micro-op for tier 2 */ + + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { + _Py_UOpsSymType *null; + _Py_UOpsSymType *callable; + null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + sym_set_null(null); + sym_set_type(callable, &PyMethod_Type); + break; + } + + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { + _Py_UOpsSymType *func; + _Py_UOpsSymType *self; + func = sym_new_unknown(ctx); + if (func == NULL) goto out_of_space; + self = sym_new_unknown(ctx); + if (self == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = self; + break; + } + + case _CHECK_PEP_523: { + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS: { + _Py_UOpsSymType *self_or_null; + _Py_UOpsSymType *callable; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)this_instr->operand; + sym_set_type(callable, &PyFunction_Type); + (void)self_or_null; + (void)func_version; + break; + } + + case _CHECK_STACK_SPACE: { + break; + } + + case _INIT_CALL_PY_EXACT_ARGS: { + _Py_UOpsSymType **args; + _Py_UOpsSymType *self_or_null; + _Py_UOpsSymType *callable; + _Py_UOpsAbstractFrame *new_frame; + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + (void)callable; + PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; + if (func == NULL) { + goto error; + } + PyCodeObject *co = (PyCodeObject *)func->func_code; + assert(self_or_null != NULL); + assert(args != NULL); + if (sym_is_not_null(self_or_null)) { + // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM + args--; + argcount++; + } + _Py_UOpsSymType **localsplus_start = ctx->n_consumed; + int n_locals_already_filled = 0; + // Can determine statically, so we interleave the new locals + // and make the current stack the new locals. + // This also sets up for true call inlining. + if (sym_is_known(self_or_null)) { + localsplus_start = args; + n_locals_already_filled = argcount; + } + new_frame = ctx_frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0); + if (new_frame == NULL){ + goto out_of_space; + } + stack_pointer[-2 - oparg] = (_Py_UOpsSymType *)new_frame; + stack_pointer += -1 - oparg; + break; + } + + case _PUSH_FRAME: { + _Py_UOpsAbstractFrame *new_frame; + new_frame = (_Py_UOpsAbstractFrame *)stack_pointer[-1]; + stack_pointer += -1; + ctx->frame->stack_pointer = stack_pointer; + ctx->frame = new_frame; + ctx->curr_frame_depth++; + stack_pointer = new_frame->stack_pointer; + stack_pointer += ((0) ? 1 : 0); + break; + } + + /* _CALL_PY_WITH_DEFAULTS is not a viable micro-op for tier 2 */ + + case _CALL_TYPE_1: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_STR_1: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_TUPLE_1: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + /* _CALL_ALLOC_AND_ENTER_INIT is not a viable micro-op for tier 2 */ + + case _EXIT_INIT_CHECK: { + stack_pointer += -1; + break; + } + + case _CALL_BUILTIN_CLASS: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_O: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_FAST: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_LEN: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_ISINSTANCE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_O: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_NOARGS: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + case _CALL_METHOD_DESCRIPTOR_FAST: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + break; + } + + /* _INSTRUMENTED_CALL_KW is not a viable micro-op for tier 2 */ + + /* _CALL_KW is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ + + /* _CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ + + case _MAKE_FUNCTION: { + _Py_UOpsSymType *func; + func = sym_new_unknown(ctx); + if (func == NULL) goto out_of_space; + stack_pointer[-1] = func; + break; + } + + case _SET_FUNCTION_ATTRIBUTE: { + _Py_UOpsSymType *func; + func = sym_new_unknown(ctx); + if (func == NULL) goto out_of_space; + stack_pointer[-2] = func; + stack_pointer += -1; + break; + } + + case _BUILD_SLICE: { + _Py_UOpsSymType *slice; + slice = sym_new_unknown(ctx); + if (slice == NULL) goto out_of_space; + stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = slice; + stack_pointer += -1 - ((oparg == 3) ? 1 : 0); + break; + } + + case _CONVERT_VALUE: { + _Py_UOpsSymType *result; + result = sym_new_unknown(ctx); + if (result == NULL) goto out_of_space; + stack_pointer[-1] = result; + break; + } + + case _FORMAT_SIMPLE: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-1] = res; + break; + } + + case _FORMAT_WITH_SPEC: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _COPY: { + _Py_UOpsSymType *bottom; + _Py_UOpsSymType *top; + bottom = stack_pointer[-1 - (oparg-1)]; + assert(oparg > 0); + top = bottom; + stack_pointer[0] = top; + stack_pointer += 1; + break; + } + + case _BINARY_OP: { + _Py_UOpsSymType *res; + res = sym_new_unknown(ctx); + if (res == NULL) goto out_of_space; + stack_pointer[-2] = res; + stack_pointer += -1; + break; + } + + case _SWAP: { + _Py_UOpsSymType *top; + _Py_UOpsSymType *bottom; + top = stack_pointer[-1]; + bottom = stack_pointer[-2 - (oparg-2)]; + stack_pointer[-2 - (oparg-2)] = top; + stack_pointer[-1] = bottom; + break; + } + + /* _INSTRUMENTED_INSTRUCTION is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_JUMP_FORWARD is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_JUMP_BACKWARD is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_NONE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_NOT_NONE is not a viable micro-op for tier 2 */ + + case _GUARD_IS_TRUE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_FALSE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_NONE_POP: { + stack_pointer += -1; + break; + } + + case _GUARD_IS_NOT_NONE_POP: { + stack_pointer += -1; + break; + } + + case _JUMP_TO_TOP: { + break; + } + + case _SET_IP: { + break; + } + + case _SAVE_RETURN_OFFSET: { + break; + } + + case _EXIT_TRACE: { + break; + } + + case _CHECK_VALIDITY: { + break; + } + + case _LOAD_CONST_INLINE: { + _Py_UOpsSymType *value; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_CONST_INLINE_BORROW: { + _Py_UOpsSymType *value; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _LOAD_CONST_INLINE_WITH_NULL: { + _Py_UOpsSymType *value; + _Py_UOpsSymType *null; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + null = sym_new_null(ctx); + if (null == NULL) { + goto out_of_space; + } + stack_pointer[0] = value; + stack_pointer[1] = null; + stack_pointer += 2; + break; + } + + case _LOAD_CONST_INLINE_BORROW_WITH_NULL: { + _Py_UOpsSymType *value; + _Py_UOpsSymType *null; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + if (value == NULL) { + goto out_of_space; + } + null = sym_new_null(ctx); + if (null == NULL) { + goto out_of_space; + } + stack_pointer[0] = value; + stack_pointer[1] = null; + stack_pointer += 2; + break; + } + + case _CHECK_GLOBALS: { + break; + } + + case _CHECK_BUILTINS: { + break; + } + + case _INTERNAL_INCREMENT_OPT_COUNTER: { + stack_pointer += -1; + break; + } + diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 444063d2148934..be89a26058e8e8 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -83,9 +83,11 @@ def clean_lines(text): Python/frozen_modules/*.h Python/generated_cases.c.h Python/executor_cases.c.h +Python/tier2_redundancy_eliminator_cases.c.h # not actually source Python/bytecodes.c +Python/tier2_redundancy_eliminator_bytecodes.c # mimalloc Objects/mimalloc/*.c diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index c75aff8c1723c1..14bcd85b9eae59 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -734,6 +734,6 @@ Modules/expat/xmlrole.c - error - ## other Modules/_io/_iomodule.c - _PyIO_Module - Modules/_sqlite/module.c - _sqlite3module - -Python/optimizer_analysis.c - _Py_PartitionRootNode_Type - +Python/optimizer_analysis.c - _Py_UOpsAbstractFrame_Type - Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type - Modules/clinic/md5module.c.h _md5_md5 _keywords - diff --git a/Tools/cases_generator/README.md b/Tools/cases_generator/README.md index 7fec8a882336cd..d35a868b42ea9e 100644 --- a/Tools/cases_generator/README.md +++ b/Tools/cases_generator/README.md @@ -13,6 +13,9 @@ What's currently here: - `parser.py` helper for interactions with `parsing.py` - `tierN_generator.py`: a couple of driver scripts to read `Python/bytecodes.c` and write `Python/generated_cases.c.h` (and several other files) +- `tier2_abstract_generator.py`: reads `Python/bytecodes.c` and + `Python/tier2_redundancy_eliminator_bytecodes.c` and writes + `Python/tier2_redundancy_eliminator_cases.c.h` - `stack.py`: code to handle generalized stack effects - `cwriter.py`: code which understands tokens and how to format C code; main class: `CWriter` diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index b80fa66e2a159a..3497b7fcdf35d3 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -24,7 +24,6 @@ class Properties: pure: bool passthrough: bool - guard: bool def dump(self, indent: str) -> None: print(indent, end="") @@ -51,7 +50,6 @@ def from_list(properties: list["Properties"]) -> "Properties": has_free=any(p.has_free for p in properties), pure=all(p.pure for p in properties), passthrough=all(p.passthrough for p in properties), - guard=all(p.guard for p in properties), ) @@ -73,7 +71,6 @@ def from_list(properties: list["Properties"]) -> "Properties": has_free=False, pure=False, passthrough=False, - guard=False, ) @@ -273,7 +270,7 @@ def override_error( def convert_stack_item(item: parser.StackEffect) -> StackItem: return StackItem( - item.name, item.type, item.cond, (item.size or "1"), type_prop=item.type_prop + item.name, item.type, item.cond, (item.size or "1") ) @@ -473,7 +470,6 @@ def compute_properties(op: parser.InstDef) -> Properties: has_free=has_free, pure="pure" in op.annotations, passthrough=passthrough, - guard=passthrough and deopts, ) diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index e87aff43762b11..9b5733562f77b4 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -109,10 +109,7 @@ and a piece of C code describing its semantics:: NAME [":" type] [ "if" "(" C-expression ")" ] type: - NAME ["*"] | type_prop - - type_prop: - "&" "(" NAME ["+" NAME] ")" + NAME ["*"] stream: NAME "/" size @@ -142,26 +139,7 @@ The following definitions may occur: The optional `type` in an `object` is the C type. It defaults to `PyObject *`. The objects before the "--" are the objects on top of the stack at the start of the instruction. Those after the "--" are the objects on top of the stack at the -end of the instruction. When prefixed by a `&`, the `type` production rule follows the -`type_prop` production rule. This indicates the type of the value is of that specific type -after the operation. In this case, the type may also contain 64-bit refinement information -that is fetched from a previously defined operand in the instruction header, such as -a type version tag. This follows the format `type + refinement`. The list of possible types -and their refinements are below. They obey the following predicates: - - -* `PYLONG_TYPE`: `Py_TYPE(val) == &PyLong_Type` -* `PYFLOAT_TYPE`: `Py_TYPE(val) == &PyFloat_Type` -* `PYUNICODE_TYPE`: `Py_TYPE(val) == &PYUNICODE_TYPE` -* `NULL_TYPE`: `val == NULL` -* `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary` -* `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)` -* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`: - `_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)` -* `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary` -* `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type` -* `PYFUNCTION_TYPE_VERSION_TYPE`: - `PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)` +end of the instruction. An `inst` without `stack_effect` is a transitional form to allow the original C code diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 307919cb37ce1e..a8961f28babea1 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -75,11 +75,6 @@ class StackEffect(Node): size: str = "" # Optional `[size]` # Note: size cannot be combined with type or cond - # Optional `(type, refinement)` - type_prop: None | tuple[str, None | str] = field( - default_factory=lambda: None, init=True, compare=False, hash=False - ) - def __repr__(self) -> str: items = [self.name, self.type, self.cond, self.size] while items and items[-1] == "": @@ -260,25 +255,14 @@ def cache_effect(self) -> CacheEffect | None: @contextual def stack_effect(self) -> StackEffect | None: - # IDENTIFIER [':' [IDENTIFIER [TIMES]] ['&' '(' IDENTIFIER ['+' IDENTIFIER] ')']] ['if' '(' expression ')'] + # IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')'] # | IDENTIFIER '[' expression ']' if tkn := self.expect(lx.IDENTIFIER): type_text = "" - type_prop = None if self.expect(lx.COLON): - if i := self.expect(lx.IDENTIFIER): - type_text = i.text.strip() - if self.expect(lx.TIMES): - type_text += " *" - if self.expect(lx.AND): - consumed_bracket = self.expect(lx.LPAREN) is not None - type_prop_text = self.require(lx.IDENTIFIER).text.strip() - refinement = None - if self.expect(lx.PLUS): - refinement = self.require(lx.IDENTIFIER).text.strip() - type_prop = (type_prop_text, refinement) - if consumed_bracket: - self.require(lx.RPAREN) + type_text = self.require(lx.IDENTIFIER).text.strip() + if self.expect(lx.TIMES): + type_text += " *" cond_text = "" if self.expect(lx.IF): self.require(lx.LPAREN) @@ -295,7 +279,7 @@ def stack_effect(self) -> StackEffect | None: self.require(lx.RBRACKET) type_text = "PyObject **" size_text = size.text.strip() - return StackEffect(tkn.text, type_text, cond_text, size_text, type_prop) + return StackEffect(tkn.text, type_text, cond_text, size_text) return None @contextual diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index f62ece43c1be7f..97a301142d59c7 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -168,11 +168,11 @@ def push(self, var: StackItem) -> str: self.top_offset.push(var) return "" - def flush(self, out: CWriter) -> None: + def flush(self, out: CWriter, cast_type: str = "PyObject *") -> None: out.start_line() for var in self.variables: if not var.peek: - cast = "(PyObject *)" if var.type else "" + cast = f"({cast_type})" if var.type else "" if var.name not in UNUSED and not var.is_array(): if var.condition: out.emit(f"if ({var.condition}) ") diff --git a/Tools/cases_generator/tier2_abstract_generator.py b/Tools/cases_generator/tier2_abstract_generator.py new file mode 100644 index 00000000000000..cc29b1660d26ed --- /dev/null +++ b/Tools/cases_generator/tier2_abstract_generator.py @@ -0,0 +1,235 @@ +"""Generate the cases for the tier 2 redundancy eliminator/abstract interpreter. +Reads the instruction definitions from bytecodes.c. and tier2_redundancy_eliminator.bytecodes.c +Writes the cases to tier2_redundancy_eliminator_cases.c.h, which is #included in Python/optimizer_analysis.c. +""" + +import argparse +import os.path +import sys + +from analyzer import ( + Analysis, + Instruction, + Uop, + Part, + analyze_files, + Skip, + StackItem, + analysis_error, +) +from generators_common import ( + DEFAULT_INPUT, + ROOT, + write_header, + emit_tokens, + emit_to, + replace_sync_sp, +) +from cwriter import CWriter +from typing import TextIO, Iterator +from lexer import Token +from stack import StackOffset, Stack, SizeMismatch, UNUSED + +DEFAULT_OUTPUT = ROOT / "Python/tier2_redundancy_eliminator_cases.c.h" +DEFAULT_ABSTRACT_INPUT = ROOT / "Python/tier2_redundancy_eliminator_bytecodes.c" + + +def validate_uop(override: Uop, uop: Uop) -> None: + # To do + pass + + +def type_name(var: StackItem) -> str: + if var.is_array(): + return f"_Py_UOpsSymType **" + if var.type: + return var.type + return f"_Py_UOpsSymType *" + + +def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: + variables = {"unused"} + if not skip_inputs: + for var in reversed(uop.stack.inputs): + if var.name not in variables: + variables.add(var.name) + if var.condition: + out.emit(f"{type_name(var)}{var.name} = NULL;\n") + else: + out.emit(f"{type_name(var)}{var.name};\n") + for var in uop.stack.outputs: + if var.peek: + continue + if var.name not in variables: + variables.add(var.name) + if var.condition: + out.emit(f"{type_name(var)}{var.name} = NULL;\n") + else: + out.emit(f"{type_name(var)}{var.name};\n") + + +def decref_inputs( + out: CWriter, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, +) -> None: + next(tkn_iter) + next(tkn_iter) + next(tkn_iter) + out.emit_at("", tkn) + + +def emit_default(out: CWriter, uop: Uop) -> None: + for i, var in enumerate(uop.stack.outputs): + if var.name != "unused" and not var.peek: + if var.is_array(): + out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n") + out.emit(f"{var.name}[_i] = sym_new_unknown(ctx);\n") + out.emit(f"if ({var.name}[_i] == NULL) goto out_of_space;\n") + out.emit("}\n") + elif var.name == "null": + out.emit(f"{var.name} = sym_new_null(ctx);\n") + out.emit(f"if ({var.name} == NULL) goto out_of_space;\n") + else: + out.emit(f"{var.name} = sym_new_unknown(ctx);\n") + out.emit(f"if ({var.name} == NULL) goto out_of_space;\n") + + +def write_uop( + override: Uop | None, + uop: Uop, + out: CWriter, + stack: Stack, + debug: bool, + skip_inputs: bool, +) -> None: + try: + prototype = override if override else uop + is_override = override is not None + out.start_line() + for var in reversed(prototype.stack.inputs): + res = stack.pop(var) + if not skip_inputs: + out.emit(res) + if not prototype.properties.stores_sp: + for i, var in enumerate(prototype.stack.outputs): + res = stack.push(var) + if not var.peek or is_override: + out.emit(res) + if debug: + args = [] + for var in prototype.stack.inputs: + if not var.peek or is_override: + args.append(var.name) + out.emit(f'DEBUG_PRINTF({", ".join(args)});\n') + if override: + for cache in uop.caches: + if cache.name != "unused": + if cache.size == 4: + type = cast = "PyObject *" + else: + type = f"uint{cache.size*16}_t " + cast = f"uint{cache.size*16}_t" + out.emit(f"{type}{cache.name} = ({cast})this_instr->operand;\n") + if override: + replacement_funcs = { + "DECREF_INPUTS": decref_inputs, + "SYNC_SP": replace_sync_sp, + } + emit_tokens(out, override, stack, None, replacement_funcs) + else: + emit_default(out, uop) + + if prototype.properties.stores_sp: + for i, var in enumerate(prototype.stack.outputs): + if not var.peek or is_override: + out.emit(stack.push(var)) + out.start_line() + stack.flush(out, cast_type="_Py_UOpsSymType *") + except SizeMismatch as ex: + raise analysis_error(ex.args[0], uop.body[0]) + + +SKIPS = ("_EXTENDED_ARG",) + + +def generate_abstract_interpreter( + filenames: list[str], + abstract: Analysis, + base: Analysis, + outfile: TextIO, + debug: bool, +) -> None: + write_header(__file__, filenames, outfile) + out = CWriter(outfile, 2, False) + out.emit("\n") + base_uop_names = set([uop.name for uop in base.uops.values()]) + for abstract_uop_name in abstract.uops: + assert abstract_uop_name in base_uop_names,\ + f"All abstract uops should override base uops, but {abstract_uop_name} is not." + + for uop in base.uops.values(): + override: Uop | None = None + if uop.name in abstract.uops: + override = abstract.uops[uop.name] + validate_uop(override, uop) + if uop.properties.tier_one_only: + continue + if uop.is_super(): + continue + if not uop.is_viable(): + out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 */\n\n") + continue + out.emit(f"case {uop.name}: {{\n") + if override: + declare_variables(override, out, skip_inputs=False) + else: + declare_variables(uop, out, skip_inputs=True) + stack = Stack() + write_uop(override, uop, out, stack, debug, skip_inputs=(override is None)) + out.start_line() + out.emit("break;\n") + out.emit("}") + out.emit("\n\n") + + +def generate_tier2_abstract_from_files( + filenames: list[str], outfilename: str, debug: bool=False +) -> None: + assert len(filenames) == 2, "Need a base file and an abstract cases file." + base = analyze_files([filenames[0]]) + abstract = analyze_files([filenames[1]]) + with open(outfilename, "w") as outfile: + generate_abstract_interpreter(filenames, abstract, base, outfile, debug) + + +arg_parser = argparse.ArgumentParser( + description="Generate the code for the tier 2 interpreter.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) + +arg_parser.add_argument( + "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT +) + + +arg_parser.add_argument("input", nargs=1, help="Abstract interpreter definition file") + +arg_parser.add_argument( + "base", nargs=argparse.REMAINDER, help="The base instruction definition file(s)" +) + +arg_parser.add_argument("-d", "--debug", help="Insert debug calls", action="store_true") + +if __name__ == "__main__": + args = arg_parser.parse_args() + if len(args.base) == 0: + args.input.append(DEFAULT_INPUT) + args.input.append(DEFAULT_ABSTRACT_INPUT) + abstract = analyze_files(args.input) + base = analyze_files(args.base) + with open(args.output, "w") as outfile: + generate_abstract_interpreter(args.input, abstract, base, outfile, args.debug) From f9f6156c5affc039d4ee6b6f4999daf0d5896428 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 13 Feb 2024 14:16:37 +0000 Subject: [PATCH 099/126] GH-113710: Backedge counter improvements. (GH-115166) --- Include/cpython/optimizer.h | 10 +++++-- Include/internal/pycore_interp.h | 6 ++-- Python/bytecodes.c | 29 ++++++++++--------- Python/generated_cases.c.h | 29 ++++++++++--------- Python/optimizer.c | 48 ++++++++++++++++++++++---------- Python/pylifecycle.c | 4 +-- Python/pystate.c | 10 ++----- 7 files changed, 81 insertions(+), 55 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 3928eca583ba5b..f710ca76b2ba24 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -71,6 +71,8 @@ typedef struct { PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor); +_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer); + PyAPI_FUNC(void) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer); PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void); @@ -80,8 +82,6 @@ PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int o int _PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer); -extern _PyOptimizerObject _PyOptimizer_Default; - void _Py_ExecutorInit(_PyExecutorObject *, _PyBloomFilter *); void _Py_ExecutorClear(_PyExecutorObject *); void _Py_BloomFilter_Init(_PyBloomFilter *); @@ -96,7 +96,11 @@ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); #define OPTIMIZER_BITS_IN_COUNTER 4 /* Minimum of 16 additional executions before retry */ -#define MINIMUM_TIER2_BACKOFF 4 +#define MIN_TIER2_BACKOFF 4 +#define MAX_TIER2_BACKOFF (15 - OPTIMIZER_BITS_IN_COUNTER) +#define OPTIMIZER_BITS_MASK ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1) +/* A value <= UINT16_MAX but large enough that when shifted is > UINT16_MAX */ +#define OPTIMIZER_UNREACHABLE_THRESHOLD UINT16_MAX #define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 #define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 485b1914a44885..c244d8966f238b 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -239,8 +239,10 @@ struct _is { struct callable_cache callable_cache; _PyOptimizerObject *optimizer; _PyExecutorObject *executor_list_head; - uint16_t optimizer_resume_threshold; - uint16_t optimizer_backedge_threshold; + /* These values are shifted and offset to speed up check in JUMP_BACKWARD */ + uint32_t optimizer_resume_threshold; + uint32_t optimizer_backedge_threshold; + uint32_t next_func_version; _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f7c7e3669b7e6f..2ad5878f52e90b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2318,13 +2318,16 @@ dummy_func( assert(oparg <= INSTR_OFFSET()); JUMPBY(-oparg); #if ENABLE_SPECIALIZATION - this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); + uint16_t counter = this_instr[1].cache; + this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER); /* We are using unsigned values, but we really want signed values, so - * do the 2s complement comparison manually */ - uint16_t ucounter = this_instr[1].cache + (1 << 15); - uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15); + * do the 2s complement adjustment manually */ + uint32_t offset_counter = counter ^ (1 << 15); + uint32_t threshold = tstate->interp->optimizer_backedge_threshold; + assert((threshold & OPTIMIZER_BITS_MASK) == 0); + // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result. // Double-check that the opcode isn't instrumented or something: - if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) { + if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) { OPT_STAT_INC(attempts); _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ @@ -2338,18 +2341,18 @@ dummy_func( // Rewind and enter the executor: assert(start->op.code == ENTER_EXECUTOR); next_instr = start; - this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + this_instr[1].cache &= OPTIMIZER_BITS_MASK; } else { - int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); - if (backoff < MINIMUM_TIER2_BACKOFF) { - backoff = MINIMUM_TIER2_BACKOFF; + int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK; + backoff++; + if (backoff < MIN_TIER2_BACKOFF) { + backoff = MIN_TIER2_BACKOFF; } - else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) { - backoff++; + else if (backoff > MAX_TIER2_BACKOFF) { + backoff = MAX_TIER2_BACKOFF; } - assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER); - this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff; + this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff; } } #endif /* ENABLE_SPECIALIZATION */ diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index afb6650e5920fb..a49223e4db5318 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3263,13 +3263,16 @@ assert(oparg <= INSTR_OFFSET()); JUMPBY(-oparg); #if ENABLE_SPECIALIZATION - this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); + uint16_t counter = this_instr[1].cache; + this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER); /* We are using unsigned values, but we really want signed values, so - * do the 2s complement comparison manually */ - uint16_t ucounter = this_instr[1].cache + (1 << 15); - uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15); + * do the 2s complement adjustment manually */ + uint32_t offset_counter = counter ^ (1 << 15); + uint32_t threshold = tstate->interp->optimizer_backedge_threshold; + assert((threshold & OPTIMIZER_BITS_MASK) == 0); + // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result. // Double-check that the opcode isn't instrumented or something: - if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) { + if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) { OPT_STAT_INC(attempts); _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ @@ -3283,18 +3286,18 @@ // Rewind and enter the executor: assert(start->op.code == ENTER_EXECUTOR); next_instr = start; - this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + this_instr[1].cache &= OPTIMIZER_BITS_MASK; } else { - int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); - if (backoff < MINIMUM_TIER2_BACKOFF) { - backoff = MINIMUM_TIER2_BACKOFF; + int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK; + backoff++; + if (backoff < MIN_TIER2_BACKOFF) { + backoff = MIN_TIER2_BACKOFF; } - else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) { - backoff++; + else if (backoff > MAX_TIER2_BACKOFF) { + backoff = MAX_TIER2_BACKOFF; } - assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER); - this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff; + this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff; } } #endif /* ENABLE_SPECIALIZATION */ diff --git a/Python/optimizer.c b/Python/optimizer.c index f31f83113d3f25..13df8c170a537c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -109,6 +109,9 @@ never_optimize( _PyExecutorObject **exec, int Py_UNUSED(stack_entries)) { + /* Although it should be benign for this to be called, + * it shouldn't happen, so fail in debug builds. */ + assert(0 && "never optimize should never be called"); return 0; } @@ -120,13 +123,19 @@ PyTypeObject _PyDefaultOptimizer_Type = { .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, }; -_PyOptimizerObject _PyOptimizer_Default = { +static _PyOptimizerObject _PyOptimizer_Default = { PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type) .optimize = never_optimize, - .resume_threshold = INT16_MAX, - .backedge_threshold = INT16_MAX, + .resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, + .backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, }; +static uint32_t +shift_and_offset_threshold(uint16_t threshold) +{ + return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15); +} + _PyOptimizerObject * PyUnstable_GetOptimizer(void) { @@ -134,24 +143,33 @@ PyUnstable_GetOptimizer(void) if (interp->optimizer == &_PyOptimizer_Default) { return NULL; } - assert(interp->optimizer_backedge_threshold == interp->optimizer->backedge_threshold); - assert(interp->optimizer_resume_threshold == interp->optimizer->resume_threshold); + assert(interp->optimizer_backedge_threshold == + shift_and_offset_threshold(interp->optimizer->backedge_threshold)); + assert(interp->optimizer_resume_threshold == + shift_and_offset_threshold(interp->optimizer->resume_threshold)); Py_INCREF(interp->optimizer); return interp->optimizer; } -void -PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer) +_PyOptimizerObject * +_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer) { - PyInterpreterState *interp = _PyInterpreterState_GET(); if (optimizer == NULL) { optimizer = &_PyOptimizer_Default; } _PyOptimizerObject *old = interp->optimizer; Py_INCREF(optimizer); interp->optimizer = optimizer; - interp->optimizer_backedge_threshold = optimizer->backedge_threshold; - interp->optimizer_resume_threshold = optimizer->resume_threshold; + interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold); + interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold); + return old; +} + +void +PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer); Py_DECREF(old); } @@ -860,10 +878,10 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) return NULL; } opt->optimize = uop_optimize; - opt->resume_threshold = INT16_MAX; - // Need at least 3 iterations to settle specializations. - // A few lower bits of the counter are reserved for other flags. - opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER; + opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; + // Need a few iterations to settle specializations, + // and to ammortize the cost of optimization. + opt->backedge_threshold = 16; return (PyObject *)opt; } @@ -950,7 +968,7 @@ PyUnstable_Optimizer_NewCounter(void) return NULL; } opt->base.optimize = counter_optimize; - opt->base.resume_threshold = INT16_MAX; + opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; opt->base.backedge_threshold = 0; opt->count = 0; return (PyObject *)opt; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 230018068d751c..7e4c07bb657d19 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1627,8 +1627,8 @@ finalize_modules(PyThreadState *tstate) // Invalidate all executors and turn off tier 2 optimizer _Py_Executors_InvalidateAll(interp); - Py_XDECREF(interp->optimizer); - interp->optimizer = &_PyOptimizer_Default; + _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL); + Py_XDECREF(old); // Stop watching __builtin__ modifications PyDict_Unwatch(0, interp->builtins); diff --git a/Python/pystate.c b/Python/pystate.c index 937c43033b068d..996f465825215f 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -625,9 +625,7 @@ init_interpreter(PyInterpreterState *interp, } interp->sys_profile_initialized = false; interp->sys_trace_initialized = false; - interp->optimizer = &_PyOptimizer_Default; - interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold; - interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold; + (void)_Py_SetOptimizer(interp, NULL); interp->next_func_version = 1; interp->executor_list_head = NULL; if (interp != &runtime->_main_interpreter) { @@ -780,10 +778,8 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) tstate->_status.cleared = 0; } - Py_CLEAR(interp->optimizer); - interp->optimizer = &_PyOptimizer_Default; - interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold; - interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold; + _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL); + Py_DECREF(old); /* It is possible that any of the objects below have a finalizer that runs Python code or otherwise relies on a thread state From 5719aa23ab7f1c7a5f03309ca4044078a98e7b59 Mon Sep 17 00:00:00 2001 From: qqwqqw689 <114795525+qqwqqw689@users.noreply.github.com> Date: Tue, 13 Feb 2024 22:23:10 +0800 Subject: [PATCH 100/126] gh-113437: Update documentation about PyUnicode_AsWideChar() function (GH-113455) --- Doc/c-api/unicode.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 5541eaa521803b..666ffe89605c56 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -854,7 +854,12 @@ wchar_t Support Copy the Unicode object contents into the :c:type:`wchar_t` buffer *wstr*. At most *size* :c:type:`wchar_t` characters are copied (excluding a possibly trailing null termination character). Return the number of :c:type:`wchar_t` characters - copied or ``-1`` in case of an error. Note that the resulting :c:expr:`wchar_t*` + copied or ``-1`` in case of an error. + + When *wstr* is ``NULL``, instead return the *size* that would be required + to store all of *unicode* including a terminating null. + + Note that the resulting :c:expr:`wchar_t*` string may or may not be null-terminated. It is the responsibility of the caller to make sure that the :c:expr:`wchar_t*` string is null-terminated in case this is required by the application. Also, note that the :c:expr:`wchar_t*` string From de07941729b8899b187b8ef9690f9a74b2d6286b Mon Sep 17 00:00:00 2001 From: Christophe Papazian <114495376+christophe-papazian@users.noreply.github.com> Date: Tue, 13 Feb 2024 17:10:00 +0100 Subject: [PATCH 101/126] gh-115405: add versionadded tag for co_qualname in code objects documentation (#115411) --- Doc/reference/datamodel.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 885ee825c12296..88bc025c7c3fb4 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1134,6 +1134,8 @@ Special read-only attributes * - .. attribute:: codeobject.co_qualname - The fully qualified function name + .. versionadded:: 3.11 + * - .. attribute:: codeobject.co_argcount - The total number of positional :term:`parameters ` (including positional-only parameters and parameters with default values) From 681778c56a9204d65b8857e7ceba57f2c638671d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 13 Feb 2024 16:28:19 +0000 Subject: [PATCH 102/126] GH-113710: Improve `_SET_IP` and `_CHECK_VALIDITY` (GH-115248) --- Include/internal/pycore_uop_ids.h | 3 +- Include/internal/pycore_uop_metadata.h | 4 +- Python/bytecodes.c | 10 ++- Python/executor_cases.c.h | 13 +++- Python/optimizer.c | 5 +- Python/optimizer_analysis.c | 75 +++++++++++++------- Python/tier2_redundancy_eliminator_cases.c.h | 4 ++ 7 files changed, 79 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index b2476e1c6e5c4b..9bb537d355055d 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -237,7 +237,8 @@ extern "C" { #define _CHECK_GLOBALS 384 #define _CHECK_BUILTINS 385 #define _INTERNAL_INCREMENT_OPT_COUNTER 386 -#define MAX_UOP_ID 386 +#define _CHECK_VALIDITY_AND_SET_IP 387 +#define MAX_UOP_ID 387 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 30dc5a881574e7..163a0320aa2298 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -198,7 +198,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_IS_NONE_POP] = HAS_DEOPT_FLAG, [_GUARD_IS_NOT_NONE_POP] = HAS_DEOPT_FLAG, [_JUMP_TO_TOP] = HAS_EVAL_BREAK_FLAG, - [_SET_IP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_SET_IP] = 0, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, @@ -209,6 +209,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_GLOBALS] = HAS_DEOPT_FLAG, [_CHECK_BUILTINS] = HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, + [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { @@ -264,6 +265,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_PEP_523] = "_CHECK_PEP_523", [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", [_CHECK_VALIDITY] = "_CHECK_VALIDITY", + [_CHECK_VALIDITY_AND_SET_IP] = "_CHECK_VALIDITY_AND_SET_IP", [_COMPARE_OP] = "_COMPARE_OP", [_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT", [_COMPARE_OP_INT] = "_COMPARE_OP_INT", diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2ad5878f52e90b..96b97ca4be6d93 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4037,10 +4037,9 @@ dummy_func( CHECK_EVAL_BREAKER(); } - op(_SET_IP, (--)) { + op(_SET_IP, (instr_ptr/4 --)) { TIER_TWO_ONLY - // TODO: Put the code pointer in `operand` to avoid indirection via `frame` - frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + oparg; + frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr; } op(_SAVE_RETURN_OFFSET, (--)) { @@ -4100,6 +4099,11 @@ dummy_func( exe->count++; } + op(_CHECK_VALIDITY_AND_SET_IP, (instr_ptr/4 --)) { + TIER_TWO_ONLY + DEOPT_IF(!current_executor->vm_data.valid); + frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr; + } // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 7d48d6a05a17b0..58d238320276f4 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3360,10 +3360,9 @@ } case _SET_IP: { - oparg = CURRENT_OPARG(); + PyObject *instr_ptr = (PyObject *)CURRENT_OPERAND(); TIER_TWO_ONLY - // TODO: Put the code pointer in `operand` to avoid indirection via `frame` - frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + oparg; + frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr; break; } @@ -3459,4 +3458,12 @@ break; } + case _CHECK_VALIDITY_AND_SET_IP: { + PyObject *instr_ptr = (PyObject *)CURRENT_OPERAND(); + TIER_TWO_ONLY + if (!current_executor->vm_data.valid) goto deoptimize; + frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr; + break; + } + #undef TIER_TWO diff --git a/Python/optimizer.c b/Python/optimizer.c index 13df8c170a537c..efa19680c9b1f3 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -432,9 +432,8 @@ translate_bytecode_to_trace( top: // Jump here after _PUSH_FRAME or likely branches for (;;) { target = INSTR_IP(instr, code); - RESERVE_RAW(3, "epilogue"); // Always need space for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE - ADD_TO_TRACE(_SET_IP, target, 0, target); - ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target); + RESERVE_RAW(2, "epilogue"); // Always need space for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE + ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target); uint32_t opcode = instr->op.code; uint32_t oparg = instr->op.arg; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index e02ca4d6acf6c1..49974520de924d 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -652,35 +652,62 @@ uop_redundancy_eliminator( static void remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { + /* Remove _SET_IP and _CHECK_VALIDITY where possible. + * _SET_IP is needed if the following instruction escapes or + * could error. _CHECK_VALIDITY is needed if the previous + * instruction could have escaped. */ int last_set_ip = -1; - bool maybe_invalid = false; + bool may_have_escaped = false; for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; - if (opcode == _SET_IP) { - buffer[pc].opcode = NOP; - last_set_ip = pc; - } - else if (opcode == _CHECK_VALIDITY) { - if (maybe_invalid) { - maybe_invalid = false; - } - else { + switch (opcode) { + case _SET_IP: buffer[pc].opcode = NOP; - } - } - else if (op_is_end(opcode)) { - break; - } - else { - if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { - maybe_invalid = true; - if (last_set_ip >= 0) { - buffer[last_set_ip].opcode = _SET_IP; + last_set_ip = pc; + break; + case _CHECK_VALIDITY: + if (may_have_escaped) { + may_have_escaped = false; } - } - if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { - if (last_set_ip >= 0) { - buffer[last_set_ip].opcode = _SET_IP; + else { + buffer[pc].opcode = NOP; + } + break; + case _CHECK_VALIDITY_AND_SET_IP: + if (may_have_escaped) { + may_have_escaped = false; + buffer[pc].opcode = _CHECK_VALIDITY; + } + else { + buffer[pc].opcode = NOP; + } + last_set_ip = pc; + break; + case _JUMP_TO_TOP: + case _EXIT_TRACE: + return; + default: + { + bool needs_ip = false; + if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { + needs_ip = true; + may_have_escaped = true; + } + if (_PyUop_Flags[opcode] & HAS_ERROR_FLAG) { + needs_ip = true; + } + if (opcode == _PUSH_FRAME) { + needs_ip = true; + } + if (needs_ip && last_set_ip >= 0) { + if (buffer[last_set_ip].opcode == _CHECK_VALIDITY) { + buffer[last_set_ip].opcode = _CHECK_VALIDITY_AND_SET_IP; + } + else { + assert(buffer[last_set_ip].opcode == _NOP); + buffer[last_set_ip].opcode = _SET_IP; + } + last_set_ip = -1; } } } diff --git a/Python/tier2_redundancy_eliminator_cases.c.h b/Python/tier2_redundancy_eliminator_cases.c.h index 77a7f5b2360c3b..c2b7bbaf1c4481 100644 --- a/Python/tier2_redundancy_eliminator_cases.c.h +++ b/Python/tier2_redundancy_eliminator_cases.c.h @@ -1674,3 +1674,7 @@ break; } + case _CHECK_VALIDITY_AND_SET_IP: { + break; + } + From 02b63239f1e91f8a03c0b455c5201e6d07f642ab Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Tue, 13 Feb 2024 18:07:16 +0100 Subject: [PATCH 103/126] Remove more stray backticks from NEWS files (#115374) * Remove stray backtick from NEWS file * Remove more stray backticks from 3.12.0a1.rst * Remove another stray backtick in 3.13.0a1.rst --- Misc/NEWS.d/3.12.0a1.rst | 4 ++-- Misc/NEWS.d/3.12.0b1.rst | 4 ++-- Misc/NEWS.d/3.13.0a1.rst | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Misc/NEWS.d/3.12.0a1.rst b/Misc/NEWS.d/3.12.0a1.rst index f192bf086ed259..4739e0fb89a4a8 100644 --- a/Misc/NEWS.d/3.12.0a1.rst +++ b/Misc/NEWS.d/3.12.0a1.rst @@ -2722,7 +2722,7 @@ on future on an error - e.g. TimeoutError or KeyboardInterrupt. Fix a :mod:`sqlite3` regression where ``*args`` and ``**kwds`` were incorrectly relayed from :py:func:`~sqlite3.connect` to the :class:`~sqlite3.Connection` factory. The regression was introduced in -3.11a1 with PR 24421 (:gh:`85128`). Patch by Erlend E. Aasland.` +3.11a1 with PR 24421 (:gh:`85128`). Patch by Erlend E. Aasland. .. @@ -2988,7 +2988,7 @@ Kumar Aditya. .. section: Library Fix crash in :class:`struct.Struct` when it was not completely initialized -by initializing it in :meth:`~object.__new__``. Patch by Kumar Aditya. +by initializing it in :meth:`~object.__new__`. Patch by Kumar Aditya. .. diff --git a/Misc/NEWS.d/3.12.0b1.rst b/Misc/NEWS.d/3.12.0b1.rst index 21f2c748f40548..d9804be764c9a9 100644 --- a/Misc/NEWS.d/3.12.0b1.rst +++ b/Misc/NEWS.d/3.12.0b1.rst @@ -563,10 +563,10 @@ Complex function calls are now faster and consume no C stack space. .. nonce: fvgsCl .. section: Core and Builtins -``len()`` for 0-dimensional :class:`memoryview`` objects (such as +``len()`` for 0-dimensional :class:`memoryview` objects (such as ``memoryview(ctypes.c_uint8(42))``) now raises a :exc:`TypeError`. Previously this returned ``1``, which was not consistent with ``mem_0d[0]`` -raising an :exc:`IndexError``. +raising an :exc:`IndexError`. .. diff --git a/Misc/NEWS.d/3.13.0a1.rst b/Misc/NEWS.d/3.13.0a1.rst index d385b6a4504f97..16715bee5a8e49 100644 --- a/Misc/NEWS.d/3.13.0a1.rst +++ b/Misc/NEWS.d/3.13.0a1.rst @@ -4380,7 +4380,7 @@ Patch by Victor Stinner. .. nonce: I6MQhb .. section: Library -:pep:`594`: Remove the :mod:`!cgi`` and :mod:`!cgitb` modules, deprecated in +:pep:`594`: Remove the :mod:`!cgi` and :mod:`!cgitb` modules, deprecated in Python 3.11. Patch by Victor Stinner. .. From 225cd55fe676d128518af31f53b63a591fc4a569 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Tue, 13 Feb 2024 20:45:37 +0300 Subject: [PATCH 104/126] gh-115417: Remove accidentally left debugging print (#115418) gh-115417: Remove debugging print --- Modules/_testcapi/time.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/_testcapi/time.c b/Modules/_testcapi/time.c index 4fbf7dd14ebb66..57eb9135d30029 100644 --- a/Modules/_testcapi/time.c +++ b/Modules/_testcapi/time.c @@ -75,7 +75,6 @@ test_pytime_time(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) printf("ERR! %d\n", (int)t); return NULL; } - printf("... %d\n", (int)t); return pytime_as_float(t); } From 4deb70590e1829081fb6d110e9dc6d060a49d95d Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 13 Feb 2024 14:35:06 -0500 Subject: [PATCH 105/126] gh-115383: Use runner version to compute config.cache key (#115409) --- .github/workflows/build.yml | 16 ++++++++++++---- .github/workflows/reusable-macos.yml | 4 +++- .github/workflows/reusable-ubuntu.yml | 4 +++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0a2f6da50ed8a0..70db2a6250e8da 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -131,11 +131,13 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.x' + - name: Runner image version + run: echo "IMAGE_VERSION=${ImageVersion}" >> $GITHUB_ENV - name: Restore config.cache uses: actions/cache@v4 with: path: config.cache - key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }}-${{ env.pythonLocation }} + key: ${{ github.job }}-${{ runner.os }}-${{ env.IMAGE_VERSION }}-${{ needs.check_source.outputs.config_hash }}-${{ env.pythonLocation }} - name: Install Dependencies run: sudo ./.github/workflows/posix-deps-apt.sh - name: Add ccache to PATH @@ -258,11 +260,13 @@ jobs: LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }}/lib steps: - uses: actions/checkout@v4 + - name: Runner image version + run: echo "IMAGE_VERSION=${ImageVersion}" >> $GITHUB_ENV - name: Restore config.cache uses: actions/cache@v4 with: path: config.cache - key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} + key: ${{ github.job }}-${{ runner.os }}-${{ env.IMAGE_VERSION }}-${{ needs.check_source.outputs.config_hash }} - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install Dependencies @@ -341,11 +345,13 @@ jobs: run: mkdir -p $CPYTHON_RO_SRCDIR $CPYTHON_BUILDDIR - name: Bind mount sources read-only run: sudo mount --bind -o ro $GITHUB_WORKSPACE $CPYTHON_RO_SRCDIR + - name: Runner image version + run: echo "IMAGE_VERSION=${ImageVersion}" >> $GITHUB_ENV - name: Restore config.cache uses: actions/cache@v4 with: path: ${{ env.CPYTHON_BUILDDIR }}/config.cache - key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} + key: ${{ github.job }}-${{ runner.os }}-${{ env.IMAGE_VERSION }}-${{ needs.check_source.outputs.config_hash }} - name: Configure CPython out-of-tree working-directory: ${{ env.CPYTHON_BUILDDIR }} run: | @@ -420,11 +426,13 @@ jobs: ASAN_OPTIONS: detect_leaks=0:allocator_may_return_null=1:handle_segv=0 steps: - uses: actions/checkout@v4 + - name: Runner image version + run: echo "IMAGE_VERSION=${ImageVersion}" >> $GITHUB_ENV - name: Restore config.cache uses: actions/cache@v4 with: path: config.cache - key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} + key: ${{ github.job }}-${{ runner.os }}-${{ env.IMAGE_VERSION }}-${{ needs.check_source.outputs.config_hash }} - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install Dependencies diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index cad619b78ce5f2..ba62d9568c6b80 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -28,11 +28,13 @@ jobs: runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 + - name: Runner image version + run: echo "IMAGE_VERSION=${ImageVersion}" >> $GITHUB_ENV - name: Restore config.cache uses: actions/cache@v4 with: path: config.cache - key: ${{ github.job }}-${{ matrix.os }}-${{ inputs.config_hash }} + key: ${{ github.job }}-${{ matrix.os }}-${{ env.IMAGE_VERSION }}-${{ inputs.config_hash }} - name: Install Homebrew dependencies run: brew install pkg-config openssl@3.0 xz gdbm tcl-tk - name: Configure CPython diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 0cbad57f0c6572..ee64fe62a0bd0a 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -52,11 +52,13 @@ jobs: run: mkdir -p $CPYTHON_RO_SRCDIR $CPYTHON_BUILDDIR - name: Bind mount sources read-only run: sudo mount --bind -o ro $GITHUB_WORKSPACE $CPYTHON_RO_SRCDIR + - name: Runner image version + run: echo "IMAGE_VERSION=${ImageVersion}" >> $GITHUB_ENV - name: Restore config.cache uses: actions/cache@v4 with: path: ${{ env.CPYTHON_BUILDDIR }}/config.cache - key: ${{ github.job }}-${{ runner.os }}-${{ inputs.config_hash }} + key: ${{ github.job }}-${{ runner.os }}-${{ env.IMAGE_VERSION }}-${{ inputs.config_hash }} - name: Configure CPython out-of-tree working-directory: ${{ env.CPYTHON_BUILDDIR }} run: ${{ inputs.options }} From 206f73dc5f1b4c3c81119808aa7fd9038661cf90 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Tue, 13 Feb 2024 23:49:13 +0300 Subject: [PATCH 106/126] gh-115391: Fix compiler warning in `Objects/longobject.c` (GH-115368) --- Objects/longobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 932111f58425f2..fe782983334323 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1135,7 +1135,7 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int endianness) if (n <= 0) { // nothing to do! } - else if (n <= sizeof(cv.b)) { + else if (n <= (Py_ssize_t)sizeof(cv.b)) { #if PY_LITTLE_ENDIAN if (little_endian) { memcpy(buffer, cv.b, n); From 514b1c91b8651e8ab9129a34b7482033d2fd4e5b Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 13 Feb 2024 14:56:49 -0700 Subject: [PATCH 107/126] gh-76785: Improved Subinterpreters Compatibility with 3.12 (gh-115424) For the most part, these changes make is substantially easier to backport subinterpreter-related code to 3.12, especially the related modules (e.g. _xxsubinterpreters). The main motivation is to support releasing a PyPI package with the 3.13 capabilities compiled for 3.12. A lot of the changes here involve either hiding details behind macros/functions or splitting up some files. --- Include/internal/pycore_code.h | 9 + Include/internal/pycore_crossinterp.h | 26 + Include/internal/pycore_tstate.h | 7 + Makefile.pre.in | 8 + Modules/_interpreters_common.h | 13 + Modules/_xxinterpchannelsmodule.c | 34 +- Modules/_xxinterpqueuesmodule.c | 22 +- Modules/_xxsubinterpretersmodule.c | 55 +- Python/crossinterp.c | 720 ++------------------ Python/crossinterp_data_lookup.h | 594 ++++++++++++++++ Python/crossinterp_exceptions.h | 80 +++ Tools/c-analyzer/cpython/globals-to-fix.tsv | 8 +- 12 files changed, 857 insertions(+), 719 deletions(-) create mode 100644 Modules/_interpreters_common.h create mode 100644 Python/crossinterp_data_lookup.h create mode 100644 Python/crossinterp_exceptions.h diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index fdd5918228455d..85536162132072 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -8,6 +8,15 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif + +// We hide some of the newer PyCodeObject fields behind macros. +// This helps with backporting certain changes to 3.12. +#define _PyCode_HAS_EXECUTORS(CODE) \ + (CODE->co_executors != NULL) +#define _PyCode_HAS_INSTRUMENTATION(CODE) \ + (CODE->_co_instrumentation_version > 0) + + #define CODE_MAX_WATCHERS 8 /* PEP 659 diff --git a/Include/internal/pycore_crossinterp.h b/Include/internal/pycore_crossinterp.h index d6e297a7e8e6db..63abef864ff87f 100644 --- a/Include/internal/pycore_crossinterp.h +++ b/Include/internal/pycore_crossinterp.h @@ -87,6 +87,11 @@ struct _xid { PyAPI_FUNC(_PyCrossInterpreterData *) _PyCrossInterpreterData_New(void); PyAPI_FUNC(void) _PyCrossInterpreterData_Free(_PyCrossInterpreterData *data); +#define _PyCrossInterpreterData_DATA(DATA) ((DATA)->data) +#define _PyCrossInterpreterData_OBJ(DATA) ((DATA)->obj) +#define _PyCrossInterpreterData_INTERPID(DATA) ((DATA)->interpid) +// Users should not need getters for "new_object" or "free". + /* defining cross-interpreter data */ @@ -101,6 +106,25 @@ PyAPI_FUNC(int) _PyCrossInterpreterData_InitWithSize( PyAPI_FUNC(void) _PyCrossInterpreterData_Clear( PyInterpreterState *, _PyCrossInterpreterData *); +// Normally the Init* functions are sufficient. The only time +// additional initialization might be needed is to set the "free" func, +// though that should be infrequent. +#define _PyCrossInterpreterData_SET_FREE(DATA, FUNC) \ + do { \ + (DATA)->free = (FUNC); \ + } while (0) +// Additionally, some shareable types are essentially light wrappers +// around other shareable types. The crossinterpdatafunc of the wrapper +// can often be implemented by calling the wrapped object's +// crossinterpdatafunc and then changing the "new_object" function. +// We have _PyCrossInterpreterData_SET_NEW_OBJECT() here for that, +// but might be better to have a function like +// _PyCrossInterpreterData_AdaptToWrapper() instead. +#define _PyCrossInterpreterData_SET_NEW_OBJECT(DATA, FUNC) \ + do { \ + (DATA)->new_object = (FUNC); \ + } while (0) + /* using cross-interpreter data */ @@ -170,6 +194,8 @@ extern void _PyXI_Fini(PyInterpreterState *interp); extern PyStatus _PyXI_InitTypes(PyInterpreterState *interp); extern void _PyXI_FiniTypes(PyInterpreterState *interp); +#define _PyInterpreterState_GetXIState(interp) (&(interp)->xi) + /***************************/ /* short-term data sharing */ diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 77a1dc59163d21..3e8fcf5b6ec1fa 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -13,6 +13,13 @@ extern "C" { #include "pycore_brc.h" // struct _brc_thread_state +static inline void +_PyThreadState_SetWhence(PyThreadState *tstate, int whence) +{ + tstate->_whence = whence; +} + + // Every PyThreadState is actually allocated as a _PyThreadStateImpl. The // PyThreadState fields are exposed as part of the C API, although most fields // are intended to be private. The _PyThreadStateImpl fields not exposed. diff --git a/Makefile.pre.in b/Makefile.pre.in index d3b18acad61ce5..4b9d9c171b9efb 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1671,6 +1671,14 @@ Modules/pwdmodule.o: $(srcdir)/Modules/pwdmodule.c $(srcdir)/Modules/posixmodule Modules/signalmodule.o: $(srcdir)/Modules/signalmodule.c $(srcdir)/Modules/posixmodule.h +Modules/_xxsubinterpretersmodule.o: $(srcdir)/Modules/_xxsubinterpretersmodule.c $(srcdir)/Modules/_interpreters_common.h + +Modules/_xxinterpqueuesmodule.o: $(srcdir)/Modules/_xxinterpqueuesmodule.c $(srcdir)/Modules/_interpreters_common.h + +Modules/_xxinterpchannelsmodule.o: $(srcdir)/Modules/_xxinterpchannelsmodule.c $(srcdir)/Modules/_interpreters_common.h + +Python/crossinterp.o: $(srcdir)/Python/crossinterp.c $(srcdir)/Python/crossinterp_data_lookup.h $(srcdir)/Python/crossinterp_exceptions.h + Python/dynload_shlib.o: $(srcdir)/Python/dynload_shlib.c Makefile $(CC) -c $(PY_CORE_CFLAGS) \ -DSOABI='"$(SOABI)"' \ diff --git a/Modules/_interpreters_common.h b/Modules/_interpreters_common.h new file mode 100644 index 00000000000000..5661a26d8790d1 --- /dev/null +++ b/Modules/_interpreters_common.h @@ -0,0 +1,13 @@ + +#define _RESOLVE_MODINIT_FUNC_NAME(NAME) \ + PyInit_ ## NAME +#define RESOLVE_MODINIT_FUNC_NAME(NAME) \ + _RESOLVE_MODINIT_FUNC_NAME(NAME) + + +static int +ensure_xid_class(PyTypeObject *cls, crossinterpdatafunc getdata) +{ + //assert(cls->tp_flags & Py_TPFLAGS_HEAPTYPE); + return _PyCrossInterpreterData_RegisterClass(cls, getdata); +} diff --git a/Modules/_xxinterpchannelsmodule.c b/Modules/_xxinterpchannelsmodule.c index 4e9b8a82a3f630..a2974aced12ca0 100644 --- a/Modules/_xxinterpchannelsmodule.c +++ b/Modules/_xxinterpchannelsmodule.c @@ -17,6 +17,8 @@ #include // sched_yield() #endif +#include "_interpreters_common.h" + /* This module has the following process-global state: @@ -80,7 +82,9 @@ channel's queue, which are safely managed via the _PyCrossInterpreterData_*() API.. The module does not create any objects that are shared globally. */ -#define MODULE_NAME "_xxinterpchannels" +#define MODULE_NAME _xxinterpchannels +#define MODULE_NAME_STR Py_STRINGIFY(MODULE_NAME) +#define MODINIT_FUNC_NAME RESOLVE_MODINIT_FUNC_NAME(MODULE_NAME) #define GLOBAL_MALLOC(TYPE) \ @@ -101,7 +105,7 @@ static int register_xid_class(PyTypeObject *cls, crossinterpdatafunc shared, struct xid_class_registry *classes) { - int res = _PyCrossInterpreterData_RegisterClass(cls, shared); + int res = ensure_xid_class(cls, shared); if (res == 0) { assert(classes->count < MAX_XID_CLASSES); // The class has refs elsewhere, so we need to incref here. @@ -167,7 +171,7 @@ _get_current_interp(void) static PyObject * _get_current_module(void) { - PyObject *name = PyUnicode_FromString(MODULE_NAME); + PyObject *name = PyUnicode_FromString(MODULE_NAME_STR); if (name == NULL) { return NULL; } @@ -217,7 +221,7 @@ add_new_exception(PyObject *mod, const char *name, PyObject *base) } #define ADD_NEW_EXCEPTION(MOD, NAME, BASE) \ - add_new_exception(MOD, MODULE_NAME "." Py_STRINGIFY(NAME), BASE) + add_new_exception(MOD, MODULE_NAME_STR "." Py_STRINGIFY(NAME), BASE) static PyTypeObject * add_new_type(PyObject *mod, PyType_Spec *spec, crossinterpdatafunc shared, @@ -299,7 +303,7 @@ _get_current_module_state(void) if (mod == NULL) { // XXX import it? PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME " module not imported yet"); + MODULE_NAME_STR " module not imported yet"); return NULL; } module_state *state = get_module_state(mod); @@ -784,7 +788,7 @@ _channelqueue_clear_interpreter(_channelqueue *queue, int64_t interpid) while (next != NULL) { _channelitem *item = next; next = item->next; - if (item->data->interpid == interpid) { + if (_PyCrossInterpreterData_INTERPID(item->data) == interpid) { if (prev == NULL) { queue->first = item->next; } @@ -2126,7 +2130,7 @@ static PyStructSequence_Field channel_info_fields[] = { }; static PyStructSequence_Desc channel_info_desc = { - .name = MODULE_NAME ".ChannelInfo", + .name = MODULE_NAME_STR ".ChannelInfo", .doc = channel_info_doc, .fields = channel_info_fields, .n_in_sequence = 8, @@ -2474,10 +2478,11 @@ struct _channelid_xid { static PyObject * _channelid_from_xid(_PyCrossInterpreterData *data) { - struct _channelid_xid *xid = (struct _channelid_xid *)data->data; + struct _channelid_xid *xid = \ + (struct _channelid_xid *)_PyCrossInterpreterData_DATA(data); // It might not be imported yet, so we can't use _get_current_module(). - PyObject *mod = PyImport_ImportModule(MODULE_NAME); + PyObject *mod = PyImport_ImportModule(MODULE_NAME_STR); if (mod == NULL) { return NULL; } @@ -2530,7 +2535,8 @@ _channelid_shared(PyThreadState *tstate, PyObject *obj, { return -1; } - struct _channelid_xid *xid = (struct _channelid_xid *)data->data; + struct _channelid_xid *xid = \ + (struct _channelid_xid *)_PyCrossInterpreterData_DATA(data); xid->cid = ((channelid *)obj)->cid; xid->end = ((channelid *)obj)->end; xid->resolve = ((channelid *)obj)->resolve; @@ -2601,7 +2607,7 @@ static PyType_Slot channelid_typeslots[] = { }; static PyType_Spec channelid_typespec = { - .name = MODULE_NAME ".ChannelID", + .name = MODULE_NAME_STR ".ChannelID", .basicsize = sizeof(channelid), .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE), @@ -2680,7 +2686,7 @@ _channelend_shared(PyThreadState *tstate, PyObject *obj, if (res < 0) { return -1; } - data->new_object = _channelend_from_xid; + _PyCrossInterpreterData_SET_NEW_OBJECT(data, _channelend_from_xid); return 0; } @@ -3379,7 +3385,7 @@ module_free(void *mod) static struct PyModuleDef moduledef = { .m_base = PyModuleDef_HEAD_INIT, - .m_name = MODULE_NAME, + .m_name = MODULE_NAME_STR, .m_doc = module_doc, .m_size = sizeof(module_state), .m_methods = module_functions, @@ -3390,7 +3396,7 @@ static struct PyModuleDef moduledef = { }; PyMODINIT_FUNC -PyInit__xxinterpchannels(void) +MODINIT_FUNC_NAME(void) { return PyModuleDef_Init(&moduledef); } diff --git a/Modules/_xxinterpqueuesmodule.c b/Modules/_xxinterpqueuesmodule.c index 537ba9188055dd..7d8c67f49fefb8 100644 --- a/Modules/_xxinterpqueuesmodule.c +++ b/Modules/_xxinterpqueuesmodule.c @@ -8,8 +8,12 @@ #include "Python.h" #include "pycore_crossinterp.h" // struct _xid +#include "_interpreters_common.h" -#define MODULE_NAME "_xxinterpqueues" + +#define MODULE_NAME _xxinterpqueues +#define MODULE_NAME_STR Py_STRINGIFY(MODULE_NAME) +#define MODINIT_FUNC_NAME RESOLVE_MODINIT_FUNC_NAME(MODULE_NAME) #define GLOBAL_MALLOC(TYPE) \ @@ -64,7 +68,7 @@ _get_current_interp(void) static PyObject * _get_current_module(void) { - PyObject *name = PyUnicode_FromString(MODULE_NAME); + PyObject *name = PyUnicode_FromString(MODULE_NAME_STR); if (name == NULL) { return NULL; } @@ -602,7 +606,7 @@ _queue_clear_interpreter(_queue *queue, int64_t interpid) while (next != NULL) { _queueitem *item = next; next = item->next; - if (item->data->interpid == interpid) { + if (_PyCrossInterpreterData_INTERPID(item->data) == interpid) { if (prev == NULL) { queue->items.first = item->next; } @@ -1062,7 +1066,7 @@ set_external_queue_type(PyObject *module, PyTypeObject *queue_type) } state->queue_type = (PyTypeObject *)Py_NewRef(queue_type); - if (_PyCrossInterpreterData_RegisterClass(queue_type, _queueobj_shared) < 0) { + if (ensure_xid_class(queue_type, _queueobj_shared) < 0) { return -1; } @@ -1130,7 +1134,7 @@ _queueid_xid_free(void *data) static PyObject * _queueobj_from_xid(_PyCrossInterpreterData *data) { - int64_t qid = *(int64_t *)data->data; + int64_t qid = *(int64_t *)_PyCrossInterpreterData_DATA(data); PyObject *qidobj = PyLong_FromLongLong(qid); if (qidobj == NULL) { return NULL; @@ -1140,7 +1144,7 @@ _queueobj_from_xid(_PyCrossInterpreterData *data) if (mod == NULL) { // XXX import it? PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME " module not imported yet"); + MODULE_NAME_STR " module not imported yet"); return NULL; } @@ -1181,7 +1185,7 @@ _queueobj_shared(PyThreadState *tstate, PyObject *queueobj, _PyCrossInterpreterData_Init(data, tstate->interp, raw, NULL, _queueobj_from_xid); Py_DECREF(qidobj); - data->free = _queueid_xid_free; + _PyCrossInterpreterData_SET_FREE(data, _queueid_xid_free); return 0; } @@ -1670,7 +1674,7 @@ module_free(void *mod) static struct PyModuleDef moduledef = { .m_base = PyModuleDef_HEAD_INIT, - .m_name = MODULE_NAME, + .m_name = MODULE_NAME_STR, .m_doc = module_doc, .m_size = sizeof(module_state), .m_methods = module_functions, @@ -1681,7 +1685,7 @@ static struct PyModuleDef moduledef = { }; PyMODINIT_FUNC -PyInit__xxinterpqueues(void) +MODINIT_FUNC_NAME(void) { return PyModuleDef_Init(&moduledef); } diff --git a/Modules/_xxsubinterpretersmodule.c b/Modules/_xxsubinterpretersmodule.c index 4e9e13457a9eb3..b4004d165078f7 100644 --- a/Modules/_xxsubinterpretersmodule.c +++ b/Modules/_xxsubinterpretersmodule.c @@ -19,8 +19,12 @@ #include "interpreteridobject.h" #include "marshal.h" // PyMarshal_ReadObjectFromString() +#include "_interpreters_common.h" -#define MODULE_NAME "_xxsubinterpreters" + +#define MODULE_NAME _xxsubinterpreters +#define MODULE_NAME_STR Py_STRINGIFY(MODULE_NAME) +#define MODINIT_FUNC_NAME RESOLVE_MODINIT_FUNC_NAME(MODULE_NAME) static PyInterpreterState * @@ -125,7 +129,7 @@ get_interpid_obj(PyInterpreterState *interp) static PyObject * _get_current_module(void) { - PyObject *name = PyUnicode_FromString(MODULE_NAME); + PyObject *name = PyUnicode_FromString(MODULE_NAME_STR); if (name == NULL) { return NULL; } @@ -152,16 +156,16 @@ typedef struct { static PyObject * xibufferview_from_xid(PyTypeObject *cls, _PyCrossInterpreterData *data) { - assert(data->data != NULL); - assert(data->obj == NULL); - assert(data->interpid >= 0); + assert(_PyCrossInterpreterData_DATA(data) != NULL); + assert(_PyCrossInterpreterData_OBJ(data) == NULL); + assert(_PyCrossInterpreterData_INTERPID(data) >= 0); XIBufferViewObject *self = PyObject_Malloc(sizeof(XIBufferViewObject)); if (self == NULL) { return NULL; } PyObject_Init((PyObject *)self, cls); - self->view = (Py_buffer *)data->data; - self->interpid = data->interpid; + self->view = (Py_buffer *)_PyCrossInterpreterData_DATA(data); + self->interpid = _PyCrossInterpreterData_INTERPID(data); return (PyObject *)self; } @@ -209,7 +213,7 @@ static PyType_Slot XIBufferViewType_slots[] = { }; static PyType_Spec XIBufferViewType_spec = { - .name = MODULE_NAME ".CrossInterpreterBufferView", + .name = MODULE_NAME_STR ".CrossInterpreterBufferView", .basicsize = sizeof(XIBufferViewObject), .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE), @@ -267,8 +271,7 @@ register_memoryview_xid(PyObject *mod, PyTypeObject **p_state) *p_state = cls; // Register XID for the builtin memoryview type. - if (_PyCrossInterpreterData_RegisterClass( - &PyMemoryView_Type, _memoryview_shared) < 0) { + if (ensure_xid_class(&PyMemoryView_Type, _memoryview_shared) < 0) { return -1; } // We don't ever bother un-registering memoryview. @@ -303,7 +306,7 @@ _get_current_module_state(void) if (mod == NULL) { // XXX import it? PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME " module not imported yet"); + MODULE_NAME_STR " module not imported yet"); return NULL; } module_state *state = get_module_state(mod); @@ -372,9 +375,7 @@ check_code_object(PyCodeObject *code) } // We trust that no code objects under co_consts have unbound cell vars. - if (code->co_executors != NULL - || code->_co_instrumentation_version > 0) - { + if (_PyCode_HAS_EXECUTORS(code) || _PyCode_HAS_INSTRUMENTATION(code)) { return "only basic functions are supported"; } if (code->_co_monitoring != NULL) { @@ -602,7 +603,7 @@ interp_destroy(PyObject *self, PyObject *args, PyObject *kwds) // Destroy the interpreter. PyThreadState *tstate = PyThreadState_New(interp); - tstate->_whence = _PyThreadState_WHENCE_INTERP; + _PyThreadState_SetWhence(tstate, _PyThreadState_WHENCE_INTERP); // XXX Possible GILState issues? PyThreadState *save_tstate = PyThreadState_Swap(tstate); Py_EndInterpreter(tstate); @@ -691,7 +692,7 @@ static PyObject * interp_set___main___attrs(PyObject *self, PyObject *args) { PyObject *id, *updates; - if (!PyArg_ParseTuple(args, "OO:" MODULE_NAME ".set___main___attrs", + if (!PyArg_ParseTuple(args, "OO:" MODULE_NAME_STR ".set___main___attrs", &id, &updates)) { return NULL; @@ -856,18 +857,18 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) PyObject *id, *code; PyObject *shared = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O:" MODULE_NAME ".exec", kwlist, + "OO|O:" MODULE_NAME_STR ".exec", kwlist, &id, &code, &shared)) { return NULL; } const char *expected = "a string, a function, or a code object"; if (PyUnicode_Check(code)) { - code = (PyObject *)convert_script_arg(code, MODULE_NAME ".exec", + code = (PyObject *)convert_script_arg(code, MODULE_NAME_STR ".exec", "argument 2", expected); } else { - code = (PyObject *)convert_code_arg(code, MODULE_NAME ".exec", + code = (PyObject *)convert_code_arg(code, MODULE_NAME_STR ".exec", "argument 2", expected); } if (code == NULL) { @@ -908,12 +909,12 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) PyObject *id, *script; PyObject *shared = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OU|O:" MODULE_NAME ".run_string", kwlist, + "OU|O:" MODULE_NAME_STR ".run_string", kwlist, &id, &script, &shared)) { return NULL; } - script = (PyObject *)convert_script_arg(script, MODULE_NAME ".exec", + script = (PyObject *)convert_script_arg(script, MODULE_NAME_STR ".exec", "argument 2", "a string"); if (script == NULL) { return NULL; @@ -934,7 +935,7 @@ PyDoc_STRVAR(run_string_doc, \n\ Execute the provided string in the identified interpreter.\n\ \n\ -(See " MODULE_NAME ".exec()."); +(See " MODULE_NAME_STR ".exec()."); static PyObject * interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) @@ -943,12 +944,12 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) PyObject *id, *func; PyObject *shared = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O:" MODULE_NAME ".run_func", kwlist, + "OO|O:" MODULE_NAME_STR ".run_func", kwlist, &id, &func, &shared)) { return NULL; } - PyCodeObject *code = convert_code_arg(func, MODULE_NAME ".exec", + PyCodeObject *code = convert_code_arg(func, MODULE_NAME_STR ".exec", "argument 2", "a function or a code object"); if (code == NULL) { @@ -972,7 +973,7 @@ Execute the body of the provided function in the identified interpreter.\n\ Code objects are also supported. In both cases, closures and args\n\ are not supported. Methods and other callables are not supported either.\n\ \n\ -(See " MODULE_NAME ".exec()."); +(See " MODULE_NAME_STR ".exec()."); static PyObject * @@ -1166,7 +1167,7 @@ module_free(void *mod) static struct PyModuleDef moduledef = { .m_base = PyModuleDef_HEAD_INIT, - .m_name = MODULE_NAME, + .m_name = MODULE_NAME_STR, .m_doc = module_doc, .m_size = sizeof(module_state), .m_methods = module_functions, @@ -1177,7 +1178,7 @@ static struct PyModuleDef moduledef = { }; PyMODINIT_FUNC -PyInit__xxsubinterpreters(void) +MODINIT_FUNC_NAME(void) { return PyModuleDef_Init(&moduledef); } diff --git a/Python/crossinterp.c b/Python/crossinterp.c index c6ed7daeb1074a..143b261f9a5396 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -7,7 +7,6 @@ #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_namespace.h" //_PyNamespace_New() #include "pycore_pyerrors.h" // _PyErr_Clear() -#include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_typeobject.h" // _PyType_GetModuleName() #include "pycore_weakref.h" // _PyWeakref_GET_REF() @@ -16,47 +15,12 @@ /* exceptions */ /**************/ -/* InterpreterError extends Exception */ - -static PyTypeObject _PyExc_InterpreterError = { - PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "InterpreterError", - .tp_doc = PyDoc_STR("An interpreter was not found."), - //.tp_base = (PyTypeObject *)PyExc_BaseException, -}; -PyObject *PyExc_InterpreterError = (PyObject *)&_PyExc_InterpreterError; - -/* InterpreterNotFoundError extends InterpreterError */ - -static PyTypeObject _PyExc_InterpreterNotFoundError = { - PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "InterpreterNotFoundError", - .tp_doc = PyDoc_STR("An interpreter was not found."), - .tp_base = &_PyExc_InterpreterError, -}; -PyObject *PyExc_InterpreterNotFoundError = (PyObject *)&_PyExc_InterpreterNotFoundError; - -/* lifecycle */ - -static int -init_exceptions(PyInterpreterState *interp) -{ - _PyExc_InterpreterError.tp_base = (PyTypeObject *)PyExc_BaseException; - if (_PyStaticType_InitBuiltin(interp, &_PyExc_InterpreterError) < 0) { - return -1; - } - if (_PyStaticType_InitBuiltin(interp, &_PyExc_InterpreterNotFoundError) < 0) { - return -1; - } - return 0; -} - -static void -fini_exceptions(PyInterpreterState *interp) -{ - _PyStaticType_Dealloc(interp, &_PyExc_InterpreterNotFoundError); - _PyStaticType_Dealloc(interp, &_PyExc_InterpreterError); -} +static int init_exceptions(PyInterpreterState *); +static void fini_exceptions(PyInterpreterState *); +static int _init_not_shareable_error_type(PyInterpreterState *); +static void _fini_not_shareable_error_type(PyInterpreterState *); +static PyObject * _get_not_shareable_error_type(PyInterpreterState *); +#include "crossinterp_exceptions.h" /***************************/ @@ -67,7 +31,7 @@ int _Py_CallInInterpreter(PyInterpreterState *interp, _Py_simple_func func, void *arg) { - if (interp == _PyThreadState_GetCurrent()->interp) { + if (interp == PyInterpreterState_Get()) { return func(arg); } // XXX Emit a warning if this fails? @@ -79,7 +43,7 @@ int _Py_CallInInterpreterAndRawFree(PyInterpreterState *interp, _Py_simple_func func, void *arg) { - if (interp == _PyThreadState_GetCurrent()->interp) { + if (interp == PyInterpreterState_Get()) { int res = func(arg); PyMem_RawFree(arg); return res; @@ -94,6 +58,20 @@ _Py_CallInInterpreterAndRawFree(PyInterpreterState *interp, /* cross-interpreter data */ /**************************/ +/* registry of {type -> crossinterpdatafunc} */ + +/* For now we use a global registry of shareable classes. An + alternative would be to add a tp_* slot for a class's + crossinterpdatafunc. It would be simpler and more efficient. */ + +static void xid_lookup_init(PyInterpreterState *); +static void xid_lookup_fini(PyInterpreterState *); +static crossinterpdatafunc lookup_getdata(PyInterpreterState *, PyObject *); +#include "crossinterp_data_lookup.h" + + +/* lifecycle */ + _PyCrossInterpreterData * _PyCrossInterpreterData_New(void) { @@ -114,38 +92,6 @@ _PyCrossInterpreterData_Free(_PyCrossInterpreterData *xid) } -/* exceptions */ - -static PyStatus -_init_not_shareable_error_type(PyInterpreterState *interp) -{ - const char *name = "_interpreters.NotShareableError"; - PyObject *base = PyExc_ValueError; - PyObject *ns = NULL; - PyObject *exctype = PyErr_NewException(name, base, ns); - if (exctype == NULL) { - PyErr_Clear(); - return _PyStatus_ERR("could not initialize NotShareableError"); - } - - interp->xi.PyExc_NotShareableError = exctype; - return _PyStatus_OK(); -} - -static void -_fini_not_shareable_error_type(PyInterpreterState *interp) -{ - Py_CLEAR(interp->xi.PyExc_NotShareableError); -} - -static PyObject * -_get_not_shareable_error_type(PyInterpreterState *interp) -{ - assert(interp->xi.PyExc_NotShareableError != NULL); - return interp->xi.PyExc_NotShareableError; -} - - /* defining cross-interpreter data */ static inline void @@ -156,7 +102,7 @@ _xidata_init(_PyCrossInterpreterData *data) assert(data->data == NULL); assert(data->obj == NULL); *data = (_PyCrossInterpreterData){0}; - data->interpid = -1; + _PyCrossInterpreterData_INTERPID(data) = -1; } static inline void @@ -193,7 +139,9 @@ _PyCrossInterpreterData_Init(_PyCrossInterpreterData *data, // Ideally every object would know its owning interpreter. // Until then, we have to rely on the caller to identify it // (but we don't need it in all cases). - data->interpid = (interp != NULL) ? interp->id : -1; + _PyCrossInterpreterData_INTERPID(data) = (interp != NULL) + ? PyInterpreterState_GetID(interp) + : -1; data->new_object = new_object; } @@ -223,8 +171,8 @@ _PyCrossInterpreterData_Clear(PyInterpreterState *interp, assert(data != NULL); // This must be called in the owning interpreter. assert(interp == NULL - || data->interpid == -1 - || data->interpid == interp->id); + || _PyCrossInterpreterData_INTERPID(data) == -1 + || _PyCrossInterpreterData_INTERPID(data) == PyInterpreterState_GetID(interp)); _xidata_clear(data); } @@ -238,13 +186,13 @@ _check_xidata(PyThreadState *tstate, _PyCrossInterpreterData *data) // data->obj may be NULL, so we don't check it. - if (data->interpid < 0) { - _PyErr_SetString(tstate, PyExc_SystemError, "missing interp"); + if (_PyCrossInterpreterData_INTERPID(data) < 0) { + PyErr_SetString(PyExc_SystemError, "missing interp"); return -1; } if (data->new_object == NULL) { - _PyErr_SetString(tstate, PyExc_SystemError, "missing new_object func"); + PyErr_SetString(PyExc_SystemError, "missing new_object func"); return -1; } @@ -253,25 +201,6 @@ _check_xidata(PyThreadState *tstate, _PyCrossInterpreterData *data) return 0; } -static crossinterpdatafunc _lookup_getdata_from_registry( - PyInterpreterState *, PyObject *); - -static crossinterpdatafunc -_lookup_getdata(PyInterpreterState *interp, PyObject *obj) -{ - /* Cross-interpreter objects are looked up by exact match on the class. - We can reassess this policy when we move from a global registry to a - tp_* slot. */ - return _lookup_getdata_from_registry(interp, obj); -} - -crossinterpdatafunc -_PyCrossInterpreterData_Lookup(PyObject *obj) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - return _lookup_getdata(interp, obj); -} - static inline void _set_xid_lookup_failure(PyInterpreterState *interp, PyObject *obj, const char *msg) @@ -295,8 +224,8 @@ _set_xid_lookup_failure(PyInterpreterState *interp, int _PyObject_CheckCrossInterpreterData(PyObject *obj) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - crossinterpdatafunc getdata = _lookup_getdata(interp, obj); + PyInterpreterState *interp = PyInterpreterState_Get(); + crossinterpdatafunc getdata = lookup_getdata(interp, obj); if (getdata == NULL) { if (!PyErr_Occurred()) { _set_xid_lookup_failure(interp, obj, NULL); @@ -309,20 +238,16 @@ _PyObject_CheckCrossInterpreterData(PyObject *obj) int _PyObject_GetCrossInterpreterData(PyObject *obj, _PyCrossInterpreterData *data) { - PyThreadState *tstate = _PyThreadState_GetCurrent(); -#ifdef Py_DEBUG - // The caller must hold the GIL - _Py_EnsureTstateNotNULL(tstate); -#endif + PyThreadState *tstate = PyThreadState_Get(); PyInterpreterState *interp = tstate->interp; // Reset data before re-populating. *data = (_PyCrossInterpreterData){0}; - data->interpid = -1; + _PyCrossInterpreterData_INTERPID(data) = -1; // Call the "getdata" func for the object. Py_INCREF(obj); - crossinterpdatafunc getdata = _lookup_getdata(interp, obj); + crossinterpdatafunc getdata = lookup_getdata(interp, obj); if (getdata == NULL) { Py_DECREF(obj); if (!PyErr_Occurred()) { @@ -337,7 +262,7 @@ _PyObject_GetCrossInterpreterData(PyObject *obj, _PyCrossInterpreterData *data) } // Fill in the blanks and validate the result. - data->interpid = interp->id; + _PyCrossInterpreterData_INTERPID(data) = PyInterpreterState_GetID(interp); if (_check_xidata(tstate, data) != 0) { (void)_PyCrossInterpreterData_Release(data); return -1; @@ -374,7 +299,8 @@ _xidata_release(_PyCrossInterpreterData *data, int rawfree) } // Switch to the original interpreter. - PyInterpreterState *interp = _PyInterpreterState_LookUpID(data->interpid); + PyInterpreterState *interp = _PyInterpreterState_LookUpID( + _PyCrossInterpreterData_INTERPID(data)); if (interp == NULL) { // The interpreter was already destroyed. // This function shouldn't have been called. @@ -408,538 +334,6 @@ _PyCrossInterpreterData_ReleaseAndRawFree(_PyCrossInterpreterData *data) } -/* registry of {type -> crossinterpdatafunc} */ - -/* For now we use a global registry of shareable classes. An - alternative would be to add a tp_* slot for a class's - crossinterpdatafunc. It would be simpler and more efficient. */ - -static inline struct _xidregistry * -_get_global_xidregistry(_PyRuntimeState *runtime) -{ - return &runtime->xi.registry; -} - -static inline struct _xidregistry * -_get_xidregistry(PyInterpreterState *interp) -{ - return &interp->xi.registry; -} - -static inline struct _xidregistry * -_get_xidregistry_for_type(PyInterpreterState *interp, PyTypeObject *cls) -{ - struct _xidregistry *registry = _get_global_xidregistry(interp->runtime); - if (cls->tp_flags & Py_TPFLAGS_HEAPTYPE) { - registry = _get_xidregistry(interp); - } - return registry; -} - -static int -_xidregistry_add_type(struct _xidregistry *xidregistry, - PyTypeObject *cls, crossinterpdatafunc getdata) -{ - struct _xidregitem *newhead = PyMem_RawMalloc(sizeof(struct _xidregitem)); - if (newhead == NULL) { - return -1; - } - *newhead = (struct _xidregitem){ - // We do not keep a reference, to avoid keeping the class alive. - .cls = cls, - .refcount = 1, - .getdata = getdata, - }; - if (cls->tp_flags & Py_TPFLAGS_HEAPTYPE) { - // XXX Assign a callback to clear the entry from the registry? - newhead->weakref = PyWeakref_NewRef((PyObject *)cls, NULL); - if (newhead->weakref == NULL) { - PyMem_RawFree(newhead); - return -1; - } - } - newhead->next = xidregistry->head; - if (newhead->next != NULL) { - newhead->next->prev = newhead; - } - xidregistry->head = newhead; - return 0; -} - -static struct _xidregitem * -_xidregistry_remove_entry(struct _xidregistry *xidregistry, - struct _xidregitem *entry) -{ - struct _xidregitem *next = entry->next; - if (entry->prev != NULL) { - assert(entry->prev->next == entry); - entry->prev->next = next; - } - else { - assert(xidregistry->head == entry); - xidregistry->head = next; - } - if (next != NULL) { - next->prev = entry->prev; - } - Py_XDECREF(entry->weakref); - PyMem_RawFree(entry); - return next; -} - -static void -_xidregistry_clear(struct _xidregistry *xidregistry) -{ - struct _xidregitem *cur = xidregistry->head; - xidregistry->head = NULL; - while (cur != NULL) { - struct _xidregitem *next = cur->next; - Py_XDECREF(cur->weakref); - PyMem_RawFree(cur); - cur = next; - } -} - -static void -_xidregistry_lock(struct _xidregistry *registry) -{ - if (registry->global) { - PyMutex_Lock(®istry->mutex); - } - // else: Within an interpreter we rely on the GIL instead of a separate lock. -} - -static void -_xidregistry_unlock(struct _xidregistry *registry) -{ - if (registry->global) { - PyMutex_Unlock(®istry->mutex); - } -} - -static struct _xidregitem * -_xidregistry_find_type(struct _xidregistry *xidregistry, PyTypeObject *cls) -{ - struct _xidregitem *cur = xidregistry->head; - while (cur != NULL) { - if (cur->weakref != NULL) { - // cur is/was a heap type. - PyObject *registered = _PyWeakref_GET_REF(cur->weakref); - if (registered == NULL) { - // The weakly ref'ed object was freed. - cur = _xidregistry_remove_entry(xidregistry, cur); - continue; - } - assert(PyType_Check(registered)); - assert(cur->cls == (PyTypeObject *)registered); - assert(cur->cls->tp_flags & Py_TPFLAGS_HEAPTYPE); - Py_DECREF(registered); - } - if (cur->cls == cls) { - return cur; - } - cur = cur->next; - } - return NULL; -} - -int -_PyCrossInterpreterData_RegisterClass(PyTypeObject *cls, - crossinterpdatafunc getdata) -{ - if (!PyType_Check(cls)) { - PyErr_Format(PyExc_ValueError, "only classes may be registered"); - return -1; - } - if (getdata == NULL) { - PyErr_Format(PyExc_ValueError, "missing 'getdata' func"); - return -1; - } - - int res = 0; - PyInterpreterState *interp = _PyInterpreterState_GET(); - struct _xidregistry *xidregistry = _get_xidregistry_for_type(interp, cls); - _xidregistry_lock(xidregistry); - - struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); - if (matched != NULL) { - assert(matched->getdata == getdata); - matched->refcount += 1; - goto finally; - } - - res = _xidregistry_add_type(xidregistry, cls, getdata); - -finally: - _xidregistry_unlock(xidregistry); - return res; -} - -int -_PyCrossInterpreterData_UnregisterClass(PyTypeObject *cls) -{ - int res = 0; - PyInterpreterState *interp = _PyInterpreterState_GET(); - struct _xidregistry *xidregistry = _get_xidregistry_for_type(interp, cls); - _xidregistry_lock(xidregistry); - - struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); - if (matched != NULL) { - assert(matched->refcount > 0); - matched->refcount -= 1; - if (matched->refcount == 0) { - (void)_xidregistry_remove_entry(xidregistry, matched); - } - res = 1; - } - - _xidregistry_unlock(xidregistry); - return res; -} - -static crossinterpdatafunc -_lookup_getdata_from_registry(PyInterpreterState *interp, PyObject *obj) -{ - PyTypeObject *cls = Py_TYPE(obj); - - struct _xidregistry *xidregistry = _get_xidregistry_for_type(interp, cls); - _xidregistry_lock(xidregistry); - - struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); - crossinterpdatafunc func = matched != NULL ? matched->getdata : NULL; - - _xidregistry_unlock(xidregistry); - return func; -} - -/* cross-interpreter data for builtin types */ - -// bytes - -struct _shared_bytes_data { - char *bytes; - Py_ssize_t len; -}; - -static PyObject * -_new_bytes_object(_PyCrossInterpreterData *data) -{ - struct _shared_bytes_data *shared = (struct _shared_bytes_data *)(data->data); - return PyBytes_FromStringAndSize(shared->bytes, shared->len); -} - -static int -_bytes_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) -{ - if (_PyCrossInterpreterData_InitWithSize( - data, tstate->interp, sizeof(struct _shared_bytes_data), obj, - _new_bytes_object - ) < 0) - { - return -1; - } - struct _shared_bytes_data *shared = (struct _shared_bytes_data *)data->data; - if (PyBytes_AsStringAndSize(obj, &shared->bytes, &shared->len) < 0) { - _PyCrossInterpreterData_Clear(tstate->interp, data); - return -1; - } - return 0; -} - -// str - -struct _shared_str_data { - int kind; - const void *buffer; - Py_ssize_t len; -}; - -static PyObject * -_new_str_object(_PyCrossInterpreterData *data) -{ - struct _shared_str_data *shared = (struct _shared_str_data *)(data->data); - return PyUnicode_FromKindAndData(shared->kind, shared->buffer, shared->len); -} - -static int -_str_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) -{ - if (_PyCrossInterpreterData_InitWithSize( - data, tstate->interp, sizeof(struct _shared_str_data), obj, - _new_str_object - ) < 0) - { - return -1; - } - struct _shared_str_data *shared = (struct _shared_str_data *)data->data; - shared->kind = PyUnicode_KIND(obj); - shared->buffer = PyUnicode_DATA(obj); - shared->len = PyUnicode_GET_LENGTH(obj); - return 0; -} - -// int - -static PyObject * -_new_long_object(_PyCrossInterpreterData *data) -{ - return PyLong_FromSsize_t((Py_ssize_t)(data->data)); -} - -static int -_long_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) -{ - /* Note that this means the size of shareable ints is bounded by - * sys.maxsize. Hence on 32-bit architectures that is half the - * size of maximum shareable ints on 64-bit. - */ - Py_ssize_t value = PyLong_AsSsize_t(obj); - if (value == -1 && PyErr_Occurred()) { - if (PyErr_ExceptionMatches(PyExc_OverflowError)) { - PyErr_SetString(PyExc_OverflowError, "try sending as bytes"); - } - return -1; - } - _PyCrossInterpreterData_Init(data, tstate->interp, (void *)value, NULL, - _new_long_object); - // data->obj and data->free remain NULL - return 0; -} - -// float - -static PyObject * -_new_float_object(_PyCrossInterpreterData *data) -{ - double * value_ptr = data->data; - return PyFloat_FromDouble(*value_ptr); -} - -static int -_float_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) -{ - if (_PyCrossInterpreterData_InitWithSize( - data, tstate->interp, sizeof(double), NULL, - _new_float_object - ) < 0) - { - return -1; - } - double *shared = (double *)data->data; - *shared = PyFloat_AsDouble(obj); - return 0; -} - -// None - -static PyObject * -_new_none_object(_PyCrossInterpreterData *data) -{ - // XXX Singleton refcounts are problematic across interpreters... - return Py_NewRef(Py_None); -} - -static int -_none_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) -{ - _PyCrossInterpreterData_Init(data, tstate->interp, NULL, NULL, - _new_none_object); - // data->data, data->obj and data->free remain NULL - return 0; -} - -// bool - -static PyObject * -_new_bool_object(_PyCrossInterpreterData *data) -{ - if (data->data){ - Py_RETURN_TRUE; - } - Py_RETURN_FALSE; -} - -static int -_bool_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) -{ - _PyCrossInterpreterData_Init(data, tstate->interp, - (void *) (Py_IsTrue(obj) ? (uintptr_t) 1 : (uintptr_t) 0), NULL, - _new_bool_object); - // data->obj and data->free remain NULL - return 0; -} - -// tuple - -struct _shared_tuple_data { - Py_ssize_t len; - _PyCrossInterpreterData **data; -}; - -static PyObject * -_new_tuple_object(_PyCrossInterpreterData *data) -{ - struct _shared_tuple_data *shared = (struct _shared_tuple_data *)(data->data); - PyObject *tuple = PyTuple_New(shared->len); - if (tuple == NULL) { - return NULL; - } - - for (Py_ssize_t i = 0; i < shared->len; i++) { - PyObject *item = _PyCrossInterpreterData_NewObject(shared->data[i]); - if (item == NULL){ - Py_DECREF(tuple); - return NULL; - } - PyTuple_SET_ITEM(tuple, i, item); - } - return tuple; -} - -static void -_tuple_shared_free(void* data) -{ - struct _shared_tuple_data *shared = (struct _shared_tuple_data *)(data); -#ifndef NDEBUG - int64_t interpid = PyInterpreterState_GetID(_PyInterpreterState_GET()); -#endif - for (Py_ssize_t i = 0; i < shared->len; i++) { - if (shared->data[i] != NULL) { - assert(shared->data[i]->interpid == interpid); - _PyCrossInterpreterData_Release(shared->data[i]); - PyMem_RawFree(shared->data[i]); - shared->data[i] = NULL; - } - } - PyMem_Free(shared->data); - PyMem_RawFree(shared); -} - -static int -_tuple_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) -{ - Py_ssize_t len = PyTuple_GET_SIZE(obj); - if (len < 0) { - return -1; - } - struct _shared_tuple_data *shared = PyMem_RawMalloc(sizeof(struct _shared_tuple_data)); - if (shared == NULL){ - PyErr_NoMemory(); - return -1; - } - - shared->len = len; - shared->data = (_PyCrossInterpreterData **) PyMem_Calloc(shared->len, sizeof(_PyCrossInterpreterData *)); - if (shared->data == NULL) { - PyErr_NoMemory(); - return -1; - } - - for (Py_ssize_t i = 0; i < shared->len; i++) { - _PyCrossInterpreterData *data = _PyCrossInterpreterData_New(); - if (data == NULL) { - goto error; // PyErr_NoMemory already set - } - PyObject *item = PyTuple_GET_ITEM(obj, i); - - int res = -1; - if (!_Py_EnterRecursiveCallTstate(tstate, " while sharing a tuple")) { - res = _PyObject_GetCrossInterpreterData(item, data); - _Py_LeaveRecursiveCallTstate(tstate); - } - if (res < 0) { - PyMem_RawFree(data); - goto error; - } - shared->data[i] = data; - } - _PyCrossInterpreterData_Init( - data, tstate->interp, shared, obj, _new_tuple_object); - data->free = _tuple_shared_free; - return 0; - -error: - _tuple_shared_free(shared); - return -1; -} - -// registration - -static void -_register_builtins_for_crossinterpreter_data(struct _xidregistry *xidregistry) -{ - // None - if (_xidregistry_add_type(xidregistry, (PyTypeObject *)PyObject_Type(Py_None), _none_shared) != 0) { - Py_FatalError("could not register None for cross-interpreter sharing"); - } - - // int - if (_xidregistry_add_type(xidregistry, &PyLong_Type, _long_shared) != 0) { - Py_FatalError("could not register int for cross-interpreter sharing"); - } - - // bytes - if (_xidregistry_add_type(xidregistry, &PyBytes_Type, _bytes_shared) != 0) { - Py_FatalError("could not register bytes for cross-interpreter sharing"); - } - - // str - if (_xidregistry_add_type(xidregistry, &PyUnicode_Type, _str_shared) != 0) { - Py_FatalError("could not register str for cross-interpreter sharing"); - } - - // bool - if (_xidregistry_add_type(xidregistry, &PyBool_Type, _bool_shared) != 0) { - Py_FatalError("could not register bool for cross-interpreter sharing"); - } - - // float - if (_xidregistry_add_type(xidregistry, &PyFloat_Type, _float_shared) != 0) { - Py_FatalError("could not register float for cross-interpreter sharing"); - } - - // tuple - if (_xidregistry_add_type(xidregistry, &PyTuple_Type, _tuple_shared) != 0) { - Py_FatalError("could not register tuple for cross-interpreter sharing"); - } -} - -/* registry lifecycle */ - -static void -_xidregistry_init(struct _xidregistry *registry) -{ - if (registry->initialized) { - return; - } - registry->initialized = 1; - - if (registry->global) { - // Registering the builtins is cheap so we don't bother doing it lazily. - assert(registry->head == NULL); - _register_builtins_for_crossinterpreter_data(registry); - } -} - -static void -_xidregistry_fini(struct _xidregistry *registry) -{ - if (!registry->initialized) { - return; - } - registry->initialized = 0; - - _xidregistry_clear(registry); -} - - /*************************/ /* convenience utilities */ /*************************/ @@ -1023,6 +417,10 @@ _convert_exc_to_TracebackException(PyObject *exc, PyObject **p_tbexc) return -1; } +// We accommodate backports here. +#ifndef _Py_EMPTY_STR +# define _Py_EMPTY_STR &_Py_STR(empty) +#endif static const char * _format_TracebackException(PyObject *tbexc) @@ -1031,7 +429,8 @@ _format_TracebackException(PyObject *tbexc) if (lines == NULL) { return NULL; } - PyObject *formatted_obj = PyUnicode_Join(&_Py_STR(empty), lines); + assert(_Py_EMPTY_STR != NULL); + PyObject *formatted_obj = PyUnicode_Join(_Py_EMPTY_STR, lines); Py_DECREF(lines); if (formatted_obj == NULL) { return NULL; @@ -1600,7 +999,7 @@ _sharednsitem_has_value(_PyXI_namespace_item *item, int64_t *p_interpid) return 0; } if (p_interpid != NULL) { - *p_interpid = item->data->interpid; + *p_interpid = _PyCrossInterpreterData_INTERPID(item->data); } return 1; } @@ -1874,7 +1273,7 @@ _PyXI_FreeNamespace(_PyXI_namespace *ns) return; } - if (interpid == PyInterpreterState_GetID(_PyInterpreterState_GET())) { + if (interpid == PyInterpreterState_GetID(PyInterpreterState_Get())) { _sharedns_free(ns); } else { @@ -2014,7 +1413,7 @@ _enter_session(_PyXI_session *session, PyInterpreterState *interp) PyThreadState *prev = tstate; if (interp != tstate->interp) { tstate = PyThreadState_New(interp); - tstate->_whence = _PyThreadState_WHENCE_EXEC; + _PyThreadState_SetWhence(tstate, _PyThreadState_WHENCE_EXEC); // XXX Possible GILState issues? session->prev_tstate = PyThreadState_Swap(tstate); assert(session->prev_tstate == prev); @@ -2073,7 +1472,7 @@ _propagate_not_shareable_error(_PyXI_session *session) if (session == NULL) { return; } - PyInterpreterState *interp = _PyInterpreterState_GET(); + PyInterpreterState *interp = PyInterpreterState_Get(); if (PyErr_ExceptionMatches(_get_not_shareable_error_type(interp))) { // We want to propagate the exception directly. session->_error_override = _PyXI_ERR_NOT_SHAREABLE; @@ -2245,18 +1644,12 @@ _PyXI_Exit(_PyXI_session *session) PyStatus _PyXI_Init(PyInterpreterState *interp) { - PyStatus status; - - // Initialize the XID registry. - if (_Py_IsMainInterpreter(interp)) { - _xidregistry_init(_get_global_xidregistry(interp->runtime)); - } - _xidregistry_init(_get_xidregistry(interp)); + // Initialize the XID lookup state (e.g. registry). + xid_lookup_init(interp); // Initialize exceptions (heap types). - status = _init_not_shareable_error_type(interp); - if (_PyStatus_EXCEPTION(status)) { - return status; + if (_init_not_shareable_error_type(interp) < 0) { + return _PyStatus_ERR("failed to initialize NotShareableError"); } return _PyStatus_OK(); @@ -2271,11 +1664,8 @@ _PyXI_Fini(PyInterpreterState *interp) // Finalize exceptions (heap types). _fini_not_shareable_error_type(interp); - // Finalize the XID registry. - _xidregistry_fini(_get_xidregistry(interp)); - if (_Py_IsMainInterpreter(interp)) { - _xidregistry_fini(_get_global_xidregistry(interp->runtime)); - } + // Finalize the XID lookup state (e.g. registry). + xid_lookup_fini(interp); } PyStatus diff --git a/Python/crossinterp_data_lookup.h b/Python/crossinterp_data_lookup.h new file mode 100644 index 00000000000000..863919ad42fb97 --- /dev/null +++ b/Python/crossinterp_data_lookup.h @@ -0,0 +1,594 @@ + +static crossinterpdatafunc _lookup_getdata_from_registry( + PyInterpreterState *, PyObject *); + +static crossinterpdatafunc +lookup_getdata(PyInterpreterState *interp, PyObject *obj) +{ + /* Cross-interpreter objects are looked up by exact match on the class. + We can reassess this policy when we move from a global registry to a + tp_* slot. */ + return _lookup_getdata_from_registry(interp, obj); +} + +crossinterpdatafunc +_PyCrossInterpreterData_Lookup(PyObject *obj) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + return lookup_getdata(interp, obj); +} + + +/***********************************************/ +/* a registry of {type -> crossinterpdatafunc} */ +/***********************************************/ + +/* For now we use a global registry of shareable classes. An + alternative would be to add a tp_* slot for a class's + crossinterpdatafunc. It would be simpler and more efficient. */ + + +/* registry lifecycle */ + +static void _register_builtins_for_crossinterpreter_data(struct _xidregistry *); + +static void +_xidregistry_init(struct _xidregistry *registry) +{ + if (registry->initialized) { + return; + } + registry->initialized = 1; + + if (registry->global) { + // Registering the builtins is cheap so we don't bother doing it lazily. + assert(registry->head == NULL); + _register_builtins_for_crossinterpreter_data(registry); + } +} + +static void _xidregistry_clear(struct _xidregistry *); + +static void +_xidregistry_fini(struct _xidregistry *registry) +{ + if (!registry->initialized) { + return; + } + registry->initialized = 0; + + _xidregistry_clear(registry); +} + +static inline struct _xidregistry * _get_global_xidregistry(_PyRuntimeState *); +static inline struct _xidregistry * _get_xidregistry(PyInterpreterState *); + +static void +xid_lookup_init(PyInterpreterState *interp) +{ + if (_Py_IsMainInterpreter(interp)) { + _xidregistry_init(_get_global_xidregistry(interp->runtime)); + } + _xidregistry_init(_get_xidregistry(interp)); +} + +static void +xid_lookup_fini(PyInterpreterState *interp) +{ + _xidregistry_fini(_get_xidregistry(interp)); + if (_Py_IsMainInterpreter(interp)) { + _xidregistry_fini(_get_global_xidregistry(interp->runtime)); + } +} + + +/* registry thread safety */ + +static void +_xidregistry_lock(struct _xidregistry *registry) +{ + if (registry->global) { + PyMutex_Lock(®istry->mutex); + } + // else: Within an interpreter we rely on the GIL instead of a separate lock. +} + +static void +_xidregistry_unlock(struct _xidregistry *registry) +{ + if (registry->global) { + PyMutex_Unlock(®istry->mutex); + } +} + + +/* accessing the registry */ + +static inline struct _xidregistry * +_get_global_xidregistry(_PyRuntimeState *runtime) +{ + return &runtime->xi.registry; +} + +static inline struct _xidregistry * +_get_xidregistry(PyInterpreterState *interp) +{ + return &interp->xi.registry; +} + +static inline struct _xidregistry * +_get_xidregistry_for_type(PyInterpreterState *interp, PyTypeObject *cls) +{ + struct _xidregistry *registry = _get_global_xidregistry(interp->runtime); + if (cls->tp_flags & Py_TPFLAGS_HEAPTYPE) { + registry = _get_xidregistry(interp); + } + return registry; +} + +static struct _xidregitem * _xidregistry_remove_entry( + struct _xidregistry *, struct _xidregitem *); + +static struct _xidregitem * +_xidregistry_find_type(struct _xidregistry *xidregistry, PyTypeObject *cls) +{ + struct _xidregitem *cur = xidregistry->head; + while (cur != NULL) { + if (cur->weakref != NULL) { + // cur is/was a heap type. + PyObject *registered = _PyWeakref_GET_REF(cur->weakref); + if (registered == NULL) { + // The weakly ref'ed object was freed. + cur = _xidregistry_remove_entry(xidregistry, cur); + continue; + } + assert(PyType_Check(registered)); + assert(cur->cls == (PyTypeObject *)registered); + assert(cur->cls->tp_flags & Py_TPFLAGS_HEAPTYPE); + Py_DECREF(registered); + } + if (cur->cls == cls) { + return cur; + } + cur = cur->next; + } + return NULL; +} + +static crossinterpdatafunc +_lookup_getdata_from_registry(PyInterpreterState *interp, PyObject *obj) +{ + PyTypeObject *cls = Py_TYPE(obj); + + struct _xidregistry *xidregistry = _get_xidregistry_for_type(interp, cls); + _xidregistry_lock(xidregistry); + + struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); + crossinterpdatafunc func = matched != NULL ? matched->getdata : NULL; + + _xidregistry_unlock(xidregistry); + return func; +} + + +/* updating the registry */ + +static int +_xidregistry_add_type(struct _xidregistry *xidregistry, + PyTypeObject *cls, crossinterpdatafunc getdata) +{ + struct _xidregitem *newhead = PyMem_RawMalloc(sizeof(struct _xidregitem)); + if (newhead == NULL) { + return -1; + } + *newhead = (struct _xidregitem){ + // We do not keep a reference, to avoid keeping the class alive. + .cls = cls, + .refcount = 1, + .getdata = getdata, + }; + if (cls->tp_flags & Py_TPFLAGS_HEAPTYPE) { + // XXX Assign a callback to clear the entry from the registry? + newhead->weakref = PyWeakref_NewRef((PyObject *)cls, NULL); + if (newhead->weakref == NULL) { + PyMem_RawFree(newhead); + return -1; + } + } + newhead->next = xidregistry->head; + if (newhead->next != NULL) { + newhead->next->prev = newhead; + } + xidregistry->head = newhead; + return 0; +} + +static struct _xidregitem * +_xidregistry_remove_entry(struct _xidregistry *xidregistry, + struct _xidregitem *entry) +{ + struct _xidregitem *next = entry->next; + if (entry->prev != NULL) { + assert(entry->prev->next == entry); + entry->prev->next = next; + } + else { + assert(xidregistry->head == entry); + xidregistry->head = next; + } + if (next != NULL) { + next->prev = entry->prev; + } + Py_XDECREF(entry->weakref); + PyMem_RawFree(entry); + return next; +} + +static void +_xidregistry_clear(struct _xidregistry *xidregistry) +{ + struct _xidregitem *cur = xidregistry->head; + xidregistry->head = NULL; + while (cur != NULL) { + struct _xidregitem *next = cur->next; + Py_XDECREF(cur->weakref); + PyMem_RawFree(cur); + cur = next; + } +} + +int +_PyCrossInterpreterData_RegisterClass(PyTypeObject *cls, + crossinterpdatafunc getdata) +{ + if (!PyType_Check(cls)) { + PyErr_Format(PyExc_ValueError, "only classes may be registered"); + return -1; + } + if (getdata == NULL) { + PyErr_Format(PyExc_ValueError, "missing 'getdata' func"); + return -1; + } + + int res = 0; + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _xidregistry *xidregistry = _get_xidregistry_for_type(interp, cls); + _xidregistry_lock(xidregistry); + + struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); + if (matched != NULL) { + assert(matched->getdata == getdata); + matched->refcount += 1; + goto finally; + } + + res = _xidregistry_add_type(xidregistry, cls, getdata); + +finally: + _xidregistry_unlock(xidregistry); + return res; +} + +int +_PyCrossInterpreterData_UnregisterClass(PyTypeObject *cls) +{ + int res = 0; + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _xidregistry *xidregistry = _get_xidregistry_for_type(interp, cls); + _xidregistry_lock(xidregistry); + + struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); + if (matched != NULL) { + assert(matched->refcount > 0); + matched->refcount -= 1; + if (matched->refcount == 0) { + (void)_xidregistry_remove_entry(xidregistry, matched); + } + res = 1; + } + + _xidregistry_unlock(xidregistry); + return res; +} + + +/********************************************/ +/* cross-interpreter data for builtin types */ +/********************************************/ + +// bytes + +struct _shared_bytes_data { + char *bytes; + Py_ssize_t len; +}; + +static PyObject * +_new_bytes_object(_PyCrossInterpreterData *data) +{ + struct _shared_bytes_data *shared = (struct _shared_bytes_data *)(data->data); + return PyBytes_FromStringAndSize(shared->bytes, shared->len); +} + +static int +_bytes_shared(PyThreadState *tstate, PyObject *obj, + _PyCrossInterpreterData *data) +{ + if (_PyCrossInterpreterData_InitWithSize( + data, tstate->interp, sizeof(struct _shared_bytes_data), obj, + _new_bytes_object + ) < 0) + { + return -1; + } + struct _shared_bytes_data *shared = (struct _shared_bytes_data *)data->data; + if (PyBytes_AsStringAndSize(obj, &shared->bytes, &shared->len) < 0) { + _PyCrossInterpreterData_Clear(tstate->interp, data); + return -1; + } + return 0; +} + +// str + +struct _shared_str_data { + int kind; + const void *buffer; + Py_ssize_t len; +}; + +static PyObject * +_new_str_object(_PyCrossInterpreterData *data) +{ + struct _shared_str_data *shared = (struct _shared_str_data *)(data->data); + return PyUnicode_FromKindAndData(shared->kind, shared->buffer, shared->len); +} + +static int +_str_shared(PyThreadState *tstate, PyObject *obj, + _PyCrossInterpreterData *data) +{ + if (_PyCrossInterpreterData_InitWithSize( + data, tstate->interp, sizeof(struct _shared_str_data), obj, + _new_str_object + ) < 0) + { + return -1; + } + struct _shared_str_data *shared = (struct _shared_str_data *)data->data; + shared->kind = PyUnicode_KIND(obj); + shared->buffer = PyUnicode_DATA(obj); + shared->len = PyUnicode_GET_LENGTH(obj); + return 0; +} + +// int + +static PyObject * +_new_long_object(_PyCrossInterpreterData *data) +{ + return PyLong_FromSsize_t((Py_ssize_t)(data->data)); +} + +static int +_long_shared(PyThreadState *tstate, PyObject *obj, + _PyCrossInterpreterData *data) +{ + /* Note that this means the size of shareable ints is bounded by + * sys.maxsize. Hence on 32-bit architectures that is half the + * size of maximum shareable ints on 64-bit. + */ + Py_ssize_t value = PyLong_AsSsize_t(obj); + if (value == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_SetString(PyExc_OverflowError, "try sending as bytes"); + } + return -1; + } + _PyCrossInterpreterData_Init(data, tstate->interp, (void *)value, NULL, + _new_long_object); + // data->obj and data->free remain NULL + return 0; +} + +// float + +static PyObject * +_new_float_object(_PyCrossInterpreterData *data) +{ + double * value_ptr = data->data; + return PyFloat_FromDouble(*value_ptr); +} + +static int +_float_shared(PyThreadState *tstate, PyObject *obj, + _PyCrossInterpreterData *data) +{ + if (_PyCrossInterpreterData_InitWithSize( + data, tstate->interp, sizeof(double), NULL, + _new_float_object + ) < 0) + { + return -1; + } + double *shared = (double *)data->data; + *shared = PyFloat_AsDouble(obj); + return 0; +} + +// None + +static PyObject * +_new_none_object(_PyCrossInterpreterData *data) +{ + // XXX Singleton refcounts are problematic across interpreters... + return Py_NewRef(Py_None); +} + +static int +_none_shared(PyThreadState *tstate, PyObject *obj, + _PyCrossInterpreterData *data) +{ + _PyCrossInterpreterData_Init(data, tstate->interp, NULL, NULL, + _new_none_object); + // data->data, data->obj and data->free remain NULL + return 0; +} + +// bool + +static PyObject * +_new_bool_object(_PyCrossInterpreterData *data) +{ + if (data->data){ + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; +} + +static int +_bool_shared(PyThreadState *tstate, PyObject *obj, + _PyCrossInterpreterData *data) +{ + _PyCrossInterpreterData_Init(data, tstate->interp, + (void *) (Py_IsTrue(obj) ? (uintptr_t) 1 : (uintptr_t) 0), NULL, + _new_bool_object); + // data->obj and data->free remain NULL + return 0; +} + +// tuple + +struct _shared_tuple_data { + Py_ssize_t len; + _PyCrossInterpreterData **data; +}; + +static PyObject * +_new_tuple_object(_PyCrossInterpreterData *data) +{ + struct _shared_tuple_data *shared = (struct _shared_tuple_data *)(data->data); + PyObject *tuple = PyTuple_New(shared->len); + if (tuple == NULL) { + return NULL; + } + + for (Py_ssize_t i = 0; i < shared->len; i++) { + PyObject *item = _PyCrossInterpreterData_NewObject(shared->data[i]); + if (item == NULL){ + Py_DECREF(tuple); + return NULL; + } + PyTuple_SET_ITEM(tuple, i, item); + } + return tuple; +} + +static void +_tuple_shared_free(void* data) +{ + struct _shared_tuple_data *shared = (struct _shared_tuple_data *)(data); +#ifndef NDEBUG + int64_t interpid = PyInterpreterState_GetID(_PyInterpreterState_GET()); +#endif + for (Py_ssize_t i = 0; i < shared->len; i++) { + if (shared->data[i] != NULL) { + assert(_PyCrossInterpreterData_INTERPID(shared->data[i]) == interpid); + _PyCrossInterpreterData_Release(shared->data[i]); + PyMem_RawFree(shared->data[i]); + shared->data[i] = NULL; + } + } + PyMem_Free(shared->data); + PyMem_RawFree(shared); +} + +static int +_tuple_shared(PyThreadState *tstate, PyObject *obj, + _PyCrossInterpreterData *data) +{ + Py_ssize_t len = PyTuple_GET_SIZE(obj); + if (len < 0) { + return -1; + } + struct _shared_tuple_data *shared = PyMem_RawMalloc(sizeof(struct _shared_tuple_data)); + if (shared == NULL){ + PyErr_NoMemory(); + return -1; + } + + shared->len = len; + shared->data = (_PyCrossInterpreterData **) PyMem_Calloc(shared->len, sizeof(_PyCrossInterpreterData *)); + if (shared->data == NULL) { + PyErr_NoMemory(); + return -1; + } + + for (Py_ssize_t i = 0; i < shared->len; i++) { + _PyCrossInterpreterData *data = _PyCrossInterpreterData_New(); + if (data == NULL) { + goto error; // PyErr_NoMemory already set + } + PyObject *item = PyTuple_GET_ITEM(obj, i); + + int res = -1; + if (!_Py_EnterRecursiveCallTstate(tstate, " while sharing a tuple")) { + res = _PyObject_GetCrossInterpreterData(item, data); + _Py_LeaveRecursiveCallTstate(tstate); + } + if (res < 0) { + PyMem_RawFree(data); + goto error; + } + shared->data[i] = data; + } + _PyCrossInterpreterData_Init( + data, tstate->interp, shared, obj, _new_tuple_object); + data->free = _tuple_shared_free; + return 0; + +error: + _tuple_shared_free(shared); + return -1; +} + +// registration + +static void +_register_builtins_for_crossinterpreter_data(struct _xidregistry *xidregistry) +{ + // None + if (_xidregistry_add_type(xidregistry, (PyTypeObject *)PyObject_Type(Py_None), _none_shared) != 0) { + Py_FatalError("could not register None for cross-interpreter sharing"); + } + + // int + if (_xidregistry_add_type(xidregistry, &PyLong_Type, _long_shared) != 0) { + Py_FatalError("could not register int for cross-interpreter sharing"); + } + + // bytes + if (_xidregistry_add_type(xidregistry, &PyBytes_Type, _bytes_shared) != 0) { + Py_FatalError("could not register bytes for cross-interpreter sharing"); + } + + // str + if (_xidregistry_add_type(xidregistry, &PyUnicode_Type, _str_shared) != 0) { + Py_FatalError("could not register str for cross-interpreter sharing"); + } + + // bool + if (_xidregistry_add_type(xidregistry, &PyBool_Type, _bool_shared) != 0) { + Py_FatalError("could not register bool for cross-interpreter sharing"); + } + + // float + if (_xidregistry_add_type(xidregistry, &PyFloat_Type, _float_shared) != 0) { + Py_FatalError("could not register float for cross-interpreter sharing"); + } + + // tuple + if (_xidregistry_add_type(xidregistry, &PyTuple_Type, _tuple_shared) != 0) { + Py_FatalError("could not register tuple for cross-interpreter sharing"); + } +} diff --git a/Python/crossinterp_exceptions.h b/Python/crossinterp_exceptions.h new file mode 100644 index 00000000000000..e418cf91d4a7af --- /dev/null +++ b/Python/crossinterp_exceptions.h @@ -0,0 +1,80 @@ + +/* InterpreterError extends Exception */ + +static PyTypeObject _PyExc_InterpreterError = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "interpreters.InterpreterError", + .tp_doc = PyDoc_STR("A cross-interpreter operation failed"), + //.tp_base = (PyTypeObject *)PyExc_BaseException, +}; +PyObject *PyExc_InterpreterError = (PyObject *)&_PyExc_InterpreterError; + +/* InterpreterNotFoundError extends InterpreterError */ + +static PyTypeObject _PyExc_InterpreterNotFoundError = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "interpreters.InterpreterNotFoundError", + .tp_doc = PyDoc_STR("An interpreter was not found"), + .tp_base = &_PyExc_InterpreterError, +}; +PyObject *PyExc_InterpreterNotFoundError = (PyObject *)&_PyExc_InterpreterNotFoundError; + +/* NotShareableError extends ValueError */ + +static int +_init_not_shareable_error_type(PyInterpreterState *interp) +{ + const char *name = "interpreters.NotShareableError"; + PyObject *base = PyExc_ValueError; + PyObject *ns = NULL; + PyObject *exctype = PyErr_NewException(name, base, ns); + if (exctype == NULL) { + return -1; + } + + _PyInterpreterState_GetXIState(interp)->PyExc_NotShareableError = exctype; + return 0; +} + +static void +_fini_not_shareable_error_type(PyInterpreterState *interp) +{ + Py_CLEAR(_PyInterpreterState_GetXIState(interp)->PyExc_NotShareableError); +} + +static PyObject * +_get_not_shareable_error_type(PyInterpreterState *interp) +{ + assert(_PyInterpreterState_GetXIState(interp)->PyExc_NotShareableError != NULL); + return _PyInterpreterState_GetXIState(interp)->PyExc_NotShareableError; +} + + +/* lifecycle */ + +static int +init_exceptions(PyInterpreterState *interp) +{ + // builtin static types + _PyExc_InterpreterError.tp_base = (PyTypeObject *)PyExc_BaseException; + if (_PyStaticType_InitBuiltin(interp, &_PyExc_InterpreterError) < 0) { + return -1; + } + if (_PyStaticType_InitBuiltin(interp, &_PyExc_InterpreterNotFoundError) < 0) { + return -1; + } + + // heap types + // We would call _init_not_shareable_error_type() here too, + // but that leads to ref leaks + + return 0; +} + +static void +fini_exceptions(PyInterpreterState *interp) +{ + // Likewise with _fini_not_shareable_error_type(). + _PyStaticType_Dealloc(interp, &_PyExc_InterpreterNotFoundError); + _PyStaticType_Dealloc(interp, &_PyExc_InterpreterError); +} diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 1d9576d083d8dc..5c5016f7137164 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -292,10 +292,10 @@ Objects/exceptions.c - PyExc_UnicodeWarning - Objects/exceptions.c - PyExc_BytesWarning - Objects/exceptions.c - PyExc_ResourceWarning - Objects/exceptions.c - PyExc_EncodingWarning - -Python/crossinterp.c - _PyExc_InterpreterError - -Python/crossinterp.c - _PyExc_InterpreterNotFoundError - -Python/crossinterp.c - PyExc_InterpreterError - -Python/crossinterp.c - PyExc_InterpreterNotFoundError - +Python/crossinterp_exceptions.h - _PyExc_InterpreterError - +Python/crossinterp_exceptions.h - _PyExc_InterpreterNotFoundError - +Python/crossinterp_exceptions.h - PyExc_InterpreterError - +Python/crossinterp_exceptions.h - PyExc_InterpreterNotFoundError - ##----------------------- ## singletons From 518af37eb569f52a3daf2cf9f4787deed10754ca Mon Sep 17 00:00:00 2001 From: "T. Wouters" Date: Wed, 14 Feb 2024 00:58:49 +0100 Subject: [PATCH 108/126] gh-115421: Update the list of installed test subdirectories. (#115422) Update the list of installed test subdirectories with all newly added subdirectories of Lib/test, so that the tests in those directories are properly installed. --- Makefile.pre.in | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index 4b9d9c171b9efb..96886adf309d81 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2234,13 +2234,13 @@ LIBSUBDIRS= asyncio \ __phello__ TESTSUBDIRS= idlelib/idle_test \ test \ - test/audiodata \ test/archivetestdata \ + test/audiodata \ test/certdata \ test/certdata/capath \ test/cjkencodings \ - test/crashers \ test/configdata \ + test/crashers \ test/data \ test/decimaltestdata \ test/dtracedata \ @@ -2254,8 +2254,10 @@ TESTSUBDIRS= idlelib/idle_test \ test/subprocessdata \ test/support \ test/support/_hypothesis_stubs \ + test/support/interpreters \ test/test_asyncio \ test/test_capi \ + test/test_concurrent_futures \ test/test_cppext \ test/test_ctypes \ test/test_dataclasses \ @@ -2264,7 +2266,6 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_email/data \ test/test_future_stmt \ test/test_gdb \ - test/test_inspect \ test/test_import \ test/test_import/data \ test/test_import/data/circular_imports \ @@ -2317,8 +2318,13 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_importlib/resources/zipdata01 \ test/test_importlib/resources/zipdata02 \ test/test_importlib/source \ + test/test_inspect \ + test/test_interpreters \ test/test_json \ test/test_module \ + test/test_multiprocessing_fork \ + test/test_multiprocessing_forkserver \ + test/test_multiprocessing_spawn \ test/test_pathlib \ test/test_peg_generator \ test/test_pydoc \ From f15795c9a0f206b9abfb48007b267d12cd14f4a8 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 14 Feb 2024 09:32:51 +0900 Subject: [PATCH 109/126] gh-111968: Rename freelist related struct names to Eric's suggestion (gh-115329) --- Include/internal/pycore_dict.h | 6 --- Include/internal/pycore_freelist.h | 52 +++++++++++++------------- Include/internal/pycore_interp.h | 4 -- Include/internal/pycore_object_state.h | 4 ++ Include/internal/pycore_pystate.h | 6 +-- Include/internal/pycore_tstate.h | 2 +- Objects/dictobject.c | 51 ++++++++++--------------- Objects/floatobject.c | 38 +++++++++---------- Objects/genobject.c | 42 ++++++++++----------- Objects/listobject.c | 34 ++++++++--------- Objects/object.c | 18 ++++----- Objects/sliceobject.c | 20 +++++----- Objects/tupleobject.c | 42 ++++++++++----------- Python/context.c | 32 ++++++++-------- Python/gc_free_threading.c | 2 +- Python/gc_gil.c | 2 +- Python/object_stack.c | 34 ++++++++--------- Python/pylifecycle.c | 4 +- Python/pystate.c | 4 +- 19 files changed, 190 insertions(+), 207 deletions(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 0ebe701bc16f81..e5ef9a8607a83b 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -67,12 +67,6 @@ typedef struct { extern PyObject* _PyDictView_New(PyObject *, PyTypeObject *); extern PyObject* _PyDictView_Intersect(PyObject* self, PyObject *other); - -/* runtime lifecycle */ - -extern void _PyDict_Fini(PyInterpreterState *state); - - /* other API */ typedef struct { diff --git a/Include/internal/pycore_freelist.h b/Include/internal/pycore_freelist.h index 1bc551914794f0..b365ca337eabc8 100644 --- a/Include/internal/pycore_freelist.h +++ b/Include/internal/pycore_freelist.h @@ -33,14 +33,14 @@ extern "C" { # define _PyObjectStackChunk_MAXFREELIST 0 #endif -struct _Py_list_state { +struct _Py_list_freelist { #ifdef WITH_FREELISTS PyListObject *free_list[PyList_MAXFREELIST]; int numfree; #endif }; -struct _Py_tuple_state { +struct _Py_tuple_freelist { #if WITH_FREELISTS /* There is one freelist for each size from 1 to PyTuple_MAXSAVESIZE. The empty tuple is handled separately. @@ -57,7 +57,7 @@ struct _Py_tuple_state { #endif }; -struct _Py_float_state { +struct _Py_float_freelist { #ifdef WITH_FREELISTS /* Special free list free_list is a singly-linked list of available PyFloatObjects, @@ -77,7 +77,7 @@ struct _Py_dict_freelist { #endif }; -struct _Py_slice_state { +struct _Py_slice_freelist { #ifdef WITH_FREELISTS /* Using a cache is very effective since typically only a single slice is created and then deleted again. */ @@ -85,7 +85,7 @@ struct _Py_slice_state { #endif }; -struct _Py_context_state { +struct _Py_context_freelist { #ifdef WITH_FREELISTS // List of free PyContext objects PyContext *freelist; @@ -93,7 +93,7 @@ struct _Py_context_state { #endif }; -struct _Py_async_gen_state { +struct _Py_async_gen_freelist { #ifdef WITH_FREELISTS /* Freelists boost performance 6-10%; they also reduce memory fragmentation, as _PyAsyncGenWrappedValue and PyAsyncGenASend @@ -109,31 +109,31 @@ struct _Py_async_gen_state { struct _PyObjectStackChunk; -struct _Py_object_stack_state { +struct _Py_object_stack_freelist { struct _PyObjectStackChunk *free_list; Py_ssize_t numfree; }; -typedef struct _Py_freelist_state { - struct _Py_float_state floats; - struct _Py_tuple_state tuples; - struct _Py_list_state lists; +struct _Py_object_freelists { + struct _Py_float_freelist floats; + struct _Py_tuple_freelist tuples; + struct _Py_list_freelist lists; struct _Py_dict_freelist dicts; - struct _Py_slice_state slices; - struct _Py_context_state contexts; - struct _Py_async_gen_state async_gens; - struct _Py_object_stack_state object_stacks; -} _PyFreeListState; - -extern void _PyObject_ClearFreeLists(_PyFreeListState *state, int is_finalization); -extern void _PyTuple_ClearFreeList(_PyFreeListState *state, int is_finalization); -extern void _PyFloat_ClearFreeList(_PyFreeListState *state, int is_finalization); -extern void _PyList_ClearFreeList(_PyFreeListState *state, int is_finalization); -extern void _PySlice_ClearFreeList(_PyFreeListState *state, int is_finalization); -extern void _PyDict_ClearFreeList(_PyFreeListState *state, int is_finalization); -extern void _PyAsyncGen_ClearFreeLists(_PyFreeListState *state, int is_finalization); -extern void _PyContext_ClearFreeList(_PyFreeListState *state, int is_finalization); -extern void _PyObjectStackChunk_ClearFreeList(_PyFreeListState *state, int is_finalization); + struct _Py_slice_freelist slices; + struct _Py_context_freelist contexts; + struct _Py_async_gen_freelist async_gens; + struct _Py_object_stack_freelist object_stacks; +}; + +extern void _PyObject_ClearFreeLists(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyTuple_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyFloat_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyList_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PySlice_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyDict_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyAsyncGen_ClearFreeLists(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyContext_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyObjectStackChunk_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); #ifdef __cplusplus } diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index c244d8966f238b..c07447183d6209 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -20,7 +20,6 @@ extern "C" { #include "pycore_dtoa.h" // struct _dtoa_state #include "pycore_exceptions.h" // struct _Py_exc_state #include "pycore_floatobject.h" // struct _Py_float_state -#include "pycore_freelist.h" // struct _Py_freelist_state #include "pycore_function.h" // FUNC_MAX_WATCHERS #include "pycore_gc.h" // struct _gc_runtime_state #include "pycore_genobject.h" // struct _Py_async_gen_state @@ -222,9 +221,6 @@ struct _is { // One bit is set for each non-NULL entry in code_watchers uint8_t active_code_watchers; -#if !defined(Py_GIL_DISABLED) - struct _Py_freelist_state freelist_state; -#endif struct _py_object_state object_state; struct _Py_unicode_state unicode; struct _Py_long_state long_state; diff --git a/Include/internal/pycore_object_state.h b/Include/internal/pycore_object_state.h index 9eac27b1a9a4e3..cd7c9335b3e611 100644 --- a/Include/internal/pycore_object_state.h +++ b/Include/internal/pycore_object_state.h @@ -8,6 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_freelist.h" // _PyObject_freelists #include "pycore_hashtable.h" // _Py_hashtable_t struct _py_object_runtime_state { @@ -18,6 +19,9 @@ struct _py_object_runtime_state { }; struct _py_object_state { +#if !defined(Py_GIL_DISABLED) + struct _Py_object_freelists freelists; +#endif #ifdef Py_REF_DEBUG Py_ssize_t reftotal; #endif diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 289ef28f0dd9a9..6f9e6a332a7830 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -268,7 +268,7 @@ PyAPI_FUNC(const PyConfig*) _Py_GetConfig(void); // See also PyInterpreterState_Get() and _PyInterpreterState_GET(). extern PyInterpreterState* _PyGILState_GetInterpreterStateUnsafe(void); -static inline _PyFreeListState* _PyFreeListState_GET(void) +static inline struct _Py_object_freelists* _Py_object_freelists_GET(void) { PyThreadState *tstate = _PyThreadState_GET(); #ifdef Py_DEBUG @@ -276,9 +276,9 @@ static inline _PyFreeListState* _PyFreeListState_GET(void) #endif #ifdef Py_GIL_DISABLED - return &((_PyThreadStateImpl*)tstate)->freelist_state; + return &((_PyThreadStateImpl*)tstate)->freelists; #else - return &tstate->interp->freelist_state; + return &tstate->interp->object_state.freelists; #endif } diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 3e8fcf5b6ec1fa..97aa85a659fa7b 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -29,7 +29,7 @@ typedef struct _PyThreadStateImpl { #ifdef Py_GIL_DISABLED struct _mimalloc_thread_state mimalloc; - struct _Py_freelist_state freelist_state; + struct _Py_object_freelists freelists; struct _brc_thread_state brc; #endif diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 9b1defa5cbc609..11667b07ecfb4b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -271,19 +271,19 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu #ifdef WITH_FREELISTS static struct _Py_dict_freelist * -get_dict_state(void) +get_dict_freelist(void) { - _PyFreeListState *state = _PyFreeListState_GET(); - return &state->dicts; + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); + return &freelists->dicts; } #endif void -_PyDict_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) +_PyDict_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_dict_freelist *state = &freelist_state->dicts; + struct _Py_dict_freelist *state = &freelists->dicts; while (state->numfree > 0) { PyDictObject *op = state->free_list[--state->numfree]; assert(PyDict_CheckExact(op)); @@ -299,17 +299,6 @@ _PyDict_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) #endif } -void -_PyDict_Fini(PyInterpreterState *Py_UNUSED(interp)) -{ - // With Py_GIL_DISABLED: - // the freelists for the current thread state have already been cleared. -#ifndef Py_GIL_DISABLED - _PyFreeListState *state = _PyFreeListState_GET(); - _PyDict_ClearFreeList(state, 1); -#endif -} - static inline Py_hash_t unicode_get_hash(PyObject *o) { @@ -322,9 +311,9 @@ void _PyDict_DebugMallocStats(FILE *out) { #ifdef WITH_FREELISTS - struct _Py_dict_freelist *state = get_dict_state(); + struct _Py_dict_freelist *dict_freelist = get_dict_freelist(); _PyDebugAllocatorStats(out, "free PyDictObject", - state->numfree, sizeof(PyDictObject)); + dict_freelist->numfree, sizeof(PyDictObject)); #endif } @@ -674,9 +663,9 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) } #ifdef WITH_FREELISTS - struct _Py_dict_freelist *state = get_dict_state(); - if (log2_size == PyDict_LOG_MINSIZE && unicode && state->keys_numfree > 0) { - dk = state->keys_free_list[--state->keys_numfree]; + struct _Py_dict_freelist *dict_freelist = get_dict_freelist(); + if (log2_size == PyDict_LOG_MINSIZE && unicode && dict_freelist->keys_numfree > 0) { + dk = dict_freelist->keys_free_list[--dict_freelist->keys_numfree]; OBJECT_STAT_INC(from_freelist); } else @@ -709,12 +698,12 @@ static void free_keys_object(PyDictKeysObject *keys) { #ifdef WITH_FREELISTS - struct _Py_dict_freelist *state = get_dict_state(); + struct _Py_dict_freelist *dict_freelist = get_dict_freelist(); if (DK_LOG_SIZE(keys) == PyDict_LOG_MINSIZE - && state->keys_numfree < PyDict_MAXFREELIST - && state->keys_numfree >= 0 + && dict_freelist->keys_numfree < PyDict_MAXFREELIST + && dict_freelist->keys_numfree >= 0 && DK_IS_UNICODE(keys)) { - state->keys_free_list[state->keys_numfree++] = keys; + dict_freelist->keys_free_list[dict_freelist->keys_numfree++] = keys; OBJECT_STAT_INC(to_freelist); return; } @@ -754,9 +743,9 @@ new_dict(PyInterpreterState *interp, PyDictObject *mp; assert(keys != NULL); #ifdef WITH_FREELISTS - struct _Py_dict_freelist *state = get_dict_state(); - if (state->numfree > 0) { - mp = state->free_list[--state->numfree]; + struct _Py_dict_freelist *dict_freelist = get_dict_freelist(); + if (dict_freelist->numfree > 0) { + mp = dict_freelist->free_list[--dict_freelist->numfree]; assert (mp != NULL); assert (Py_IS_TYPE(mp, &PyDict_Type)); OBJECT_STAT_INC(from_freelist); @@ -2604,10 +2593,10 @@ dict_dealloc(PyObject *self) dictkeys_decref(interp, keys); } #ifdef WITH_FREELISTS - struct _Py_dict_freelist *state = get_dict_state(); - if (state->numfree < PyDict_MAXFREELIST && state->numfree >=0 && + struct _Py_dict_freelist *dict_freelist = get_dict_freelist(); + if (dict_freelist->numfree < PyDict_MAXFREELIST && dict_freelist->numfree >=0 && Py_IS_TYPE(mp, &PyDict_Type)) { - state->free_list[state->numfree++] = mp; + dict_freelist->free_list[dict_freelist->numfree++] = mp; OBJECT_STAT_INC(to_freelist); } else diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 9b322c52d4daea..7dac8292c7232b 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -8,7 +8,7 @@ #include "pycore_dtoa.h" // _Py_dg_dtoa() #include "pycore_floatobject.h" // _PyFloat_FormatAdvancedWriter() #include "pycore_initconfig.h" // _PyStatus_OK() -#include "pycore_interp.h" // _PyInterpreterState.float_state +#include "pycore_interp.h" // _Py_float_freelist #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_object.h" // _PyObject_Init(), _PyDebugAllocatorStats() @@ -27,12 +27,12 @@ class float "PyObject *" "&PyFloat_Type" #include "clinic/floatobject.c.h" #ifdef WITH_FREELISTS -static struct _Py_float_state * -get_float_state(void) +static struct _Py_float_freelist * +get_float_freelist(void) { - _PyFreeListState *state = _PyFreeListState_GET(); - assert(state != NULL); - return &state->floats; + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); + assert(freelists != NULL); + return &freelists->floats; } #endif @@ -129,11 +129,11 @@ PyFloat_FromDouble(double fval) { PyFloatObject *op; #ifdef WITH_FREELISTS - struct _Py_float_state *state = get_float_state(); - op = state->free_list; + struct _Py_float_freelist *float_freelist = get_float_freelist(); + op = float_freelist->free_list; if (op != NULL) { - state->free_list = (PyFloatObject *) Py_TYPE(op); - state->numfree--; + float_freelist->free_list = (PyFloatObject *) Py_TYPE(op); + float_freelist->numfree--; OBJECT_STAT_INC(from_freelist); } else @@ -245,14 +245,14 @@ _PyFloat_ExactDealloc(PyObject *obj) assert(PyFloat_CheckExact(obj)); PyFloatObject *op = (PyFloatObject *)obj; #ifdef WITH_FREELISTS - struct _Py_float_state *state = get_float_state(); - if (state->numfree >= PyFloat_MAXFREELIST || state->numfree < 0) { + struct _Py_float_freelist *float_freelist = get_float_freelist(); + if (float_freelist->numfree >= PyFloat_MAXFREELIST || float_freelist->numfree < 0) { PyObject_Free(op); return; } - state->numfree++; - Py_SET_TYPE(op, (PyTypeObject *)state->free_list); - state->free_list = op; + float_freelist->numfree++; + Py_SET_TYPE(op, (PyTypeObject *)float_freelist->free_list); + float_freelist->free_list = op; OBJECT_STAT_INC(to_freelist); #else PyObject_Free(op); @@ -1990,10 +1990,10 @@ _PyFloat_InitTypes(PyInterpreterState *interp) } void -_PyFloat_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) +_PyFloat_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_float_state *state = &freelist_state->floats; + struct _Py_float_freelist *state = &freelists->floats; PyFloatObject *f = state->free_list; while (f != NULL) { PyFloatObject *next = (PyFloatObject*) Py_TYPE(f); @@ -2021,10 +2021,10 @@ void _PyFloat_DebugMallocStats(FILE *out) { #ifdef WITH_FREELISTS - struct _Py_float_state *state = get_float_state(); + struct _Py_float_freelist *float_freelist = get_float_freelist(); _PyDebugAllocatorStats(out, "free PyFloatObject", - state->numfree, sizeof(PyFloatObject)); + float_freelist->numfree, sizeof(PyFloatObject)); #endif } diff --git a/Objects/genobject.c b/Objects/genobject.c index 59ab7abf6180bd..a1b6db1b5889d3 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -7,7 +7,7 @@ #include "pycore_ceval.h" // _PyEval_EvalFrame() #include "pycore_frame.h" // _PyInterpreterFrame #include "pycore_gc.h" // _PyGC_CLEAR_FINALIZED() -#include "pycore_genobject.h" // struct _Py_async_gen_state +#include "pycore_genobject.h" // struct _Py_async_gen_freelist #include "pycore_modsupport.h" // _PyArg_CheckPositional() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_opcode_utils.h" // RESUME_AFTER_YIELD_FROM @@ -1629,11 +1629,11 @@ PyTypeObject PyAsyncGen_Type = { #ifdef WITH_FREELISTS -static struct _Py_async_gen_state * -get_async_gen_state(void) +static struct _Py_async_gen_freelist * +get_async_gen_freelist(void) { - _PyFreeListState *state = _PyFreeListState_GET(); - return &state->async_gens; + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); + return &freelists->async_gens; } #endif @@ -1656,10 +1656,10 @@ PyAsyncGen_New(PyFrameObject *f, PyObject *name, PyObject *qualname) void -_PyAsyncGen_ClearFreeLists(_PyFreeListState *freelist_state, int is_finalization) +_PyAsyncGen_ClearFreeLists(struct _Py_object_freelists *freelist_state, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_async_gen_state *state = &freelist_state->async_gens; + struct _Py_async_gen_freelist *state = &freelist_state->async_gens; while (state->value_numfree > 0) { _PyAsyncGenWrappedValue *o; @@ -1726,11 +1726,11 @@ async_gen_asend_dealloc(PyAsyncGenASend *o) Py_CLEAR(o->ags_gen); Py_CLEAR(o->ags_sendval); #ifdef WITH_FREELISTS - struct _Py_async_gen_state *state = get_async_gen_state(); - if (state->asend_numfree >= 0 && state->asend_numfree < _PyAsyncGen_MAXFREELIST) { + struct _Py_async_gen_freelist *async_gen_freelist = get_async_gen_freelist(); + if (async_gen_freelist->asend_numfree >= 0 && async_gen_freelist->asend_numfree < _PyAsyncGen_MAXFREELIST) { assert(PyAsyncGenASend_CheckExact(o)); _PyGC_CLEAR_FINALIZED((PyObject *)o); - state->asend_freelist[state->asend_numfree++] = o; + async_gen_freelist->asend_freelist[async_gen_freelist->asend_numfree++] = o; } else #endif @@ -1896,10 +1896,10 @@ async_gen_asend_new(PyAsyncGenObject *gen, PyObject *sendval) { PyAsyncGenASend *o; #ifdef WITH_FREELISTS - struct _Py_async_gen_state *state = get_async_gen_state(); - if (state->asend_numfree > 0) { - state->asend_numfree--; - o = state->asend_freelist[state->asend_numfree]; + struct _Py_async_gen_freelist *async_gen_freelist = get_async_gen_freelist(); + if (async_gen_freelist->asend_numfree > 0) { + async_gen_freelist->asend_numfree--; + o = async_gen_freelist->asend_freelist[async_gen_freelist->asend_numfree]; _Py_NewReference((PyObject *)o); } else @@ -1931,10 +1931,10 @@ async_gen_wrapped_val_dealloc(_PyAsyncGenWrappedValue *o) _PyObject_GC_UNTRACK((PyObject *)o); Py_CLEAR(o->agw_val); #ifdef WITH_FREELISTS - struct _Py_async_gen_state *state = get_async_gen_state(); - if (state->value_numfree >= 0 && state->value_numfree < _PyAsyncGen_MAXFREELIST) { + struct _Py_async_gen_freelist *async_gen_freelist = get_async_gen_freelist(); + if (async_gen_freelist->value_numfree >= 0 && async_gen_freelist->value_numfree < _PyAsyncGen_MAXFREELIST) { assert(_PyAsyncGenWrappedValue_CheckExact(o)); - state->value_freelist[state->value_numfree++] = o; + async_gen_freelist->value_freelist[async_gen_freelist->value_numfree++] = o; OBJECT_STAT_INC(to_freelist); } else @@ -2004,10 +2004,10 @@ _PyAsyncGenValueWrapperNew(PyThreadState *tstate, PyObject *val) assert(val); #ifdef WITH_FREELISTS - struct _Py_async_gen_state *state = get_async_gen_state(); - if (state->value_numfree > 0) { - state->value_numfree--; - o = state->value_freelist[state->value_numfree]; + struct _Py_async_gen_freelist *async_gen_freelist = get_async_gen_freelist(); + if (async_gen_freelist->value_numfree > 0) { + async_gen_freelist->value_numfree--; + o = async_gen_freelist->value_freelist[async_gen_freelist->value_numfree]; OBJECT_STAT_INC(from_freelist); assert(_PyAsyncGenWrappedValue_CheckExact(o)); _Py_NewReference((PyObject*)o); diff --git a/Objects/listobject.c b/Objects/listobject.c index 7fdb91eab890b5..93409a82f8a489 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -4,7 +4,7 @@ #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_interp.h" // PyInterpreterState.list -#include "pycore_list.h" // struct _Py_list_state, _PyListIterObject +#include "pycore_list.h" // struct _Py_list_freelist, _PyListIterObject #include "pycore_long.h" // _PyLong_DigitCount #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_object.h" // _PyObject_GC_TRACK(), _PyDebugAllocatorStats() @@ -21,12 +21,12 @@ class list "PyListObject *" "&PyList_Type" _Py_DECLARE_STR(list_err, "list index out of range"); #ifdef WITH_FREELISTS -static struct _Py_list_state * -get_list_state(void) +static struct _Py_list_freelist * +get_list_freelist(void) { - _PyFreeListState *state = _PyFreeListState_GET(); - assert(state != NULL); - return &state->lists; + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); + assert(freelists != NULL); + return &freelists->lists; } #endif @@ -120,10 +120,10 @@ list_preallocate_exact(PyListObject *self, Py_ssize_t size) } void -_PyList_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) +_PyList_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_list_state *state = &freelist_state->lists; + struct _Py_list_freelist *state = &freelists->lists; while (state->numfree > 0) { PyListObject *op = state->free_list[--state->numfree]; assert(PyList_CheckExact(op)); @@ -140,10 +140,10 @@ void _PyList_DebugMallocStats(FILE *out) { #ifdef WITH_FREELISTS - struct _Py_list_state *state = get_list_state(); + struct _Py_list_freelist *list_freelist = get_list_freelist(); _PyDebugAllocatorStats(out, "free PyListObject", - state->numfree, sizeof(PyListObject)); + list_freelist->numfree, sizeof(PyListObject)); #endif } @@ -158,10 +158,10 @@ PyList_New(Py_ssize_t size) } #ifdef WITH_FREELISTS - struct _Py_list_state *state = get_list_state(); - if (PyList_MAXFREELIST && state->numfree > 0) { - state->numfree--; - op = state->free_list[state->numfree]; + struct _Py_list_freelist *list_freelist = get_list_freelist(); + if (PyList_MAXFREELIST && list_freelist->numfree > 0) { + list_freelist->numfree--; + op = list_freelist->free_list[list_freelist->numfree]; OBJECT_STAT_INC(from_freelist); _Py_NewReference((PyObject *)op); } @@ -391,9 +391,9 @@ list_dealloc(PyObject *self) PyMem_Free(op->ob_item); } #ifdef WITH_FREELISTS - struct _Py_list_state *state = get_list_state(); - if (state->numfree < PyList_MAXFREELIST && state->numfree >= 0 && PyList_CheckExact(op)) { - state->free_list[state->numfree++] = op; + struct _Py_list_freelist *list_freelist = get_list_freelist(); + if (list_freelist->numfree < PyList_MAXFREELIST && list_freelist->numfree >= 0 && PyList_CheckExact(op)) { + list_freelist->free_list[list_freelist->numfree++] = op; OBJECT_STAT_INC(to_freelist); } else diff --git a/Objects/object.c b/Objects/object.c index 275aa6713c8c21..23eab8288a41e8 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -794,19 +794,19 @@ PyObject_Bytes(PyObject *v) } void -_PyObject_ClearFreeLists(_PyFreeListState *state, int is_finalization) +_PyObject_ClearFreeLists(struct _Py_object_freelists *freelists, int is_finalization) { // In the free-threaded build, freelists are per-PyThreadState and cleared in PyThreadState_Clear() // In the default build, freelists are per-interpreter and cleared in finalize_interp_types() - _PyFloat_ClearFreeList(state, is_finalization); - _PyTuple_ClearFreeList(state, is_finalization); - _PyList_ClearFreeList(state, is_finalization); - _PyDict_ClearFreeList(state, is_finalization); - _PyContext_ClearFreeList(state, is_finalization); - _PyAsyncGen_ClearFreeLists(state, is_finalization); + _PyFloat_ClearFreeList(freelists, is_finalization); + _PyTuple_ClearFreeList(freelists, is_finalization); + _PyList_ClearFreeList(freelists, is_finalization); + _PyDict_ClearFreeList(freelists, is_finalization); + _PyContext_ClearFreeList(freelists, is_finalization); + _PyAsyncGen_ClearFreeLists(freelists, is_finalization); // Only be cleared if is_finalization is true. - _PyObjectStackChunk_ClearFreeList(state, is_finalization); - _PySlice_ClearFreeList(state, is_finalization); + _PyObjectStackChunk_ClearFreeList(freelists, is_finalization); + _PySlice_ClearFreeList(freelists, is_finalization); } /* diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 9880c123c80f95..7333aea91e5648 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -103,15 +103,15 @@ PyObject _Py_EllipsisObject = _PyObject_HEAD_INIT(&PyEllipsis_Type); /* Slice object implementation */ -void _PySlice_ClearFreeList(_PyFreeListState *state, int is_finalization) +void _PySlice_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization) { if (!is_finalization) { return; } #ifdef WITH_FREELISTS - PySliceObject *obj = state->slices.slice_cache; + PySliceObject *obj = freelists->slices.slice_cache; if (obj != NULL) { - state->slices.slice_cache = NULL; + freelists->slices.slice_cache = NULL; PyObject_GC_Del(obj); } #endif @@ -127,10 +127,10 @@ _PyBuildSlice_Consume2(PyObject *start, PyObject *stop, PyObject *step) assert(start != NULL && stop != NULL && step != NULL); PySliceObject *obj; #ifdef WITH_FREELISTS - _PyFreeListState *state = _PyFreeListState_GET(); - if (state->slices.slice_cache != NULL) { - obj = state->slices.slice_cache; - state->slices.slice_cache = NULL; + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); + if (freelists->slices.slice_cache != NULL) { + obj = freelists->slices.slice_cache; + freelists->slices.slice_cache = NULL; _Py_NewReference((PyObject *)obj); } else @@ -365,9 +365,9 @@ slice_dealloc(PySliceObject *r) Py_DECREF(r->start); Py_DECREF(r->stop); #ifdef WITH_FREELISTS - _PyFreeListState *state = _PyFreeListState_GET(); - if (state->slices.slice_cache == NULL) { - state->slices.slice_cache = r; + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); + if (freelists->slices.slice_cache == NULL) { + freelists->slices.slice_cache = r; } else #endif diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 7d73c3fb0f7f2c..1cdf79d95ae352 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -962,13 +962,13 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) } -static void maybe_freelist_clear(_PyFreeListState *, int); +static void maybe_freelist_clear(struct _Py_object_freelists *, int); void -_PyTuple_ClearFreeList(_PyFreeListState *state, int is_finalization) +_PyTuple_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization) { - maybe_freelist_clear(state, is_finalization); + maybe_freelist_clear(freelists, is_finalization); } /*********************** Tuple Iterator **************************/ @@ -1120,26 +1120,26 @@ tuple_iter(PyObject *seq) * freelists * *************/ -#define STATE (state->tuples) -#define FREELIST_FINALIZED (STATE.numfree[0] < 0) +#define TUPLE_FREELIST (freelists->tuples) +#define FREELIST_FINALIZED (TUPLE_FREELIST.numfree[0] < 0) static inline PyTupleObject * maybe_freelist_pop(Py_ssize_t size) { #ifdef WITH_FREELISTS - _PyFreeListState *state = _PyFreeListState_GET(); + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); if (size == 0) { return NULL; } assert(size > 0); if (size < PyTuple_MAXSAVESIZE) { Py_ssize_t index = size - 1; - PyTupleObject *op = STATE.free_list[index]; + PyTupleObject *op = TUPLE_FREELIST.free_list[index]; if (op != NULL) { /* op is the head of a linked list, with the first item pointing to the next node. Here we pop off the old head. */ - STATE.free_list[index] = (PyTupleObject *) op->ob_item[0]; - STATE.numfree[index]--; + TUPLE_FREELIST.free_list[index] = (PyTupleObject *) op->ob_item[0]; + TUPLE_FREELIST.numfree[index]--; /* Inlined _PyObject_InitVar() without _PyType_HasFeature() test */ #ifdef Py_TRACE_REFS /* maybe_freelist_push() ensures these were already set. */ @@ -1161,21 +1161,21 @@ static inline int maybe_freelist_push(PyTupleObject *op) { #ifdef WITH_FREELISTS - _PyFreeListState *state = _PyFreeListState_GET(); + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); if (Py_SIZE(op) == 0) { return 0; } Py_ssize_t index = Py_SIZE(op) - 1; if (index < PyTuple_NFREELISTS - && STATE.numfree[index] < PyTuple_MAXFREELIST - && STATE.numfree[index] >= 0 + && TUPLE_FREELIST.numfree[index] < PyTuple_MAXFREELIST + && TUPLE_FREELIST.numfree[index] >= 0 && Py_IS_TYPE(op, &PyTuple_Type)) { /* op is the head of a linked list, with the first item pointing to the next node. Here we set op as the new head. */ - op->ob_item[0] = (PyObject *) STATE.free_list[index]; - STATE.free_list[index] = op; - STATE.numfree[index]++; + op->ob_item[0] = (PyObject *) TUPLE_FREELIST.free_list[index]; + TUPLE_FREELIST.free_list[index] = op; + TUPLE_FREELIST.numfree[index]++; OBJECT_STAT_INC(to_freelist); return 1; } @@ -1184,13 +1184,13 @@ maybe_freelist_push(PyTupleObject *op) } static void -maybe_freelist_clear(_PyFreeListState *state, int fini) +maybe_freelist_clear(struct _Py_object_freelists *freelists, int fini) { #ifdef WITH_FREELISTS for (Py_ssize_t i = 0; i < PyTuple_NFREELISTS; i++) { - PyTupleObject *p = STATE.free_list[i]; - STATE.free_list[i] = NULL; - STATE.numfree[i] = fini ? -1 : 0; + PyTupleObject *p = TUPLE_FREELIST.free_list[i]; + TUPLE_FREELIST.free_list[i] = NULL; + TUPLE_FREELIST.numfree[i] = fini ? -1 : 0; while (p) { PyTupleObject *q = p; p = (PyTupleObject *)(p->ob_item[0]); @@ -1205,13 +1205,13 @@ void _PyTuple_DebugMallocStats(FILE *out) { #ifdef WITH_FREELISTS - _PyFreeListState *state = _PyFreeListState_GET(); + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); for (int i = 0; i < PyTuple_NFREELISTS; i++) { int len = i + 1; char buf[128]; PyOS_snprintf(buf, sizeof(buf), "free %d-sized PyTupleObject", len); - _PyDebugAllocatorStats(out, buf, STATE.numfree[i], + _PyDebugAllocatorStats(out, buf, TUPLE_FREELIST.numfree[i], _PyObject_VAR_SIZE(&PyTuple_Type, len)); } #endif diff --git a/Python/context.c b/Python/context.c index e44fef705c36e0..01a21b47da5452 100644 --- a/Python/context.c +++ b/Python/context.c @@ -65,11 +65,11 @@ contextvar_del(PyContextVar *var); #ifdef WITH_FREELISTS -static struct _Py_context_state * -get_context_state(void) +static struct _Py_context_freelist * +get_context_freelist(void) { - _PyFreeListState *state = _PyFreeListState_GET(); - return &state->contexts; + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); + return &freelists->contexts; } #endif @@ -341,11 +341,11 @@ _context_alloc(void) { PyContext *ctx; #ifdef WITH_FREELISTS - struct _Py_context_state *state = get_context_state(); - if (state->numfree > 0) { - state->numfree--; - ctx = state->freelist; - state->freelist = (PyContext *)ctx->ctx_weakreflist; + struct _Py_context_freelist *context_freelist = get_context_freelist(); + if (context_freelist->numfree > 0) { + context_freelist->numfree--; + ctx = context_freelist->freelist; + context_freelist->freelist = (PyContext *)ctx->ctx_weakreflist; OBJECT_STAT_INC(from_freelist); ctx->ctx_weakreflist = NULL; _Py_NewReference((PyObject *)ctx); @@ -468,11 +468,11 @@ context_tp_dealloc(PyContext *self) (void)context_tp_clear(self); #ifdef WITH_FREELISTS - struct _Py_context_state *state = get_context_state(); - if (state->numfree >= 0 && state->numfree < PyContext_MAXFREELIST) { - state->numfree++; - self->ctx_weakreflist = (PyObject *)state->freelist; - state->freelist = self; + struct _Py_context_freelist *context_freelist = get_context_freelist(); + if (context_freelist->numfree >= 0 && context_freelist->numfree < PyContext_MAXFREELIST) { + context_freelist->numfree++; + self->ctx_weakreflist = (PyObject *)context_freelist->freelist; + context_freelist->freelist = self; OBJECT_STAT_INC(to_freelist); } else @@ -1267,10 +1267,10 @@ get_token_missing(void) void -_PyContext_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) +_PyContext_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_context_state *state = &freelist_state->contexts; + struct _Py_context_freelist *state = &freelists->contexts; for (; state->numfree > 0; state->numfree--) { PyContext *ctx = state->freelist; state->freelist = (PyContext *)ctx->ctx_weakreflist; diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 93e1168002b6f7..3dc1dc19182eb4 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1721,7 +1721,7 @@ _PyGC_ClearAllFreeLists(PyInterpreterState *interp) HEAD_LOCK(&_PyRuntime); _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head; while (tstate != NULL) { - _PyObject_ClearFreeLists(&tstate->freelist_state, 0); + _PyObject_ClearFreeLists(&tstate->freelists, 0); tstate = (_PyThreadStateImpl *)tstate->base.next; } HEAD_UNLOCK(&_PyRuntime); diff --git a/Python/gc_gil.c b/Python/gc_gil.c index 5f1365f509deb0..48646c7af86b7f 100644 --- a/Python/gc_gil.c +++ b/Python/gc_gil.c @@ -11,7 +11,7 @@ void _PyGC_ClearAllFreeLists(PyInterpreterState *interp) { - _PyObject_ClearFreeLists(&interp->freelist_state, 0); + _PyObject_ClearFreeLists(&interp->object_state.freelists, 0); } #endif diff --git a/Python/object_stack.c b/Python/object_stack.c index ced4460da00f44..ff2901cdacceb8 100644 --- a/Python/object_stack.c +++ b/Python/object_stack.c @@ -8,22 +8,22 @@ extern _PyObjectStackChunk *_PyObjectStackChunk_New(void); extern void _PyObjectStackChunk_Free(_PyObjectStackChunk *); -static struct _Py_object_stack_state * -get_state(void) +static struct _Py_object_stack_freelist * +get_object_stack_freelist(void) { - _PyFreeListState *state = _PyFreeListState_GET(); - return &state->object_stacks; + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); + return &freelists->object_stacks; } _PyObjectStackChunk * _PyObjectStackChunk_New(void) { _PyObjectStackChunk *buf; - struct _Py_object_stack_state *state = get_state(); - if (state->numfree > 0) { - buf = state->free_list; - state->free_list = buf->prev; - state->numfree--; + struct _Py_object_stack_freelist *obj_stack_freelist = get_object_stack_freelist(); + if (obj_stack_freelist->numfree > 0) { + buf = obj_stack_freelist->free_list; + obj_stack_freelist->free_list = buf->prev; + obj_stack_freelist->numfree--; } else { // NOTE: we use PyMem_RawMalloc() here because this is used by the GC @@ -43,13 +43,13 @@ void _PyObjectStackChunk_Free(_PyObjectStackChunk *buf) { assert(buf->n == 0); - struct _Py_object_stack_state *state = get_state(); - if (state->numfree >= 0 && - state->numfree < _PyObjectStackChunk_MAXFREELIST) + struct _Py_object_stack_freelist *obj_stack_freelist = get_object_stack_freelist(); + if (obj_stack_freelist->numfree >= 0 && + obj_stack_freelist->numfree < _PyObjectStackChunk_MAXFREELIST) { - buf->prev = state->free_list; - state->free_list = buf; - state->numfree++; + buf->prev = obj_stack_freelist->free_list; + obj_stack_freelist->free_list = buf; + obj_stack_freelist->numfree++; } else { PyMem_RawFree(buf); @@ -89,7 +89,7 @@ _PyObjectStack_Merge(_PyObjectStack *dst, _PyObjectStack *src) } void -_PyObjectStackChunk_ClearFreeList(_PyFreeListState *free_lists, int is_finalization) +_PyObjectStackChunk_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization) { if (!is_finalization) { // Ignore requests to clear the free list during GC. We use object @@ -97,7 +97,7 @@ _PyObjectStackChunk_ClearFreeList(_PyFreeListState *free_lists, int is_finalizat return; } - struct _Py_object_stack_state *state = &free_lists->object_stacks; + struct _Py_object_stack_freelist *state = &freelists->object_stacks; while (state->numfree > 0) { _PyObjectStackChunk *buf = state->free_list; state->free_list = buf->prev; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 7e4c07bb657d19..5e5db98481150e 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1795,8 +1795,8 @@ finalize_interp_types(PyInterpreterState *interp) #ifndef Py_GIL_DISABLED // With Py_GIL_DISABLED: // the freelists for the current thread state have already been cleared. - _PyFreeListState *state = _PyFreeListState_GET(); - _PyObject_ClearFreeLists(state, 1); + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); + _PyObject_ClearFreeLists(freelists, 1); #endif #ifdef Py_DEBUG diff --git a/Python/pystate.c b/Python/pystate.c index 996f465825215f..82c955882185e8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1548,8 +1548,8 @@ PyThreadState_Clear(PyThreadState *tstate) } #ifdef Py_GIL_DISABLED // Each thread should clear own freelists in free-threading builds. - _PyFreeListState *freelist_state = _PyFreeListState_GET(); - _PyObject_ClearFreeLists(freelist_state, 1); + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); + _PyObject_ClearFreeLists(freelists, 1); // Remove ourself from the biased reference counting table of threads. _Py_brc_remove_thread(tstate); From 46245b0d831b9f5b15b4a0483c785ea71bffef12 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 14 Feb 2024 08:55:00 +0200 Subject: [PATCH 110/126] Docs: Use substitutions instead of manual version updates (#115416) --- Doc/conf.py | 2 ++ Doc/tutorial/interpreter.rst | 4 ++-- Doc/tutorial/stdlib.rst | 2 +- Doc/tutorial/stdlib2.rst | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index 677d139046e5d0..0e84d866a22f5b 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -66,6 +66,8 @@ rst_epilog = f""" .. |python_version_literal| replace:: ``Python {version}`` +.. |python_x_dot_y_literal| replace:: ``python{version}`` +.. |usr_local_bin_python_x_dot_y_literal| replace:: ``/usr/local/bin/python{version}`` """ # There are two options for replacing |today|: either, you set today to some diff --git a/Doc/tutorial/interpreter.rst b/Doc/tutorial/interpreter.rst index 42ebf2b3d294a8..299b6c2777adc0 100644 --- a/Doc/tutorial/interpreter.rst +++ b/Doc/tutorial/interpreter.rst @@ -10,7 +10,7 @@ Using the Python Interpreter Invoking the Interpreter ======================== -The Python interpreter is usually installed as :file:`/usr/local/bin/python3.13` +The Python interpreter is usually installed as |usr_local_bin_python_x_dot_y_literal| on those machines where it is available; putting :file:`/usr/local/bin` in your Unix shell's search path makes it possible to start it by typing the command: @@ -24,7 +24,7 @@ Python guru or system administrator. (E.g., :file:`/usr/local/python` is a popular alternative location.) On Windows machines where you have installed Python from the :ref:`Microsoft Store -`, the :file:`python3.13` command will be available. If you have +`, the |python_x_dot_y_literal| command will be available. If you have the :ref:`py.exe launcher ` installed, you can use the :file:`py` command. See :ref:`setting-envvars` for other ways to launch Python. diff --git a/Doc/tutorial/stdlib.rst b/Doc/tutorial/stdlib.rst index 63f4b5e1ce0207..9def2a5714950b 100644 --- a/Doc/tutorial/stdlib.rst +++ b/Doc/tutorial/stdlib.rst @@ -15,7 +15,7 @@ operating system:: >>> import os >>> os.getcwd() # Return the current working directory - 'C:\\Python312' + 'C:\\Python313' >>> os.chdir('/server/accesslogs') # Change current working directory >>> os.system('mkdir today') # Run the command mkdir in the system shell 0 diff --git a/Doc/tutorial/stdlib2.rst b/Doc/tutorial/stdlib2.rst index 33f311db3a24d2..09b6f3d91bcfed 100644 --- a/Doc/tutorial/stdlib2.rst +++ b/Doc/tutorial/stdlib2.rst @@ -279,7 +279,7 @@ applications include caching objects that are expensive to create:: Traceback (most recent call last): File "", line 1, in d['primary'] # entry was automatically removed - File "C:/python312/lib/weakref.py", line 46, in __getitem__ + File "C:/python313/lib/weakref.py", line 46, in __getitem__ o = self.data[key]() KeyError: 'primary' From 3fd2ad8241a61e75b2cd33c697af276863efbb51 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 14 Feb 2024 10:41:17 +0000 Subject: [PATCH 111/126] ftplib docs: `timeout` doesn't have to be a whole number (#115443) --- Doc/library/ftplib.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst index 2f98a272c297ae..9abf7974d1936d 100644 --- a/Doc/library/ftplib.rst +++ b/Doc/library/ftplib.rst @@ -104,7 +104,7 @@ FTP objects :param timeout: A timeout in seconds for blocking operations like :meth:`connect` (default: the global default timeout setting). - :type timeout: int | None + :type timeout: float | None :param source_address: |param_doc_source_address| @@ -178,7 +178,7 @@ FTP objects :param timeout: A timeout in seconds for the connection attempt (default: the global default timeout setting). - :type timeout: int | None + :type timeout: float | None :param source_address: |param_doc_source_address| @@ -483,7 +483,7 @@ FTP_TLS objects :param timeout: A timeout in seconds for blocking operations like :meth:`~FTP.connect` (default: the global default timeout setting). - :type timeout: int | None + :type timeout: float | None :param source_address: |param_doc_source_address| From 57e4c81ae1cd605efa173885574aedc3fded4b8b Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 14 Feb 2024 19:12:52 +0800 Subject: [PATCH 112/126] gh-114058: Fix flaky globals to constant test (#115423) Co-authored-by: Victor Stinner --- Lib/test/test_capi/test_opt.py | 43 +++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index b64aed10d2d653..1a8ed3441fa855 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -7,6 +7,8 @@ import _testinternalcapi +from test.support import script_helper + @contextlib.contextmanager def temporary_optimizer(opt): @@ -659,7 +661,7 @@ def dummy(x): opt = _testinternalcapi.get_uop_optimizer() with temporary_optimizer(opt): - testfunc(20) + testfunc(32) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -677,10 +679,10 @@ def testfunc(n): opt = _testinternalcapi.get_uop_optimizer() with temporary_optimizer(opt): - res = testfunc(20) + res = testfunc(32) ex = get_first_executor(testfunc) - self.assertEqual(res, 19 * 2) + self.assertEqual(res, 62) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} self.assertNotIn("_GUARD_BOTH_INT", uops) @@ -699,7 +701,7 @@ def testfunc(n): opt = _testinternalcapi.get_uop_optimizer() with temporary_optimizer(opt): - res = testfunc(20) + res = testfunc(32) ex = get_first_executor(testfunc) self.assertEqual(res, 4) @@ -716,7 +718,7 @@ def testfunc(n): opt = _testinternalcapi.get_uop_optimizer() with temporary_optimizer(opt): - testfunc(20) + testfunc(32) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -740,7 +742,7 @@ def testfunc(n): def dummy(x): return x + 2 - testfunc(10) + testfunc(32) ex = get_first_executor(testfunc) # Honestly as long as it doesn't crash it's fine. @@ -749,20 +751,39 @@ def dummy(x): # This test is a little implementation specific. def test_promote_globals_to_constants(self): + + result = script_helper.run_python_until_end('-c', textwrap.dedent(""" + import _testinternalcapi + import opcode + + def get_first_executor(func): + code = func.__code__ + co_code = code.co_code + JUMP_BACKWARD = opcode.opmap["JUMP_BACKWARD"] + for i in range(0, len(co_code), 2): + if co_code[i] == JUMP_BACKWARD: + try: + return _testinternalcapi.get_executor(code, i) + except ValueError: + pass + return None + def testfunc(n): for i in range(n): x = range(i) return x opt = _testinternalcapi.get_uop_optimizer() - with temporary_optimizer(opt): - testfunc(20) + _testinternalcapi.set_optimizer(opt) + testfunc(64) ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) + assert ex is not None uops = {opname for opname, _, _ in ex} - self.assertNotIn("_LOAD_GLOBAL_BUILTIN", uops) - self.assertIn("_LOAD_CONST_INLINE_BORROW_WITH_NULL", uops) + assert "_LOAD_GLOBAL_BUILTINS" not in uops + assert "_LOAD_CONST_INLINE_BORROW_WITH_NULL" in uops + """)) + self.assertEqual(result[0].rc, 0, result) From dd5e4d90789b3a065290e264122629f31cb0b547 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 14 Feb 2024 12:14:56 +0100 Subject: [PATCH 113/126] gh-100414: Add SQLite backend to dbm (#114481) Co-authored-by: Raymond Hettinger Co-authored-by: Serhiy Storchaka Co-authored-by: Mariusz Felisiak --- Doc/library/dbm.rst | 53 ++- Doc/whatsnew/3.13.rst | 10 + Lib/dbm/__init__.py | 8 +- Lib/dbm/sqlite3.py | 141 ++++++++ Lib/test/test_dbm.py | 28 ++ Lib/test/test_dbm_sqlite3.py | 308 ++++++++++++++++++ ...-01-23-13-03-22.gh-issue-100414.5kTdU5.rst | 2 + 7 files changed, 544 insertions(+), 6 deletions(-) create mode 100644 Lib/dbm/sqlite3.py create mode 100644 Lib/test/test_dbm_sqlite3.py create mode 100644 Misc/NEWS.d/next/Library/2024-01-23-13-03-22.gh-issue-100414.5kTdU5.rst diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 9bb5e5f8950956..0f9c825fec9385 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -8,8 +8,13 @@ -------------- -:mod:`dbm` is a generic interface to variants of the DBM database --- -:mod:`dbm.gnu` or :mod:`dbm.ndbm`. If none of these modules is installed, the +:mod:`dbm` is a generic interface to variants of the DBM database: + +* :mod:`dbm.sqlite3` +* :mod:`dbm.gnu` +* :mod:`dbm.ndbm` + +If none of these modules are installed, the slow-but-simple implementation in module :mod:`dbm.dumb` will be used. There is a `third party interface `_ to the Oracle Berkeley DB. @@ -25,8 +30,8 @@ the Oracle Berkeley DB. .. function:: whichdb(filename) This function attempts to guess which of the several simple database modules - available --- :mod:`dbm.gnu`, :mod:`dbm.ndbm` or :mod:`dbm.dumb` --- should - be used to open a given file. + available --- :mod:`dbm.sqlite3`, :mod:`dbm.gnu`, :mod:`dbm.ndbm`, + or :mod:`dbm.dumb` --- should be used to open a given file. Return one of the following values: @@ -144,6 +149,46 @@ then prints out the contents of the database:: The individual submodules are described in the following sections. +:mod:`dbm.sqlite3` --- SQLite backend for dbm +--------------------------------------------- + +.. module:: dbm.sqlite3 + :platform: All + :synopsis: SQLite backend for dbm + +.. versionadded:: 3.13 + +**Source code:** :source:`Lib/dbm/sqlite3.py` + +-------------- + +This module uses the standard library :mod:`sqlite3` module to provide an +SQLite backend for the :mod:`dbm` module. +The files created by :mod:`dbm.sqlite3` can thus be opened by :mod:`sqlite3`, +or any other SQLite browser, including the SQLite CLI. + +.. function:: open(filename, /, flag="r", mode=0o666) + + Open an SQLite database. + The returned object behaves like a :term:`mapping`, + implements a :meth:`!close` method, + and supports a "closing" context manager via the :keyword:`with` keyword. + + :param filename: + The path to the database to be opened. + :type filename: :term:`path-like object` + + :param str flag: + + * ``'r'`` (default): |flag_r| + * ``'w'``: |flag_w| + * ``'c'``: |flag_c| + * ``'n'``: |flag_n| + + :param mode: + The Unix file access mode of the file (default: octal ``0o666``), + used only when the database has to be created. + :mod:`dbm.gnu` --- GNU database manager --------------------------------------- diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index b96720df0a2f2d..a265bf1734c1d3 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -231,6 +231,16 @@ dis the ``show_offsets`` parameter. (Contributed by Irit Katriel in :gh:`112137`.) +dbm +--- + +* Add :meth:`dbm.gnu.gdbm.clear` and :meth:`dbm.ndbm.ndbm.clear` methods that remove all items + from the database. + (Contributed by Donghee Na in :gh:`107122`.) + +* Add new :mod:`dbm.sqlite3` backend. + (Contributed by Raymond Hettinger and Erlend E. Aasland in :gh:`100414`.) + doctest ------- diff --git a/Lib/dbm/__init__.py b/Lib/dbm/__init__.py index 8055d3769f9dd0..97c0bb1c9ca946 100644 --- a/Lib/dbm/__init__.py +++ b/Lib/dbm/__init__.py @@ -5,7 +5,7 @@ import dbm d = dbm.open(file, 'w', 0o666) -The returned object is a dbm.gnu, dbm.ndbm or dbm.dumb object, dependent on the +The returned object is a dbm.sqlite3, dbm.gnu, dbm.ndbm or dbm.dumb database object, dependent on the type of database being opened (determined by the whichdb function) in the case of an existing dbm. If the dbm does not exist and the create or new flag ('c' or 'n') was specified, the dbm type will be determined by the availability of @@ -38,7 +38,7 @@ class error(Exception): pass -_names = ['dbm.gnu', 'dbm.ndbm', 'dbm.dumb'] +_names = ['dbm.gnu', 'dbm.ndbm', 'dbm.sqlite3', 'dbm.dumb'] _defaultmod = None _modules = {} @@ -164,6 +164,10 @@ def whichdb(filename): if len(s) != 4: return "" + # Check for SQLite3 header string. + if s16 == b"SQLite format 3\0": + return "dbm.sqlite3" + # Convert to 4-byte int in native byte order -- return "" if impossible try: (magic,) = struct.unpack("=l", s) diff --git a/Lib/dbm/sqlite3.py b/Lib/dbm/sqlite3.py new file mode 100644 index 00000000000000..74c9d9b7e2f1d8 --- /dev/null +++ b/Lib/dbm/sqlite3.py @@ -0,0 +1,141 @@ +import os +import sqlite3 +import sys +from pathlib import Path +from contextlib import suppress, closing +from collections.abc import MutableMapping + +BUILD_TABLE = """ + CREATE TABLE IF NOT EXISTS Dict ( + key BLOB UNIQUE NOT NULL, + value BLOB NOT NULL + ) +""" +GET_SIZE = "SELECT COUNT (key) FROM Dict" +LOOKUP_KEY = "SELECT value FROM Dict WHERE key = CAST(? AS BLOB)" +STORE_KV = "REPLACE INTO Dict (key, value) VALUES (CAST(? AS BLOB), CAST(? AS BLOB))" +DELETE_KEY = "DELETE FROM Dict WHERE key = CAST(? AS BLOB)" +ITER_KEYS = "SELECT key FROM Dict" + + +class error(OSError): + pass + + +_ERR_CLOSED = "DBM object has already been closed" +_ERR_REINIT = "DBM object does not support reinitialization" + + +def _normalize_uri(path): + path = Path(path) + uri = path.absolute().as_uri() + while "//" in uri: + uri = uri.replace("//", "/") + return uri + + +class _Database(MutableMapping): + + def __init__(self, path, /, *, flag, mode): + if hasattr(self, "_cx"): + raise error(_ERR_REINIT) + + path = os.fsdecode(path) + match flag: + case "r": + flag = "ro" + case "w": + flag = "rw" + case "c": + flag = "rwc" + Path(path).touch(mode=mode, exist_ok=True) + case "n": + flag = "rwc" + Path(path).unlink(missing_ok=True) + Path(path).touch(mode=mode) + case _: + raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n', " + f"not {flag!r}") + + # We use the URI format when opening the database. + uri = _normalize_uri(path) + uri = f"{uri}?mode={flag}" + + try: + self._cx = sqlite3.connect(uri, autocommit=True, uri=True) + except sqlite3.Error as exc: + raise error(str(exc)) + + # This is an optimization only; it's ok if it fails. + with suppress(sqlite3.OperationalError): + self._cx.execute("PRAGMA journal_mode = wal") + + if flag == "rwc": + self._execute(BUILD_TABLE) + + def _execute(self, *args, **kwargs): + if not self._cx: + raise error(_ERR_CLOSED) + try: + return closing(self._cx.execute(*args, **kwargs)) + except sqlite3.Error as exc: + raise error(str(exc)) + + def __len__(self): + with self._execute(GET_SIZE) as cu: + row = cu.fetchone() + return row[0] + + def __getitem__(self, key): + with self._execute(LOOKUP_KEY, (key,)) as cu: + row = cu.fetchone() + if not row: + raise KeyError(key) + return row[0] + + def __setitem__(self, key, value): + self._execute(STORE_KV, (key, value)) + + def __delitem__(self, key): + with self._execute(DELETE_KEY, (key,)) as cu: + if not cu.rowcount: + raise KeyError(key) + + def __iter__(self): + try: + with self._execute(ITER_KEYS) as cu: + for row in cu: + yield row[0] + except sqlite3.Error as exc: + raise error(str(exc)) + + def close(self): + if self._cx: + self._cx.close() + self._cx = None + + def keys(self): + return list(super().keys()) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +def open(filename, /, flag="r", mode=0o666): + """Open a dbm.sqlite3 database and return the dbm object. + + The 'filename' parameter is the name of the database file. + + The optional 'flag' parameter can be one of ...: + 'r' (default): open an existing database for read only access + 'w': open an existing database for read/write access + 'c': create a database if it does not exist; open for read/write access + 'n': always create a new, empty database; open for read/write access + + The optional 'mode' parameter is the Unix file access mode of the database; + only used when creating a new database. Default: 0o666. + """ + return _Database(filename, flag=flag, mode=mode) diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index e3924d8ec8b5c1..4be7c5649da68a 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -6,6 +6,13 @@ from test.support import import_helper from test.support import os_helper + +try: + from dbm import sqlite3 as dbm_sqlite3 +except ImportError: + dbm_sqlite3 = None + + try: from dbm import ndbm except ImportError: @@ -213,6 +220,27 @@ def test_whichdb_ndbm(self): for path in fnames: self.assertIsNone(self.dbm.whichdb(path)) + @unittest.skipUnless(dbm_sqlite3, reason='Test requires dbm.sqlite3') + def test_whichdb_sqlite3(self): + # Databases created by dbm.sqlite3 are detected correctly. + with dbm_sqlite3.open(_fname, "c") as db: + db["key"] = "value" + self.assertEqual(self.dbm.whichdb(_fname), "dbm.sqlite3") + + @unittest.skipUnless(dbm_sqlite3, reason='Test requires dbm.sqlite3') + def test_whichdb_sqlite3_existing_db(self): + # Existing sqlite3 databases are detected correctly. + sqlite3 = import_helper.import_module("sqlite3") + try: + # Create an empty database. + with sqlite3.connect(_fname) as cx: + cx.execute("CREATE TABLE dummy(database)") + cx.commit() + finally: + cx.close() + self.assertEqual(self.dbm.whichdb(_fname), "dbm.sqlite3") + + def setUp(self): self.addCleanup(cleaunup_test_dir) setup_test_dir() diff --git a/Lib/test/test_dbm_sqlite3.py b/Lib/test/test_dbm_sqlite3.py new file mode 100644 index 00000000000000..7bc2a030352835 --- /dev/null +++ b/Lib/test/test_dbm_sqlite3.py @@ -0,0 +1,308 @@ +import sqlite3 +import sys +import test.support +import unittest +from contextlib import closing +from functools import partial +from pathlib import Path +from test.support import cpython_only, import_helper, os_helper + + +dbm_sqlite3 = import_helper.import_module("dbm.sqlite3") +from dbm.sqlite3 import _normalize_uri + + +class _SQLiteDbmTests(unittest.TestCase): + + def setUp(self): + self.filename = os_helper.TESTFN + db = dbm_sqlite3.open(self.filename, "c") + db.close() + + def tearDown(self): + for suffix in "", "-wal", "-shm": + os_helper.unlink(self.filename + suffix) + + +class URI(unittest.TestCase): + + def test_uri_substitutions(self): + dataset = ( + ("/absolute/////b/c", "/absolute/b/c"), + ("PRE#MID##END", "PRE%23MID%23%23END"), + ("%#?%%#", "%25%23%3F%25%25%23"), + ) + for path, normalized in dataset: + with self.subTest(path=path, normalized=normalized): + self.assertTrue(_normalize_uri(path).endswith(normalized)) + + @unittest.skipUnless(sys.platform == "win32", "requires Windows") + def test_uri_windows(self): + dataset = ( + # Relative subdir. + (r"2018\January.xlsx", + "2018/January.xlsx"), + # Absolute with drive letter. + (r"C:\Projects\apilibrary\apilibrary.sln", + "/C:/Projects/apilibrary/apilibrary.sln"), + # Relative with drive letter. + (r"C:Projects\apilibrary\apilibrary.sln", + "/C:Projects/apilibrary/apilibrary.sln"), + ) + for path, normalized in dataset: + with self.subTest(path=path, normalized=normalized): + if not Path(path).is_absolute(): + self.skipTest(f"skipping relative path: {path!r}") + self.assertTrue(_normalize_uri(path).endswith(normalized)) + + +class ReadOnly(_SQLiteDbmTests): + + def setUp(self): + super().setUp() + with dbm_sqlite3.open(self.filename, "w") as db: + db[b"key1"] = "value1" + db[b"key2"] = "value2" + self.db = dbm_sqlite3.open(self.filename, "r") + + def tearDown(self): + self.db.close() + super().tearDown() + + def test_readonly_read(self): + self.assertEqual(self.db[b"key1"], b"value1") + self.assertEqual(self.db[b"key2"], b"value2") + + def test_readonly_write(self): + with self.assertRaises(dbm_sqlite3.error): + self.db[b"new"] = "value" + + def test_readonly_delete(self): + with self.assertRaises(dbm_sqlite3.error): + del self.db[b"key1"] + + def test_readonly_keys(self): + self.assertEqual(self.db.keys(), [b"key1", b"key2"]) + + def test_readonly_iter(self): + self.assertEqual([k for k in self.db], [b"key1", b"key2"]) + + +class ReadWrite(_SQLiteDbmTests): + + def setUp(self): + super().setUp() + self.db = dbm_sqlite3.open(self.filename, "w") + + def tearDown(self): + self.db.close() + super().tearDown() + + def db_content(self): + with closing(sqlite3.connect(self.filename)) as cx: + keys = [r[0] for r in cx.execute("SELECT key FROM Dict")] + vals = [r[0] for r in cx.execute("SELECT value FROM Dict")] + return keys, vals + + def test_readwrite_unique_key(self): + self.db["key"] = "value" + self.db["key"] = "other" + keys, vals = self.db_content() + self.assertEqual(keys, [b"key"]) + self.assertEqual(vals, [b"other"]) + + def test_readwrite_delete(self): + self.db["key"] = "value" + self.db["new"] = "other" + + del self.db[b"new"] + keys, vals = self.db_content() + self.assertEqual(keys, [b"key"]) + self.assertEqual(vals, [b"value"]) + + del self.db[b"key"] + keys, vals = self.db_content() + self.assertEqual(keys, []) + self.assertEqual(vals, []) + + def test_readwrite_null_key(self): + with self.assertRaises(dbm_sqlite3.error): + self.db[None] = "value" + + def test_readwrite_null_value(self): + with self.assertRaises(dbm_sqlite3.error): + self.db[b"key"] = None + + +class Misuse(_SQLiteDbmTests): + + def setUp(self): + super().setUp() + self.db = dbm_sqlite3.open(self.filename, "w") + + def tearDown(self): + self.db.close() + super().tearDown() + + def test_misuse_double_create(self): + self.db["key"] = "value" + with dbm_sqlite3.open(self.filename, "c") as db: + self.assertEqual(db[b"key"], b"value") + + def test_misuse_double_close(self): + self.db.close() + + def test_misuse_invalid_flag(self): + regex = "must be.*'r'.*'w'.*'c'.*'n', not 'invalid'" + with self.assertRaisesRegex(ValueError, regex): + dbm_sqlite3.open(self.filename, flag="invalid") + + def test_misuse_double_delete(self): + self.db["key"] = "value" + del self.db[b"key"] + with self.assertRaises(KeyError): + del self.db[b"key"] + + def test_misuse_invalid_key(self): + with self.assertRaises(KeyError): + self.db[b"key"] + + def test_misuse_iter_close1(self): + self.db["1"] = 1 + it = iter(self.db) + self.db.close() + with self.assertRaises(dbm_sqlite3.error): + next(it) + + def test_misuse_iter_close2(self): + self.db["1"] = 1 + self.db["2"] = 2 + it = iter(self.db) + next(it) + self.db.close() + with self.assertRaises(dbm_sqlite3.error): + next(it) + + def test_misuse_use_after_close(self): + self.db.close() + with self.assertRaises(dbm_sqlite3.error): + self.db[b"read"] + with self.assertRaises(dbm_sqlite3.error): + self.db[b"write"] = "value" + with self.assertRaises(dbm_sqlite3.error): + del self.db[b"del"] + with self.assertRaises(dbm_sqlite3.error): + len(self.db) + with self.assertRaises(dbm_sqlite3.error): + self.db.keys() + + def test_misuse_reinit(self): + with self.assertRaises(dbm_sqlite3.error): + self.db.__init__("new.db", flag="n", mode=0o666) + + def test_misuse_empty_filename(self): + for flag in "r", "w", "c", "n": + with self.assertRaises(dbm_sqlite3.error): + db = dbm_sqlite3.open("", flag="c") + + +class DataTypes(_SQLiteDbmTests): + + dataset = ( + # (raw, coerced) + (42, b"42"), + (3.14, b"3.14"), + ("string", b"string"), + (b"bytes", b"bytes"), + ) + + def setUp(self): + super().setUp() + self.db = dbm_sqlite3.open(self.filename, "w") + + def tearDown(self): + self.db.close() + super().tearDown() + + def test_datatypes_values(self): + for raw, coerced in self.dataset: + with self.subTest(raw=raw, coerced=coerced): + self.db["key"] = raw + self.assertEqual(self.db[b"key"], coerced) + + def test_datatypes_keys(self): + for raw, coerced in self.dataset: + with self.subTest(raw=raw, coerced=coerced): + self.db[raw] = "value" + self.assertEqual(self.db[coerced], b"value") + # Raw keys are silently coerced to bytes. + self.assertEqual(self.db[raw], b"value") + del self.db[raw] + + def test_datatypes_replace_coerced(self): + self.db["10"] = "value" + self.db[b"10"] = "value" + self.db[10] = "value" + self.assertEqual(self.db.keys(), [b"10"]) + + +class CorruptDatabase(_SQLiteDbmTests): + """Verify that database exceptions are raised as dbm.sqlite3.error.""" + + def setUp(self): + super().setUp() + with closing(sqlite3.connect(self.filename)) as cx: + with cx: + cx.execute("DROP TABLE IF EXISTS Dict") + cx.execute("CREATE TABLE Dict (invalid_schema)") + + def check(self, flag, fn, should_succeed=False): + with closing(dbm_sqlite3.open(self.filename, flag)) as db: + with self.assertRaises(dbm_sqlite3.error): + fn(db) + + @staticmethod + def read(db): + return db["key"] + + @staticmethod + def write(db): + db["key"] = "value" + + @staticmethod + def iter(db): + next(iter(db)) + + @staticmethod + def keys(db): + db.keys() + + @staticmethod + def del_(db): + del db["key"] + + @staticmethod + def len_(db): + len(db) + + def test_corrupt_readwrite(self): + for flag in "r", "w", "c": + with self.subTest(flag=flag): + check = partial(self.check, flag=flag) + check(fn=self.read) + check(fn=self.write) + check(fn=self.iter) + check(fn=self.keys) + check(fn=self.del_) + check(fn=self.len_) + + def test_corrupt_force_new(self): + with closing(dbm_sqlite3.open(self.filename, "n")) as db: + db["foo"] = "write" + _ = db[b"foo"] + next(iter(db)) + del db[b"foo"] + + +if __name__ == "__main__": + unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-01-23-13-03-22.gh-issue-100414.5kTdU5.rst b/Misc/NEWS.d/next/Library/2024-01-23-13-03-22.gh-issue-100414.5kTdU5.rst new file mode 100644 index 00000000000000..ffcb926a8d546c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-23-13-03-22.gh-issue-100414.5kTdU5.rst @@ -0,0 +1,2 @@ +Add :mod:`dbm.sqlite3` as a backend to :mod:`dbm`. +Patch by Raymond Hettinger and Erlend E. Aasland. From 029ec91d43b377535ff7eb94993e0d2add4af720 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 14 Feb 2024 14:16:09 +0100 Subject: [PATCH 114/126] gh-100414: Skip test_dbm_sqlite3 if sqlite3 is unavailable (#115449) Co-authored-by: Alex Waygood --- Lib/test/test_dbm_sqlite3.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_dbm_sqlite3.py b/Lib/test/test_dbm_sqlite3.py index 7bc2a030352835..7a49fd2f924f8d 100644 --- a/Lib/test/test_dbm_sqlite3.py +++ b/Lib/test/test_dbm_sqlite3.py @@ -1,4 +1,3 @@ -import sqlite3 import sys import test.support import unittest @@ -7,8 +6,12 @@ from pathlib import Path from test.support import cpython_only, import_helper, os_helper - dbm_sqlite3 = import_helper.import_module("dbm.sqlite3") +# N.B. The test will fail on some platforms without sqlite3 +# if the sqlite3 import is above the import of dbm.sqlite3. +# This is deliberate: if the import helper managed to import dbm.sqlite3, +# we must inevitably be able to import sqlite3. Else, we have a problem. +import sqlite3 from dbm.sqlite3 import _normalize_uri From ec8909a23931338f81803ea3f18dc2073f74a152 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Wed, 14 Feb 2024 16:31:28 +0300 Subject: [PATCH 115/126] gh-115450: Fix direct invocation of `test_desctut` (#115451) --- Lib/test/test_descrtut.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_descrtut.py b/Lib/test/test_descrtut.py index 13e3ea41bdb76c..f097c4e7300baa 100644 --- a/Lib/test/test_descrtut.py +++ b/Lib/test/test_descrtut.py @@ -39,16 +39,16 @@ def merge(self, other): Here's the new type at work: >>> print(defaultdict) # show our type - + >>> print(type(defaultdict)) # its metatype >>> a = defaultdict(default=0.0) # create an instance >>> print(a) # show the instance {} >>> print(type(a)) # show its type - + >>> print(a.__class__) # show its class - + >>> print(type(a) is a.__class__) # its type is its class True >>> a[1] = 3.25 # modify the instance @@ -99,7 +99,7 @@ def merge(self, other): >>> print(sortdict(a.__dict__)) {'default': -1000, 'x1': 100, 'x2': 200} >>> -""" +""" % {'modname': __name__} class defaultdict2(dict): __slots__ = ['default'] @@ -264,19 +264,19 @@ def merge(self, other): ... print("classmethod", cls, y) >>> C.foo(1) - classmethod 1 + classmethod 1 >>> c = C() >>> c.foo(1) - classmethod 1 + classmethod 1 >>> class D(C): ... pass >>> D.foo(1) - classmethod 1 + classmethod 1 >>> d = D() >>> d.foo(1) - classmethod 1 + classmethod 1 This prints "classmethod __main__.D 1" both times; in other words, the class passed as the first argument of foo() is the class involved in the @@ -292,18 +292,18 @@ class passed as the first argument of foo() is the class involved in the >>> E.foo(1) E.foo() called - classmethod 1 + classmethod 1 >>> e = E() >>> e.foo(1) E.foo() called - classmethod 1 + classmethod 1 In this example, the call to C.foo() from E.foo() will see class C as its first argument, not class E. This is to be expected, since the call specifies the class C. But it stresses the difference between these class methods and methods defined in metaclasses (where an upcall to a metamethod would pass the target class as an explicit first argument). -""" +""" % {'modname': __name__} test_5 = """ From 6d9141ed766f4003f39362937dc397e9f734c7e5 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 14 Feb 2024 14:47:19 +0100 Subject: [PATCH 116/126] gh-100414: Make dbm.sqlite3 the preferred dbm backend (#115447) --- Doc/whatsnew/3.13.rst | 2 +- Lib/dbm/__init__.py | 2 +- .../next/Library/2024-01-23-13-03-22.gh-issue-100414.5kTdU5.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index a265bf1734c1d3..b14fb4e5392a2c 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -238,7 +238,7 @@ dbm from the database. (Contributed by Donghee Na in :gh:`107122`.) -* Add new :mod:`dbm.sqlite3` backend. +* Add new :mod:`dbm.sqlite3` backend, and make it the default :mod:`!dbm` backend. (Contributed by Raymond Hettinger and Erlend E. Aasland in :gh:`100414`.) doctest diff --git a/Lib/dbm/__init__.py b/Lib/dbm/__init__.py index 97c0bb1c9ca946..4fdbc54e74cfb6 100644 --- a/Lib/dbm/__init__.py +++ b/Lib/dbm/__init__.py @@ -38,7 +38,7 @@ class error(Exception): pass -_names = ['dbm.gnu', 'dbm.ndbm', 'dbm.sqlite3', 'dbm.dumb'] +_names = ['dbm.sqlite3', 'dbm.gnu', 'dbm.ndbm', 'dbm.dumb'] _defaultmod = None _modules = {} diff --git a/Misc/NEWS.d/next/Library/2024-01-23-13-03-22.gh-issue-100414.5kTdU5.rst b/Misc/NEWS.d/next/Library/2024-01-23-13-03-22.gh-issue-100414.5kTdU5.rst index ffcb926a8d546c..0f3b3bdd7c6d26 100644 --- a/Misc/NEWS.d/next/Library/2024-01-23-13-03-22.gh-issue-100414.5kTdU5.rst +++ b/Misc/NEWS.d/next/Library/2024-01-23-13-03-22.gh-issue-100414.5kTdU5.rst @@ -1,2 +1,2 @@ -Add :mod:`dbm.sqlite3` as a backend to :mod:`dbm`. +Add :mod:`dbm.sqlite3` as a backend to :mod:`dbm`, and make it the new default :mod:`!dbm` backend. Patch by Raymond Hettinger and Erlend E. Aasland. From 6755c4e0c8803a246e632835030c0b8837b3b676 Mon Sep 17 00:00:00 2001 From: Stanislav Lyu Date: Wed, 14 Feb 2024 16:52:42 +0300 Subject: [PATCH 117/126] gh-115403: Remove extra colon after "Examples" in datetime documentation (#115452) --- Doc/library/datetime.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index a46eed35ee2329..4602132f37f733 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -1811,7 +1811,7 @@ Other constructor: be truncated). 4. Fractional hours and minutes are not supported. - Examples:: + Examples: .. doctest:: From bb791c7728e0508ad5df28a90b27e202d66a9cfa Mon Sep 17 00:00:00 2001 From: Brian Schubert Date: Wed, 14 Feb 2024 10:01:27 -0500 Subject: [PATCH 118/126] gh-115392: Fix doctest reporting incorrect line numbers for decorated functions (#115440) --- Lib/doctest.py | 2 +- Lib/test/test_doctest/decorator_mod.py | 10 ++++++++++ Lib/test/test_doctest/doctest_lineno.py | 9 +++++++++ Lib/test/test_doctest/test_doctest.py | 1 + .../2024-02-13-18-27-03.gh-issue-115392.gle5tp.rst | 2 ++ 5 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 Lib/test/test_doctest/decorator_mod.py create mode 100644 Misc/NEWS.d/next/Library/2024-02-13-18-27-03.gh-issue-115392.gle5tp.rst diff --git a/Lib/doctest.py b/Lib/doctest.py index 114aac62a34e95..1969777b667787 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -1140,7 +1140,7 @@ def _find_lineno(self, obj, source_lines): obj = obj.fget if inspect.isfunction(obj) and getattr(obj, '__doc__', None): # We don't use `docstring` var here, because `obj` can be changed. - obj = obj.__code__ + obj = inspect.unwrap(obj).__code__ if inspect.istraceback(obj): obj = obj.tb_frame if inspect.isframe(obj): obj = obj.f_code if inspect.iscode(obj): diff --git a/Lib/test/test_doctest/decorator_mod.py b/Lib/test/test_doctest/decorator_mod.py new file mode 100644 index 00000000000000..9f106888411202 --- /dev/null +++ b/Lib/test/test_doctest/decorator_mod.py @@ -0,0 +1,10 @@ +# This module is used in `doctest_lineno.py`. +import functools + + +def decorator(f): + @functools.wraps(f) + def inner(): + return f() + + return inner diff --git a/Lib/test/test_doctest/doctest_lineno.py b/Lib/test/test_doctest/doctest_lineno.py index 677c569cf710eb..0dbcd9a11eaba2 100644 --- a/Lib/test/test_doctest/doctest_lineno.py +++ b/Lib/test/test_doctest/doctest_lineno.py @@ -67,3 +67,12 @@ def property_with_doctest(self): # https://github.com/python/cpython/issues/99433 str_wrapper = object().__str__ + + +# https://github.com/python/cpython/issues/115392 +from test.test_doctest.decorator_mod import decorator + +@decorator +@decorator +def func_with_docstring_wrapped(): + """Some unrelated info.""" diff --git a/Lib/test/test_doctest/test_doctest.py b/Lib/test/test_doctest/test_doctest.py index 7015255db1f7f0..43be200b983227 100644 --- a/Lib/test/test_doctest/test_doctest.py +++ b/Lib/test/test_doctest/test_doctest.py @@ -685,6 +685,7 @@ def basics(): r""" None test.test_doctest.doctest_lineno.MethodWrapper.method_without_docstring 61 test.test_doctest.doctest_lineno.MethodWrapper.property_with_doctest 4 test.test_doctest.doctest_lineno.func_with_docstring + 77 test.test_doctest.doctest_lineno.func_with_docstring_wrapped 12 test.test_doctest.doctest_lineno.func_with_doctest None test.test_doctest.doctest_lineno.func_without_docstring diff --git a/Misc/NEWS.d/next/Library/2024-02-13-18-27-03.gh-issue-115392.gle5tp.rst b/Misc/NEWS.d/next/Library/2024-02-13-18-27-03.gh-issue-115392.gle5tp.rst new file mode 100644 index 00000000000000..1c3368968e4cf0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-13-18-27-03.gh-issue-115392.gle5tp.rst @@ -0,0 +1,2 @@ +Fix a bug in :mod:`doctest` where incorrect line numbers would be +reported for decorated functions. From 81e140d10b77f0a41a5581412e3f3471cc77981f Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 14 Feb 2024 16:36:13 +0100 Subject: [PATCH 119/126] Docs: reword sentences about dbm submodule traits (#114609) Don't repeatedly say that keys and values are coerced into bytes. --- Doc/library/dbm.rst | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 0f9c825fec9385..b4f83d454ac651 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -205,10 +205,6 @@ The :mod:`dbm.gnu` module provides an interface to the :abbr:`GDBM (GNU dbm)` library, similar to the :mod:`dbm.ndbm` module, but with additional functionality like crash tolerance. -:class:`!gdbm` objects behave similar to :term:`mappings `, -except that keys and values are always converted to :class:`bytes` before storing, -and the :meth:`!items` and :meth:`!values` methods are not supported. - .. note:: |incompat_note| .. exception:: error @@ -256,8 +252,9 @@ and the :meth:`!items` and :meth:`!values` methods are not supported. A string of characters the *flag* parameter of :meth:`~dbm.gnu.open` supports. - In addition to the dictionary-like methods, :class:`gdbm` objects have the - following methods and attributes: + :class:`!gdbm` objects behave similar to :term:`mappings `, + but :meth:`!items` and :meth:`!values` methods are not supported. + The following methods are also provided: .. method:: gdbm.firstkey() @@ -314,10 +311,6 @@ and the :meth:`!items` and :meth:`!values` methods are not supported. The :mod:`dbm.ndbm` module provides an interface to the :abbr:`NDBM (New Database Manager)` library. -:class:`!ndbm` objects behave similar to :term:`mappings `, -except that keys and values are always stored as :class:`bytes`, -and the :meth:`!items` and :meth:`!values` methods are not supported. - This module can be used with the "classic" NDBM interface or the :abbr:`GDBM (GNU dbm)` compatibility interface. @@ -359,8 +352,9 @@ This module can be used with the "classic" NDBM interface or the :param int mode: |mode_param_doc| - In addition to the dictionary-like methods, :class:`!ndbm` objects - provide the following method: + :class:`!ndbm` objects behave similar to :term:`mappings `, + but :meth:`!items` and :meth:`!values` methods are not supported. + The following methods are also provided: .. versionchanged:: 3.11 Accepts :term:`path-like object` for filename. @@ -399,8 +393,6 @@ The :mod:`dbm.dumb` module provides a persistent :class:`dict`-like interface which is written entirely in Python. Unlike other :mod:`dbm` backends, such as :mod:`dbm.gnu`, no external library is required. -As with other :mod:`dbm` backends, -the keys and values are always stored as :class:`bytes`. The :mod:`!dbm.dumb` module defines the following: From 671360161f0b7a5ff4c1d062e570962e851b4bde Mon Sep 17 00:00:00 2001 From: kcatss Date: Thu, 15 Feb 2024 01:08:26 +0900 Subject: [PATCH 120/126] gh-115243: Fix crash in deque.index() when the deque is concurrently modified (GH-115247) --- Lib/test/test_deque.py | 6 +++++- .../Security/2024-02-12-00-33-01.gh-issue-115243.e1oGX8.rst | 1 + Modules/_collectionsmodule.c | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-02-12-00-33-01.gh-issue-115243.e1oGX8.rst diff --git a/Lib/test/test_deque.py b/Lib/test/test_deque.py index ae1dfacd7262e4..4679f297fd7f4a 100644 --- a/Lib/test/test_deque.py +++ b/Lib/test/test_deque.py @@ -166,7 +166,7 @@ def test_contains(self): with self.assertRaises(RuntimeError): n in d - def test_contains_count_stop_crashes(self): + def test_contains_count_index_stop_crashes(self): class A: def __eq__(self, other): d.clear() @@ -178,6 +178,10 @@ def __eq__(self, other): with self.assertRaises(RuntimeError): _ = d.count(3) + d = deque([A()]) + with self.assertRaises(RuntimeError): + d.index(0) + def test_extend(self): d = deque('a') self.assertRaises(TypeError, d.extend, 1) diff --git a/Misc/NEWS.d/next/Security/2024-02-12-00-33-01.gh-issue-115243.e1oGX8.rst b/Misc/NEWS.d/next/Security/2024-02-12-00-33-01.gh-issue-115243.e1oGX8.rst new file mode 100644 index 00000000000000..ae0e910c7d159c --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-02-12-00-33-01.gh-issue-115243.e1oGX8.rst @@ -0,0 +1 @@ +Fix possible crashes in :meth:`collections.deque.index` when the deque is concurrently modified. diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index ef77d34b10e47b..4fa76d62bc3f8d 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -1218,8 +1218,9 @@ deque_index_impl(dequeobject *deque, PyObject *v, Py_ssize_t start, n = stop - i; while (--n >= 0) { CHECK_NOT_END(b); - item = b->data[index]; + item = Py_NewRef(b->data[index]); cmp = PyObject_RichCompareBool(item, v, Py_EQ); + Py_DECREF(item); if (cmp > 0) return PyLong_FromSsize_t(stop - n - 1); if (cmp < 0) From 4b2d1786ccf913bc80ff571c32b196be1543ca54 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Wed, 14 Feb 2024 10:29:06 -0600 Subject: [PATCH 121/126] gh-115399: Upgrade bundled libexpat to 2.6.0 (#115431) --- ...-02-13-15-14-39.gh-issue-115399.xT-scP.rst | 1 + Misc/sbom.spdx.json | 67 +-- Modules/expat/expat.h | 28 +- Modules/expat/expat_config.h | 1 + Modules/expat/internal.h | 8 +- Modules/expat/siphash.h | 10 +- Modules/expat/winconfig.h | 7 +- Modules/expat/xmlparse.c | 558 +++++++++++------- Modules/expat/xmlrole.c | 6 +- Modules/expat/xmlrole.h | 6 +- Modules/expat/xmltok.c | 29 +- Modules/expat/xmltok.h | 8 +- Modules/expat/xmltok_impl.c | 2 +- Tools/build/generate_sbom.py | 5 +- Tools/c-analyzer/cpython/_parser.py | 1 + 15 files changed, 431 insertions(+), 306 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-02-13-15-14-39.gh-issue-115399.xT-scP.rst diff --git a/Misc/NEWS.d/next/Security/2024-02-13-15-14-39.gh-issue-115399.xT-scP.rst b/Misc/NEWS.d/next/Security/2024-02-13-15-14-39.gh-issue-115399.xT-scP.rst new file mode 100644 index 00000000000000..e8163b6f29c189 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-02-13-15-14-39.gh-issue-115399.xT-scP.rst @@ -0,0 +1 @@ +Update bundled libexpat to 2.6.0 diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index d783d14255e66f..03b2db20553e56 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -48,29 +48,15 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "ab7bb32514d170592dfb3f76e41bbdc075a4e7e0" + "checksumValue": "90c06411f131e777e2b5c3d22b7ccf50bc46f617" }, { "algorithm": "SHA256", - "checksumValue": "f521acdad222644365b0e81a33bcd6939a98c91b225c47582cc84bd73d96febc" + "checksumValue": "3045f9176950aa13a54e53fa096385670c676c492705d636e977f888e4c72d48" } ], "fileName": "Modules/expat/expat.h" }, - { - "SPDXID": "SPDXRef-FILE-Modules-expat-expat-config.h", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "73627287302ee3e84347c4fe21f37a9cb828bc3b" - }, - { - "algorithm": "SHA256", - "checksumValue": "f17e59f9d95eeb05694c02508aa284d332616c22cbe2e6a802d8a0710310eaab" - } - ], - "fileName": "Modules/expat/expat_config.h" - }, { "SPDXID": "SPDXRef-FILE-Modules-expat-expat-external.h", "checksums": [ @@ -104,11 +90,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "2790d37e7de2f13dccc4f4fb352cbdf9ed6abaa2" + "checksumValue": "9f6d9211a7b627785d5c48d10cc8eda66255113f" }, { "algorithm": "SHA256", - "checksumValue": "d2efe5a1018449968a689f444cca432e3d5875aba6ad08ee18ca235d64f41bb9" + "checksumValue": "9f0bdd346dd94ac4359c636a4e60bc768f4ae53ce0e836eb05fb9246ee36c7f2" } ], "fileName": "Modules/expat/internal.h" @@ -160,11 +146,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "2b984f806f10fbfbf72d8d1b7ba2992413c15299" + "checksumValue": "4c49b5df2bc702f663ba3b5a52d1940ec363226b" }, { "algorithm": "SHA256", - "checksumValue": "fbce56cd680e690043bbf572188cc2d0a25dbfc0d47ac8cb98eb3de768d4e694" + "checksumValue": "b5ec29f6560acc183f1ee8ab92bb3aea17b87b4c2120cd2e3f78deba7a12491e" } ], "fileName": "Modules/expat/siphash.h" @@ -188,11 +174,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "e774ae6ee9391aa6ffb8f775fb74e48f4b428959" + "checksumValue": "a3a8c44efd55dbf2cfea8fcee009ec63120ec0a3" }, { "algorithm": "SHA256", - "checksumValue": "3c71cea9a6174718542331971a35db317902b2433be9d8dd1cb24239b635c0cc" + "checksumValue": "e70948500d34dfcba4e9f0b305319dfe2a937c7cbfb687905128b56e1a6f8b33" } ], "fileName": "Modules/expat/winconfig.h" @@ -202,11 +188,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "b580e827e16baa6b035586ffcd4d90301e5a353f" + "checksumValue": "3b5de0ed1de33cad85b46230707403247f2851df" }, { "algorithm": "SHA256", - "checksumValue": "483518bbd69338eefc706cd7fc0b6039df2d3e347f64097989059ed6d2385a1e" + "checksumValue": "a03abd531601eef61a87e06113d218ff139b6969e15a3d4668cd85d65fc6f79b" } ], "fileName": "Modules/expat/xmlparse.c" @@ -216,11 +202,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "5ef21312af73deb2428be3fe97a65244608e76de" + "checksumValue": "ef767128d2dda99436712dcf3465dde5dbaab876" }, { "algorithm": "SHA256", - "checksumValue": "6fcf8c72ac0112c1b98bd2039c632a66b4c3dc516ce7c1f981390951121ef3c0" + "checksumValue": "71fb52aa302cf6f56e41943009965804f49ff2210d9bd15b258f70aaf70db772" } ], "fileName": "Modules/expat/xmlrole.c" @@ -230,11 +216,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "c1a4ea6356643d0820edb9c024c20ad2aaf562dc" + "checksumValue": "c961fb1a80f7b0601a63e69fba793fe5f6dff157" }, { "algorithm": "SHA256", - "checksumValue": "2b5d674be6ef20c7e3f69295176d75e68c5616e4dfce0a186fdd5e2ed8315f7a" + "checksumValue": "228470eb9181a9a7575b63137edcb61b817ee4e0923faffdbeba29e07c939713" } ], "fileName": "Modules/expat/xmlrole.h" @@ -244,11 +230,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "e6d66ae9fd61d7950c62c5d87693c30a707e8577" + "checksumValue": "8394790c0199c8f88108542ad78f23095d28a3fe" }, { "algorithm": "SHA256", - "checksumValue": "1110f651bdccfa765ad3d6f3857a35887ab35fc0fe7f3f3488fde2b238b482e3" + "checksumValue": "5b16c671ccc42496374762768e4bf48f614aecfd2025a07925b8d94244aec645" } ], "fileName": "Modules/expat/xmltok.c" @@ -258,11 +244,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "9c2a544875fd08ba9c2397296c97263518a410aa" + "checksumValue": "7d2943a0128094455004b1a98007b98734221bae" }, { "algorithm": "SHA256", - "checksumValue": "4299a03828b98bfe47ec6809f6e279252954a9a911dc7e0f19551bd74e3af971" + "checksumValue": "6b8919dc951606dc6f2b0175f8955a9ced901ce8bd08db47f291b6c04227ae7f" } ], "fileName": "Modules/expat/xmltok.h" @@ -272,11 +258,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "aa96882de8e3d1d3083124b595aa911efe44e5ad" + "checksumValue": "7756f7c0d3625ae7dde6cf7d386685ffacb57c7e" }, { "algorithm": "SHA256", - "checksumValue": "0fbcba7931707c60301305dab78d2298d96447d0a5513926d8b18135228c0818" + "checksumValue": "a3fe18ff32b21fbcb7c190895c68158404e1b9fb449db6431bc08b261dc03938" } ], "fileName": "Modules/expat/xmltok_impl.c" @@ -1590,14 +1576,14 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "6b902ab103843592be5e99504f846ec109c1abb692e85347587f237a4ffa1033" + "checksumValue": "a13447b9aa67d7c860783fdf6820f33ebdea996900d6d8bbc50a628f55f099f7" } ], - "downloadLocation": "https://github.com/libexpat/libexpat/releases/download/R_2_5_0/expat-2.5.0.tar.gz", + "downloadLocation": "https://github.com/libexpat/libexpat/releases/download/R_2_6_0/expat-2.6.0.tar.gz", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:libexpat_project:libexpat:2.5.0:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:libexpat_project:libexpat:2.6.0:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1605,7 +1591,7 @@ "name": "expat", "originator": "Organization: Expat development team", "primaryPackagePurpose": "SOURCE", - "versionInfo": "2.5.0" + "versionInfo": "2.6.0" }, { "SPDXID": "SPDXRef-PACKAGE-hacl-star", @@ -2368,11 +2354,6 @@ "relationshipType": "CONTAINS", "spdxElementId": "SPDXRef-PACKAGE-expat" }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-expat-config.h", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-expat" - }, { "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-expat-external.h", "relationshipType": "CONTAINS", diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h index 1c83563cbf68e7..95464b0dd17735 100644 --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -11,11 +11,13 @@ Copyright (c) 2000-2005 Fred L. Drake, Jr. Copyright (c) 2001-2002 Greg Stein Copyright (c) 2002-2016 Karl Waclawek - Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016-2024 Sebastian Pipping Copyright (c) 2016 Cristian Rodríguez Copyright (c) 2016 Thomas Beutlich Copyright (c) 2017 Rhodri James Copyright (c) 2022 Thijs Schreijer + Copyright (c) 2023 Hanno Böck + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -269,7 +271,7 @@ XML_ParserCreate_MM(const XML_Char *encoding, const XML_Memory_Handling_Suite *memsuite, const XML_Char *namespaceSeparator); -/* Prepare a parser object to be re-used. This is particularly +/* Prepare a parser object to be reused. This is particularly valuable when memory allocation overhead is disproportionately high, such as when a large number of small documnents need to be parsed. All handlers are cleared from the parser, except for the @@ -951,7 +953,7 @@ XMLPARSEAPI(XML_Index) XML_GetCurrentByteIndex(XML_Parser parser); XMLPARSEAPI(int) XML_GetCurrentByteCount(XML_Parser parser); -/* If XML_CONTEXT_BYTES is defined, returns the input buffer, sets +/* If XML_CONTEXT_BYTES is >=1, returns the input buffer, sets the integer pointed to by offset to the offset within this buffer of the current parse position, and sets the integer pointed to by size to the size of this buffer (the number of input bytes). Otherwise @@ -1025,7 +1027,9 @@ enum XML_FeatureEnum { XML_FEATURE_ATTR_INFO, /* Added in Expat 2.4.0. */ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, - XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT + XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, + /* Added in Expat 2.6.0. */ + XML_FEATURE_GE /* Additional features must be added to the end of this enum. */ }; @@ -1038,23 +1042,29 @@ typedef struct { XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); -#ifdef XML_DTD -/* Added in Expat 2.4.0. */ +#if XML_GE == 1 +/* Added in Expat 2.4.0 for XML_DTD defined and + * added in Expat 2.6.0 for XML_GE == 1. */ XMLPARSEAPI(XML_Bool) XML_SetBillionLaughsAttackProtectionMaximumAmplification( XML_Parser parser, float maximumAmplificationFactor); -/* Added in Expat 2.4.0. */ +/* Added in Expat 2.4.0 for XML_DTD defined and + * added in Expat 2.6.0 for XML_GE == 1. */ XMLPARSEAPI(XML_Bool) XML_SetBillionLaughsAttackProtectionActivationThreshold( XML_Parser parser, unsigned long long activationThresholdBytes); #endif +/* Added in Expat 2.6.0. */ +XMLPARSEAPI(XML_Bool) +XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); + /* Expat follows the semantic versioning convention. - See http://semver.org. + See https://semver.org */ #define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 5 +#define XML_MINOR_VERSION 6 #define XML_MICRO_VERSION 0 #ifdef __cplusplus diff --git a/Modules/expat/expat_config.h b/Modules/expat/expat_config.h index 6671f7b689ba97..e7d9499d9078d9 100644 --- a/Modules/expat/expat_config.h +++ b/Modules/expat/expat_config.h @@ -16,6 +16,7 @@ #define XML_NS 1 #define XML_DTD 1 +#define XML_GE 1 #define XML_CONTEXT_BYTES 1024 // bpo-30947: Python uses best available entropy sources to diff --git a/Modules/expat/internal.h b/Modules/expat/internal.h index e09f533b23c9df..cce71e4c5164b5 100644 --- a/Modules/expat/internal.h +++ b/Modules/expat/internal.h @@ -28,9 +28,10 @@ Copyright (c) 2002-2003 Fred L. Drake, Jr. Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2003 Greg Stein - Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016-2023 Sebastian Pipping Copyright (c) 2018 Yury Gribov Copyright (c) 2019 David Loffredo + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -154,12 +155,15 @@ extern "C" { void _INTERNAL_trim_to_complete_utf8_characters(const char *from, const char **fromLimRef); -#if defined(XML_DTD) +#if XML_GE == 1 unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); const char *unsignedCharToPrintable(unsigned char c); #endif +extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c +extern unsigned int g_parseAttempts; // used for testing only + #ifdef __cplusplus } #endif diff --git a/Modules/expat/siphash.h b/Modules/expat/siphash.h index 303283ad2de98d..a1ed99e687bd6e 100644 --- a/Modules/expat/siphash.h +++ b/Modules/expat/siphash.h @@ -106,7 +106,7 @@ * if this code is included and compiled as C++; related GCC warning is: * warning: use of C++11 long long integer constant [-Wlong-long] */ -#define _SIP_ULL(high, low) ((((uint64_t)high) << 32) | (low)) +#define SIP_ULL(high, low) ((((uint64_t)high) << 32) | (low)) #define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) @@ -190,10 +190,10 @@ sip_round(struct siphash *H, const int rounds) { static struct siphash * sip24_init(struct siphash *H, const struct sipkey *key) { - H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0]; - H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1]; - H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0]; - H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1]; + H->v0 = SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0]; + H->v1 = SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1]; + H->v2 = SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0]; + H->v3 = SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1]; H->p = H->buf; H->c = 0; diff --git a/Modules/expat/winconfig.h b/Modules/expat/winconfig.h index 2ecd61b5b94820..05805514ec7fa2 100644 --- a/Modules/expat/winconfig.h +++ b/Modules/expat/winconfig.h @@ -9,7 +9,8 @@ Copyright (c) 2000 Clark Cooper Copyright (c) 2002 Greg Stein Copyright (c) 2005 Karl Waclawek - Copyright (c) 2017-2021 Sebastian Pipping + Copyright (c) 2017-2023 Sebastian Pipping + Copyright (c) 2023 Orgad Shaneh Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -35,7 +36,9 @@ #ifndef WINCONFIG_H #define WINCONFIG_H -#define WIN32_LEAN_AND_MEAN +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif #include #undef WIN32_LEAN_AND_MEAN diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c index b6c2eca97567ba..aaf0fa9c8f96d1 100644 --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -1,4 +1,4 @@ -/* 5ab094ffadd6edfc94c3eee53af44a86951f9f1f0933ada3114bbce2bfb02c99 (2.5.0+) +/* 628e24d4966bedbd4800f6ed128d06d29703765b4bce12d3b7f099f90f842fc9 (2.6.0+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -13,7 +13,7 @@ Copyright (c) 2002-2016 Karl Waclawek Copyright (c) 2005-2009 Steven Solie Copyright (c) 2016 Eric Rahm - Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016-2024 Sebastian Pipping Copyright (c) 2016 Gaurav Copyright (c) 2016 Thomas Beutlich Copyright (c) 2016 Gustavo Grieco @@ -32,10 +32,13 @@ Copyright (c) 2019 David Loffredo Copyright (c) 2019-2020 Ben Wagner Copyright (c) 2019 Vadim Zeitlin - Copyright (c) 2021 Dong-hee Na + Copyright (c) 2021 Donghee Na Copyright (c) 2022 Samanta Navarro Copyright (c) 2022 Jeffrey Walton Copyright (c) 2022 Jann Horn + Copyright (c) 2022 Sean McBride + Copyright (c) 2023 Owain Davies + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -60,10 +63,25 @@ #define XML_BUILDING_EXPAT 1 -#include +#include "expat_config.h" -#if ! defined(_GNU_SOURCE) -# define _GNU_SOURCE 1 /* syscall prototype */ +#if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1) +# error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default) +#endif + +#if defined(XML_DTD) && XML_GE == 0 +# error Either undefine XML_DTD or define XML_GE to 1. +#endif + +#if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \ + || (XML_CONTEXT_BYTES + 0 < 0) +# error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default) +#endif + +#if defined(HAVE_SYSCALL_GETRANDOM) +# if ! defined(_GNU_SOURCE) +# define _GNU_SOURCE 1 /* syscall prototype */ +# endif #endif #ifdef _WIN32 @@ -73,6 +91,7 @@ # endif #endif +#include #include #include /* memset(), memcpy() */ #include @@ -131,8 +150,8 @@ Your options include: \ * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ - * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ - * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \ + * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ + * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ @@ -196,6 +215,8 @@ typedef char ICHAR; /* Do safe (NULL-aware) pointer arithmetic */ #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) +#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b)) + #include "internal.h" #include "xmltok.h" #include "xmlrole.h" @@ -279,7 +300,7 @@ typedef struct { XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to contain the 'raw' name as well. - A parser re-uses these structures, maintaining a list of allocated + A parser reuses these structures, maintaining a list of allocated TAG objects in a free list. */ typedef struct tag { @@ -408,12 +429,12 @@ enum XML_Account { XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ }; -#ifdef XML_DTD +#if XML_GE == 1 typedef unsigned long long XmlBigCount; typedef struct accounting { XmlBigCount countBytesDirect; XmlBigCount countBytesIndirect; - int debugLevel; + unsigned long debugLevel; float maximumAmplificationFactor; // >=1.0 unsigned long long activationThresholdBytes; } ACCOUNTING; @@ -422,9 +443,9 @@ typedef struct entity_stats { unsigned int countEverOpened; unsigned int currentDepth; unsigned int maximumDepthSeen; - int debugLevel; + unsigned long debugLevel; } ENTITY_STATS; -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, const char *end, const char **endPtr); @@ -464,41 +485,47 @@ static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *start, const char *end, const char **endPtr, XML_Bool haveMore, enum XML_Account account); -static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, +static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, const char *end, const char **nextPtr, XML_Bool haveMore, enum XML_Account account); #ifdef XML_DTD -static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *, +static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, const char *end, const char **nextPtr, XML_Bool haveMore); #endif /* XML_DTD */ static void freeBindings(XML_Parser parser, BINDING *bindings); -static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, - const char *s, TAG_NAME *tagNamePtr, +static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, + const char *attStr, TAG_NAME *tagNamePtr, BINDING **bindingsPtr, enum XML_Account account); static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); -static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, - XML_Bool isId, const XML_Char *dfltValue, - XML_Parser parser); -static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, - XML_Bool isCdata, const char *, - const char *, STRING_POOL *, +static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, + XML_Bool isCdata, XML_Bool isId, + const XML_Char *value, XML_Parser parser); +static enum XML_Error storeAttributeValue(XML_Parser parser, + const ENCODING *enc, XML_Bool isCdata, + const char *ptr, const char *end, + STRING_POOL *pool, enum XML_Account account); -static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, - XML_Bool isCdata, const char *, - const char *, STRING_POOL *, +static enum XML_Error appendAttributeValue(XML_Parser parser, + const ENCODING *enc, + XML_Bool isCdata, const char *ptr, + const char *end, STRING_POOL *pool, enum XML_Account account); static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); -static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); +static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType); +#if XML_GE == 1 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end, enum XML_Account account); +#else +static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity); +#endif static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int reportComment(XML_Parser parser, const ENCODING *enc, @@ -518,21 +545,22 @@ static void dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms); static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); -static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *, - const HASH_TABLE *); +static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, + STRING_POOL *newPool, const HASH_TABLE *oldTable); static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize); -static void FASTCALL hashTableInit(HASH_TABLE *, +static void FASTCALL hashTableInit(HASH_TABLE *table, const XML_Memory_Handling_Suite *ms); -static void FASTCALL hashTableClear(HASH_TABLE *); -static void FASTCALL hashTableDestroy(HASH_TABLE *); -static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); -static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *); +static void FASTCALL hashTableClear(HASH_TABLE *table); +static void FASTCALL hashTableDestroy(HASH_TABLE *table); +static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter, + const HASH_TABLE *table); +static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter); -static void FASTCALL poolInit(STRING_POOL *, +static void FASTCALL poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms); -static void FASTCALL poolClear(STRING_POOL *); -static void FASTCALL poolDestroy(STRING_POOL *); +static void FASTCALL poolClear(STRING_POOL *pool); +static void FASTCALL poolDestroy(STRING_POOL *pool); static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end); static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, @@ -562,7 +590,7 @@ static XML_Parser parserCreate(const XML_Char *encodingName, static void parserInit(XML_Parser parser, const XML_Char *encodingName); -#ifdef XML_DTD +#if XML_GE == 1 static float accountingGetCurrentAmplification(XML_Parser rootParser); static void accountingReportStats(XML_Parser originParser, const char *epilog); static void accountingOnAbort(XML_Parser originParser); @@ -585,13 +613,12 @@ static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, static XML_Parser getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff); -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ static unsigned long getDebugLevel(const char *variableName, unsigned long defaultDebugLevel); #define poolStart(pool) ((pool)->start) -#define poolEnd(pool) ((pool)->ptr) #define poolLength(pool) ((pool)->ptr - (pool)->start) #define poolChop(pool) ((void)--(pool->ptr)) #define poolLastChar(pool) (((pool)->ptr)[-1]) @@ -602,21 +629,35 @@ static unsigned long getDebugLevel(const char *variableName, ? 0 \ : ((*((pool)->ptr)++ = c), 1)) +XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c +unsigned int g_parseAttempts = 0; // used for testing only + struct XML_ParserStruct { /* The first member must be m_userData so that the XML_GetUserData macro works. */ void *m_userData; void *m_handlerArg; - char *m_buffer; + + // How the four parse buffer pointers below relate in time and space: + // + // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim + // | | | | + // <--parsed-->| | | + // <---parsing--->| | + // <--unoccupied-->| + // <---------total-malloced/realloced-------->| + + char *m_buffer; // malloc/realloc base pointer of parse buffer const XML_Memory_Handling_Suite m_mem; - /* first character to be parsed */ - const char *m_bufferPtr; - /* past last character to be parsed */ - char *m_bufferEnd; - /* allocated end of m_buffer */ - const char *m_bufferLim; + const char *m_bufferPtr; // first character to be parsed + char *m_bufferEnd; // past last character to be parsed + const char *m_bufferLim; // allocated end of m_buffer + XML_Index m_parseEndByteIndex; const char *m_parseEndPtr; + size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ + XML_Bool m_reparseDeferralEnabled; + int m_lastBufferRequestSize; XML_Char *m_dataBuf; XML_Char *m_dataBufEnd; XML_StartElementHandler m_startElementHandler; @@ -703,7 +744,7 @@ struct XML_ParserStruct { enum XML_ParamEntityParsing m_paramEntityParsing; #endif unsigned long m_hash_secret_salt; -#ifdef XML_DTD +#if XML_GE == 1 ACCOUNTING m_accounting; ENTITY_STATS m_entity_stats; #endif @@ -948,6 +989,47 @@ get_hash_secret_salt(XML_Parser parser) { return parser->m_hash_secret_salt; } +static enum XML_Error +callProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); + + if (parser->m_reparseDeferralEnabled + && ! parser->m_parsingStatus.finalBuffer) { + // Heuristic: don't try to parse a partial token again until the amount of + // available data has increased significantly. + const size_t had_before = parser->m_partialTokenBytesBefore; + // ...but *do* try anyway if we're close to causing a reallocation. + size_t available_buffer + = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); +#if XML_CONTEXT_BYTES > 0 + available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES); +#endif + available_buffer + += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd); + // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok + const bool enough + = (have_now >= 2 * had_before) + || ((size_t)parser->m_lastBufferRequestSize > available_buffer); + + if (! enough) { + *endPtr = start; // callers may expect this to be set + return XML_ERROR_NONE; + } + } + g_parseAttempts += 1; + const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); + if (ret == XML_ERROR_NONE) { + // if we consumed nothing, remember what we had on this parse attempt. + if (*endPtr == start) { + parser->m_partialTokenBytesBefore = have_now; + } else { + parser->m_partialTokenBytesBefore = 0; + } + } + return ret; +} + static XML_Bool /* only valid for root parser */ startParsing(XML_Parser parser) { /* hash functions must be initialized before setContext() is called */ @@ -1129,6 +1211,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_bufferEnd = parser->m_buffer; parser->m_parseEndByteIndex = 0; parser->m_parseEndPtr = NULL; + parser->m_partialTokenBytesBefore = 0; + parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; + parser->m_lastBufferRequestSize = 0; parser->m_declElementType = NULL; parser->m_declAttributeId = NULL; parser->m_declEntity = NULL; @@ -1163,7 +1248,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { #endif parser->m_hash_secret_salt = 0; -#ifdef XML_DTD +#if XML_GE == 1 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); parser->m_accounting.maximumAmplificationFactor @@ -1298,6 +1383,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, to worry which hash secrets each table has. */ unsigned long oldhash_secret_salt; + XML_Bool oldReparseDeferralEnabled; /* Validate the oldParser parameter before we pull everything out of it */ if (oldParser == NULL) @@ -1342,6 +1428,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, to worry which hash secrets each table has. */ oldhash_secret_salt = parser->m_hash_secret_salt; + oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled; #ifdef XML_DTD if (! context) @@ -1394,6 +1481,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; parser->m_ns_triplets = oldns_triplets; parser->m_hash_secret_salt = oldhash_secret_salt; + parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled; parser->m_parentParser = oldParser; #ifdef XML_DTD parser->m_paramEntityParsing = oldParamEntityParsing; @@ -1848,55 +1936,8 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { parser->m_parsingStatus.parsing = XML_PARSING; } - if (len == 0) { - parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; - if (! isFinal) - return XML_STATUS_OK; - parser->m_positionPtr = parser->m_bufferPtr; - parser->m_parseEndPtr = parser->m_bufferEnd; - - /* If data are left over from last buffer, and we now know that these - data are the final chunk of input, then we have to check them again - to detect errors based on that fact. - */ - parser->m_errorCode - = parser->m_processor(parser, parser->m_bufferPtr, - parser->m_parseEndPtr, &parser->m_bufferPtr); - - if (parser->m_errorCode == XML_ERROR_NONE) { - switch (parser->m_parsingStatus.parsing) { - case XML_SUSPENDED: - /* It is hard to be certain, but it seems that this case - * cannot occur. This code is cleaning up a previous parse - * with no new data (since len == 0). Changing the parsing - * state requires getting to execute a handler function, and - * there doesn't seem to be an opportunity for that while in - * this circumstance. - * - * Given the uncertainty, we retain the code but exclude it - * from coverage tests. - * - * LCOV_EXCL_START - */ - XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, - parser->m_bufferPtr, &parser->m_position); - parser->m_positionPtr = parser->m_bufferPtr; - return XML_STATUS_SUSPENDED; - /* LCOV_EXCL_STOP */ - case XML_INITIALIZED: - case XML_PARSING: - parser->m_parsingStatus.parsing = XML_FINISHED; - /* fall through */ - default: - return XML_STATUS_OK; - } - } - parser->m_eventEndPtr = parser->m_eventPtr; - parser->m_processor = errorProcessor; - return XML_STATUS_ERROR; - } -#ifndef XML_CONTEXT_BYTES - else if (parser->m_bufferPtr == parser->m_bufferEnd) { +#if XML_CONTEXT_BYTES == 0 + if (parser->m_bufferPtr == parser->m_bufferEnd) { const char *end; int nLeftOver; enum XML_Status result; @@ -1907,12 +1948,15 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } + // though this isn't a buffer request, we assume that `len` is the app's + // preferred buffer fill size, and therefore save it here. + parser->m_lastBufferRequestSize = len; parser->m_parseEndByteIndex += len; parser->m_positionPtr = s; parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; parser->m_errorCode - = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end); + = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); if (parser->m_errorCode != XML_ERROR_NONE) { parser->m_eventEndPtr = parser->m_eventPtr; @@ -1939,23 +1983,25 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { &parser->m_position); nLeftOver = s + len - end; if (nLeftOver) { - if (parser->m_buffer == NULL - || nLeftOver > parser->m_bufferLim - parser->m_buffer) { - /* avoid _signed_ integer overflow */ - char *temp = NULL; - const int bytesToAllocate = (int)((unsigned)len * 2U); - if (bytesToAllocate > 0) { - temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate); - } - if (temp == NULL) { - parser->m_errorCode = XML_ERROR_NO_MEMORY; - parser->m_eventPtr = parser->m_eventEndPtr = NULL; - parser->m_processor = errorProcessor; - return XML_STATUS_ERROR; - } - parser->m_buffer = temp; - parser->m_bufferLim = parser->m_buffer + bytesToAllocate; + // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED + // (and XML_ERROR_FINISHED) from XML_GetBuffer. + const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing; + parser->m_parsingStatus.parsing = XML_PARSING; + void *const temp = XML_GetBuffer(parser, nLeftOver); + parser->m_parsingStatus.parsing = originalStatus; + // GetBuffer may have overwritten this, but we want to remember what the + // app requested, not how many bytes were left over after parsing. + parser->m_lastBufferRequestSize = len; + if (temp == NULL) { + // NOTE: parser->m_errorCode has already been set by XML_GetBuffer(). + parser->m_eventPtr = parser->m_eventEndPtr = NULL; + parser->m_processor = errorProcessor; + return XML_STATUS_ERROR; } + // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we + // don't have any data to preserve, and can copy straight into the start + // of the buffer rather than the GetBuffer return pointer (which may be + // pointing further into the allocated buffer). memcpy(parser->m_buffer, end, nLeftOver); } parser->m_bufferPtr = parser->m_buffer; @@ -1966,16 +2012,15 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { parser->m_eventEndPtr = parser->m_bufferPtr; return result; } -#endif /* not defined XML_CONTEXT_BYTES */ - else { - void *buff = XML_GetBuffer(parser, len); - if (buff == NULL) - return XML_STATUS_ERROR; - else { - memcpy(buff, s, len); - return XML_ParseBuffer(parser, len, isFinal); - } +#endif /* XML_CONTEXT_BYTES == 0 */ + void *buff = XML_GetBuffer(parser, len); + if (buff == NULL) + return XML_STATUS_ERROR; + if (len > 0) { + assert(s != NULL); // make sure s==NULL && len!=0 was rejected above + memcpy(buff, s, len); } + return XML_ParseBuffer(parser, len, isFinal); } enum XML_Status XMLCALL @@ -2015,8 +2060,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { parser->m_parseEndByteIndex += len; parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; - parser->m_errorCode = parser->m_processor( - parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); + parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, + &parser->m_bufferPtr); if (parser->m_errorCode != XML_ERROR_NONE) { parser->m_eventEndPtr = parser->m_eventPtr; @@ -2061,10 +2106,14 @@ XML_GetBuffer(XML_Parser parser, int len) { default:; } - if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) { -#ifdef XML_CONTEXT_BYTES + // whether or not the request succeeds, `len` seems to be the app's preferred + // buffer fill size; remember it. + parser->m_lastBufferRequestSize = len; + if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd) + || parser->m_buffer == NULL) { +#if XML_CONTEXT_BYTES > 0 int keep; -#endif /* defined XML_CONTEXT_BYTES */ +#endif /* XML_CONTEXT_BYTES > 0 */ /* Do not invoke signed arithmetic overflow: */ int neededSize = (int)((unsigned)len + (unsigned)EXPAT_SAFE_PTR_DIFF( @@ -2073,7 +2122,7 @@ XML_GetBuffer(XML_Parser parser, int len) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } -#ifdef XML_CONTEXT_BYTES +#if XML_CONTEXT_BYTES > 0 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; @@ -2083,10 +2132,11 @@ XML_GetBuffer(XML_Parser parser, int len) { return NULL; } neededSize += keep; -#endif /* defined XML_CONTEXT_BYTES */ - if (neededSize - <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { -#ifdef XML_CONTEXT_BYTES +#endif /* XML_CONTEXT_BYTES > 0 */ + if (parser->m_buffer && parser->m_bufferPtr + && neededSize + <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { +#if XML_CONTEXT_BYTES > 0 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { int offset = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) @@ -2099,19 +2149,17 @@ XML_GetBuffer(XML_Parser parser, int len) { parser->m_bufferPtr -= offset; } #else - if (parser->m_buffer && parser->m_bufferPtr) { - memmove(parser->m_buffer, parser->m_bufferPtr, - EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); - parser->m_bufferEnd - = parser->m_buffer - + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); - parser->m_bufferPtr = parser->m_buffer; - } -#endif /* not defined XML_CONTEXT_BYTES */ + memmove(parser->m_buffer, parser->m_bufferPtr, + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); + parser->m_bufferEnd + = parser->m_buffer + + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); + parser->m_bufferPtr = parser->m_buffer; +#endif /* XML_CONTEXT_BYTES > 0 */ } else { char *newBuf; int bufferSize - = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr); + = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer); if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; do { @@ -2128,7 +2176,7 @@ XML_GetBuffer(XML_Parser parser, int len) { return NULL; } parser->m_bufferLim = newBuf + bufferSize; -#ifdef XML_CONTEXT_BYTES +#if XML_CONTEXT_BYTES > 0 if (parser->m_bufferPtr) { memcpy(newBuf, &parser->m_bufferPtr[-keep], EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) @@ -2158,7 +2206,7 @@ XML_GetBuffer(XML_Parser parser, int len) { parser->m_bufferEnd = newBuf; } parser->m_bufferPtr = parser->m_buffer = newBuf; -#endif /* not defined XML_CONTEXT_BYTES */ +#endif /* XML_CONTEXT_BYTES > 0 */ } parser->m_eventPtr = parser->m_eventEndPtr = NULL; parser->m_positionPtr = NULL; @@ -2208,7 +2256,7 @@ XML_ResumeParser(XML_Parser parser) { } parser->m_parsingStatus.parsing = XML_PARSING; - parser->m_errorCode = parser->m_processor( + parser->m_errorCode = callProcessor( parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); if (parser->m_errorCode != XML_ERROR_NONE) { @@ -2272,7 +2320,7 @@ XML_GetCurrentByteCount(XML_Parser parser) { const char *XMLCALL XML_GetInputContext(XML_Parser parser, int *offset, int *size) { -#ifdef XML_CONTEXT_BYTES +#if XML_CONTEXT_BYTES > 0 if (parser == NULL) return NULL; if (parser->m_eventPtr && parser->m_buffer) { @@ -2286,7 +2334,7 @@ XML_GetInputContext(XML_Parser parser, int *offset, int *size) { (void)parser; (void)offset; (void)size; -#endif /* defined XML_CONTEXT_BYTES */ +#endif /* XML_CONTEXT_BYTES > 0 */ return (const char *)0; } @@ -2506,7 +2554,7 @@ XML_GetFeatureList(void) { #ifdef XML_DTD {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, #endif -#ifdef XML_CONTEXT_BYTES +#if XML_CONTEXT_BYTES > 0 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), XML_CONTEXT_BYTES}, #endif @@ -2522,8 +2570,9 @@ XML_GetFeatureList(void) { #ifdef XML_ATTR_INFO {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, #endif -#ifdef XML_DTD - /* Added in Expat 2.4.0. */ +#if XML_GE == 1 + /* Added in Expat 2.4.0 for XML_DTD defined and + * added in Expat 2.6.0 for XML_GE == 1. */ {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, XML_L("XML_BLAP_MAX_AMP"), (long int) @@ -2531,13 +2580,15 @@ XML_GetFeatureList(void) { {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, XML_L("XML_BLAP_ACT_THRES"), EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, + /* Added in Expat 2.6.0. */ + {XML_FEATURE_GE, XML_L("XML_GE"), 0}, #endif {XML_FEATURE_END, NULL, 0}}; return features; } -#ifdef XML_DTD +#if XML_GE == 1 XML_Bool XMLCALL XML_SetBillionLaughsAttackProtectionMaximumAmplification( XML_Parser parser, float maximumAmplificationFactor) { @@ -2559,7 +2610,16 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( parser->m_accounting.activationThresholdBytes = activationThresholdBytes; return XML_TRUE; } -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ + +XML_Bool XMLCALL +XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { + if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { + parser->m_reparseDeferralEnabled = enabled; + return XML_TRUE; + } + return XML_FALSE; +} /* Initially tag->rawName always points into the parse buffer; for those TAG instances opened while the current parse buffer was @@ -2581,7 +2641,7 @@ storeRawNames(XML_Parser parser) { */ if (tag->rawName == rawNameBuf) break; - /* For re-use purposes we need to ensure that the + /* For reuse purposes we need to ensure that the size of tag->buf is a multiple of sizeof(XML_Char). */ rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); @@ -2645,13 +2705,13 @@ externalEntityInitProcessor2(XML_Parser parser, const char *start, int tok = XmlContentTok(parser->m_encoding, start, end, &next); switch (tok) { case XML_TOK_BOM: -#ifdef XML_DTD +#if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ /* If we are at the end of the buffer, this would cause the next stage, i.e. externalEntityInitProcessor3, to pass control directly to @@ -2765,7 +2825,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, for (;;) { const char *next = s; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(enc, s, end, &next); -#ifdef XML_DTD +#if XML_GE == 1 const char *accountAfter = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) ? (haveMore ? s /* i.e. 0 bytes */ : end) @@ -2831,14 +2891,14 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { -#ifdef XML_DTD +#if XML_GE == 1 /* NOTE: We are replacing 4-6 characters original input for 1 character * so there is no amplification and hence recording without * protection. */ accountingDiffTolerated(parser, tok, (char *)&ch, ((char *)&ch) + sizeof(XML_Char), __LINE__, XML_ACCOUNT_ENTITY_EXPANSION); -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ if (parser->m_characterDataHandler) parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); else if (parser->m_defaultHandler) @@ -3039,13 +3099,13 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, if (parser->m_ns && localPart) { /* localPart and prefix may have been overwritten in tag->name.str, since this points to the binding->uri - buffer which gets re-used; so we have to add them again + buffer which gets reused; so we have to add them again */ uri = (XML_Char *)tag->name.str + tag->name.uriLen; /* don't need to check for space - already done in storeAtts() */ while (*localPart) *uri++ = *localPart++; - prefix = (XML_Char *)tag->name.prefix; + prefix = tag->name.prefix; if (parser->m_ns_triplets && prefix) { *uri++ = parser->m_namespaceSeparator; while (*prefix) @@ -3112,7 +3172,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, However, now we have a start/endCdataSectionHandler, so it seems easier to let the user deal with this. */ - else if (0 && parser->m_characterDataHandler) + else if ((0) && parser->m_characterDataHandler) parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0); /* END disabled code */ @@ -3141,8 +3201,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); } else parser->m_characterDataHandler( - parser->m_handlerArg, (XML_Char *)s, - (int)((XML_Char *)end - (XML_Char *)s)); + parser->m_handlerArg, (const XML_Char *)s, + (int)((const XML_Char *)end - (const XML_Char *)s)); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); /* We are at the end of the final buffer, should we check for @@ -3175,8 +3235,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, *eventPP = s; } } else - charDataHandler(parser->m_handlerArg, (XML_Char *)s, - (int)((XML_Char *)next - (XML_Char *)s)); + charDataHandler(parser->m_handlerArg, (const XML_Char *)s, + (int)((const XML_Char *)next - (const XML_Char *)s)); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); } break; @@ -4040,7 +4100,7 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, for (;;) { const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok = XmlCdataSectionTok(enc, s, end, &next); -#ifdef XML_DTD +#if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; @@ -4055,7 +4115,7 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, parser->m_endCdataSectionHandler(parser->m_handlerArg); /* BEGIN disabled code */ /* see comment under XML_TOK_CDATA_SECT_OPEN */ - else if (0 && parser->m_characterDataHandler) + else if ((0) && parser->m_characterDataHandler) parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0); /* END disabled code */ @@ -4091,8 +4151,8 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, *eventPP = s; } } else - charDataHandler(parser->m_handlerArg, (XML_Char *)s, - (int)((XML_Char *)next - (XML_Char *)s)); + charDataHandler(parser->m_handlerArg, (const XML_Char *)s, + (int)((const XML_Char *)next - (const XML_Char *)s)); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); } break; @@ -4192,7 +4252,7 @@ doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, *eventPP = s; *startPtr = NULL; tok = XmlIgnoreSectionTok(enc, s, end, &next); -# ifdef XML_DTD +# if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); @@ -4284,7 +4344,7 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, const XML_Char *storedversion = NULL; int standalone = -1; -#ifdef XML_DTD +#if XML_GE == 1 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); @@ -4482,16 +4542,16 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, parser->m_processor = entityValueProcessor; return entityValueProcessor(parser, next, end, nextPtr); } - /* If we are at the end of the buffer, this would cause XmlPrologTok to - return XML_TOK_NONE on the next call, which would then cause the - function to exit with *nextPtr set to s - that is what we want for other - tokens, but not for the BOM - we would rather like to skip it; - then, when this routine is entered the next time, XmlPrologTok will - return XML_TOK_INVALID, since the BOM is still in the buffer + /* XmlPrologTok has now set the encoding based on the BOM it found, and we + must move s and nextPtr forward to consume the BOM. + + If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we + would leave the BOM in the buffer and return. On the next call to this + function, our XmlPrologTok call would return XML_TOK_INVALID, since it + is not valid to have multiple BOMs. */ - else if (tok == XML_TOK_BOM && next == end - && ! parser->m_parsingStatus.finalBuffer) { -# ifdef XML_DTD + else if (tok == XML_TOK_BOM) { +# if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); @@ -4500,7 +4560,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, # endif *nextPtr = next; - return XML_ERROR_NONE; + s = next; } /* If we get this token, we have the start of what might be a normal tag, but not a declaration (i.e. it doesn't begin with @@ -4707,11 +4767,13 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, } } role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); -#ifdef XML_DTD +#if XML_GE == 1 switch (role) { case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl - case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl +# ifdef XML_DTD + case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl +# endif break; default: if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { @@ -5029,6 +5091,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, break; case XML_ROLE_ENTITY_VALUE: if (dtd->keepProcessing) { +#if XML_GE == 1 + // This will store the given replacement text in + // parser->m_declEntity->textPtr. enum XML_Error result = storeEntityValue(parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar, XML_ACCOUNT_NONE); @@ -5049,6 +5114,25 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, poolDiscard(&dtd->entityValuePool); if (result != XML_ERROR_NONE) return result; +#else + // This will store "&entity123;" in parser->m_declEntity->textPtr + // to end up as "&entity123;" in the handler. + if (parser->m_declEntity != NULL) { + const enum XML_Error result + = storeSelfEntityValue(parser, parser->m_declEntity); + if (result != XML_ERROR_NONE) + return result; + + if (parser->m_entityDeclHandler) { + *eventEndPP = s; + parser->m_entityDeclHandler( + parser->m_handlerArg, parser->m_declEntity->name, + parser->m_declEntity->is_param, parser->m_declEntity->textPtr, + parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); + handleDefault = XML_FALSE; + } + } +#endif } break; case XML_ROLE_DOCTYPE_SYSTEM_ID: @@ -5107,6 +5191,16 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, } break; case XML_ROLE_ENTITY_COMPLETE: +#if XML_GE == 0 + // This will store "&entity123;" in entity->textPtr + // to end up as "&entity123;" in the handler. + if (parser->m_declEntity != NULL) { + const enum XML_Error result + = storeSelfEntityValue(parser, parser->m_declEntity); + if (result != XML_ERROR_NONE) + return result; + } +#endif if (dtd->keepProcessing && parser->m_declEntity && parser->m_entityDeclHandler) { *eventEndPP = s; @@ -5648,7 +5742,7 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end, for (;;) { const char *next = NULL; int tok = XmlPrologTok(parser->m_encoding, s, end, &next); -#ifdef XML_DTD +#if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); @@ -5728,7 +5822,7 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { return XML_ERROR_NO_MEMORY; } entity->open = XML_TRUE; -#ifdef XML_DTD +#if XML_GE == 1 entityTrackingOnOpen(parser, entity, __LINE__); #endif entity->processed = 0; @@ -5761,10 +5855,10 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { entity->processed = (int)(next - textStart); parser->m_processor = internalEntityProcessor; - } else { -#ifdef XML_DTD + } else if (parser->m_openInternalEntities->entity == entity) { +#if XML_GE == 1 entityTrackingOnClose(parser, entity, __LINE__); -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ entity->open = XML_FALSE; parser->m_openInternalEntities = openEntity->next; /* put openEntity back in list of free instances */ @@ -5813,7 +5907,7 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, return result; } -#ifdef XML_DTD +#if XML_GE == 1 entityTrackingOnClose(parser, entity, __LINE__); #endif entity->open = XML_FALSE; @@ -5892,7 +5986,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, const char *next = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ int tok = XmlAttributeValueTok(enc, ptr, end, &next); -#ifdef XML_DTD +#if XML_GE == 1 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; @@ -5957,14 +6051,14 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { -#ifdef XML_DTD +#if XML_GE == 1 /* NOTE: We are replacing 4-6 characters original input for 1 character * so there is no amplification and hence recording without * protection. */ accountingDiffTolerated(parser, tok, (char *)&ch, ((char *)&ch) + sizeof(XML_Char), __LINE__, XML_ACCOUNT_ENTITY_EXPANSION); -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ if (! poolAppendChar(pool, ch)) return XML_ERROR_NO_MEMORY; break; @@ -6042,14 +6136,14 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, enum XML_Error result; const XML_Char *textEnd = entity->textPtr + entity->textLen; entity->open = XML_TRUE; -#ifdef XML_DTD +#if XML_GE == 1 entityTrackingOnOpen(parser, entity, __LINE__); #endif result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata, (const char *)entity->textPtr, (const char *)textEnd, pool, XML_ACCOUNT_ENTITY_EXPANSION); -#ifdef XML_DTD +#if XML_GE == 1 entityTrackingOnClose(parser, entity, __LINE__); #endif entity->open = XML_FALSE; @@ -6079,6 +6173,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, /* not reached */ } +#if XML_GE == 1 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *entityTextPtr, const char *entityTextEnd, @@ -6086,12 +6181,12 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, DTD *const dtd = parser->m_dtd; /* save one level of indirection */ STRING_POOL *pool = &(dtd->entityValuePool); enum XML_Error result = XML_ERROR_NONE; -#ifdef XML_DTD +# ifdef XML_DTD int oldInEntityValue = parser->m_prologState.inEntityValue; parser->m_prologState.inEntityValue = 1; -#else +# else UNUSED_P(account); -#endif /* XML_DTD */ +# endif /* XML_DTD */ /* never return Null for the value argument in EntityDeclHandler, since this would indicate an external entity; therefore we have to make sure that entityValuePool.start is not null */ @@ -6105,18 +6200,16 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); -#ifdef XML_DTD if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, account)) { accountingOnAbort(parser); result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; goto endEntityValue; } -#endif switch (tok) { case XML_TOK_PARAM_ENTITY_REF: -#ifdef XML_DTD +# ifdef XML_DTD if (parser->m_isParamEntity || enc != parser->m_encoding) { const XML_Char *name; ENTITY *entity; @@ -6178,7 +6271,7 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, } break; } -#endif /* XML_DTD */ +# endif /* XML_DTD */ /* In the internal subset, PE references are not legal within markup declarations, e.g entity values in this case. */ parser->m_eventPtr = entityTextPtr; @@ -6259,12 +6352,38 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, entityTextPtr = next; } endEntityValue: -#ifdef XML_DTD +# ifdef XML_DTD parser->m_prologState.inEntityValue = oldInEntityValue; -#endif /* XML_DTD */ +# endif /* XML_DTD */ return result; } +#else /* XML_GE == 0 */ + +static enum XML_Error +storeSelfEntityValue(XML_Parser parser, ENTITY *entity) { + // This will store "&entity123;" in entity->textPtr + // to end up as "&entity123;" in the handler. + const char *const entity_start = "&"; + const char *const entity_end = ";"; + + STRING_POOL *const pool = &(parser->m_dtd->entityValuePool); + if (! poolAppendString(pool, entity_start) + || ! poolAppendString(pool, entity->name) + || ! poolAppendString(pool, entity_end)) { + poolDiscard(pool); + return XML_ERROR_NO_MEMORY; + } + + entity->textPtr = poolStart(pool); + entity->textLen = (int)(poolLength(pool)); + poolFinish(pool); + + return XML_ERROR_NONE; +} + +#endif /* XML_GE == 0 */ + static void FASTCALL normalizeLines(XML_Char *s) { XML_Char *p; @@ -6375,8 +6494,9 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); } else - parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, - (int)((XML_Char *)end - (XML_Char *)s)); + parser->m_defaultHandler( + parser->m_handlerArg, (const XML_Char *)s, + (int)((const XML_Char *)end - (const XML_Char *)s)); } static int @@ -6480,7 +6600,7 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, name = poolStoreString(&dtd->pool, enc, start, end); if (! name) return NULL; - /* skip quotation mark - its storage will be re-used (like in name[-1]) */ + /* skip quotation mark - its storage will be reused (like in name[-1]) */ ++name; id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); @@ -6630,6 +6750,10 @@ getContext(XML_Parser parser) { static XML_Bool setContext(XML_Parser parser, const XML_Char *context) { + if (context == NULL) { + return XML_FALSE; + } + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *s = context; @@ -7220,7 +7344,7 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, return NULL; for (;;) { const enum XML_Convert_Result convert_res = XmlConvert( - enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); + enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end); if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; @@ -7651,7 +7775,7 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { return result; } -#ifdef XML_DTD +#if XML_GE == 1 static float accountingGetCurrentAmplification(XML_Parser rootParser) { @@ -7672,7 +7796,7 @@ accountingReportStats(XML_Parser originParser, const char *epilog) { const XML_Parser rootParser = getRootParserOf(originParser, NULL); assert(! rootParser->m_parentParser); - if (rootParser->m_accounting.debugLevel < 1) { + if (rootParser->m_accounting.debugLevel == 0u) { return; } @@ -7709,7 +7833,7 @@ accountingReportDiff(XML_Parser rootParser, /* Note: Performance is of no concern here */ const char *walker = before; - if ((rootParser->m_accounting.debugLevel >= 3) + if ((rootParser->m_accounting.debugLevel >= 3u) || (after - before) <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { for (; walker < after; walker++) { @@ -7774,7 +7898,7 @@ accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, || (amplificationFactor <= rootParser->m_accounting.maximumAmplificationFactor); - if (rootParser->m_accounting.debugLevel >= 2) { + if (rootParser->m_accounting.debugLevel >= 2u) { accountingReportStats(rootParser, ""); accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, bytesMore, source_line, account); @@ -7801,7 +7925,7 @@ static void entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, const char *action, int sourceLine) { assert(! rootParser->m_parentParser); - if (rootParser->m_entity_stats.debugLevel < 1) + if (rootParser->m_entity_stats.debugLevel == 0u) return; # if defined(XML_UNICODE) @@ -8382,7 +8506,7 @@ unsignedCharToPrintable(unsigned char c) { assert(0); /* never gets here */ } -#endif /* XML_DTD */ +#endif /* XML_GE == 1 */ static unsigned long getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { @@ -8393,9 +8517,9 @@ getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { const char *const value = valueOrNull; errno = 0; - char *afterValue = (char *)value; + char *afterValue = NULL; unsigned long debugLevel = strtoul(value, &afterValue, 10); - if ((errno != 0) || (afterValue[0] != '\0')) { + if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) { errno = 0; return defaultDebugLevel; } diff --git a/Modules/expat/xmlrole.c b/Modules/expat/xmlrole.c index 3f0f5c150c6278..2c48bf40867953 100644 --- a/Modules/expat/xmlrole.c +++ b/Modules/expat/xmlrole.c @@ -12,10 +12,10 @@ Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2002-2003 Fred L. Drake, Jr. Copyright (c) 2005-2009 Steven Solie - Copyright (c) 2016-2021 Sebastian Pipping + Copyright (c) 2016-2023 Sebastian Pipping Copyright (c) 2017 Rhodri James Copyright (c) 2019 David Loffredo - Copyright (c) 2021 Dong-hee Na + Copyright (c) 2021 Donghee Na Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -38,7 +38,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include +#include "expat_config.h" #include diff --git a/Modules/expat/xmlrole.h b/Modules/expat/xmlrole.h index d6e1fa150a108a..a7904274c91d4e 100644 --- a/Modules/expat/xmlrole.h +++ b/Modules/expat/xmlrole.h @@ -10,7 +10,7 @@ Copyright (c) 2000 Clark Cooper Copyright (c) 2002 Karl Waclawek Copyright (c) 2002 Fred L. Drake, Jr. - Copyright (c) 2017 Sebastian Pipping + Copyright (c) 2017-2024 Sebastian Pipping Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -127,9 +127,9 @@ typedef struct prolog_state { #endif /* XML_DTD */ } PROLOG_STATE; -void XmlPrologStateInit(PROLOG_STATE *); +void XmlPrologStateInit(PROLOG_STATE *state); #ifdef XML_DTD -void XmlPrologStateInitExternalEntity(PROLOG_STATE *); +void XmlPrologStateInitExternalEntity(PROLOG_STATE *state); #endif /* XML_DTD */ #define XmlTokenRole(state, tok, ptr, end, enc) \ diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c index 2b7012a58be419..29a66d72ceea5e 100644 --- a/Modules/expat/xmltok.c +++ b/Modules/expat/xmltok.c @@ -12,7 +12,7 @@ Copyright (c) 2002 Greg Stein Copyright (c) 2002-2016 Karl Waclawek Copyright (c) 2005-2009 Steven Solie - Copyright (c) 2016-2022 Sebastian Pipping + Copyright (c) 2016-2024 Sebastian Pipping Copyright (c) 2016 Pascal Cuoq Copyright (c) 2016 Don Lewis Copyright (c) 2017 Rhodri James @@ -20,8 +20,10 @@ Copyright (c) 2017 Benbuck Nason Copyright (c) 2017 José Gutiérrez de la Concha Copyright (c) 2019 David Loffredo - Copyright (c) 2021 Dong-hee Na + Copyright (c) 2021 Donghee Na Copyright (c) 2022 Martin Ettl + Copyright (c) 2022 Sean McBride + Copyright (c) 2023 Hanno Böck Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -44,7 +46,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include +#include "expat_config.h" #include #include /* memcpy */ @@ -76,7 +78,7 @@ #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) #define UCS2_GET_NAMING(pages, hi, lo) \ - (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F))) + (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F))) /* A 2 byte UTF-8 representation splits the characters 11 bits between the bottom 5 and 6 bits of the bytes. We need 8 bits to index into @@ -100,7 +102,7 @@ & (1u << (((byte)[2]) & 0x1F))) /* Detection of invalid UTF-8 sequences is based on Table 3.1B - of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ + of Unicode 3.2: https://www.unicode.org/unicode/reports/tr28/ with the additional restriction of not allowing the Unicode code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE). Implementation details: @@ -225,7 +227,7 @@ struct normal_encoding { /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL, \ /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL -static int FASTCALL checkCharRefNumber(int); +static int FASTCALL checkCharRefNumber(int result); #include "xmltok_impl.h" #include "ascii.h" @@ -243,7 +245,7 @@ static int FASTCALL checkCharRefNumber(int); #endif #define SB_BYTE_TYPE(enc, p) \ - (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) + (((const struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) #ifdef XML_MIN_SIZE static int PTRFASTCALL @@ -407,7 +409,7 @@ utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short *to = *toP; const char *from = *fromP; while (from < fromLim && to < toLim) { - switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { + switch (SB_BYTE_TYPE(enc, from)) { case BT_LEAD2: if (fromLim - from < 2) { res = XML_CONVERT_INPUT_INCOMPLETE; @@ -715,31 +717,26 @@ unicode_byte_type(char hi, char lo) { return res; \ } -#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8))) #define GET_LO(ptr) ((unsigned char)(ptr)[0]) #define GET_HI(ptr) ((unsigned char)(ptr)[1]) DEFINE_UTF16_TO_UTF8(little2_) DEFINE_UTF16_TO_UTF16(little2_) -#undef SET2 #undef GET_LO #undef GET_HI -#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF))) #define GET_LO(ptr) ((unsigned char)(ptr)[1]) #define GET_HI(ptr) ((unsigned char)(ptr)[0]) DEFINE_UTF16_TO_UTF8(big2_) DEFINE_UTF16_TO_UTF16(big2_) -#undef SET2 #undef GET_LO #undef GET_HI #define LITTLE2_BYTE_TYPE(enc, p) \ - ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ - : unicode_byte_type((p)[1], (p)[0])) + ((p)[1] == 0 ? SB_BYTE_TYPE(enc, p) : unicode_byte_type((p)[1], (p)[0])) #define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1) #define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c)) #define LITTLE2_IS_NAME_CHAR_MINBPC(p) \ @@ -872,9 +869,7 @@ static const struct normal_encoding internal_little2_encoding #endif #define BIG2_BYTE_TYPE(enc, p) \ - ((p)[0] == 0 \ - ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ - : unicode_byte_type((p)[0], (p)[1])) + ((p)[0] == 0 ? SB_BYTE_TYPE(enc, p + 1) : unicode_byte_type((p)[0], (p)[1])) #define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1) #define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c)) #define BIG2_IS_NAME_CHAR_MINBPC(p) \ diff --git a/Modules/expat/xmltok.h b/Modules/expat/xmltok.h index 6f630c2f9ba96d..c51fce1ec1518b 100644 --- a/Modules/expat/xmltok.h +++ b/Modules/expat/xmltok.h @@ -10,7 +10,7 @@ Copyright (c) 2000 Clark Cooper Copyright (c) 2002 Fred L. Drake, Jr. Copyright (c) 2002-2005 Karl Waclawek - Copyright (c) 2016-2017 Sebastian Pipping + Copyright (c) 2016-2024 Sebastian Pipping Copyright (c) 2017 Rhodri James Licensed under the MIT license: @@ -289,7 +289,8 @@ int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc, const char **encodingNamePtr, const ENCODING **namedEncodingPtr, int *standalonePtr); -int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); +int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr, + const char *name); const ENCODING *XmlGetUtf8InternalEncoding(void); const ENCODING *XmlGetUtf16InternalEncoding(void); int FASTCALL XmlUtf8Encode(int charNumber, char *buf); @@ -307,7 +308,8 @@ int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc, const char **encodingNamePtr, const ENCODING **namedEncodingPtr, int *standalonePtr); -int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); +int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr, + const char *name); const ENCODING *XmlGetUtf8InternalEncodingNS(void); const ENCODING *XmlGetUtf16InternalEncodingNS(void); ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c index 1971d74bf8c91f..239a2d06c4512c 100644 --- a/Modules/expat/xmltok_impl.c +++ b/Modules/expat/xmltok_impl.c @@ -126,7 +126,7 @@ # endif # define HAS_CHARS(enc, ptr, end, count) \ - ((end) - (ptr) >= ((count)*MINBPC(enc))) + ((end) - (ptr) >= ((count) * MINBPC(enc))) # define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1) diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index 442487f2d2546b..82016dc408639d 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -59,7 +59,10 @@ class PackageFiles(typing.NamedTuple): include=["Modules/_decimal/libmpdec/**"] ), "expat": PackageFiles( - include=["Modules/expat/**"] + include=["Modules/expat/**"], + exclude=[ + "Modules/expat/expat_config.h", + ] ), "macholib": PackageFiles( include=["Lib/ctypes/macholib/**"], diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index be89a26058e8e8..61cd41ea8f31c1 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -104,6 +104,7 @@ def clean_lines(text): # The problem with xmlparse.c is that something # has gone wrong where # we handle "maybe inline actual" # in Tools/c-analyzer/c_parser/parser/_global.py. +Modules/expat/internal.h Modules/expat/xmlparse.c ''') From a2d4281415e67c62f91363376db97eb66a9fb716 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Thu, 15 Feb 2024 02:00:50 +0900 Subject: [PATCH 122/126] gh-112087: Make __sizeof__ and listiter_{len, next} to be threadsafe (gh-114843) --- Lib/test/support/__init__.py | 27 ++--- Lib/test/test_iter.py | 2 +- ...-02-14-23-50-55.gh-issue-112087.H_4W_v.rst | 2 + Objects/listobject.c | 102 +++++++++--------- Python/bytecodes.c | 8 +- Python/executor_cases.c.h | 3 +- Python/generated_cases.c.h | 5 +- 7 files changed, 80 insertions(+), 69 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-14-23-50-55.gh-issue-112087.H_4W_v.rst diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 5b091fb2fd32dc..1d03ec0f5bd12b 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1727,19 +1727,22 @@ def _check_tracemalloc(): def check_free_after_iterating(test, iter, cls, args=()): - class A(cls): - def __del__(self): - nonlocal done - done = True - try: - next(it) - except StopIteration: - pass - done = False - it = iter(A(*args)) - # Issue 26494: Shouldn't crash - test.assertRaises(StopIteration, next, it) + def wrapper(): + class A(cls): + def __del__(self): + nonlocal done + done = True + try: + next(it) + except StopIteration: + pass + + it = iter(A(*args)) + # Issue 26494: Shouldn't crash + test.assertRaises(StopIteration, next, it) + + wrapper() # The sequence should be deallocated just after the end of iterating gc_collect() test.assertTrue(done) diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index 30aedb0db3bb3d..9606d5beab71cb 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -302,7 +302,7 @@ def __eq__(self, other): # listiter_reduce_general self.assertEqual( run("reversed", orig["reversed"](list(range(8)))), - (iter, ([],)) + (reversed, ([],)) ) for case in types: diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-14-23-50-55.gh-issue-112087.H_4W_v.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-14-23-50-55.gh-issue-112087.H_4W_v.rst new file mode 100644 index 00000000000000..f92cdafd4ba225 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-14-23-50-55.gh-issue-112087.H_4W_v.rst @@ -0,0 +1,2 @@ +For an empty reverse iterator for list will be reduced to :func:`reversed`. +Patch by Donghee Na diff --git a/Objects/listobject.c b/Objects/listobject.c index 93409a82f8a489..96182a42306d95 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -20,6 +20,14 @@ class list "PyListObject *" "&PyList_Type" _Py_DECLARE_STR(list_err, "list index out of range"); +#ifdef Py_GIL_DISABLED +# define LOAD_SSIZE(value) _Py_atomic_load_ssize_relaxed(&value) +# define STORE_SSIZE(value, new_value) _Py_atomic_store_ssize_relaxed(&value, new_value) +#else +# define LOAD_SSIZE(value) value +# define STORE_SSIZE(value, new_value) value = new_value +#endif + #ifdef WITH_FREELISTS static struct _Py_list_freelist * get_list_freelist(void) @@ -2971,7 +2979,8 @@ list___sizeof___impl(PyListObject *self) /*[clinic end generated code: output=3417541f95f9a53e input=b8030a5d5ce8a187]*/ { size_t res = _PyObject_SIZE(Py_TYPE(self)); - res += (size_t)self->allocated * sizeof(void*); + Py_ssize_t allocated = LOAD_SSIZE(self->allocated); + res += (size_t)allocated * sizeof(void*); return PyLong_FromSize_t(res); } @@ -3373,33 +3382,34 @@ static PyObject * listiter_next(PyObject *self) { _PyListIterObject *it = (_PyListIterObject *)self; - PyListObject *seq; - PyObject *item; - - assert(it != NULL); - seq = it->it_seq; - if (seq == NULL) + Py_ssize_t index = LOAD_SSIZE(it->it_index); + if (index < 0) { return NULL; - assert(PyList_Check(seq)); - - if (it->it_index < PyList_GET_SIZE(seq)) { - item = PyList_GET_ITEM(seq, it->it_index); - ++it->it_index; - return Py_NewRef(item); } - it->it_seq = NULL; - Py_DECREF(seq); - return NULL; + PyObject *item = list_get_item_ref(it->it_seq, index); + if (item == NULL) { + // out-of-bounds + STORE_SSIZE(it->it_index, -1); +#ifndef Py_GIL_DISABLED + PyListObject *seq = it->it_seq; + it->it_seq = NULL; + Py_DECREF(seq); +#endif + return NULL; + } + STORE_SSIZE(it->it_index, index + 1); + return item; } static PyObject * listiter_len(PyObject *self, PyObject *Py_UNUSED(ignored)) { + assert(self != NULL); _PyListIterObject *it = (_PyListIterObject *)self; - Py_ssize_t len; - if (it->it_seq) { - len = PyList_GET_SIZE(it->it_seq) - it->it_index; + Py_ssize_t index = LOAD_SSIZE(it->it_index); + if (index >= 0) { + Py_ssize_t len = PyList_GET_SIZE(it->it_seq) - index; if (len >= 0) return PyLong_FromSsize_t(len); } @@ -3420,8 +3430,8 @@ listiter_setstate(PyObject *self, PyObject *state) if (index == -1 && PyErr_Occurred()) return NULL; if (it->it_seq != NULL) { - if (index < 0) - index = 0; + if (index < -1) + index = -1; else if (index > PyList_GET_SIZE(it->it_seq)) index = PyList_GET_SIZE(it->it_seq); /* iterator exhausted */ it->it_index = index; @@ -3526,26 +3536,24 @@ static PyObject * listreviter_next(PyObject *self) { listreviterobject *it = (listreviterobject *)self; - PyObject *item; - Py_ssize_t index; - PyListObject *seq; - assert(it != NULL); - seq = it->it_seq; - if (seq == NULL) { - return NULL; - } + PyListObject *seq = it->it_seq; assert(PyList_Check(seq)); - index = it->it_index; - if (index>=0 && index < PyList_GET_SIZE(seq)) { - item = PyList_GET_ITEM(seq, index); - it->it_index--; - return Py_NewRef(item); + Py_ssize_t index = LOAD_SSIZE(it->it_index); + if (index < 0) { + return NULL; + } + PyObject *item = list_get_item_ref(seq, index); + if (item != NULL) { + STORE_SSIZE(it->it_index, index - 1); + return item; } - it->it_index = -1; + STORE_SSIZE(it->it_index, -1); +#ifndef Py_GIL_DISABLED it->it_seq = NULL; Py_DECREF(seq); +#endif return NULL; } @@ -3553,7 +3561,8 @@ static PyObject * listreviter_len(PyObject *self, PyObject *Py_UNUSED(ignored)) { listreviterobject *it = (listreviterobject *)self; - Py_ssize_t len = it->it_index + 1; + Py_ssize_t index = LOAD_SSIZE(it->it_index); + Py_ssize_t len = index + 1; if (it->it_seq == NULL || PyList_GET_SIZE(it->it_seq) < len) len = 0; return PyLong_FromSsize_t(len); @@ -3588,6 +3597,7 @@ static PyObject * listiter_reduce_general(void *_it, int forward) { PyObject *list; + PyObject *iter; /* _PyEval_GetBuiltin can invoke arbitrary code, * call must be before access of iterator pointers. @@ -3595,29 +3605,21 @@ listiter_reduce_general(void *_it, int forward) /* the objects are not the same, index is of different types! */ if (forward) { - PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); - if (!iter) { - return NULL; - } + iter = _PyEval_GetBuiltin(&_Py_ID(iter)); _PyListIterObject *it = (_PyListIterObject *)_it; - if (it->it_seq) { + if (it->it_index >= 0) { return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); } - Py_DECREF(iter); } else { - PyObject *reversed = _PyEval_GetBuiltin(&_Py_ID(reversed)); - if (!reversed) { - return NULL; - } + iter = _PyEval_GetBuiltin(&_Py_ID(reversed)); listreviterobject *it = (listreviterobject *)_it; - if (it->it_seq) { - return Py_BuildValue("N(O)n", reversed, it->it_seq, it->it_index); + if (it->it_index >= 0) { + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); } - Py_DECREF(reversed); } /* empty iterator, create an empty list */ list = PyList_New(0); if (list == NULL) return NULL; - return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), list); + return Py_BuildValue("N(N)", iter, list); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 96b97ca4be6d93..28ade64e056ad7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2606,11 +2606,14 @@ dummy_func( assert(Py_TYPE(iter) == &PyListIter_Type); STAT_INC(FOR_ITER, hit); PyListObject *seq = it->it_seq; - if (seq == NULL || it->it_index >= PyList_GET_SIZE(seq)) { + if ((size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { + it->it_index = -1; + #ifndef Py_GIL_DISABLED if (seq != NULL) { it->it_seq = NULL; Py_DECREF(seq); } + #endif Py_DECREF(iter); STACK_SHRINK(1); /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ @@ -2624,8 +2627,7 @@ dummy_func( _PyListIterObject *it = (_PyListIterObject *)iter; assert(Py_TYPE(iter) == &PyListIter_Type); PyListObject *seq = it->it_seq; - DEOPT_IF(seq == NULL); - DEOPT_IF(it->it_index >= PyList_GET_SIZE(seq)); + DEOPT_IF((size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)); } op(_ITER_NEXT_LIST, (iter -- iter, next)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 58d238320276f4..7a0e0e43be019c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2201,8 +2201,7 @@ _PyListIterObject *it = (_PyListIterObject *)iter; assert(Py_TYPE(iter) == &PyListIter_Type); PyListObject *seq = it->it_seq; - if (seq == NULL) goto deoptimize; - if (it->it_index >= PyList_GET_SIZE(seq)) goto deoptimize; + if ((size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) goto deoptimize; break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a49223e4db5318..177bc327454f63 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2541,11 +2541,14 @@ assert(Py_TYPE(iter) == &PyListIter_Type); STAT_INC(FOR_ITER, hit); PyListObject *seq = it->it_seq; - if (seq == NULL || it->it_index >= PyList_GET_SIZE(seq)) { + if ((size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { + it->it_index = -1; + #ifndef Py_GIL_DISABLED if (seq != NULL) { it->it_seq = NULL; Py_DECREF(seq); } + #endif Py_DECREF(iter); STACK_SHRINK(1); /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ From 17773fcb863d5aef299487b07207c2ced8e9477e Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 14 Feb 2024 12:27:39 -0500 Subject: [PATCH 123/126] gh-115441: Fix missing braces warning (#115460) Removes `_py_object_state_INIT`. We want to initialize the `object_state` field to zero. --- Include/internal/pycore_runtime_init.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 571a7d612c94e2..7a05c105d7bf12 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -169,7 +169,6 @@ extern PyTypeObject _PyExc_MemoryError; { .threshold = 10, }, \ }, \ }, \ - .object_state = _py_object_state_INIT(INTERP), \ .dtoa = _dtoa_state_INIT(&(INTERP)), \ .dict_state = _dict_state_INIT, \ .func_state = { \ @@ -206,16 +205,6 @@ extern PyTypeObject _PyExc_MemoryError; .context_ver = 1, \ } -#ifdef Py_TRACE_REFS -# define _py_object_state_INIT(INTERP) \ - { \ - .refchain = NULL, \ - } -#else -# define _py_object_state_INIT(INTERP) \ - { 0 } -#endif - // global objects From 49e8fdc1df41b6547fb3255f9e3a44dfb3b81fe0 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 14 Feb 2024 19:03:20 +0100 Subject: [PATCH 124/126] Docs: spell out sentence about ndbm/gdbm file formats (#115470) --- Doc/library/dbm.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index b4f83d454ac651..227b55c4315419 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -61,10 +61,6 @@ the Oracle Berkeley DB. The Unix file access mode of the file (default: octal ``0o666``), used only when the database has to be created. -.. |incompat_note| replace:: - The file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are incompatible - and can not be used interchangeably. - .. function:: open(file, flag='r', mode=0o666) Open a database and return the corresponding database object. @@ -205,7 +201,10 @@ The :mod:`dbm.gnu` module provides an interface to the :abbr:`GDBM (GNU dbm)` library, similar to the :mod:`dbm.ndbm` module, but with additional functionality like crash tolerance. -.. note:: |incompat_note| +.. note:: + + The file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are incompatible + and can not be used interchangeably. .. exception:: error @@ -314,7 +313,10 @@ The :mod:`dbm.ndbm` module provides an interface to the This module can be used with the "classic" NDBM interface or the :abbr:`GDBM (GNU dbm)` compatibility interface. -.. note:: |incompat_note| +.. note:: + + The file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are incompatible + and can not be used interchangeably. .. warning:: From 889cc43cb14a1b8c532a56680a93636507b9987a Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Wed, 14 Feb 2024 13:47:15 -0600 Subject: [PATCH 125/126] gh-112302: Move pip SBOM discovery to release-tools (#115360) --- Misc/sbom.spdx.json | 670 ----------------------------------- Tools/build/generate_sbom.py | 251 ------------- 2 files changed, 921 deletions(-) diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index 03b2db20553e56..e28eaea81d6aae 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -1554,20 +1554,6 @@ } ], "fileName": "Modules/_decimal/libmpdec/vcdiv64.asm" - }, - { - "SPDXID": "SPDXRef-FILE-Lib-ensurepip-bundled-pip-24.0-py3-none-any.whl", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "e44313ae1e6af3c2bd3b60ab2fa8c34308d00555" - }, - { - "algorithm": "SHA256", - "checksumValue": "ba0d021a166865d2265246961bec0152ff124de910c5cc39f1156ce3fa7c69dc" - } - ], - "fileName": "Lib/ensurepip/_bundled/pip-24.0-py3-none-any.whl" } ], "packages": [ @@ -1680,660 +1666,9 @@ "originator": "Organization: bytereef.org", "primaryPackagePurpose": "SOURCE", "versionInfo": "2.5.1" - }, - { - "SPDXID": "SPDXRef-PACKAGE-cachecontrol", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "95dedbec849f46dda3137866dc28b9d133fc9af55f5b805ab1291833e4457aa4" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/1d/e3/a22348e6226dcd585d5a4b5f0175b3a16dabfd3912cbeb02f321d00e56c7/cachecontrol-0.13.1-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/cachecontrol@0.13.1", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "cachecontrol", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "0.13.1" - }, - { - "SPDXID": "SPDXRef-PACKAGE-colorama", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/colorama@0.4.6", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "colorama", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "0.4.6" - }, - { - "SPDXID": "SPDXRef-PACKAGE-distlib", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/8e/41/9307e4f5f9976bc8b7fea0b66367734e8faf3ec84bc0d412d8cfabbb66cd/distlib-0.3.8-py2.py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/distlib@0.3.8", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "distlib", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "0.3.8" - }, - { - "SPDXID": "SPDXRef-PACKAGE-distro", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "99522ca3e365cac527b44bde033f64c6945d90eb9f769703caaec52b09bbd3ff" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/f4/2c/c90a3adaf0ddb70afe193f5ebfb539612af57cffe677c3126be533df3098/distro-1.8.0-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/distro@1.8.0", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "distro", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "1.8.0" - }, - { - "SPDXID": "SPDXRef-PACKAGE-msgpack", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/9f/4a/36d936e54cf71e23ad276564465f6a54fb129e3d61520b76e13e0bb29167/msgpack-1.0.5-cp310-cp310-macosx_10_9_universal2.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/msgpack@1.0.5", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "msgpack", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "1.0.5" - }, - { - "SPDXID": "SPDXRef-PACKAGE-packaging", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/05/8e/8de486cbd03baba4deef4142bd643a3e7bbe954a784dc1bb17142572d127/packaging-21.3-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/packaging@21.3", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "packaging", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "21.3" - }, - { - "SPDXID": "SPDXRef-PACKAGE-platformdirs", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "cec7b889196b9144d088e4c57d9ceef7374f6c39694ad1577a0aab50d27ea28c" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/9e/d8/563a9fc17153c588c8c2042d2f0f84a89057cdb1c30270f589c88b42d62c/platformdirs-3.8.1-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/platformdirs@3.8.1", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "platformdirs", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "3.8.1" - }, - { - "SPDXID": "SPDXRef-PACKAGE-pyparsing", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "d554a96d1a7d3ddaf7183104485bc19fd80543ad6ac5bdb6426719d766fb06c1" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/a4/24/6ae4c9c45cf99d96b06b5d99e25526c060303171fb0aea9da2bfd7dbde93/pyparsing-3.1.0-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/pyparsing@3.1.0", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "pyparsing", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "3.1.0" - }, - { - "SPDXID": "SPDXRef-PACKAGE-pyproject-hooks", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "283c11acd6b928d2f6a7c73fa0d01cb2bdc5f07c57a2eeb6e83d5e56b97976f8" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/d5/ea/9ae603de7fbb3df820b23a70f6aff92bf8c7770043254ad8d2dc9d6bcba4/pyproject_hooks-1.0.0-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/pyproject-hooks@1.0.0", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "pyproject-hooks", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "1.0.0" - }, - { - "SPDXID": "SPDXRef-PACKAGE-requests", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/requests@2.31.0", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "requests", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "2.31.0" - }, - { - "SPDXID": "SPDXRef-PACKAGE-certifi", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/4c/dd/2234eab22353ffc7d94e8d13177aaa050113286e93e7b40eae01fbf7c3d9/certifi-2023.7.22-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/certifi@2023.7.22", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "certifi", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "2023.7.22" - }, - { - "SPDXID": "SPDXRef-PACKAGE-chardet", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "362777fb014af596ad31334fde1e8c327dfdb076e1960d1694662d46a6917ab9" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/74/8f/8fc49109009e8d2169d94d72e6b1f4cd45c13d147ba7d6170fb41f22b08f/chardet-5.1.0-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/chardet@5.1.0", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "chardet", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "5.1.0" - }, - { - "SPDXID": "SPDXRef-PACKAGE-idna", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/fc/34/3030de6f1370931b9dbb4dad48f6ab1015ab1d32447850b9fc94e60097be/idna-3.4-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/idna@3.4", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "idna", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "3.4" - }, - { - "SPDXID": "SPDXRef-PACKAGE-rich", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "8f87bc7ee54675732fa66a05ebfe489e27264caeeff3728c945d25971b6485ec" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/fc/1e/482e5eec0b89b593e81d78f819a9412849814e22225842b598908e7ac560/rich-13.4.2-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/rich@13.4.2", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "rich", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "13.4.2" - }, - { - "SPDXID": "SPDXRef-PACKAGE-pygments", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/34/a7/37c8d68532ba71549db4212cb036dbd6161b40e463aba336770e80c72f84/Pygments-2.15.1-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/pygments@2.15.1", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "pygments", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "2.15.1" - }, - { - "SPDXID": "SPDXRef-PACKAGE-typing-extensions", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/ec/6b/63cc3df74987c36fe26157ee12e09e8f9db4de771e0f3404263117e75b95/typing_extensions-4.7.1-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/typing_extensions@4.7.1", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "typing_extensions", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "4.7.1" - }, - { - "SPDXID": "SPDXRef-PACKAGE-resolvelib", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "d2da45d1a8dfee81bdd591647783e340ef3bcb104b54c383f70d422ef5cc7dbf" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/d2/fc/e9ccf0521607bcd244aa0b3fbd574f71b65e9ce6a112c83af988bbbe2e23/resolvelib-1.0.1-py2.py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/resolvelib@1.0.1", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "resolvelib", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "1.0.1" - }, - { - "SPDXID": "SPDXRef-PACKAGE-setuptools", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/c7/42/be1c7bbdd83e1bfb160c94b9cafd8e25efc7400346cf7ccdbdb452c467fa/setuptools-68.0.0-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/setuptools@68.0.0", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "setuptools", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "68.0.0" - }, - { - "SPDXID": "SPDXRef-PACKAGE-six", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/six@1.16.0", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "six", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "1.16.0" - }, - { - "SPDXID": "SPDXRef-PACKAGE-tenacity", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "2f277afb21b851637e8f52e6a613ff08734c347dc19ade928e519d7d2d8569b0" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/e7/b0/c23bd61e1b32c9b96fbca996c87784e196a812da8d621d8d04851f6c8181/tenacity-8.2.2-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/tenacity@8.2.2", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "tenacity", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "8.2.2" - }, - { - "SPDXID": "SPDXRef-PACKAGE-tomli", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/tomli@2.0.1", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "tomli", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "2.0.1" - }, - { - "SPDXID": "SPDXRef-PACKAGE-truststore", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "e37a5642ae9fc48caa8f120b6283d77225d600d224965a672c9e8ef49ce4bb4c" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/20/56/7811d5439b6a56374f274a8672d8f18b4deadadeb3a9f0c86424b98b6f96/truststore-0.8.0-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/truststore@0.8.0", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "truststore", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "0.8.0" - }, - { - "SPDXID": "SPDXRef-PACKAGE-webencodings", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/webencodings@0.5.1", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "webencodings", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "0.5.1" - }, - { - "SPDXID": "SPDXRef-PACKAGE-urllib3", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "94a757d178c9be92ef5539b8840d48dc9cf1b2709c9d6b588232a055c524458b" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/48/fe/a5c6cc46e9fe9171d7ecf0f33ee7aae14642f8d74baa7af4d7840f9358be/urllib3-1.26.17-py2.py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/urllib3@1.26.17", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "urllib3", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "1.26.17" - }, - { - "SPDXID": "SPDXRef-PACKAGE-pip", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "ba0d021a166865d2265246961bec0152ff124de910c5cc39f1156ce3fa7c69dc" - } - ], - "downloadLocation": "https://files.pythonhosted.org/packages/8a/6a/19e9fe04fca059ccf770861c7d5721ab4c2aebc539889e97c7977528a53b/pip-24.0-py3-none-any.whl", - "externalRefs": [ - { - "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:pypa:pip:24.0:*:*:*:*:*:*:*", - "referenceType": "cpe23Type" - }, - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": "pkg:pypi/pip@24.0", - "referenceType": "purl" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "pip", - "originator": "Organization: Python Packaging Authority", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "24.0" } ], "relationships": [ - { - "relatedSpdxElement": "SPDXRef-PACKAGE-cachecontrol", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-certifi", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-chardet", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-colorama", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-distlib", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-distro", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-idna", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-msgpack", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-packaging", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-platformdirs", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-pygments", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-pyparsing", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-pyproject-hooks", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-requests", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-resolvelib", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-rich", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-setuptools", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-six", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-tenacity", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-tomli", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-truststore", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-typing-extensions", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-urllib3", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, - { - "relatedSpdxElement": "SPDXRef-PACKAGE-webencodings", - "relationshipType": "DEPENDS_ON", - "spdxElementId": "SPDXRef-PACKAGE-pip" - }, { "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-COPYING", "relationshipType": "CONTAINS", @@ -2888,11 +2223,6 @@ "relatedSpdxElement": "SPDXRef-FILE-Modules-decimal-libmpdec-vcdiv64.asm", "relationshipType": "CONTAINS", "spdxElementId": "SPDXRef-PACKAGE-mpdecimal" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Lib-ensurepip-bundled-pip-24.0-py3-none-any.whl", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-pip" } ], "spdxVersion": "SPDX-2.3" diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index 82016dc408639d..201c81c4d14d79 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -53,8 +53,6 @@ class PackageFiles(typing.NamedTuple): # values to 'exclude' if we create new files within tracked # directories that aren't sourced from third-party packages. PACKAGE_TO_FILES = { - # NOTE: pip's entry in this structure is automatically generated in - # the 'discover_pip_sbom_package()' function below. "mpdecimal": PackageFiles( include=["Modules/_decimal/libmpdec/**"] ), @@ -127,264 +125,15 @@ def filter_gitignored_paths(paths: list[str]) -> list[str]: return sorted([line.split()[-1] for line in git_check_ignore_lines if line.startswith("::")]) -def fetch_package_metadata_from_pypi(project: str, version: str, filename: str | None = None) -> tuple[str, str] | None: - """ - Fetches the SHA256 checksum and download location from PyPI. - If we're given a filename then we match with that, otherwise we use wheels. - """ - # Get pip's download location from PyPI. Check that the checksum is correct too. - try: - raw_text = urlopen(f"https://pypi.org/pypi/{project}/{version}/json").read() - release_metadata = json.loads(raw_text) - url: dict[str, typing.Any] - - # Look for a matching artifact filename and then check - # its remote checksum to the local one. - for url in release_metadata["urls"]: - # pip can only use Python-only dependencies, so there's - # no risk of picking the 'incorrect' wheel here. - if ( - (filename is None and url["packagetype"] == "bdist_wheel") - or (filename is not None and url["filename"] == filename) - ): - break - else: - raise ValueError(f"No matching filename on PyPI for '{filename}'") - - # Successfully found the download URL for the matching artifact. - download_url = url["url"] - checksum_sha256 = url["digests"]["sha256"] - return download_url, checksum_sha256 - - except (OSError, ValueError) as e: - # Fail if we're running in CI where we should have an internet connection. - error_if( - "CI" in os.environ, - f"Couldn't fetch metadata for project '{project}' from PyPI: {e}" - ) - return None - - -def find_ensurepip_pip_wheel() -> pathlib.Path | None: - """Try to find the pip wheel bundled in ensurepip. If missing return None""" - - ensurepip_bundled_dir = CPYTHON_ROOT_DIR / "Lib/ensurepip/_bundled" - - pip_wheels = [] - try: - for wheel_filename in os.listdir(ensurepip_bundled_dir): - if wheel_filename.startswith("pip-"): - pip_wheels.append(wheel_filename) - else: - print(f"Unexpected wheel in ensurepip: '{wheel_filename}'") - sys.exit(1) - - # Ignore this error, likely caused by downstream distributors - # deleting the 'ensurepip/_bundled' directory. - except FileNotFoundError: - pass - - if len(pip_wheels) == 0: - return None - elif len(pip_wheels) > 1: - print("Multiple pip wheels detected in 'Lib/ensurepip/_bundled'") - sys.exit(1) - # Otherwise return the one pip wheel. - return ensurepip_bundled_dir / pip_wheels[0] - - -def maybe_remove_pip_and_deps_from_sbom(sbom_data: dict[str, typing.Any]) -> None: - """ - Removes pip and its dependencies from the SBOM data - if the pip wheel is removed from ensurepip. This is done - by redistributors of Python and pip. - """ - - # If there's a wheel we don't remove anything. - if find_ensurepip_pip_wheel() is not None: - return - - # Otherwise we traverse the relationships - # to find dependent packages to remove. - sbom_pip_spdx_id = spdx_id("SPDXRef-PACKAGE-pip") - sbom_spdx_ids_to_remove = {sbom_pip_spdx_id} - - # Find all package SPDXIDs that pip depends on. - for sbom_relationship in sbom_data["relationships"]: - if ( - sbom_relationship["relationshipType"] == "DEPENDS_ON" - and sbom_relationship["spdxElementId"] == sbom_pip_spdx_id - ): - sbom_spdx_ids_to_remove.add(sbom_relationship["relatedSpdxElement"]) - - # Remove all the packages and relationships. - sbom_data["packages"] = [ - sbom_package for sbom_package in sbom_data["packages"] - if sbom_package["SPDXID"] not in sbom_spdx_ids_to_remove - ] - sbom_data["relationships"] = [ - sbom_relationship for sbom_relationship in sbom_data["relationships"] - if sbom_relationship["relatedSpdxElement"] not in sbom_spdx_ids_to_remove - ] - - -def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None: - """pip is a part of a packaging ecosystem (Python, surprise!) so it's actually - automatable to discover the metadata we need like the version and checksums - so let's do that on behalf of our friends at the PyPA. This function also - discovers vendored packages within pip and fetches their metadata. - """ - global PACKAGE_TO_FILES - - pip_wheel_filepath = find_ensurepip_pip_wheel() - if pip_wheel_filepath is None: - return # There's no pip wheel, nothing to discover. - - # Add the wheel filename to the list of files so the SBOM file - # and relationship generator can work its magic on the wheel too. - PACKAGE_TO_FILES["pip"] = PackageFiles( - include=[str(pip_wheel_filepath.relative_to(CPYTHON_ROOT_DIR))] - ) - - # Wheel filename format puts the version right after the project name. - pip_version = pip_wheel_filepath.name.split("-")[1] - pip_checksum_sha256 = hashlib.sha256( - pip_wheel_filepath.read_bytes() - ).hexdigest() - - pip_metadata = fetch_package_metadata_from_pypi( - project="pip", - version=pip_version, - filename=pip_wheel_filepath.name, - ) - # We couldn't fetch any metadata from PyPI, - # so we give up on verifying if we're not in CI. - if pip_metadata is None: - return - - pip_download_url, pip_actual_sha256 = pip_metadata - if pip_actual_sha256 != pip_checksum_sha256: - raise ValueError("Unexpected") - - # Parse 'pip/_vendor/vendor.txt' from the wheel for sub-dependencies. - with zipfile.ZipFile(pip_wheel_filepath) as whl: - vendor_txt_data = whl.read("pip/_vendor/vendor.txt").decode() - - # With this version regex we're assuming that pip isn't using pre-releases. - # If any version doesn't match we get a failure below, so we're safe doing this. - version_pin_re = re.compile(r"^([a-zA-Z0-9_.-]+)==([0-9.]*[0-9])$") - sbom_pip_dependency_spdx_ids = set() - for line in vendor_txt_data.splitlines(): - line = line.partition("#")[0].strip() # Strip comments and whitespace. - if not line: # Skip empty lines. - continue - - # Non-empty lines we must be able to match. - match = version_pin_re.match(line) - error_if(match is None, f"Couldn't parse line from pip vendor.txt: '{line}'") - assert match is not None # Make mypy happy. - - # Parse out and normalize the project name. - project_name, project_version = match.groups() - project_name = project_name.lower() - - # At this point if pip's metadata fetch succeeded we should - # expect this request to also succeed. - project_metadata = ( - fetch_package_metadata_from_pypi(project_name, project_version) - ) - assert project_metadata is not None - project_download_url, project_checksum_sha256 = project_metadata - - # Update our SBOM data with what we received from PyPI. - # Don't overwrite any existing values. - sbom_project_spdx_id = spdx_id(f"SPDXRef-PACKAGE-{project_name}") - sbom_pip_dependency_spdx_ids.add(sbom_project_spdx_id) - for package in sbom_data["packages"]: - if package["SPDXID"] != sbom_project_spdx_id: - continue - - # Only thing missing from this blob is the `licenseConcluded`, - # that needs to be triaged by human maintainers if the list changes. - package.update({ - "SPDXID": sbom_project_spdx_id, - "name": project_name, - "versionInfo": project_version, - "downloadLocation": project_download_url, - "checksums": [ - {"algorithm": "SHA256", "checksumValue": project_checksum_sha256} - ], - "externalRefs": [ - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": f"pkg:pypi/{project_name}@{project_version}", - "referenceType": "purl", - }, - ], - "primaryPackagePurpose": "SOURCE" - }) - break - - PACKAGE_TO_FILES[project_name] = PackageFiles(include=None) - - # Remove pip from the existing SBOM packages if it's there - # and then overwrite its entry with our own generated one. - sbom_pip_spdx_id = spdx_id("SPDXRef-PACKAGE-pip") - sbom_data["packages"] = [ - sbom_package - for sbom_package in sbom_data["packages"] - if sbom_package["name"] != "pip" - ] - sbom_data["packages"].append( - { - "SPDXID": sbom_pip_spdx_id, - "name": "pip", - "versionInfo": pip_version, - "originator": "Organization: Python Packaging Authority", - "licenseConcluded": "NOASSERTION", - "downloadLocation": pip_download_url, - "checksums": [ - {"algorithm": "SHA256", "checksumValue": pip_checksum_sha256} - ], - "externalRefs": [ - { - "referenceCategory": "SECURITY", - "referenceLocator": f"cpe:2.3:a:pypa:pip:{pip_version}:*:*:*:*:*:*:*", - "referenceType": "cpe23Type", - }, - { - "referenceCategory": "PACKAGE_MANAGER", - "referenceLocator": f"pkg:pypi/pip@{pip_version}", - "referenceType": "purl", - }, - ], - "primaryPackagePurpose": "SOURCE", - } - ) - for sbom_dep_spdx_id in sorted(sbom_pip_dependency_spdx_ids): - sbom_data["relationships"].append({ - "spdxElementId": sbom_pip_spdx_id, - "relatedSpdxElement": sbom_dep_spdx_id, - "relationshipType": "DEPENDS_ON" - }) - - def main() -> None: sbom_path = CPYTHON_ROOT_DIR / "Misc/sbom.spdx.json" sbom_data = json.loads(sbom_path.read_bytes()) - # Check if pip should be removed if the wheel is missing. - # We can't reset the SBOM relationship data until checking this. - maybe_remove_pip_and_deps_from_sbom(sbom_data) - # We regenerate all of this information. Package information # should be preserved though since that is edited by humans. sbom_data["files"] = [] sbom_data["relationships"] = [] - # Insert pip's SBOM metadata from the wheel. - discover_pip_sbom_package(sbom_data) - # Ensure all packages in this tool are represented also in the SBOM file. actual_names = {package["name"] for package in sbom_data["packages"]} expected_names = set(PACKAGE_TO_FILES) From d9f4cbe5e1e3c31518724d87d0d379d7ce6823ca Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Thu, 15 Feb 2024 03:48:11 +0800 Subject: [PATCH 126/126] Add myself to various CODEOWNERS (GH-115481) --- .github/CODEOWNERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 7933d319550576..5dbfbbb8ebaf7e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -37,6 +37,8 @@ Python/flowgraph.c @markshannon @iritkatriel Python/ast_opt.c @isidentical Python/bytecodes.c @markshannon @gvanrossum Python/optimizer*.c @markshannon @gvanrossum +Python/optimizer_analysis.c @Fidget-Spinner +Python/tier2_redundancy_eliminator_bytecodes.c @Fidget-Spinner Lib/test/test_patma.py @brandtbucher Lib/test/test_type_*.py @JelleZijlstra Lib/test/test_capi/test_misc.py @markshannon @gvanrossum