diff --git a/.github/workflows/artifact.yaml b/.github/workflows/artifact.yaml index eae5339d..1d9e6eed 100644 --- a/.github/workflows/artifact.yaml +++ b/.github/workflows/artifact.yaml @@ -11,6 +11,7 @@ jobs: sdist: runs-on: ubuntu-22.04 + timeout-minutes: 10 strategy: fail-fast: false env: @@ -77,7 +78,7 @@ jobs: { cc: "clang", cflags: "-Os -fstrict-aliasing -fno-plt -flto=full -emit-llvm", - features: "avx512,no-panic,unstable-simd,yyjson", + features: "avx512,unstable-simd,yyjson", ldflags: "-fuse-ld=lld -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow", rustflags: "-C linker=clang -C link-arg=-fuse-ld=lld -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -Z mir-opt-level=4 -Z threads=4 -D warnings", tag: null, @@ -85,17 +86,17 @@ jobs: }, ] env: - PYTHON: "${{ matrix.python.interpreter }}" - PYTHON_PACKAGE: "${{ matrix.python.package }}" - TARGET: "${{ matrix.arch.target }}" + CARGO_TARGET_DIR: "/tmp/orjson" CC: "${{ matrix.arch.cc }}" - VENV: ".venv" - FEATURES: "${{ matrix.arch.features }}" CFLAGS: "${{ matrix.arch.cflags }}" + COMPATIBILITY: "${{ matrix.python.compatibility }}" + FEATURES: "${{ matrix.arch.features }}" LDFLAGS: "${{ matrix.arch.ldflags }}" + PYTHON: "${{ matrix.python.interpreter }}" + PYTHON_PACKAGE: "${{ matrix.python.package }}" RUSTFLAGS: "${{ matrix.arch.rustflags }}" - CARGO_TARGET_DIR: "/tmp/orjson" - COMPATIBILITY: "${{ matrix.python.compatibility }}" + TARGET: "${{ matrix.arch.target }}" + VENV: ".venv" steps: - name: cpuinfo @@ -167,7 +168,7 @@ jobs: { cc: "clang", cflags: "-Os -fstrict-aliasing -fno-plt -flto=full -emit-llvm", - features: "no-panic,unstable-simd,yyjson", + features: "unstable-simd,yyjson", ldflags: "-fuse-ld=lld -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow", rustflags: "-C linker=clang -C link-arg=-fuse-ld=lld -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -Z mir-opt-level=4 -Z threads=4 -D warnings", tag: "aarch64", @@ -175,17 +176,17 @@ jobs: }, ] env: - PYTHON: "${{ matrix.python.interpreter }}" - PYTHON_PACKAGE: "${{ matrix.python.package }}" - TARGET: "${{ matrix.arch.target }}" + CARGO_TARGET_DIR: "/tmp/orjson" CC: "${{ matrix.arch.cc }}" - VENV: ".venv" - FEATURES: "${{ matrix.arch.features }}" CFLAGS: "${{ matrix.arch.cflags }}" + COMPATIBILITY: "${{ matrix.python.compatibility }}" + FEATURES: "${{ matrix.arch.features }}" LDFLAGS: "${{ matrix.arch.ldflags }}" + PYTHON: "${{ matrix.python.interpreter }}" + PYTHON_PACKAGE: "${{ matrix.python.package }}" RUSTFLAGS: "${{ matrix.arch.rustflags }}" - CARGO_TARGET_DIR: "/tmp/orjson" - COMPATIBILITY: "${{ matrix.python.compatibility }}" + TARGET: "${{ matrix.arch.target }}" + VENV: ".venv" steps: - name: cpuinfo @@ -241,6 +242,7 @@ jobs: musllinux: runs-on: ubuntu-24.04 + timeout-minutes: 10 strategy: fail-fast: false matrix: @@ -256,11 +258,11 @@ jobs: - target: aarch64-unknown-linux-musl arch: aarch64 platform: linux/arm64 - features: no-panic,unstable-simd,unwind,yyjson + features: unstable-simd,unwind,yyjson - target: x86_64-unknown-linux-musl arch: x86_64 platform: linux/amd64 - features: avx512,no-panic,unstable-simd,unwind,yyjson + features: avx512,unstable-simd,unwind,yyjson steps: - uses: actions/checkout@v4 @@ -317,6 +319,7 @@ jobs: manylinux_non_amd64: runs-on: ubuntu-24.04 + timeout-minutes: 20 strategy: fail-fast: false matrix: @@ -331,28 +334,35 @@ jobs: { arch: 'aarch64', cflags: '-Os -flto=full -fstrict-aliasing', - features: 'no-panic,unstable-simd,yyjson', + features: 'unstable-simd,yyjson', rustflags: '-Z mir-opt-level=4 -C lto=fat -D warnings', target: 'aarch64-unknown-linux-gnu', }, + { + arch: 'i686', + cflags: '-Os -flto -fstrict-aliasing', + features: 'unstable-simd,yyjson', + rustflags: '-Z mir-opt-level=4 -C lto=fat -D warnings', + target: 'i686-unknown-linux-gnu', + }, { arch: 'armv7', cflags: '-Os -flto=full -fstrict-aliasing', - features: 'no-panic,yyjson', # no SIMD + features: 'yyjson', # no SIMD rustflags: '-Z mir-opt-level=4 -C lto=fat -D warnings -C opt-level=s', target: 'armv7-unknown-linux-gnueabihf', }, { arch: 'ppc64le', cflags: '-Os -flto=full -fstrict-aliasing', - features: 'no-panic,unstable-simd,yyjson', + features: 'unstable-simd,yyjson', rustflags: '-Z mir-opt-level=4 -C lto=fat -D warnings', target: 'powerpc64le-unknown-linux-gnu', }, { arch: 's390x', cflags: '-Os -flto=full -fstrict-aliasing -march=z10', - features: 'no-panic,yyjson', + features: 'yyjson', rustflags: '-Z mir-opt-level=4 -C lto=fat -D warnings -C target-cpu=z10', target: 's390x-unknown-linux-gnu', }, @@ -389,7 +399,8 @@ jobs: retention-days: 1 macos_aarch64: - runs-on: macos-14 + runs-on: macos-15 + timeout-minutes: 10 strategy: fail-fast: false matrix: @@ -414,7 +425,6 @@ jobs: - uses: actions/setup-python@v5 with: python-version: "${{ matrix.python.version }}" - allow-prereleases: true - uses: dtolnay/rust-toolchain@master with: @@ -426,6 +436,8 @@ jobs: run: | cargo fetch --target aarch64-apple-darwin & + export PATH=$HOME/.cargo/bin:$HOME/.local/bin:$PATH + curl -LsSf https://astral.sh/uv/install.sh | sh uv venv --python python${{ matrix.python.version }} uv pip install --upgrade "maturin>=1,<2" -r test/requirements.txt -r integration/requirements.txt @@ -435,11 +447,12 @@ jobs: - name: maturin run: | - PATH=$HOME/.cargo/bin:$PATH \ + export PATH=$HOME/.cargo/bin:$HOME/.local/bin:$PATH + MACOSX_DEPLOYMENT_TARGET="${{ matrix.python.macosx_target }}" \ PYO3_CROSS_LIB_DIR=$(python -c "import sysconfig;print(sysconfig.get_config_var('LIBDIR'))") \ maturin build --release --strip \ - --features=no-panic,unstable-simd,yyjson \ + --features=unstable-simd,yyjson \ --interpreter python${{ matrix.python.version }} \ --target=universal2-apple-darwin uv pip install target/wheels/orjson*.whl @@ -463,6 +476,7 @@ jobs: macos_amd64: runs-on: macos-13 + timeout-minutes: 10 strategy: fail-fast: false matrix: @@ -496,6 +510,8 @@ jobs: run: | cargo fetch --target aarch64-apple-darwin & + export PATH=$HOME/.cargo/bin:$HOME/.local/bin:$PATH + curl -LsSf https://astral.sh/uv/install.sh | sh uv venv --python python${{ matrix.python.version }} uv pip install --upgrade "maturin>=1,<2" -r test/requirements.txt -r integration/requirements.txt @@ -505,11 +521,12 @@ jobs: - name: maturin run: | - PATH=$HOME/.cargo/bin:$PATH \ + export PATH=$HOME/.cargo/bin:$HOME/.local/bin:$PATH + MACOSX_DEPLOYMENT_TARGET="${{ matrix.python.macosx_target }}" \ PYO3_CROSS_LIB_DIR=$(python -c "import sysconfig;print(sysconfig.get_config_var('LIBDIR'))") \ maturin build --release --strip \ - --features=no-panic,unstable-simd,yyjson \ + --features=unstable-simd,yyjson \ --interpreter python${{ matrix.python.version }} \ --target=universal2-apple-darwin uv pip install target/wheels/orjson*.whl @@ -531,9 +548,75 @@ jobs: overwrite: true retention-days: 1 + windows: + runs-on: windows-2022 + timeout-minutes: 10 + strategy: + fail-fast: false + matrix: + python: [ + { version: '3.13' }, + { version: '3.12' }, + { version: '3.11' }, + { version: '3.10' }, + { version: '3.9' }, + { version: '3.8' }, + ] + platform: [ + { arch: "x64", target: "x86_64-pc-windows-msvc" }, + { arch: "x86", target: "i686-pc-windows-msvc" }, + ] + env: + CFLAGS: "-Os" + LDFLAGS: "-Wl,--as-needed" + RUSTFLAGS: "-C lto=fat -Z mir-opt-level=4 -D warnings" + steps: + + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "${{ matrix.python.version }}" + architecture: "${{ matrix.platform.arch }}" + + - uses: dtolnay/rust-toolchain@master + with: + toolchain: "${{ env.RUST_TOOLCHAIN }}" + targets: "${{ matrix.platform.target }}" + components: "rust-src" + + - name: Build environment + run: | + cargo fetch --target "${{ matrix.platform.target }}" & + + python.exe -m pip install --upgrade pip "maturin>=1,<2" wheel + python.exe -m pip install -r test\requirements.txt -r integration\requirements.txt + + mkdir .cargo + cp ci\config.toml .cargo\config.toml + + - name: maturin + run: | + maturin.exe build --release --strip --features=unstable-simd,yyjson --target="${{ matrix.platform.target }}" + python.exe -m pip install orjson --no-index --find-links target\wheels + + - run: python.exe -m pytest -s -rxX -v test + env: + PYTHONMALLOC: "debug" + + - name: Store wheels + if: "startsWith(github.ref, 'refs/tags/')" + uses: actions/upload-artifact@v4 + with: + name: orjson_windows_${{ matrix.platform.arch }}_${{ matrix.python.version }} + path: target\wheels + overwrite: true + retention-days: 1 + pypi: name: PyPI runs-on: ubuntu-24.04 + timeout-minutes: 10 if: "startsWith(github.ref, 'refs/tags/')" needs: [ macos_aarch64, @@ -543,6 +626,7 @@ jobs: manylinux_non_amd64, musllinux, sdist, + windows, ] environment: name: pypi diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index e97ed4b5..6cd1a711 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -8,7 +8,7 @@ jobs: with: python-version: "3.12" - run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain=stable --profile=default -y - - run: pip install -U ruff==0.7.1 mypy==1.13.0 + - run: pip install -U ruff==0.8.0 mypy==1.13.0 - uses: actions/checkout@v4 diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml index ef9d5310..3dc0091d 100644 --- a/.github/workflows/stale.yaml +++ b/.github/workflows/stale.yaml @@ -10,7 +10,7 @@ jobs: stale: runs-on: ubuntu-latest steps: - - uses: actions/stale@v8 + - uses: actions/stale@v9 with: days-before-stale: 7 days-before-close: 1 diff --git a/Cargo.lock b/Cargo.lock index fde9db8d..fd22bf98 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,9 +25,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.31" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" +checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" dependencies = [ "shlex", ] @@ -86,12 +86,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" -dependencies = [ - "no-panic", -] +checksum = "540654e97a3f4470a492cd30ff187bc95d89557a903a2bbf112e2fae98104ef2" [[package]] name = "itoap" @@ -107,9 +104,9 @@ checksum = "b9d9d414fc817d3e3d62b2598616733f76c4cc74fbac96069674739b881295c8" [[package]] name = "libc" -version = "0.2.161" +version = "0.2.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" [[package]] name = "memchr" @@ -117,17 +114,6 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" -[[package]] -name = "no-panic" -version = "0.1.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8540b7d99a20166178b42a05776aef900cdbfec397f861dfc7819bf1d7760b3d" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "once_cell" version = "1.20.2" @@ -163,9 +149,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -206,24 +192,21 @@ name = "ryu" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" -dependencies = [ - "no-panic", -] [[package]] name = "serde" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", @@ -232,9 +215,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.132" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -268,9 +251,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" -version = "2.0.86" +version = "2.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89275301d38033efb81a6e60e3497e734dfcc62571f2854bf4b16690398824c" +checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" dependencies = [ "proc-macro2", "quote", @@ -285,9 +268,9 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unwinding" diff --git a/Cargo.toml b/Cargo.toml index 516f4d0e..418df3f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,11 +38,6 @@ unstable-simd = [] # Include runtime-detected functions that use AVX512VL. Requires unstable-simd and amd64. avx512 = [] -no-panic = [ - "itoa/no-panic", - "ryu/no-panic", -] - # Avoid bundling libgcc on musl. unwind = ["unwinding"] @@ -51,6 +46,7 @@ unwind = ["unwinding"] yyjson = [] # Features detected by build.rs. Do not specify. +assert_unchecked = [] inline_int = [] intrinsics = [] optimize = [] diff --git a/README.md b/README.md index 942597ef..601b9d68 100644 --- a/README.md +++ b/README.md @@ -9,33 +9,22 @@ third-party libraries. It serializes [numpy](https://github.com/ijl/orjson?tab=readme-ov-file#numpy), and [UUID](https://github.com/ijl/orjson?tab=readme-ov-file#uuid) instances natively. -Its features and drawbacks compared to other Python JSON libraries: - -* serializes `dataclass` instances 40-50x as fast as other libraries -* serializes `datetime`, `date`, and `time` instances to RFC 3339 format, -e.g., "1970-01-01T00:00:00+00:00" -* serializes `numpy.ndarray` instances 4-12x as fast with 0.3x the memory -usage of other libraries -* pretty prints 10x to 20x as fast as the standard library -* serializes to `bytes` rather than `str`, i.e., is not a drop-in replacement -* serializes `str` without escaping unicode to ASCII, e.g., "好" rather than -"\\\u597d" -* serializes `float` 10x as fast and deserializes twice as fast as other -libraries -* serializes subclasses of `str`, `int`, `list`, and `dict` natively, -requiring `default` to specify how to serialize others -* serializes arbitrary types using a `default` hook -* has strict UTF-8 conformance, more correct than the standard library -* has strict JSON conformance in not supporting Nan/Infinity/-Infinity -* has an option for strict JSON conformance on 53-bit integers with default -support for 64-bit -* does not provide `load()` or `dump()` functions for reading from/writing to -file-like objects +[orjson.dumps()](https://github.com/ijl/orjson?tab=readme-ov-file#serialize) is +something like 10x as fast as `json`, serializes +common types and subtypes, has a `default` parameter for the caller to add +support for arbitrary types, and a number of flags controlling output. + +[orjson.loads()](https://github.com/ijl/orjson?tab=readme-ov-file#deserialize) +is something like 2x as fast as `json`, and is strictly compliant with UTF-8 and +RFC 8259. + +Reading from and writing to files, line-delimited JSON files, and so on is +not provided by the library. orjson supports CPython 3.8, 3.9, 3.10, 3.11, 3.12, 3.13, and 3.14. -It distributes amd64/x86_64, aarch64/armv8, arm7, POWER/ppc64le, and s390x -wheels for Linux, amd64 and aarch64 wheels for macOS, and amd64 +It distributes amd64/x86_64, i686/x86, aarch64/armv8, arm7, POWER/ppc64le, +and s390x wheels for Linux, amd64 and aarch64 wheels for macOS, and amd64 and i686/x86 wheels for Windows. orjson does not and will not support PyPy, embedded Python builds for @@ -72,8 +61,7 @@ available in the repository. 3. [Testing](https://github.com/ijl/orjson?tab=readme-ov-file#testing) 4. [Performance](https://github.com/ijl/orjson?tab=readme-ov-file#performance) 1. [Latency](https://github.com/ijl/orjson?tab=readme-ov-file#latency) - 2. [Memory](https://github.com/ijl/orjson?tab=readme-ov-file#memory) - 3. [Reproducing](https://github.com/ijl/orjson?tab=readme-ov-file#reproducing) + 2. [Reproducing](https://github.com/ijl/orjson?tab=readme-ov-file#reproducing) 5. [Questions](https://github.com/ijl/orjson?tab=readme-ov-file#questions) 6. [Packaging](https://github.com/ijl/orjson?tab=readme-ov-file#packaging) 7. [License](https://github.com/ijl/orjson?tab=readme-ov-file#license) @@ -130,13 +118,18 @@ implementations in a `default` function and options enabling them can be removed but do not need to be. There was no change in deserialization. To migrate from the standard library, the largest difference is that -`orjson.dumps` returns `bytes` and `json.dumps` returns a `str`. Users with -`dict` objects using non-`str` keys should specify -`option=orjson.OPT_NON_STR_KEYS`. `sort_keys` is replaced by -`option=orjson.OPT_SORT_KEYS`. `indent` is replaced by -`option=orjson.OPT_INDENT_2` and other levels of indentation are not +`orjson.dumps` returns `bytes` and `json.dumps` returns a `str`. + +Users with `dict` objects using non-`str` keys should specify `option=orjson.OPT_NON_STR_KEYS`. + +`sort_keys` is replaced by `option=orjson.OPT_SORT_KEYS`. + +`indent` is replaced by `option=orjson.OPT_INDENT_2` and other levels of indentation are not supported. +`ensure_ascii` is probably not relevant today and UTF-8 characters cannot be +escaped to ASCII. + ### Serialize ```python @@ -222,7 +215,7 @@ Python otherwise implicitly returns `None`, which appears to the caller like a legitimate value and is serialized: ```python ->>> import orjson, json, rapidjson +>>> import orjson, json >>> def default(obj): if isinstance(obj, decimal.Decimal): @@ -232,8 +225,6 @@ def default(obj): b'{"set":null}' >>> json.dumps({"set":{1, 2}}, default=default) '{"set":null}' ->>> rapidjson.dumps({"set":{1, 2}}, default=default) -'{"set":null}' ``` #### option @@ -293,25 +284,19 @@ If displayed, the indentation and linebreaks appear like this: This measures serializing the github.json fixture as compact (52KiB) or pretty (64KiB): -| Library | compact (ms) | pretty (ms) | vs. orjson | -|------------|----------------|---------------|--------------| -| orjson | 0.03 | 0.04 | 1 | -| ujson | 0.18 | 0.19 | 4.6 | -| rapidjson | 0.1 | 0.12 | 2.9 | -| simplejson | 0.25 | 0.89 | 21.4 | -| json | 0.18 | 0.71 | 17 | +| Library | compact (ms) | pretty (ms) | vs. orjson | +|-----------|----------------|---------------|--------------| +| orjson | 0.01 | 0.02 | 1 | +| json | 0.13 | 0.54 | 34 | This measures serializing the citm_catalog.json fixture, more of a worst case due to the amount of nesting and newlines, as compact (489KiB) or pretty (1.1MiB): -| Library | compact (ms) | pretty (ms) | vs. orjson | -|------------|----------------|---------------|--------------| -| orjson | 0.59 | 0.71 | 1 | -| ujson | 2.9 | 3.59 | 5 | -| rapidjson | 1.81 | 2.8 | 3.9 | -| simplejson | 10.43 | 42.13 | 59.1 | -| json | 4.16 | 33.42 | 46.9 | +| Library | compact (ms) | pretty (ms) | vs. orjson | +|-----------|----------------|---------------|--------------| +| orjson | 0.25 | 0.45 | 1 | +| json | 3.01 | 24.42 | 54.4 | This can be reproduced using the `pyindent` script. @@ -386,18 +371,14 @@ single integer. In "str keys", the keys were converted to `str` before serialization, and orjson still specifes `option=orjson.OPT_NON_STR_KEYS` (which is always somewhat slower). -| Library | str keys (ms) | int keys (ms) | int keys sorted (ms) | -|------------|-----------------|-----------------|------------------------| -| orjson | 1.53 | 2.16 | 4.29 | -| ujson | 3.07 | 5.65 | | -| rapidjson | 4.29 | | | -| simplejson | 11.24 | 14.50 | 21.86 | -| json | 7.17 | 8.49 | | +| Library | str keys (ms) | int keys (ms) | int keys sorted (ms) | +|-----------|-----------------|-----------------|------------------------| +| orjson | 0.5 | 0.93 | 2.08 | +| json | 2.72 | 3.59 | | -ujson is blank for sorting because it segfaults. json is blank because it +json is blank because it raises `TypeError` on attempting to sort before converting all keys to `str`. -rapidjson is blank because it does not support non-`str` keys. This can -be reproduced using the `pynonstr` script. +This can be reproduced using the `pynonstr` script. ##### OPT_OMIT_MICROSECONDS @@ -539,13 +520,10 @@ b'{"a":3,"b":1,"c":2}' This measures serializing the twitter.json fixture unsorted and sorted: -| Library | unsorted (ms) | sorted (ms) | vs. orjson | -|------------|-----------------|---------------|--------------| -| orjson | 0.32 | 0.54 | 1 | -| ujson | 1.6 | 2.07 | 3.8 | -| rapidjson | 1.12 | 1.65 | 3.1 | -| simplejson | 2.25 | 3.13 | 5.8 | -| json | 1.78 | 2.32 | 4.3 | +| Library | unsorted (ms) | sorted (ms) | vs. orjson | +|-----------|-----------------|---------------|--------------| +| orjson | 0.11 | 0.3 | 1 | +| json | 1.36 | 1.93 | 6.4 | The benchmark can be reproduced using the `pysort` script. @@ -557,8 +535,7 @@ The sorting is not collation/locale-aware: b'{"A":3,"a":1,"\xc3\xa4":2}' ``` -This is the same sorting behavior as the standard library, rapidjson, -simplejson, and ujson. +This is the same sorting behavior as the standard library. `dataclass` also serialize as maps but this has no effect on them. @@ -607,8 +584,6 @@ not valid UTF-8. It otherwise does no validation and it is possible to write invalid JSON. This does not escape characters. The implementation is tested to not crash if given invalid strings or invalid JSON. -This is similar to `RawJSON` in rapidjson. - ### Deserialize ```python @@ -655,13 +630,10 @@ using `__slots__`, frozen dataclasses, those with optional or default attributes, and subclasses. There is a performance benefit to not using `__slots__`. -| Library | dict (ms) | dataclass (ms) | vs. orjson | -|------------|-------------|------------------|--------------| -| orjson | 1.40 | 1.60 | 1 | -| ujson | | | | -| rapidjson | 3.64 | 68.48 | 42 | -| simplejson | 14.21 | 92.18 | 57 | -| json | 13.28 | 94.90 | 59 | +| Library | dict (ms) | dataclass (ms) | vs. orjson | +|-----------|-------------|------------------|--------------| +| orjson | 0.43 | 0.95 | 1 | +| json | 5.81 | 38.32 | 40 | This measures serializing 555KiB of JSON, orjson natively and other libraries using `default` to serialize the output of `dataclasses.asdict()`. This can be @@ -790,13 +762,9 @@ precision and consistent rounding. compliant JSON, as `null`: ```python ->>> import orjson, ujson, rapidjson, json +>>> import orjson, json >>> orjson.dumps([float("NaN"), float("Infinity"), float("-Infinity")]) b'[null,null,null]' ->>> ujson.dumps([float("NaN"), float("Infinity"), float("-Infinity")]) -OverflowError: Invalid Inf value when encoding double ->>> rapidjson.dumps([float("NaN"), float("Infinity"), float("-Infinity")]) -'[NaN,Infinity,-Infinity]' >>> json.dumps([float("NaN"), float("Infinity"), float("-Infinity")]) '[NaN, Infinity, -Infinity]' ``` @@ -887,38 +855,28 @@ If an array is malformed, `orjson.JSONEncodeError` is raised. This measures serializing 92MiB of JSON from an `numpy.ndarray` with dimensions of `(50000, 100)` and `numpy.float64` values: -| Library | Latency (ms) | RSS diff (MiB) | vs. orjson | -|------------|----------------|------------------|--------------| -| orjson | 194 | 99 | 1.0 | -| ujson | | | | -| rapidjson | 3,048 | 309 | 15.7 | -| simplejson | 3,023 | 297 | 15.6 | -| json | 3,133 | 297 | 16.1 | +| Library | Latency (ms) | RSS diff (MiB) | vs. orjson | +|-----------|----------------|------------------|--------------| +| orjson | 105 | 105 | 1 | +| json | 1,481 | 295 | 14.2 | This measures serializing 100MiB of JSON from an `numpy.ndarray` with dimensions of `(100000, 100)` and `numpy.int32` values: -| Library | Latency (ms) | RSS diff (MiB) | vs. orjson | -|------------|----------------|------------------|--------------| -| orjson | 178 | 115 | 1.0 | -| ujson | | | | -| rapidjson | 1,512 | 551 | 8.5 | -| simplejson | 1,606 | 504 | 9.0 | -| json | 1,506 | 503 | 8.4 | +| Library | Latency (ms) | RSS diff (MiB) | vs. orjson | +|-----------|----------------|------------------|--------------| +| orjson | 68 | 119 | 1 | +| json | 684 | 501 | 10.1 | This measures serializing 105MiB of JSON from an `numpy.ndarray` with dimensions of `(100000, 200)` and `numpy.bool` values: -| Library | Latency (ms) | RSS diff (MiB) | vs. orjson | -|------------|----------------|------------------|--------------| -| orjson | 157 | 120 | 1.0 | -| ujson | | | | -| rapidjson | 710 | 327 | 4.5 | -| simplejson | 931 | 398 | 5.9 | -| json | 996 | 400 | 6.3 | +| Library | Latency (ms) | RSS diff (MiB) | vs. orjson | +|-----------|----------------|------------------|--------------| +| orjson | 50 | 125 | 1 | +| json | 573 | 398 | 11.5 | -In these benchmarks, orjson serializes natively, ujson is blank because it -does not support a `default` parameter, and the other libraries serialize +In these benchmarks, orjson serializes natively and `json` serializes `ndarray.tolist()` via `default`. The RSS column measures peak memory usage during serialization. This can be reproduced using the `pynumpy` script. @@ -936,25 +894,14 @@ If `orjson.dumps()` is given a `str` that does not contain valid UTF-8, `orjson.JSONEncodeError` is raised. If `loads()` receives invalid UTF-8, `orjson.JSONDecodeError` is raised. -orjson and rapidjson are the only compared JSON libraries to consistently -error on bad input. - ```python ->>> import orjson, ujson, rapidjson, json +>>> import orjson, json >>> orjson.dumps('\ud800') JSONEncodeError: str is not valid UTF-8: surrogates not allowed ->>> ujson.dumps('\ud800') -UnicodeEncodeError: 'utf-8' codec ... ->>> rapidjson.dumps('\ud800') -UnicodeEncodeError: 'utf-8' codec ... >>> json.dumps('\ud800') '"\\ud800"' >>> orjson.loads('"\\ud800"') JSONDecodeError: unexpected end of hex escape at line 1 column 8: line 1 column 1 (char 0) ->>> ujson.loads('"\\ud800"') -'' ->>> rapidjson.loads('"\\ud800"') -ValueError: Parse error at offset 1: The surrogate pair in string is invalid. >>> json.loads('"\\ud800"') '\ud800' ``` @@ -978,8 +925,6 @@ orjson serializes `uuid.UUID` instances to ``` python >>> import orjson, uuid ->>> orjson.dumps(uuid.UUID('f81d4fae-7dec-11d0-a765-00a0c91e6bf6')) -b'"f81d4fae-7dec-11d0-a765-00a0c91e6bf6"' >>> orjson.dumps(uuid.uuid5(uuid.NAMESPACE_DNS, "python.org")) b'"886313e1-3b8a-5372-9b90-0c9aee199e5d"' ``` @@ -1005,9 +950,6 @@ library handles a combined 342 JSON fixtures from the | Library | Invalid JSON documents not rejected | Valid JSON documents not deserialized | |------------|---------------------------------------|-----------------------------------------| | orjson | 0 | 0 | -| ujson | 31 | 0 | -| rapidjson | 6 | 0 | -| simplejson | 10 | 0 | | json | 17 | 0 | This shows that all libraries deserialize valid JSON but only orjson @@ -1018,21 +960,9 @@ The graph above can be reproduced using the `pycorrectness` script. ## Performance -Serialization and deserialization performance of orjson is better than -ultrajson, rapidjson, simplejson, or json. The benchmarks are done on -fixtures of real data: - -* twitter.json, 631.5KiB, results of a search on Twitter for "一", containing -CJK strings, dictionaries of strings and arrays of dictionaries, indented. - -* github.json, 55.8KiB, a GitHub activity feed, containing dictionaries of -strings and arrays of dictionaries, not indented. - -* citm_catalog.json, 1.7MiB, concert data, containing nested dictionaries of -strings and arrays of integers, indented. - -* canada.json, 2.2MiB, coordinates of the Canadian border in GeoJSON -format, containing floats and arrays, indented. +Serialization and deserialization performance of orjson is consistently better +than the standard library's `json`. The graphs below illustrate a few commonly +used documents. ### Latency @@ -1042,142 +972,66 @@ format, containing floats and arrays, indented. #### twitter.json serialization -| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | -|------------|---------------------------------|-------------------------|----------------------| -| orjson | 0.1 | 8377 | 1 | -| ujson | 0.9 | 1088 | 7.3 | -| rapidjson | 0.8 | 1228 | 6.8 | -| simplejson | 1.9 | 531 | 15.6 | -| json | 1.4 | 744 | 11.3 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 0.1 | 8453 | 1 | +| json | 1.3 | 765 | 11.1 | #### twitter.json deserialization -| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | -|------------|---------------------------------|-------------------------|----------------------| -| orjson | 0.6 | 1811 | 1 | -| ujson | 1.2 | 814 | 2.1 | -| rapidjson | 2.1 | 476 | 3.8 | -| simplejson | 1.6 | 626 | 3 | -| json | 1.8 | 557 | 3.3 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 0.5 | 1889 | 1 | +| json | 2.2 | 453 | 4.2 | #### github.json serialization -| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | -|------------|---------------------------------|-------------------------|----------------------| -| orjson | 0.01 | 104424 | 1 | -| ujson | 0.09 | 10594 | 9.8 | -| rapidjson | 0.07 | 13667 | 7.6 | -| simplejson | 0.2 | 5051 | 20.6 | -| json | 0.14 | 7133 | 14.6 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 0.01 | 103693 | 1 | +| json | 0.13 | 7648 | 13.6 | #### github.json deserialization -| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | -|------------|---------------------------------|-------------------------|----------------------| -| orjson | 0.05 | 20069 | 1 | -| ujson | 0.11 | 8913 | 2.3 | -| rapidjson | 0.13 | 8077 | 2.6 | -| simplejson | 0.11 | 9342 | 2.1 | -| json | 0.11 | 9291 | 2.2 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 0.04 | 23264 | 1 | +| json | 0.1 | 10430 | 2.2 | #### citm_catalog.json serialization -| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | -|------------|---------------------------------|-------------------------|----------------------| -| orjson | 0.3 | 3757 | 1 | -| ujson | 1.7 | 598 | 6.3 | -| rapidjson | 1.3 | 768 | 4.9 | -| simplejson | 8.3 | 120 | 31.1 | -| json | 3 | 331 | 11.3 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 0.3 | 3975 | 1 | +| json | 3 | 338 | 11.8 | #### citm_catalog.json deserialization -| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | -|------------|---------------------------------|-------------------------|----------------------| -| orjson | 1.4 | 730 | 1 | -| ujson | 2.6 | 384 | 1.9 | -| rapidjson | 4 | 246 | 3 | -| simplejson | 3.7 | 271 | 2.7 | -| json | 3.7 | 267 | 2.7 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 1.3 | 781 | 1 | +| json | 4 | 250 | 3.1 | #### canada.json serialization -| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | -|------------|---------------------------------|-------------------------|----------------------| -| orjson | 2.4 | 410 | 1 | -| ujson | 9.6 | 104 | 3.9 | -| rapidjson | 28.7 | 34 | 11.8 | -| simplejson | 49.3 | 20 | 20.3 | -| json | 30.6 | 32 | 12.6 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 2.5 | 399 | 1 | +| json | 29.8 | 33 | 11.9 | #### canada.json deserialization -| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | -|------------|---------------------------------|-------------------------|----------------------| -| orjson | 3 | 336 | 1 | -| ujson | 7.1 | 141 | 2.4 | -| rapidjson | 20.1 | 49 | 6.7 | -| simplejson | 16.8 | 59 | 5.6 | -| json | 18.2 | 55 | 6.1 | - -### Memory - -orjson as of 3.7.0 has higher baseline memory usage than other libraries -due to a persistent buffer used for parsing. Incremental memory usage when -deserializing is similar to the standard library and other third-party -libraries. - -This measures, in the first column, RSS after importing a library and reading -the fixture, and in the second column, increases in RSS after repeatedly -calling `loads()` on the fixture. - -#### twitter.json - -| Library | import, read() RSS (MiB) | loads() increase in RSS (MiB) | -|------------|----------------------------|---------------------------------| -| orjson | 15.7 | 3.4 | -| ujson | 16.4 | 3.4 | -| rapidjson | 16.6 | 4.4 | -| simplejson | 14.5 | 1.8 | -| json | 13.9 | 1.8 | - -#### github.json - -| Library | import, read() RSS (MiB) | loads() increase in RSS (MiB) | -|------------|----------------------------|---------------------------------| -| orjson | 15.2 | 0.4 | -| ujson | 15.4 | 0.4 | -| rapidjson | 15.7 | 0.5 | -| simplejson | 13.7 | 0.2 | -| json | 13.3 | 0.1 | - -#### citm_catalog.json - -| Library | import, read() RSS (MiB) | loads() increase in RSS (MiB) | -|------------|----------------------------|---------------------------------| -| orjson | 16.8 | 10.1 | -| ujson | 17.3 | 10.2 | -| rapidjson | 17.6 | 28.7 | -| simplejson | 15.8 | 30.1 | -| json | 14.8 | 20.5 | - -#### canada.json - -| Library | import, read() RSS (MiB) | loads() increase in RSS (MiB) | -|------------|----------------------------|---------------------------------| -| orjson | 17.2 | 22.1 | -| ujson | 17.4 | 18.3 | -| rapidjson | 18 | 23.5 | -| simplejson | 15.7 | 21.4 | -| json | 15.4 | 20.4 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 3 | 333 | 1 | +| json | 18 | 55 | 6 | ### Reproducing -The above was measured using Python 3.11.9 on Linux (amd64) with -orjson 3.10.6, ujson 5.10.0, python-rapidson 1.18, and simplejson 3.19.2. - -The latency results can be reproduced using the `pybench` and `graph` -scripts. The memory results can be reproduced using the `pymem` script. +The above was measured using Python 3.11.10 in a Fedora 42 container on an +x86-64-v4 machine using the +`orjson-3.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl` +artifact on PyPI. The latency results can be reproduced using the `pybench` script. ## Questions @@ -1226,10 +1080,11 @@ example using clang and LTO. The project's own CI tests against `nightly-2024-09-25` and stable 1.72. It is prudent to pin the nightly version because that channel can introduce -breaking changes. +breaking changes. There is a significant performance benefit to using +nightly. -orjson is tested for amd64 on Linux and cross-compiles for aarch64, arm7, -ppc64le, and s390x. It is tested for either aarch64 or amd64 on macOS and +orjson is tested for amd64, aarch64, and i686 on Linux and cross-compiles for +arm7, ppc64le, and s390x. It is tested for either aarch64 or amd64 on macOS and cross-compiles for the other, depending on version. For Windows it is tested on amd64 and i686. diff --git a/bench/data.py b/bench/data.py index 079957a6..aa9023ca 100644 --- a/bench/data.py +++ b/bench/data.py @@ -3,39 +3,17 @@ from json import dumps as _json_dumps from json import loads as json_loads -from rapidjson import dumps as _rapidjson_dumps -from rapidjson import loads as rapidjson_loads -from simplejson import dumps as _simplejson_dumps -from simplejson import loads as simplejson_loads -from ujson import dumps as _ujson_dumps -from ujson import loads as ujson_loads - from orjson import dumps as orjson_dumps from orjson import loads as orjson_loads -def ujson_dumps(obj): - return _ujson_dumps(obj).encode("utf-8") - - -def rapidjson_dumps(obj): - return _rapidjson_dumps(obj).encode("utf-8") - - def json_dumps(obj): return _json_dumps(obj).encode("utf-8") -def simplejson_dumps(obj): - return _simplejson_dumps(obj).encode("utf-8") - - libraries = { "orjson": (orjson_dumps, orjson_loads), - "ujson": (ujson_dumps, ujson_loads), "json": (json_dumps, json_loads), - "rapidjson": (rapidjson_dumps, rapidjson_loads), - "simplejson": (simplejson_dumps, simplejson_loads), } diff --git a/bench/requirements.txt b/bench/requirements.txt index 40d524ca..cd245e70 100644 --- a/bench/requirements.txt +++ b/bench/requirements.txt @@ -1,9 +1,6 @@ -memory-profiler +memory-profiler; python_version<"3.13" pandas; python_version<"3.13" pytest-benchmark pytest-random-order -python-rapidjson seaborn; python_version<"3.13" -simplejson tabulate -ujson diff --git a/bench/run_mem b/bench/run_mem index e62e755a..c3324c2b 100755 --- a/bench/run_mem +++ b/bench/run_mem @@ -19,12 +19,6 @@ if lib_name == "json": from json import dumps, loads elif lib_name == "orjson": from orjson import dumps, loads -elif lib_name == "rapidjson": - from rapidjson import dumps, loads -elif lib_name == "simplejson": - from simplejson import dumps, loads -elif lib_name == "ujson": - from ujson import dumps, loads else: raise NotImplementedError diff --git a/build.rs b/build.rs index ad6d5b9c..3e3620d9 100644 --- a/build.rs +++ b/build.rs @@ -36,6 +36,10 @@ fn main() { println!("cargo:rustc-cfg=feature=\"optimize\""); } + if let Some(true) = version_check::supports_feature("hint_assert_unchecked") { + println!("cargo:rustc-cfg=feature=\"assert_unchecked\""); + } + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] if env::var("ORJSON_DISABLE_SIMD").is_err() { // auto build unstable SIMD on nightly @@ -51,11 +55,10 @@ fn main() { } } - #[cfg(all( - target_pointer_width = "64", - any(target_arch = "x86_64", target_arch = "aarch64") - ))] - println!("cargo:rustc-cfg=feature=\"inline_int\""); + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + if let Some(64) = python_config.pointer_width { + println!("cargo:rustc-cfg=feature=\"inline_int\""); + } if env::var("ORJSON_DISABLE_YYJSON").is_ok() { if env::var("CARGO_FEATURE_YYJSON").is_ok() { diff --git a/ci/azure-pipelines.yml b/ci/azure-pipelines.yml deleted file mode 100644 index 8382734e..00000000 --- a/ci/azure-pipelines.yml +++ /dev/null @@ -1,196 +0,0 @@ -variables: - toolchain: nightly-2024-09-25 - -jobs: - -- job: win_python313_amd64 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.13.0\x64\python.exe - rustup: https://win.rustup.rs/x86_64 - target: x86_64-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.13.0' - addToPath: true - architecture: 'x64' - - checkout: self - - template: ./azure-win.yml - -- job: win_python312_amd64 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.12.2\x64\python.exe - rustup: https://win.rustup.rs/x86_64 - target: x86_64-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.12.2' - addToPath: true - architecture: 'x64' - - checkout: self - - template: ./azure-win.yml - -- job: win_python311_amd64 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.11.4\x64\python.exe - rustup: https://win.rustup.rs/x86_64 - target: x86_64-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.11.4' - addToPath: true - architecture: 'x64' - - checkout: self - - template: ./azure-win.yml - -- job: win_python310_amd64 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.10.8\x64\python.exe - rustup: https://win.rustup.rs/x86_64 - target: x86_64-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.10.8' - addToPath: true - architecture: 'x64' - - checkout: self - - template: ./azure-win.yml - -- job: win_python39_amd64 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.9.13\x64\python.exe - rustup: https://win.rustup.rs/x86_64 - target: x86_64-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.9.13' - addToPath: true - architecture: 'x64' - - checkout: self - - template: ./azure-win.yml - -- job: win_python38_amd64 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.8.10\x64\python.exe - rustup: https://win.rustup.rs/x86_64 - target: x86_64-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.8.10' - addToPath: true - architecture: 'x64' - - checkout: self - - template: ./azure-win.yml - -- job: win_python313_x86 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.13.0\x86\python.exe - rustup: https://win.rustup.rs/x86 - target: i686-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.13.0' - addToPath: true - architecture: 'x86' - - checkout: self - - template: ./azure-win.yml - -- job: win_python312_x86 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.12.2\x86\python.exe - rustup: https://win.rustup.rs/x86 - target: i686-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.12.2' - addToPath: true - architecture: 'x86' - - checkout: self - - template: ./azure-win.yml - -- job: win_python311_x86 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.11.4\x86\python.exe - rustup: https://win.rustup.rs/x86 - target: i686-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.11.4' - addToPath: true - architecture: 'x86' - - checkout: self - - template: ./azure-win.yml - -- job: win_python310_x86 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.10.8\x86\python.exe - rustup: https://win.rustup.rs/x86 - target: i686-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.10.8' - addToPath: true - architecture: 'x86' - - checkout: self - - template: ./azure-win.yml - -- job: win_python39_x86 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.9.13\x86\python.exe - rustup: https://win.rustup.rs/x86 - target: i686-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.9.13' - addToPath: true - architecture: 'x86' - - checkout: self - - template: ./azure-win.yml - -- job: win_python38_x86 - pool: - vmImage: windows-2022 - variables: - interpreter: C:\hostedtoolcache\windows\Python\3.8.10\x86\python.exe - rustup: https://win.rustup.rs/x86 - target: i686-pc-windows-msvc - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.8.10' - addToPath: true - architecture: 'x86' - - checkout: self - - template: ./azure-win.yml diff --git a/ci/azure-win.yml b/ci/azure-win.yml deleted file mode 100644 index 655eab96..00000000 --- a/ci/azure-win.yml +++ /dev/null @@ -1,41 +0,0 @@ -parameters: - interpreter: '' - rustup: '' - target: '' - toolchain: '' - -steps: -- script: | - curl $(rustup) -o rustup-init.exe - rustup-init.exe -y --default-host $(target) --default-toolchain $(toolchain)-$(target) --profile minimal - set PATH=%PATH%;%USERPROFILE%\.cargo\bin - rustup default $(toolchain)-$(target) - rustup component add rust-src - mkdir .cargo - cp ci/config.toml .cargo/config.toml - echo "##vso[task.setvariable variable=PATH;]%PATH%;%USERPROFILE%\.cargo\bin" - displayName: rustup -- script: python.exe -m pip install --upgrade pip "maturin>=1,<2" wheel - displayName: build dependencies -- script: python.exe -m pip install -r test\requirements.txt -r integration\requirements.txt - displayName: test dependencies -- script: maturin.exe build --release --features=no-panic,unstable-simd,yyjson --strip --interpreter $(interpreter) --target $(target) - displayName: build - env: - CFLAGS: "-Os -flto" - LDFLAGS: "-Wl,--as-needed" - RUSTFLAGS: "-C lto=fat -Z mir-opt-level=4 -D warnings" - CARGO_UNSTABLE_SPARSE_REGISTRY: "true" - UNSAFE_PYO3_SKIP_VERSION_CHECK: "1" -- script: python.exe -m pip install orjson --no-index --find-links=D:\a\1\s\target\wheels - displayName: install -- script: python.exe -m pytest -s -rxX -v test - env: - PYTHONMALLOC: "debug" - displayName: pytest -- script: python.exe integration\thread - displayName: thread -- script: python.exe integration\init - displayName: init -- bash: ./ci/deploy /d/a/1/s/target/wheels/*.whl - displayName: deploy diff --git a/ci/deploy b/ci/deploy deleted file mode 100755 index 1b34b539..00000000 --- a/ci/deploy +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -set -eou pipefail - -if [ -z ${DRONE_TAG+x} ]; then - tag=$(git name-rev --tags --name-only $(git rev-parse HEAD)) -else - tag="$DRONE_TAG" -fi - -echo "$tag" - -if [[ "$tag" == "undefined" ]]; then - echo "not on a tag" - exit 0 -fi - -maturin upload --skip-existing "$1" diff --git a/doc/deserialization.png b/doc/deserialization.png index d83f5d51..ad4104b5 100644 Binary files a/doc/deserialization.png and b/doc/deserialization.png differ diff --git a/doc/serialization.png b/doc/serialization.png index be98efd8..5a160234 100644 Binary files a/doc/serialization.png and b/doc/serialization.png differ diff --git a/include/pyo3/pyo3-build-config/src/impl_.rs b/include/pyo3/pyo3-build-config/src/impl_.rs index ec652591..6d232642 100644 --- a/include/pyo3/pyo3-build-config/src/impl_.rs +++ b/include/pyo3/pyo3-build-config/src/impl_.rs @@ -248,6 +248,7 @@ print("executable", sys.executable) print("calcsize_pointer", struct.calcsize("P")) print("mingw", get_platform().startswith("mingw")) print("ext_suffix", get_config_var("EXT_SUFFIX")) +print("gil_disabled", get_config_var("Py_GIL_DISABLED")) "#; let output = run_python_script(interpreter.as_ref(), SCRIPT)?; let map: HashMap = parse_script_output(&output); @@ -290,6 +291,13 @@ print("ext_suffix", get_config_var("EXT_SUFFIX")) let implementation = map["implementation"].parse()?; + let gil_disabled = match map["gil_disabled"].as_str() { + "1" => true, + "0" => false, + "None" => false, + _ => panic!("Unknown Py_GIL_DISABLED value"), + }; + let lib_name = if cfg!(windows) { default_lib_name_windows( version, @@ -300,12 +308,14 @@ print("ext_suffix", get_config_var("EXT_SUFFIX")) // on Windows from sysconfig - e.g. ext_suffix may be // `_d.cp312-win_amd64.pyd` for 3.12 debug build map["ext_suffix"].starts_with("_d."), + gil_disabled, ) } else { default_lib_name_unix( version, implementation, map.get("ld_version").map(String::as_str), + gil_disabled, ) }; @@ -375,10 +385,15 @@ print("ext_suffix", get_config_var("EXT_SUFFIX")) _ => false, }; let lib_dir = get_key!(sysconfigdata, "LIBDIR").ok().map(str::to_string); + let gil_disabled = match sysconfigdata.get_value("Py_GIL_DISABLED") { + Some(value) => value == "1", + None => false, + }; let lib_name = Some(default_lib_name_unix( version, implementation, sysconfigdata.get_value("LDVERSION"), + gil_disabled, )); let pointer_width = parse_key!(sysconfigdata, "SIZEOF_VOID_P") .map(|bytes_width: u32| bytes_width * 8) @@ -1106,10 +1121,15 @@ impl BuildFlags { /// the interpreter and printing variables of interest from /// sysconfig.get_config_vars. fn from_interpreter(interpreter: impl AsRef) -> Result { - // sysconfig is missing all the flags on windows, so we can't actually - // query the interpreter directly for its build flags. + // sysconfig is missing all the flags on windows for Python 3.12 and + // older, so we can't actually query the interpreter directly for its + // build flags on those versions. if cfg!(windows) { - return Ok(Self::new()); + let script = String::from("import sys;print(sys.version_info < (3, 13))"); + let stdout = run_python_script(interpreter.as_ref(), &script)?; + if stdout.trim_end() == "True" { + return Ok(Self::new()); + } } let mut script = String::from("import sysconfig\n"); @@ -1528,6 +1548,7 @@ fn default_abi3_config(host: &Triple, version: PythonVersion) -> InterpreterConf abi3, false, false, + false, )) } else { None @@ -1604,9 +1625,10 @@ fn default_lib_name_for_target( abi3, false, false, + false, )) } else if is_linking_libpython_for_target(target) { - Some(default_lib_name_unix(version, implementation, None)) + Some(default_lib_name_unix(version, implementation, None, false)) } else { None } @@ -1618,16 +1640,26 @@ fn default_lib_name_windows( abi3: bool, mingw: bool, debug: bool, + gil_disabled: bool, ) -> String { if debug { // CPython bug: linking against python3_d.dll raises error // https://github.com/python/cpython/issues/101614 - format!("python{}{}_d", version.major, version.minor) + if gil_disabled { + format!("python{}{}t_d", version.major, version.minor) + } else { + format!("python{}{}_d", version.major, version.minor) + } } else if abi3 && !(implementation.is_pypy() || implementation.is_graalpy()) { WINDOWS_ABI3_LIB_NAME.to_owned() } else if mingw { + if gil_disabled { + panic!("MinGW free-threaded builds are not currently tested or supported") + } // https://packages.msys2.org/base/mingw-w64-python format!("python{}.{}", version.major, version.minor) + } else if gil_disabled { + format!("python{}{}t", version.major, version.minor) } else { format!("python{}{}", version.major, version.minor) } @@ -1637,6 +1669,7 @@ fn default_lib_name_unix( version: PythonVersion, implementation: PythonImplementation, ld_version: Option<&str>, + gil_disabled: bool, ) -> String { match implementation { PythonImplementation::CPython => match ld_version { @@ -1644,7 +1677,11 @@ fn default_lib_name_unix( None => { if version > PythonVersion::PY37 { // PEP 3149 ABI version tags are finally gone - format!("python{}.{}", version.major, version.minor) + if gil_disabled { + format!("python{}.{}t", version.major, version.minor) + } else { + format!("python{}.{}", version.major, version.minor) + } } else { // Work around https://bugs.python.org/issue36707 format!("python{}.{}m", version.major, version.minor) @@ -2351,6 +2388,7 @@ mod tests { false, false, false, + false, ), "python39", ); @@ -2361,6 +2399,7 @@ mod tests { true, false, false, + false, ), "python3", ); @@ -2371,6 +2410,7 @@ mod tests { false, true, false, + false, ), "python3.9", ); @@ -2381,6 +2421,7 @@ mod tests { true, true, false, + false, ), "python3", ); @@ -2391,6 +2432,7 @@ mod tests { true, false, false, + false, ), "python39", ); @@ -2401,6 +2443,7 @@ mod tests { false, false, true, + false, ), "python39_d", ); @@ -2413,6 +2456,7 @@ mod tests { true, false, true, + false, ), "python39_d", ); @@ -2423,16 +2467,31 @@ mod tests { use PythonImplementation::*; // Defaults to python3.7m for CPython 3.7 assert_eq!( - super::default_lib_name_unix(PythonVersion { major: 3, minor: 7 }, CPython, None), + super::default_lib_name_unix( + PythonVersion { major: 3, minor: 7 }, + CPython, + None, + false + ), "python3.7m", ); // Defaults to pythonX.Y for CPython 3.8+ assert_eq!( - super::default_lib_name_unix(PythonVersion { major: 3, minor: 8 }, CPython, None), + super::default_lib_name_unix( + PythonVersion { major: 3, minor: 8 }, + CPython, + None, + false + ), "python3.8", ); assert_eq!( - super::default_lib_name_unix(PythonVersion { major: 3, minor: 9 }, CPython, None), + super::default_lib_name_unix( + PythonVersion { major: 3, minor: 9 }, + CPython, + None, + false + ), "python3.9", ); // Can use ldversion to override for CPython @@ -2440,19 +2499,25 @@ mod tests { super::default_lib_name_unix( PythonVersion { major: 3, minor: 9 }, CPython, - Some("3.7md") + Some("3.7md"), + false ), "python3.7md", ); // PyPy 3.9 includes ldversion assert_eq!( - super::default_lib_name_unix(PythonVersion { major: 3, minor: 9 }, PyPy, None), + super::default_lib_name_unix(PythonVersion { major: 3, minor: 9 }, PyPy, None, false), "pypy3.9-c", ); assert_eq!( - super::default_lib_name_unix(PythonVersion { major: 3, minor: 9 }, PyPy, Some("3.9d")), + super::default_lib_name_unix( + PythonVersion { major: 3, minor: 9 }, + PyPy, + Some("3.9d"), + false + ), "pypy3.9d-c", ); } diff --git a/include/pyo3/pyo3-build-config/src/lib.rs b/include/pyo3/pyo3-build-config/src/lib.rs index 6e295c99..66f52031 100644 --- a/include/pyo3/pyo3-build-config/src/lib.rs +++ b/include/pyo3/pyo3-build-config/src/lib.rs @@ -138,6 +138,10 @@ fn resolve_cross_compile_config_path() -> Option { pub fn print_feature_cfgs() { let rustc_minor_version = rustc_minor_version().unwrap_or(0); + if rustc_minor_version >= 70 { + println!("cargo:rustc-cfg=rustc_has_once_lock"); + } + // invalid_from_utf8 lint was added in Rust 1.74 if rustc_minor_version >= 74 { println!("cargo:rustc-cfg=invalid_from_utf8_lint"); @@ -175,6 +179,7 @@ pub fn print_expected_cfgs() { println!("cargo:rustc-check-cfg=cfg(pyo3_leak_on_drop_without_reference_pool)"); println!("cargo:rustc-check-cfg=cfg(diagnostic_namespace)"); println!("cargo:rustc-check-cfg=cfg(c_str_lit)"); + println!("cargo:rustc-check-cfg=cfg(rustc_has_once_lock)"); // allow `Py_3_*` cfgs from the minimum supported version up to the // maximum minor version (+1 for development for the next) diff --git a/include/pyo3/pyo3-ffi/src/compat/py_3_13.rs b/include/pyo3/pyo3-ffi/src/compat/py_3_13.rs index 9f44ced6..59289cb7 100644 --- a/include/pyo3/pyo3-ffi/src/compat/py_3_13.rs +++ b/include/pyo3/pyo3-ffi/src/compat/py_3_13.rs @@ -83,3 +83,24 @@ compat_function!( 1 } ); + +compat_function!( + originally_defined_for(Py_3_13); + + #[inline] + pub unsafe fn PyList_Extend( + list: *mut crate::PyObject, + iterable: *mut crate::PyObject, + ) -> std::os::raw::c_int { + crate::PyList_SetSlice(list, crate::PY_SSIZE_T_MAX, crate::PY_SSIZE_T_MAX, iterable) + } +); + +compat_function!( + originally_defined_for(Py_3_13); + + #[inline] + pub unsafe fn PyList_Clear(list: *mut crate::PyObject) -> std::os::raw::c_int { + crate::PyList_SetSlice(list, 0, crate::PY_SSIZE_T_MAX, std::ptr::null_mut()) + } +); diff --git a/include/pyo3/pyo3-ffi/src/cpython/critical_section.rs b/include/pyo3/pyo3-ffi/src/cpython/critical_section.rs index 97b2f5e0..3760d224 100644 --- a/include/pyo3/pyo3-ffi/src/cpython/critical_section.rs +++ b/include/pyo3/pyo3-ffi/src/cpython/critical_section.rs @@ -9,6 +9,27 @@ pub struct PyCriticalSection { _cs_mutex: *mut PyMutex, } +// #[cfg(Py_GIL_DISABLED)] +// impl Default for PyCriticalSection { +// fn default() -> Self { +// let mut mutex = crate::PyMutex::new(); +// PyCriticalSection { +// _cs_prev: 0, +// _cs_mutex: core::ptr::addr_of_mut!(mutex), +// } +// } +// } + +#[cfg(Py_GIL_DISABLED)] +impl Default for PyCriticalSection { + fn default() -> Self { + PyCriticalSection { + _cs_prev: 0, + _cs_mutex: core::ptr::null_mut(), + } + } +} + #[repr(C)] #[cfg(Py_GIL_DISABLED)] pub struct PyCriticalSection2 { diff --git a/include/pyo3/pyo3-ffi/src/cpython/dictobject.rs b/include/pyo3/pyo3-ffi/src/cpython/dictobject.rs index 79dcbfdb..f67a7725 100644 --- a/include/pyo3/pyo3-ffi/src/cpython/dictobject.rs +++ b/include/pyo3/pyo3-ffi/src/cpython/dictobject.rs @@ -36,6 +36,15 @@ extern "C" { item: *mut PyObject, hash: crate::Py_hash_t, ) -> c_int; + + #[cfg(Py_3_13)] + pub fn _PyDict_SetItem_KnownHash_LockHeld( + mp: *mut PyDictObject, + name: *mut PyObject, + value: *mut PyObject, + hash: crate::Py_hash_t, + ) -> c_int; + // skipped _PyDict_DelItem_KnownHash // skipped _PyDict_DelItemIf // skipped _PyDict_NewKeysForClass diff --git a/include/pyo3/pyo3-ffi/src/datetime.rs b/include/pyo3/pyo3-ffi/src/datetime.rs index 7283b6d4..76d12151 100644 --- a/include/pyo3/pyo3-ffi/src/datetime.rs +++ b/include/pyo3/pyo3-ffi/src/datetime.rs @@ -4,16 +4,17 @@ //! and covers the various date and time related objects in the Python `datetime` //! standard library module. +#[cfg(not(PyPy))] +use crate::PyCapsule_Import; #[cfg(GraalPy)] use crate::{PyLong_AsLong, PyLong_Check, PyObject_GetAttrString, Py_DecRef}; use crate::{PyObject, PyObject_TypeCheck, PyTypeObject, Py_TYPE}; -use std::cell::UnsafeCell; #[cfg(not(GraalPy))] use std::os::raw::c_char; use std::os::raw::c_int; use std::ptr; -#[cfg(not(PyPy))] -use {crate::PyCapsule_Import, std::ffi::CString}; +use std::sync::Once; +use std::{cell::UnsafeCell, ffi::CStr}; #[cfg(not(any(PyPy, GraalPy)))] use {crate::Py_hash_t, std::os::raw::c_uchar}; // Type struct wrappers @@ -593,6 +594,8 @@ pub struct PyDateTime_CAPI { // Python already shares this object between threads, so it's no more evil for us to do it too! unsafe impl Sync for PyDateTime_CAPI {} +pub const PyDateTime_CAPSULE_NAME: &CStr = c_str!("datetime.datetime_CAPI"); + /// Returns a pointer to a `PyDateTime_CAPI` instance /// /// # Note @@ -600,33 +603,38 @@ unsafe impl Sync for PyDateTime_CAPI {} /// `PyDateTime_IMPORT` is called #[inline] pub unsafe fn PyDateTimeAPI() -> *mut PyDateTime_CAPI { - *PyDateTimeAPI_impl.0.get() -} - -#[inline] -pub unsafe fn PyDateTime_TimeZone_UTC() -> *mut PyObject { - (*PyDateTimeAPI()).TimeZone_UTC + *PyDateTimeAPI_impl.ptr.get() } /// Populates the `PyDateTimeAPI` object pub unsafe fn PyDateTime_IMPORT() { - // PyPy expects the C-API to be initialized via PyDateTime_Import, so trying to use - // `PyCapsule_Import` will behave unexpectedly in pypy. - #[cfg(PyPy)] - let py_datetime_c_api = PyDateTime_Import(); - - #[cfg(not(PyPy))] - let py_datetime_c_api = { - // PyDateTime_CAPSULE_NAME is a macro in C - let PyDateTime_CAPSULE_NAME = CString::new("datetime.datetime_CAPI").unwrap(); - - PyCapsule_Import(PyDateTime_CAPSULE_NAME.as_ptr(), 1) as *mut PyDateTime_CAPI - }; + if !PyDateTimeAPI_impl.once.is_completed() { + // PyPy expects the C-API to be initialized via PyDateTime_Import, so trying to use + // `PyCapsule_Import` will behave unexpectedly in pypy. + #[cfg(PyPy)] + let py_datetime_c_api = PyDateTime_Import(); + + #[cfg(not(PyPy))] + let py_datetime_c_api = + PyCapsule_Import(PyDateTime_CAPSULE_NAME.as_ptr(), 1) as *mut PyDateTime_CAPI; + + if py_datetime_c_api.is_null() { + return; + } - *PyDateTimeAPI_impl.0.get() = py_datetime_c_api; + // Protect against race conditions when the datetime API is concurrently + // initialized in multiple threads. UnsafeCell.get() cannot panic so this + // won't panic either. + PyDateTimeAPI_impl.once.call_once(|| { + *PyDateTimeAPI_impl.ptr.get() = py_datetime_c_api; + }); + } } -// skipped non-limited PyDateTime_TimeZone_UTC +#[inline] +pub unsafe fn PyDateTime_TimeZone_UTC() -> *mut PyObject { + (*PyDateTimeAPI()).TimeZone_UTC +} /// Type Check macros /// @@ -739,8 +747,13 @@ extern "C" { // Rust specific implementation details -struct PyDateTimeAPISingleton(UnsafeCell<*mut PyDateTime_CAPI>); +struct PyDateTimeAPISingleton { + once: Once, + ptr: UnsafeCell<*mut PyDateTime_CAPI>, +} unsafe impl Sync for PyDateTimeAPISingleton {} -static PyDateTimeAPI_impl: PyDateTimeAPISingleton = - PyDateTimeAPISingleton(UnsafeCell::new(ptr::null_mut())); +static PyDateTimeAPI_impl: PyDateTimeAPISingleton = PyDateTimeAPISingleton { + once: Once::new(), + ptr: UnsafeCell::new(ptr::null_mut()), +}; diff --git a/include/pyo3/pyo3-ffi/src/lib.rs b/include/pyo3/pyo3-ffi/src/lib.rs index c6157401..7bdba117 100644 --- a/include/pyo3/pyo3-ffi/src/lib.rs +++ b/include/pyo3/pyo3-ffi/src/lib.rs @@ -43,10 +43,39 @@ //! PyO3 uses `rustc`'s `--cfg` flags to enable or disable code used for different Python versions. //! If you want to do this for your own crate, you can do so with the [`pyo3-build-config`] crate. //! -//! - `Py_3_7`, `Py_3_8`, `Py_3_9`, `Py_3_10`: Marks code that is only enabled when -//! compiling for a given minimum Python version. +//! - `Py_3_7`, `Py_3_8`, `Py_3_9`, `Py_3_10`, `Py_3_11`, `Py_3_12`, `Py_3_13`: Marks code that is +//! only enabled when compiling for a given minimum Python version. //! - `Py_LIMITED_API`: Marks code enabled when the `abi3` feature flag is enabled. +//! - `Py_GIL_DISABLED`: Marks code that runs only in the free-threaded build of CPython. //! - `PyPy` - Marks code enabled when compiling for PyPy. +//! - `GraalPy` - Marks code enabled when compiling for GraalPy. +//! +//! Additionally, you can query for the values `Py_DEBUG`, `Py_REF_DEBUG`, +//! `Py_TRACE_REFS`, and `COUNT_ALLOCS` from `py_sys_config` to query for the +//! corresponding C build-time defines. For example, to conditionally define +//! debug code using `Py_DEBUG`, you could do: +//! +//! ```rust,ignore +//! #[cfg(py_sys_config = "Py_DEBUG")] +//! println!("only runs if python was compiled with Py_DEBUG") +//! ``` +//! +//! To use these attributes, add [`pyo3-build-config`] as a build dependency in +//! your `Cargo.toml`: +//! +//! ```toml +//! [build-dependencies] +#![doc = concat!("pyo3-build-config =\"", env!("CARGO_PKG_VERSION"), "\"")] +//! ``` +//! +//! And then either create a new `build.rs` file in the project root or modify +//! the existing `build.rs` file to call `use_pyo3_cfgs()`: +//! +//! ```rust,ignore +//! fn main() { +//! pyo3_build_config::use_pyo3_cfgs(); +//! } +//! ``` //! //! # Minimum supported Rust and Python versions //! @@ -79,11 +108,29 @@ //! [dependencies.pyo3-ffi] #![doc = concat!("version = \"", env!("CARGO_PKG_VERSION"), "\"")] //! features = ["extension-module"] +//! +//! [build-dependencies] +//! # This is only necessary if you need to configure your build based on +//! # the Python version or the compile-time configuration for the interpreter. +#![doc = concat!("pyo3_build_config = \"", env!("CARGO_PKG_VERSION"), "\"")] +//! ``` +//! +//! If you need to use conditional compilation based on Python version or how +//! Python was compiled, you need to add `pyo3-build-config` as a +//! `build-dependency` in your `Cargo.toml` as in the example above and either +//! create a new `build.rs` file or modify an existing one so that +//! `pyo3_build_config::use_pyo3_cfgs()` gets called at build time: +//! +//! **`build.rs`** +//! ```rust,ignore +//! fn main() { +//! pyo3_build_config::use_pyo3_cfgs() +//! } //! ``` //! //! **`src/lib.rs`** //! ```rust -//! use std::os::raw::c_char; +//! use std::os::raw::{c_char, c_long}; //! use std::ptr; //! //! use pyo3_ffi::*; @@ -93,14 +140,14 @@ //! m_name: c_str!("string_sum").as_ptr(), //! m_doc: c_str!("A Python module written in Rust.").as_ptr(), //! m_size: 0, -//! m_methods: unsafe { METHODS.as_mut_ptr().cast() }, +//! m_methods: unsafe { METHODS as *const [PyMethodDef] as *mut PyMethodDef }, //! m_slots: std::ptr::null_mut(), //! m_traverse: None, //! m_clear: None, //! m_free: None, //! }; //! -//! static mut METHODS: [PyMethodDef; 2] = [ +//! static mut METHODS: &[PyMethodDef] = &[ //! PyMethodDef { //! ml_name: c_str!("sum_as_string").as_ptr(), //! ml_meth: PyMethodDefPointer { @@ -110,58 +157,99 @@ //! ml_doc: c_str!("returns the sum of two integers as a string").as_ptr(), //! }, //! // A zeroed PyMethodDef to mark the end of the array. -//! PyMethodDef::zeroed() +//! PyMethodDef::zeroed(), //! ]; //! //! // The module initialization function, which must be named `PyInit_`. //! #[allow(non_snake_case)] //! #[no_mangle] //! pub unsafe extern "C" fn PyInit_string_sum() -> *mut PyObject { -//! PyModule_Create(ptr::addr_of_mut!(MODULE_DEF)) +//! let module = PyModule_Create(ptr::addr_of_mut!(MODULE_DEF)); +//! if module.is_null() { +//! return module; +//! } +//! #[cfg(Py_GIL_DISABLED)] +//! { +//! if PyUnstable_Module_SetGIL(module, Py_MOD_GIL_NOT_USED) < 0 { +//! Py_DECREF(module); +//! return std::ptr::null_mut(); +//! } +//! } +//! module //! } //! -//! pub unsafe extern "C" fn sum_as_string( -//! _self: *mut PyObject, -//! args: *mut *mut PyObject, -//! nargs: Py_ssize_t, -//! ) -> *mut PyObject { -//! if nargs != 2 { -//! PyErr_SetString( -//! PyExc_TypeError, -//! c_str!("sum_as_string() expected 2 positional arguments").as_ptr(), +//! /// A helper to parse function arguments +//! /// If we used PyO3's proc macros they'd handle all of this boilerplate for us :) +//! unsafe fn parse_arg_as_i32(obj: *mut PyObject, n_arg: usize) -> Option { +//! if PyLong_Check(obj) == 0 { +//! let msg = format!( +//! "sum_as_string expected an int for positional argument {}\0", +//! n_arg //! ); -//! return std::ptr::null_mut(); +//! PyErr_SetString(PyExc_TypeError, msg.as_ptr().cast::()); +//! return None; //! } //! -//! let arg1 = *args; -//! if PyLong_Check(arg1) == 0 { -//! PyErr_SetString( -//! PyExc_TypeError, -//! c_str!("sum_as_string() expected an int for positional argument 1").as_ptr(), -//! ); -//! return std::ptr::null_mut(); +//! // Let's keep the behaviour consistent on platforms where `c_long` is bigger than 32 bits. +//! // In particular, it is an i32 on Windows but i64 on most Linux systems +//! let mut overflow = 0; +//! let i_long: c_long = PyLong_AsLongAndOverflow(obj, &mut overflow); +//! +//! #[allow(irrefutable_let_patterns)] // some platforms have c_long equal to i32 +//! if overflow != 0 { +//! raise_overflowerror(obj); +//! None +//! } else if let Ok(i) = i_long.try_into() { +//! Some(i) +//! } else { +//! raise_overflowerror(obj); +//! None //! } +//! } //! -//! let arg1 = PyLong_AsLong(arg1); -//! if !PyErr_Occurred().is_null() { -//! return ptr::null_mut(); +//! unsafe fn raise_overflowerror(obj: *mut PyObject) { +//! let obj_repr = PyObject_Str(obj); +//! if !obj_repr.is_null() { +//! let mut size = 0; +//! let p = PyUnicode_AsUTF8AndSize(obj_repr, &mut size); +//! if !p.is_null() { +//! let s = std::str::from_utf8_unchecked(std::slice::from_raw_parts( +//! p.cast::(), +//! size as usize, +//! )); +//! let msg = format!("cannot fit {} in 32 bits\0", s); +//! +//! PyErr_SetString(PyExc_OverflowError, msg.as_ptr().cast::()); +//! } +//! Py_DECREF(obj_repr); //! } +//! } //! -//! let arg2 = *args.add(1); -//! if PyLong_Check(arg2) == 0 { +//! pub unsafe extern "C" fn sum_as_string( +//! _self: *mut PyObject, +//! args: *mut *mut PyObject, +//! nargs: Py_ssize_t, +//! ) -> *mut PyObject { +//! if nargs != 2 { //! PyErr_SetString( //! PyExc_TypeError, -//! c_str!("sum_as_string() expected an int for positional argument 2").as_ptr(), +//! c_str!("sum_as_string expected 2 positional arguments").as_ptr(), //! ); //! return std::ptr::null_mut(); //! } //! -//! let arg2 = PyLong_AsLong(arg2); -//! if !PyErr_Occurred().is_null() { -//! return ptr::null_mut(); -//! } +//! let (first, second) = (*args, *args.add(1)); +//! +//! let first = match parse_arg_as_i32(first, 1) { +//! Some(x) => x, +//! None => return std::ptr::null_mut(), +//! }; +//! let second = match parse_arg_as_i32(second, 2) { +//! Some(x) => x, +//! None => return std::ptr::null_mut(), +//! }; //! -//! match arg1.checked_add(arg2) { +//! match first.checked_add(second) { //! Some(sum) => { //! let string = sum.to_string(); //! PyUnicode_FromStringAndSize(string.as_ptr().cast::(), string.len() as isize) @@ -201,6 +289,12 @@ //! [manually][manual_builds]. Both offer more flexibility than `maturin` but require further //! configuration. //! +//! This example stores the module definition statically and uses the `PyModule_Create` function +//! in the CPython C API to register the module. This is the "old" style for registering modules +//! and has the limitation that it cannot support subinterpreters. You can also create a module +//! using the new multi-phase initialization API that does support subinterpreters. See the +//! `sequential` project located in the `examples` directory at the root of the `pyo3-ffi` crate +//! for a worked example of how to this using `pyo3-ffi`. //! //! # Using Python from Rust //! @@ -226,7 +320,7 @@ #![doc = concat!("[manual_builds]: https://pyo3.rs/v", env!("CARGO_PKG_VERSION"), "/building-and-distribution.html#manual-builds \"Manual builds - Building and Distribution - PyO3 user guide\"")] //! [setuptools-rust]: https://github.com/PyO3/setuptools-rust "Setuptools plugin for Rust extensions" //! [PEP 384]: https://www.python.org/dev/peps/pep-0384 "PEP 384 -- Defining a Stable ABI" -#![doc = concat!("[Features chapter of the guide]: https://pyo3.rs/v", env!("CARGO_PKG_VERSION"), "/features.html#features-reference \"Features Reference - PyO3 user guide\"")] +#![doc = concat!("[Features chapter of the guide]: https://pyo3.rs/v", env!("CARGO_PKG_VERSION"), "/features.html#features-reference \"Features eference - PyO3 user guide\"")] #![allow( missing_docs, non_camel_case_types, diff --git a/include/pyo3/pyo3-ffi/src/listobject.rs b/include/pyo3/pyo3-ffi/src/listobject.rs index 9d8b7ed6..881a8a87 100644 --- a/include/pyo3/pyo3-ffi/src/listobject.rs +++ b/include/pyo3/pyo3-ffi/src/listobject.rs @@ -50,6 +50,10 @@ extern "C" { arg3: Py_ssize_t, arg4: *mut PyObject, ) -> c_int; + #[cfg(Py_3_13)] + pub fn PyList_Extend(list: *mut PyObject, iterable: *mut PyObject) -> c_int; + #[cfg(Py_3_13)] + pub fn PyList_Clear(list: *mut PyObject) -> c_int; #[cfg_attr(PyPy, link_name = "PyPyList_Sort")] pub fn PyList_Sort(arg1: *mut PyObject) -> c_int; #[cfg_attr(PyPy, link_name = "PyPyList_Reverse")] diff --git a/include/pyo3/pyo3-ffi/src/methodobject.rs b/include/pyo3/pyo3-ffi/src/methodobject.rs index bd214409..37e1e206 100644 --- a/include/pyo3/pyo3-ffi/src/methodobject.rs +++ b/include/pyo3/pyo3-ffi/src/methodobject.rs @@ -50,7 +50,7 @@ pub type PyCFunctionFast = unsafe extern "C" fn( ) -> *mut PyObject; #[cfg(any(Py_3_10, not(Py_LIMITED_API)))] -#[deprecated(note = "renamed to `PyCFunctionFast`")] +#[cfg_attr(Py_3_10, deprecated(note = "renamed to `PyCFunctionFast`"))] pub type _PyCFunctionFast = PyCFunctionFast; pub type PyCFunctionWithKeywords = unsafe extern "C" fn( @@ -68,6 +68,7 @@ pub type PyCFunctionFastWithKeywords = unsafe extern "C" fn( ) -> *mut PyObject; #[cfg(any(Py_3_10, not(Py_LIMITED_API)))] +#[cfg_attr(Py_3_10, deprecated(note = "renamed to `PyCFunctionFastWithKeywords`"))] pub type _PyCFunctionFastWithKeywords = PyCFunctionFastWithKeywords; #[cfg(all(Py_3_9, not(Py_LIMITED_API)))] @@ -152,7 +153,7 @@ pub union PyMethodDefPointer { /// This variant corresponds with [`METH_FASTCALL`]. #[cfg(any(Py_3_10, not(Py_LIMITED_API)))] - #[deprecated(note = "renamed to `PyCFunctionFast`")] + #[cfg_attr(Py_3_10, deprecated(note = "renamed to `PyCFunctionFast`"))] pub _PyCFunctionFast: PyCFunctionFast, /// This variant corresponds with [`METH_FASTCALL`]. @@ -161,6 +162,7 @@ pub union PyMethodDefPointer { /// This variant corresponds with [`METH_FASTCALL`] | [`METH_KEYWORDS`]. #[cfg(any(Py_3_10, not(Py_LIMITED_API)))] + #[cfg_attr(Py_3_10, deprecated(note = "renamed to `PyCFunctionFastWithKeywords`"))] pub _PyCFunctionFastWithKeywords: PyCFunctionFastWithKeywords, /// This variant corresponds with [`METH_FASTCALL`] | [`METH_KEYWORDS`]. diff --git a/include/pyo3/pyo3-ffi/src/moduleobject.rs b/include/pyo3/pyo3-ffi/src/moduleobject.rs index ff6458f4..2417664a 100644 --- a/include/pyo3/pyo3-ffi/src/moduleobject.rs +++ b/include/pyo3/pyo3-ffi/src/moduleobject.rs @@ -88,6 +88,10 @@ pub const Py_mod_create: c_int = 1; pub const Py_mod_exec: c_int = 2; #[cfg(Py_3_12)] pub const Py_mod_multiple_interpreters: c_int = 3; +#[cfg(Py_3_13)] +pub const Py_mod_gil: c_int = 4; + +// skipped private _Py_mod_LAST_SLOT #[cfg(Py_3_12)] pub const Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED: *mut c_void = 0 as *mut c_void; @@ -96,7 +100,15 @@ pub const Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED: *mut c_void = 1 as *mut c_void #[cfg(Py_3_12)] pub const Py_MOD_PER_INTERPRETER_GIL_SUPPORTED: *mut c_void = 2 as *mut c_void; -// skipped non-limited _Py_mod_LAST_SLOT +#[cfg(Py_3_13)] +pub const Py_MOD_GIL_USED: *mut c_void = 0 as *mut c_void; +#[cfg(Py_3_13)] +pub const Py_MOD_GIL_NOT_USED: *mut c_void = 1 as *mut c_void; + +#[cfg(all(not(Py_LIMITED_API), Py_GIL_DISABLED))] +extern "C" { + pub fn PyUnstable_Module_SetGIL(module: *mut PyObject, gil: *mut c_void) -> c_int; +} #[repr(C)] pub struct PyModuleDef { diff --git a/integration/requirements.txt b/integration/requirements.txt index e5b290de..2f47d3a7 100644 --- a/integration/requirements.txt +++ b/integration/requirements.txt @@ -1,3 +1,3 @@ flask;sys_platform!="win" gunicorn;sys_platform!="win" -httpx==0.24.1;sys_platform!="win" +httpx==0.27.2;sys_platform!="win" diff --git a/pyproject.toml b/pyproject.toml index f72f842c..56e82030 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,5 +61,5 @@ known-first-party = ["orjson"] python_version = "3.8" [[tool.mypy.overrides]] -module = ["dateutil", "pytz", "simplejson", "ujson"] +module = ["dateutil", "pytz"] ignore_missing_imports = true diff --git a/requirements.txt b/requirements.txt index 9981cc8c..a204d239 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ -r test/requirements.txt maturin mypy==1.13.0 -ruff==0.7.1 +ruff==0.8.0 diff --git a/script/graph b/script/graph index 7cf47d0d..5e56e129 100755 --- a/script/graph +++ b/script/graph @@ -13,7 +13,7 @@ from tabulate import tabulate import orjson -LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json") +LIBRARIES = ("orjson", "json") def aggregate(): diff --git a/script/install-fedora b/script/install-fedora index 19a5a455..6d14860e 100755 --- a/script/install-fedora +++ b/script/install-fedora @@ -14,7 +14,7 @@ export CARGO_TARGET_DIR="${CARGO_TARGET_DIR:-target}" rm /etc/yum.repos.d/fedora-cisco-openh264.repo || true -dnf install --setopt=install_weak_deps=false -y rustup clang lld "${PYTHON_PACKAGE}" +dnf install --setopt=install_weak_deps=false -y rustup clang lld "${PYTHON_PACKAGE}" python3-uv rustup-init --default-toolchain "${RUST_TOOLCHAIN}-${TARGET}" --profile minimal --component rust-src -y source "${HOME}/.cargo/env" @@ -24,7 +24,6 @@ cp ci/config.toml .cargo/config.toml cargo fetch --target="${TARGET}" & -curl -LsSf https://astral.sh/uv/install.sh | sh rm -rf "${VENV}" uv venv --python "${PYTHON}" "${VENV}" source "${VENV}/bin/activate" diff --git a/script/lint b/script/lint index 22f5742e..94a46b1c 100755 --- a/script/lint +++ b/script/lint @@ -2,7 +2,7 @@ set -eou pipefail -to_lint="./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py script/pydataclass script/pymem +to_lint="./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py script/pydataclass script/pysort script/pynumpy script/pynonstr script/pycorrectness script/graph integration/init integration/wsgi.py integration/typestubs.py integration/thread" diff --git a/script/pycorrectness b/script/pycorrectness index 3e9c3f36..de017d89 100755 --- a/script/pycorrectness +++ b/script/pycorrectness @@ -8,23 +8,17 @@ import lzma import os from pathlib import Path -import rapidjson -import simplejson -import ujson from tabulate import tabulate import orjson dirname = os.path.join(os.path.dirname(__file__), "..", "data") -LIBRARIES = ["orjson", "ujson", "rapidjson", "simplejson", "json"] +LIBRARIES = ["orjson", "json"] LIBRARY_FUNC_MAP = { "orjson": orjson.loads, - "ujson": ujson.loads, - "rapidjson": rapidjson.loads, - "simplejson": simplejson.loads, "json": json.loads, } diff --git a/script/pydataclass b/script/pydataclass index b43c95a4..853ea932 100755 --- a/script/pydataclass +++ b/script/pydataclass @@ -8,9 +8,6 @@ import os from timeit import timeit from typing import List -import rapidjson -import simplejson -import ujson from tabulate import tabulate import orjson @@ -50,7 +47,7 @@ def default(__obj): headers = ("Library", "dict (ms)", "dataclass (ms)", "vs. orjson") -LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json") +LIBRARIES = ("orjson", "json") ITERATIONS = 100 @@ -72,34 +69,6 @@ for lib_name in LIBRARIES: lambda: json.dumps(objects_as_dataclass, default=default).encode("utf-8"), number=ITERATIONS, ) - elif lib_name == "simplejson": - as_dict = timeit( - lambda: simplejson.dumps(objects_as_dict).encode("utf-8"), - number=ITERATIONS, - ) - as_dataclass = timeit( - lambda: simplejson.dumps(objects_as_dataclass, default=default).encode( - "utf-8" - ), - number=ITERATIONS, - ) - elif lib_name == "ujson": - as_dict = timeit( - lambda: ujson.dumps(objects_as_dict).encode("utf-8"), - number=ITERATIONS, - ) - as_dataclass = None - elif lib_name == "rapidjson": - as_dict = timeit( - lambda: rapidjson.dumps(objects_as_dict).encode("utf-8"), - number=ITERATIONS, - ) - as_dataclass = timeit( - lambda: rapidjson.dumps(objects_as_dataclass, default=default).encode( - "utf-8" - ), - number=ITERATIONS, - ) elif lib_name == "orjson": as_dict = timeit(lambda: orjson.dumps(objects_as_dict), number=ITERATIONS) as_dataclass = timeit( diff --git a/script/pyindent b/script/pyindent index 753c855f..f31dc1ad 100755 --- a/script/pyindent +++ b/script/pyindent @@ -9,9 +9,6 @@ import sys from pathlib import Path from timeit import timeit -import rapidjson -import simplejson -import ujson from tabulate import tabulate import orjson @@ -37,7 +34,7 @@ data = read_fixture_obj(f"{filename}.json.xz") headers = ("Library", "compact (ms)", "pretty (ms)", "vs. orjson") -LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json") +LIBRARIES = ("orjson", "json") output_in_kib_compact = len(orjson.dumps(data)) / 1024 output_in_kib_pretty = len(orjson.dumps(data, option=orjson.OPT_INDENT_2)) / 1024 @@ -73,30 +70,6 @@ for lib_name in LIBRARIES: number=ITERATIONS, ) correct = test_correctness(json.dumps(data, indent=2).encode("utf-8")) - elif lib_name == "simplejson": - time_compact = timeit( - lambda: simplejson.dumps(data).encode("utf-8"), - number=ITERATIONS, - ) - time_pretty = timeit( - lambda: simplejson.dumps(data, indent=2).encode("utf-8"), - number=ITERATIONS, - ) - correct = test_correctness(simplejson.dumps(data, indent=2).encode("utf-8")) - elif lib_name == "ujson": - time_compact = timeit( - lambda: ujson.dumps(data).encode("utf-8"), - number=ITERATIONS, - ) - time_pretty = timeit( - lambda: ujson.dumps(data, indent=2).encode("utf-8"), - number=ITERATIONS, - ) - correct = test_correctness(ujson.dumps(data, indent=2).encode("utf-8")) - elif lib_name == "rapidjson": - time_compact = timeit(lambda: rapidjson.dumps(data), number=ITERATIONS) - time_pretty = timeit(lambda: rapidjson.dumps(data, indent=2), number=ITERATIONS) - correct = test_correctness(rapidjson.dumps(data, indent=2)) elif lib_name == "orjson": time_compact = timeit(lambda: orjson.dumps(data), number=ITERATIONS) time_pretty = timeit( diff --git a/script/pymem b/script/pymem deleted file mode 100755 index 6b97bb0d..00000000 --- a/script/pymem +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: (Apache-2.0 OR MIT) - -import io -import subprocess - -from tabulate import tabulate - -buf = io.StringIO() - -headers = ("Library", "import, read() RSS (MiB)", "loads() increase in RSS (MiB)") - -LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json") - -FIXTURES = ("canada.json", "citm_catalog.json", "github.json", "twitter.json") - -for fixture in sorted(FIXTURES, reverse=True): - table = [] - buf.write("\n" + "#### " + fixture + "\n\n") - for lib_name in LIBRARIES: - proc = subprocess.Popen( - ("bench/run_mem", f"data/{fixture}.xz", lib_name), stdout=subprocess.PIPE - ) - output = proc.stdout.readline().decode("utf-8").strip().split(",") - mem_base = int(output[0]) / 1024 / 1024 - mem_diff = int(output[1]) / 1024 / 1024 - correct = bool(int(output[2])) - if correct: - table.append((lib_name, f"{mem_base:,.1f}", f"{mem_diff:,.1f}")) - else: - table.append((lib_name, "", "")) - buf.write(tabulate(table, headers, tablefmt="github") + "\n") - -print(buf.getvalue()) diff --git a/script/pynonstr b/script/pynonstr index 9670443a..348975ef 100755 --- a/script/pynonstr +++ b/script/pynonstr @@ -9,9 +9,6 @@ import random from time import mktime from timeit import timeit -import rapidjson -import simplejson -import ujson from tabulate import tabulate import orjson @@ -33,7 +30,7 @@ data_as_str = orjson.loads(orjson.dumps(data_as_obj, option=orjson.OPT_NON_STR_K headers = ("Library", "str keys (ms)", "int keys (ms)", "int keys sorted (ms)") -LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json") +LIBRARIES = ("orjson", "json") ITERATIONS = 500 @@ -69,41 +66,6 @@ for lib_name in LIBRARIES: None # TypeError: '<' not supported between instances of 'str' and 'int' ) correct = False - elif lib_name == "simplejson": - time_as_str = timeit( - lambda: simplejson.dumps(data_as_str).encode("utf-8"), - number=ITERATIONS, - ) - time_as_obj = timeit( - lambda: simplejson.dumps(data_as_obj).encode("utf-8"), - number=ITERATIONS, - ) - time_as_obj_sorted = timeit( - lambda: simplejson.dumps(data_as_obj, sort_keys=True).encode("utf-8"), - number=ITERATIONS, - ) - correct = test_correctness( - simplejson.dumps(data_as_obj, sort_keys=True).encode("utf-8") - ) - elif lib_name == "ujson": - time_as_str = timeit( - lambda: ujson.dumps(data_as_str).encode("utf-8"), - number=ITERATIONS, - ) - time_as_obj = timeit( - lambda: ujson.dumps(data_as_obj).encode("utf-8"), - number=ITERATIONS, - ) - time_as_obj_sorted = None # segfault - correct = False - elif lib_name == "rapidjson": - time_as_str = timeit( - lambda: rapidjson.dumps(data_as_str).encode("utf-8"), - number=ITERATIONS, - ) - time_as_obj = None - time_as_obj_sorted = None - correct = False elif lib_name == "orjson": time_as_str = timeit( lambda: orjson.dumps(data_as_str, None, orjson.OPT_NON_STR_KEYS), diff --git a/script/pynumpy b/script/pynumpy index 2ad6d982..db459c22 100755 --- a/script/pynumpy +++ b/script/pynumpy @@ -11,8 +11,6 @@ from timeit import timeit import numpy import psutil -import rapidjson -import simplejson from memory_profiler import memory_usage from tabulate import tabulate @@ -71,7 +69,7 @@ def default(__obj): headers = ("Library", "Latency (ms)", "RSS diff (MiB)", "vs. orjson") -LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json") +LIBRARIES = ("orjson", "json") ITERATIONS = 10 @@ -80,17 +78,6 @@ def orjson_dumps(): return orjson.dumps(array, option=orjson.OPT_SERIALIZE_NUMPY) -ujson_dumps = None - - -def rapidjson_dumps(): - return rapidjson.dumps(array, default=default).encode("utf-8") - - -def simplejson_dumps(): - return simplejson.dumps(array, default=default).encode("utf-8") - - def json_dumps(): return json.dumps(array, default=default).encode("utf-8") diff --git a/script/pysort b/script/pysort index b1a47204..2e89e3da 100755 --- a/script/pysort +++ b/script/pysort @@ -8,9 +8,6 @@ import os from pathlib import Path from timeit import timeit -import rapidjson -import simplejson -import ujson from tabulate import tabulate import orjson @@ -34,7 +31,7 @@ data = read_fixture_obj("twitter.json.xz") headers = ("Library", "unsorted (ms)", "sorted (ms)", "vs. orjson") -LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json") +LIBRARIES = ("orjson", "json") ITERATIONS = 500 @@ -56,33 +53,6 @@ for lib_name in LIBRARIES: lambda: json.dumps(data, sort_keys=True).encode("utf-8"), number=ITERATIONS, ) - elif lib_name == "simplejson": - time_unsorted = timeit( - lambda: simplejson.dumps(data).encode("utf-8"), - number=ITERATIONS, - ) - time_sorted = timeit( - lambda: simplejson.dumps(data, sort_keys=True).encode("utf-8"), - number=ITERATIONS, - ) - elif lib_name == "ujson": - time_unsorted = timeit( - lambda: ujson.dumps(data).encode("utf-8"), - number=ITERATIONS, - ) - time_sorted = timeit( - lambda: ujson.dumps(data, sort_keys=True).encode("utf-8"), - number=ITERATIONS, - ) - elif lib_name == "rapidjson": - time_unsorted = timeit( - lambda: rapidjson.dumps(data).encode("utf-8"), - number=ITERATIONS, - ) - time_sorted = timeit( - lambda: rapidjson.dumps(data, sort_keys=True).encode("utf-8"), - number=ITERATIONS, - ) elif lib_name == "orjson": time_unsorted = timeit(lambda: orjson.dumps(data), number=ITERATIONS) time_sorted = timeit( diff --git a/src/ffi/fragment.rs b/src/ffi/fragment.rs index 9cfd4d7f..d961e0bc 100644 --- a/src/ffi/fragment.rs +++ b/src/ffi/fragment.rs @@ -36,8 +36,7 @@ pub unsafe extern "C" fn orjson_fragment_tp_new( kwds: *mut PyObject, ) -> *mut PyObject { if Py_SIZE(args) != 1 || !kwds.is_null() { - raise_args_exception(); - null_mut() + raise_args_exception() } else { let contents = PyTuple_GET_ITEM(args, 0); Py_INCREF(contents); @@ -50,12 +49,14 @@ pub unsafe extern "C" fn orjson_fragment_tp_new( } } +const FRAGMENT_LAYOUT: core::alloc::Layout = core::alloc::Layout::new::(); + #[no_mangle] #[cold] #[cfg_attr(feature = "optimize", optimize(size))] pub unsafe extern "C" fn orjson_fragment_dealloc(object: *mut PyObject) { Py_DECREF((*(object as *mut Fragment)).contents); - std::alloc::dealloc(object as *mut u8, std::alloc::Layout::new::()); + std::alloc::dealloc(object as *mut u8, FRAGMENT_LAYOUT); } #[cfg(Py_3_10)] diff --git a/src/lib.rs b/src/lib.rs index 64ace9e1..e65cd7f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ #![cfg_attr(feature = "intrinsics", feature(core_intrinsics))] #![cfg_attr(feature = "optimize", feature(optimize_attribute))] #![cfg_attr(feature = "unstable-simd", feature(portable_simd))] +#![allow(unknown_lints)] #![allow(internal_features)] // core_intrinsics #![allow(non_camel_case_types)] #![allow(static_mut_refs)] diff --git a/src/str/ffi.rs b/src/str/ffi.rs index 21841a88..9a51aa0c 100644 --- a/src/str/ffi.rs +++ b/src/str/ffi.rs @@ -1,6 +1,5 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use core::ffi::c_void; use pyo3_ffi::*; // see unicodeobject.h for documentation @@ -8,12 +7,12 @@ use pyo3_ffi::*; #[inline] pub fn hash_str(op: *mut PyObject) -> Py_hash_t { unsafe { - let data_ptr: *mut c_void = if (*op.cast::()).compact() == 1 + let data_ptr: *mut core::ffi::c_void = if (*op.cast::()).compact() == 1 && (*op.cast::()).ascii() == 1 { - (op as *mut PyASCIIObject).offset(1) as *mut c_void + (op as *mut PyASCIIObject).offset(1) as *mut core::ffi::c_void } else { - (op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void + (op as *mut PyCompactUnicodeObject).offset(1) as *mut core::ffi::c_void }; let num_bytes = (*(op as *mut PyASCIIObject)).length * ((*(op as *mut PyASCIIObject)).kind()) as isize; diff --git a/src/util.rs b/src/util.rs index 8add0f75..2958d287 100644 --- a/src/util.rs +++ b/src/util.rs @@ -267,7 +267,11 @@ macro_rules! reserve_pretty { macro_rules! assume { ($expr:expr) => { debug_assert!($expr); - #[cfg(feature = "intrinsics")] + #[cfg(feature = "assert_unchecked")] + unsafe { + core::hint::assert_unchecked($expr); + }; + #[cfg(all(not(feature = "assert_unchecked"), feature = "intrinsics"))] unsafe { core::intrinsics::assume($expr); }; diff --git a/test/requirements.txt b/test/requirements.txt index 2c04102f..f2791df7 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -7,4 +7,3 @@ psutil;(sys_platform=="linux" or sys_platform == "macos") and platform_machine== pytest pytz typing_extensions;python_version<"3.8" -xxhash==1.4.3;sys_platform=="linux" and platform_machine=="x86_64" and python_version<"3.9" # creates non-compact ASCII for test_str_ascii diff --git a/test/test_type.py b/test/test_type.py index 81c89307..3a3f7a56 100644 --- a/test/test_type.py +++ b/test/test_type.py @@ -5,11 +5,6 @@ import pytest -try: - import xxhash -except ImportError: - xxhash = None - import orjson @@ -260,17 +255,6 @@ def test_str_surrogates_dumps(self): orjson.JSONEncodeError, orjson.dumps, b"\xed\xa0\xbd\xed\xba\x80" ) # \ud83d\ude80 - @pytest.mark.skipif( - xxhash is None, reason="xxhash install broken on win, python3.9, Azure" - ) - def test_str_ascii(self): - """ - str is ASCII but not compact - """ - digest = xxhash.xxh32_hexdigest("12345") - for _ in range(2): - assert orjson.dumps(digest) == b'"b30d56b4"' - def test_bytes_dumps(self): """ bytes dumps not supported