diff --git a/.asf.yaml b/.asf.yaml index f46c437a79..b217fd7807 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -43,7 +43,9 @@ github: dismiss_stale_reviews: false required_linear_history: true - del_branch_on_merge: true + pull_requests: + # auto-delete head branches after being merged + del_branch_on_merge: true features: wiki: false issues: true diff --git a/.cargo/audit.toml b/.cargo/audit.toml index a46052f3b5..d403f0ac5a 100644 --- a/.cargo/audit.toml +++ b/.cargo/audit.toml @@ -25,4 +25,15 @@ ignore = [ # # Introduced by hive_metastore, tracked at https://github.com/cloudwego/pilota/issues/293 "RUSTSEC-2024-0388", + # `paste` is unmaintained; consider using an alternative + # + # Introduced by hive_metastore, tracked at https://github.com/cloudwego/pilota/issues/293 + "RUSTSEC-2024-0436", + # `rustls-pemfile` is unmaintained + # + # Introduced by object_store, see https://github.com/apache/arrow-rs-object-store/issues/564 + "RUSTSEC-2025-0134", + + # Tracked here: https://github.com/paupino/rust-decimal/issues/766 + "RUSTSEC-2026-0001", ] diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index 47b0c47874..fc6b1224e8 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -40,7 +40,7 @@ jobs: security_audit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: @@ -48,4 +48,3 @@ jobs: - uses: rustsec/audit-check@v2.0.0 with: token: ${{ secrets.GITHUB_TOKEN }} - ignore: RUSTSEC-2024-0436 diff --git a/.github/workflows/bindings_python_ci.yml b/.github/workflows/bindings_python_ci.yml index cfc01b01b0..955ed21e16 100644 --- a/.github/workflows/bindings_python_ci.yml +++ b/.github/workflows/bindings_python_ci.yml @@ -41,10 +41,21 @@ concurrency: cancel-in-progress: true jobs: + # check-rust: + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v6 + # - name: Check format + # working-directory: "bindings/python" + # run: cargo fmt --all -- --check + # - name: Check clippy + # working-directory: "bindings/python" + # run: cargo clippy --all-targets --all-features -- -D warnings + check-python: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: astral-sh/setup-uv@v7 with: version: "0.9.3" @@ -60,3 +71,41 @@ jobs: working-directory: "bindings/python" run: | uvx ruff check . + + # test: + # runs-on: ${{ matrix.os }} + # strategy: + # matrix: + # os: + # - ubuntu-latest + # - macos-latest + # - windows-latest + # steps: + # - uses: actions/checkout@v6 + # - uses: actions/setup-python@v6 + # with: + # python-version: 3.12 + # - uses: PyO3/maturin-action@v1 + # with: + # working-directory: "bindings/python" + # command: build + # args: --out dist --sdist + # - uses: astral-sh/setup-uv@v7 + # with: + # version: "0.9.3" + # enable-cache: true + # - name: Sync dependencies + # working-directory: "bindings/python" + # shell: bash + # run: | + # make install + # - name: Install built wheel + # working-directory: "bindings/python" + # shell: bash + # run: | + # uv pip install --reinstall dist/pyiceberg_core-*.whl + # - name: Run tests + # working-directory: "bindings/python" + # shell: bash + # run: | + # make test diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c0f360b982..f393309bcb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,7 @@ jobs: - ubuntu-latest - macos-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -101,7 +101,7 @@ jobs: root-reserve-mb: 10240 temp-reserve-mb: 10240 - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -126,7 +126,7 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -143,7 +143,6 @@ jobs: matrix: test-suite: - { name: "default", args: "--all-targets --all-features --workspace" } - - { name: "smol", args: "--all-targets --no-default-features --features smol --features storage-all --workspace" } - { name: "doc", args: "--doc --all-features --workspace" } name: Unit Tests (${{ matrix.test-suite.name }}) steps: @@ -158,7 +157,7 @@ jobs: root-reserve-mb: 10240 temp-reserve-mb: 10240 - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder @@ -180,7 +179,7 @@ jobs: name: Verify MSRV runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install protoc uses: arduino/setup-protoc@v3 with: diff --git a/.github/workflows/ci_typos.yml b/.github/workflows/ci_typos.yml index 4c60369482..b79ab0c0d1 100644 --- a/.github/workflows/ci_typos.yml +++ b/.github/workflows/ci_typos.yml @@ -40,6 +40,6 @@ jobs: env: FORCE_COLOR: 1 steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Check typos - uses: crate-ci/typos@v1.39.2 + uses: crate-ci/typos@v1.41.0 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 71d35001da..66c17a668d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -42,10 +42,11 @@ jobs: - "crates/catalog/glue" - "crates/catalog/hms" - "crates/catalog/rest" + - "crates/catalog/s3tables" - "crates/catalog/sql" - "crates/integrations/datafusion" steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml index e6b7021c9b..85663fc75f 100644 --- a/.github/workflows/release_python.yml +++ b/.github/workflows/release_python.yml @@ -85,7 +85,7 @@ jobs: runs-on: ubuntu-latest needs: [validate-release-tag] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install toml-cli if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }} @@ -107,7 +107,7 @@ jobs: command: sdist args: -o dist - name: Upload sdist - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-sdist path: bindings/python/dist @@ -128,7 +128,7 @@ jobs: } - { os: ubuntu-latest, target: "armv7l" } steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install toml-cli if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }} @@ -159,7 +159,7 @@ jobs: command: build args: --release -o dist - name: Upload wheels - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-${{ matrix.os }}-${{ matrix.target }} path: bindings/python/dist @@ -178,7 +178,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v6 + uses: actions/download-artifact@v7 with: pattern: wheels-* merge-multiple: true diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml index 9c27554f9a..833b8ee6a6 100644 --- a/.github/workflows/release_python_nightly.yml +++ b/.github/workflows/release_python_nightly.yml @@ -43,7 +43,7 @@ jobs: if: github.repository == 'apache/iceberg-rust' # Only run for apache repo runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp with: @@ -56,7 +56,7 @@ jobs: args: -o dist - name: Upload sdist - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-sdist path: bindings/python/dist @@ -78,7 +78,7 @@ jobs: } - { os: ubuntu-latest, target: "armv7l" } steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp with: @@ -102,7 +102,7 @@ jobs: args: --release -o dist - name: Upload wheels - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@v6 with: name: wheels-${{ matrix.os }}-${{ matrix.target }} path: bindings/python/dist @@ -120,7 +120,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v6 + uses: actions/download-artifact@v7 with: pattern: wheels-* merge-multiple: true @@ -128,9 +128,36 @@ jobs: - name: List downloaded artifacts run: ls -R bindings/python/dist - name: Publish to TestPyPI + id: publish-testpypi + continue-on-error: true uses: pypa/gh-action-pypi-publish@release/v1 with: repository-url: https://test.pypi.org/legacy/ skip-existing: true packages-dir: bindings/python/dist verbose: true + - name: Display error message on publish failure + if: steps.publish-testpypi.outcome == 'failure' + run: | + echo "::error::Failed to publish to TestPyPI" + echo "" + echo "⚠️ TestPyPI Publish Failed" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" + echo "This may be due to TestPyPI storage limits." + echo "See: https://docs.pypi.org/project-management/storage-limits" + echo "" + echo "To resolve this issue, use the pypi-cleanup utility to clean up old TestPyPI artifacts:" + echo "https://pypi.org/project/pypi-cleanup/" + echo "" + echo " uvx pypi-cleanup --package pyiceberg-core --host https://test.pypi.org/ \\" + echo " --verbose -d 10 --do-it --username " + echo "" + echo "Requirements:" + echo " • Must be a maintainer for pyiceberg-core on TestPyPI" + echo " (https://test.pypi.org/project/pyiceberg-core)" + echo " • Requires TestPyPI password and 2FA" + echo " • ⚠️ ONLY do this for TestPyPI, NOT for production PyPI!" + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + exit 1 diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 5e653cffe4..95a4fdc256 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -31,7 +31,7 @@ jobs: if: github.repository_owner == 'apache' runs-on: ubuntu-22.04 steps: - - uses: actions/stale@v10.1.0 + - uses: actions/stale@v10.1.1 with: stale-issue-label: 'stale' exempt-issue-labels: 'not-stale' diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml index c5925da6ae..1a52482b08 100644 --- a/.github/workflows/website.yml +++ b/.github/workflows/website.yml @@ -36,7 +36,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup mdBook uses: peaceiris/actions-mdbook@v2 diff --git a/.licenserc.yaml b/.licenserc.yaml index da87374c3b..0bcb65f3b7 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -31,6 +31,7 @@ header: - "**/DEPENDENCIES.*.tsv" # Release distributions - "dist/*" + - "target" - "Cargo.lock" - "bindings/python/uv.lock" - ".github/PULL_REQUEST_TEMPLATE.md" diff --git a/.typos.toml b/.typos.toml index 9363f17c9a..407ce8168c 100644 --- a/.typos.toml +++ b/.typos.toml @@ -15,5 +15,8 @@ # specific language governing permissions and limitations # under the License. +[type.rust] +extend-ignore-identifiers-re = ["^bimap$"] + [files] extend-exclude = ["**/testdata", "CHANGELOG.md"] diff --git a/CHANGELOG.md b/CHANGELOG.md index f66f64f478..bd35e6b5d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,164 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/) and this project adheres to [Semantic Versioning](https://semver.org/). +## [v0.8.0] - 2026-01-06 + +### Breaking Changes + +* **API Changes:** + * refactor: Remove redundant parameters from SnapshotProducer validation methods by @Li0k in https://github.com/apache/iceberg-rust/pull/1853 + * chore: Remove deprecated `remove_all` in FileIO by @jonathanc-n in https://github.com/apache/iceberg-rust/pull/1863 + * refactor: Drop smol runtime support by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1900 + +* **Compatibility:** + * chore: bump MSRV to 1.88, fix warnings and clippy errors by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1902 + +* **Dependency Updates:** + * Upgrade opendal to v0.55 by @dentiny in https://github.com/apache/iceberg-rust/pull/1895 + * deps: bump DataFusion to 51, Arrow to 57, pyo to 0.26 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1899 + +* **Other:** + * Remove wildcard pattern in exhaustive enums by @lgingerich in https://github.com/apache/iceberg-rust/pull/1925 + +### All Changes + +* chore(deps): Bump tempfile from 3.22.0 to 3.23.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1717 +* chore(deps): Bump rand from 0.8.5 to 0.9.2 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1716 +* chore(deps): Bump crate-ci/typos from 1.36.2 to 1.36.3 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1715 +* refactor: Improve REST catalog's authenticate method by @imor in https://github.com/apache/iceberg-rust/pull/1712 +* chore(deps): Bump serde_with from 3.14.0 to 3.14.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1727 +* refactor(writer): Refactor writers for the future partitioning writers by @CTTY in https://github.com/apache/iceberg-rust/pull/1657 +* Set lock on version of Pydantic by @Fokko in https://github.com/apache/iceberg-rust/pull/1737 +* chore(deps): Bump crate-ci/typos from 1.36.3 to 1.37.2 by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1734 +* feat: support more partition transformations for PartitionSpec::partition_to_path by @mnpw in https://github.com/apache/iceberg-rust/pull/1730 +* chore: Update website for 0.7.0 by @CTTY in https://github.com/apache/iceberg-rust/pull/1738 +* feat(sql-catalog): implement register table for sql catalog by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1724 +* fix: ensure CoalescePartitionsExec is enabled for IcebergCommitExec by @sgrebnov in https://github.com/apache/iceberg-rust/pull/1723 +* chore(deps): Bump regex from 1.11.2 to 1.12.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1741 +* chore(deps): Bump crate-ci/typos from 1.37.2 to 1.38.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1740 +* Improve `IcebergCommitExec` to correctly populate properties/schema by @sgrebnov in https://github.com/apache/iceberg-rust/pull/1721 +* feat(spec): add `table_properties.rs` to spec by @kaushiksrini in https://github.com/apache/iceberg-rust/pull/1733 +* chore(deps): Bump actions/stale from 10.0.0 to 10.1.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1726 +* docs: remove -src suffix from artifact name by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1743 +* feat(reader): Make ArrowReaderBuilder::new public by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1748 +* feat(writer): Add clustered and fanout writer by @CTTY in https://github.com/apache/iceberg-rust/pull/1735 +* feat(catalog): impl builder for SqlCatalog by @335g in https://github.com/apache/iceberg-rust/pull/1666 +* fix: fix read parquert file when schema change by @chenzl25 in https://github.com/apache/iceberg-rust/pull/1750 +* docs: Fix broken orbstack and podman links in CONTRIBUTING.md by @petern48 in https://github.com/apache/iceberg-rust/pull/1757 +* chore(deps): Bump tokio from 1.47.1 to 1.48.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1763 +* chore(deps): Bump backon from 1.5.2 to 1.6.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1762 +* fix: global eq delete matching should apply to only strictly older files, and fix partition scoped matching to consider spec id by @amogh-jahagirdar in https://github.com/apache/iceberg-rust/pull/1758 +* chore(deps): Bump apache/skywalking-eyes from 0.7.0 to 0.8.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1760 +* chore(deps): Bump rust_decimal from 1.38.0 to 1.39.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1761 +* feat(datafusion): implement the project node to add the partition columns by @fvaleye in https://github.com/apache/iceberg-rust/pull/1602 +* fix: snapshot was producing empty summary by @imor in https://github.com/apache/iceberg-rust/pull/1767 +* docs: Add examples for PartitioningWriter by @CTTY in https://github.com/apache/iceberg-rust/pull/1754 +* feat(sqllogictest): Add support for iceberg datafusion sqllogictest integration by @lliangyu-lin in https://github.com/apache/iceberg-rust/pull/1764 +* fix(build): Pin home version after merging #1764 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1783 +* minor: Update Cargo.lock to add home by @CTTY in https://github.com/apache/iceberg-rust/pull/1785 +* chore(deps): Bump aws-sdk-s3tables from 1.40.0 to 1.41.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1790 +* chore(deps): Bump rand from 0.8.5 to 0.9.2 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1789 +* chore(deps): Bump actions/download-artifact from 5 to 6 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1788 +* chore(deps): Bump actions/upload-artifact from 4 to 5 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1787 +* fix(reader): filter row groups when FileScanTask contains byte ranges by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1779 +* refactor(arrow,datafusion): Reuse PartitionValueCalculator in RecordBatchPartitionSplitter by @CTTY in https://github.com/apache/iceberg-rust/pull/1781 +* feat: Update Datafusion to v49 by @DerGut in https://github.com/apache/iceberg-rust/pull/1704 +* deps: unpin pydantic by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1793 +* feat(reader): Add Date32 support to RecordBatchTransformer create_column by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1792 +* feat(catalog): Implement update_table for S3TablesCatalog by @CTTY in https://github.com/apache/iceberg-rust/pull/1594 +* feat: Update Datafusion to v50 by @DerGut in https://github.com/apache/iceberg-rust/pull/1728 +* ci: Migrate to uv for python by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1796 +* ci: Relax msrv check thanks to rust 2024 by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1795 +* ci: Don't dismiss stale review to make contribution easier by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1799 +* add Makefile to bindings/python by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1800 +* chore: inline format args by @colinmarc in https://github.com/apache/iceberg-rust/pull/1805 +* refactor: Migrate from tera to minijinja by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1798 +* fix(reader): fix position delete bugs with row group skipping by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1806 +* feat(datafusion): implement the partitioning node for DataFusion to define the partitioning by @fvaleye in https://github.com/apache/iceberg-rust/pull/1620 +* feat(reader): Date32 from days since epoch for Literal:try_from_json by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1803 +* chore(deps): Bump aws-sdk-glue from 1.125.0 to 1.126.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1812 +* chore(deps): Bump astral-sh/setup-uv from 6 to 7 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1811 +* chore(deps): Bump crate-ci/typos from 1.38.1 to 1.39.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1810 +* feat(reader): position-based column projection for Parquet files without field IDs (migrated tables) by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1777 +* fix(reader): Equality delete files with partial schemas (containing only equality columns) by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1782 +* infra: use apache/hive:4.0.0 as hive Dockerfile base image by @geruh in https://github.com/apache/iceberg-rust/pull/1823 +* fix: StructType fails to deserialize JSON with type field by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1822 +* feat: Support for V3 Metadata by @c-thiel in https://github.com/apache/iceberg-rust/pull/1682 +* fix(reader): Support both position and equality delete files on the same FileScanTask by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1778 +* feat(datafusion): Add TaskWriter for DataFusion by @CTTY in https://github.com/apache/iceberg-rust/pull/1769 +* fix: support reading compressed metadata by @colinmarc in https://github.com/apache/iceberg-rust/pull/1802 +* Support deserializing bytes by @Fokko in https://github.com/apache/iceberg-rust/pull/1820 +* fix: Bump CI Spark version to 3.5.7 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1832 +* infra: use python 3.12 for release by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1836 +* pyiceberg-core: create smaller artifacts by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1841 +* infra: add collaborators to .asf.yaml by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1842 +* pyiceberg-core: use pyo3 abi3-py310 by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1843 +* ci: parallelize unit test with matrix by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1833 +* pyiceberg-core: create even smaller artifacts by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1844 +* chore: Split values.rs into separate files by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1840 +* feat(datafusion): Support `INSERT INTO` partitioned tables by @CTTY in https://github.com/apache/iceberg-rust/pull/1827 +* docs: Add Wrappers project to README by @burmecia in https://github.com/apache/iceberg-rust/pull/1852 +* feat(reader): Add PartitionSpec support to FileScanTask and RecordBatchTransformer by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1821 +* feat(reader): null struct default values in create_column by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1847 +* refactor: Remove redundant parameters from SnapshotProducer validation methods by @Li0k in https://github.com/apache/iceberg-rust/pull/1853 +* infra: add verbose=true to pypa/gh-action-pypi-publish by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1846 +* use RecordBatchTransformerBuilder instead of RecordBatchTransformer by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1857 +* chore(deps): Bump bytes from 1.10.1 to 1.11.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1861 +* chore(deps): Bump serde_with from 3.15.1 to 3.16.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1859 +* chore(deps): Bump fs-err from 3.1.3 to 3.2.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1860 +* chore(deps): Bump crate-ci/typos from 1.39.0 to 1.39.2 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1858 +* chore: Remove deprecated `remove_all` in FileIO by @jonathanc-n in https://github.com/apache/iceberg-rust/pull/1863 +* infra: notify on github workflow failure by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1870 +* feat(reader): Add binary support to `get_arrow_datum` for equality deletes with binary type by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1848 +* Raise concurrency errors properly for glue tables by @jembishop in https://github.com/apache/iceberg-rust/pull/1875 +* infra: add instructions for cleaning up testpypi artifacts by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1855 +* chore(deps): Bump actions/checkout from 5 to 6 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1883 +* Update apache-avro to v0.21.0 by @N-Boutaib in https://github.com/apache/iceberg-rust/pull/1881 +* docs: Clarify functionality of `SnapshotProduceOperation` by @jonathanc-n in https://github.com/apache/iceberg-rust/pull/1874 +* feat(datafusion): Split IcebergTableProvider into static and non-static table provider by @CTTY in https://github.com/apache/iceberg-rust/pull/1879 +* infra: use new `del_branch_on_merge` in .asf.yaml by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1888 +* Upgrade opendal to v0.55 by @dentiny in https://github.com/apache/iceberg-rust/pull/1895 +* chore(deps): Bump http from 1.3.1 to 1.4.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1892 +* chore(deps): Bump crate-ci/typos from 1.39.2 to 1.40.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1891 +* feat(datafusion): Add `sort_by_partition` to sort the input partitioned data by @CTTY in https://github.com/apache/iceberg-rust/pull/1618 +* rfc: Modularize `iceberg` Implementations by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1854 +* refactor(writer): Make writer builders non-consuming in build by @leonzchang in https://github.com/apache/iceberg-rust/pull/1889 +* fix: Keep snapshot log on replace by @c-thiel in https://github.com/apache/iceberg-rust/pull/1896 +* chore(deps): Bump actions/stale from 10.1.0 to 10.1.1 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1908 +* feat(datafusion): Add sqllogictest for DataFusion INSERT INTO by @CTTY in https://github.com/apache/iceberg-rust/pull/1887 +* refactor: Drop smol runtime support by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1900 +* chore(deps): Bump minijinja from 2.12.0 to 2.13.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1909 +* chore(deps): Bump uuid from 1.18.1 to 1.19.0 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1910 +* feat(core): Add support for `_file` column by @gbrgr in https://github.com/apache/iceberg-rust/pull/1824 +* feat: Make `rest` types public, add documentation by @c-thiel in https://github.com/apache/iceberg-rust/pull/1901 +* chore: bump MSRV to 1.88, fix warnings and clippy errors by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1902 +* ci: Make s3tables ready for publish by @Xuanwo in https://github.com/apache/iceberg-rust/pull/1916 +* deps: bump DataFusion to 51, Arrow to 57, pyo to 0.26 by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1899 +* fix: Serialize `split_offsets` as null when empty by @AndreaBozzo in https://github.com/apache/iceberg-rust/pull/1906 +* feat(catalog): Implement update_table for SqlCatalog by @lgingerich in https://github.com/apache/iceberg-rust/pull/1911 +* fix: Respect precision and scale for Decimal128 in value.rs by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1921 +* fix: restore no-op logic in constants_map for NULL identity-partitioned columns by @mbutrovich in https://github.com/apache/iceberg-rust/pull/1922 +* fix: stack overflow when loading large equality deletes by @dojiong in https://github.com/apache/iceberg-rust/pull/1915 +* chore(deps): Bump actions/upload-artifact from 5 to 6 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1931 +* chore(deps): Bump actions/download-artifact from 6 to 7 by @dependabot[bot] in https://github.com/apache/iceberg-rust/pull/1932 +* Remove wildcard pattern in exhaustive enums by @lgingerich in https://github.com/apache/iceberg-rust/pull/1925 +* fix: prioritize delete manifests to prevent scan deadlock by @dojiong in https://github.com/apache/iceberg-rust/pull/1937 +* feat: Include statistics for Reserved Fields by @Fokko in https://github.com/apache/iceberg-rust/pull/1849 +* fix(website): Update expected messages by @CTTY in https://github.com/apache/iceberg-rust/pull/1942 +* feat: Implement shared delete file loading and caching for ArrowReader by @dojiong in https://github.com/apache/iceberg-rust/pull/1941 +* infra: license header check ignore target/ dir by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1954 +* infra: release script, validate proper ICEBERG_VERSION variable by @kevinjqliu in https://github.com/apache/iceberg-rust/pull/1956 +* refactor(arrow): Rename parameter in delete_filter for clarity by @robertmu in https://github.com/apache/iceberg-rust/pull/1955 +* feat(sqllogictest): use serde derived structs for schedule parsing by @AndreaBozzo in https://github.com/apache/iceberg-rust/pull/1953 +* fix: follow IEEE 754 totalOrder for `float` and `double` by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1959 +* fix: return proper error rather than persisting error message on snapshot by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1960 +* feat(arrow): Convert Arrow schema to Iceberg schema with auto assigned field ids by @CTTY in https://github.com/apache/iceberg-rust/pull/1928 +* fix: MemoryCatalog to return absolute NamespaceIdents by @eickler in https://github.com/apache/iceberg-rust/pull/1970 +* fix(spec): Include delete file content to V3 manifest by @CTTY in https://github.com/apache/iceberg-rust/pull/1979 +* fix: fix typo check error by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1989 +* Fix ci audit failure by @liurenjie1024 in https://github.com/apache/iceberg-rust/pull/1988 +* feat: make FanoutWriter writer configurable by @Standing-Man in https://github.com/apache/iceberg-rust/pull/1962 + ## [v0.7.0] - 2025-09-23 ### Breaking Changes diff --git a/Cargo.lock b/Cargo.lock index 301b2b9428..fd172bb5f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -98,9 +98,9 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "apache-avro" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a033b4ced7c585199fb78ef50fca7fe2f444369ec48080c5fd072efa1a03cc7" +checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" dependencies = [ "bigdecimal", "bon", @@ -303,61 +303,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-channel" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "924ed96dd52d1b75e9c1a3e6275715fd320f5f9439fb5a4a11fa51f4221158d2" -dependencies = [ - "concurrent-queue", - "event-listener-strategy", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-executor" -version = "1.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497c00e0fd83a72a79a39fcbd8e3e2f055d6f6c7e025f3b3d91f4f8e76527fb8" -dependencies = [ - "async-task", - "concurrent-queue", - "fastrand", - "futures-lite", - "pin-project-lite", - "slab", -] - -[[package]] -name = "async-fs" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8034a681df4aed8b8edbd7fbe472401ecf009251c8b40556b304567052e294c5" -dependencies = [ - "async-lock", - "blocking", - "futures-lite", -] - -[[package]] -name = "async-io" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc" -dependencies = [ - "autocfg", - "cfg-if", - "concurrent-queue", - "futures-io", - "futures-lite", - "parking", - "polling", - "rustix", - "slab", - "windows-sys 0.61.2", -] - [[package]] name = "async-lock" version = "3.4.1" @@ -369,35 +314,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-net" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b948000fad4873c1c9339d60f2623323a0cfd3816e5181033c6a5cb68b2accf7" -dependencies = [ - "async-io", - "blocking", - "futures-lite", -] - -[[package]] -name = "async-process" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc50921ec0055cdd8a16de48773bfeec5c972598674347252c0399676be7da75" -dependencies = [ - "async-channel", - "async-io", - "async-lock", - "async-signal", - "async-task", - "blocking", - "cfg-if", - "event-listener", - "futures-lite", - "rustix", -] - [[package]] name = "async-recursion" version = "1.1.1" @@ -409,30 +325,6 @@ dependencies = [ "syn 2.0.108", ] -[[package]] -name = "async-signal" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c070bbf59cd3570b6b2dd54cd772527c7c3620fce8be898406dd3ed6adc64c" -dependencies = [ - "async-io", - "async-lock", - "atomic-waker", - "cfg-if", - "futures-core", - "futures-io", - "rustix", - "signal-hook-registry", - "slab", - "windows-sys 0.61.2", -] - -[[package]] -name = "async-task" -version = "4.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" - [[package]] name = "async-trait" version = "0.1.89" @@ -984,19 +876,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "blocking" -version = "1.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e83f8d02be6967315521be875afa792a316e28d57b5a2d401897e2a7921b7f21" -dependencies = [ - "async-channel", - "async-task", - "futures-io", - "futures-lite", - "piper", -] - [[package]] name = "bon" version = "3.8.1" @@ -1807,19 +1686,6 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" -[[package]] -name = "futures-lite" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad" -dependencies = [ - "fastrand", - "futures-core", - "futures-io", - "parking", - "pin-project-lite", -] - [[package]] name = "futures-macro" version = "0.3.31" @@ -2013,12 +1879,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" - [[package]] name = "hex" version = "0.4.3" @@ -2263,7 +2123,7 @@ dependencies = [ [[package]] name = "iceberg" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "apache-avro", @@ -2313,7 +2173,6 @@ dependencies = [ "serde_json", "serde_repr", "serde_with", - "smol", "strum", "tempfile", "tokio", @@ -2325,7 +2184,7 @@ dependencies = [ [[package]] name = "iceberg-cache-moka" -version = "0.7.0" +version = "0.8.0" dependencies = [ "iceberg", "moka", @@ -2333,7 +2192,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-glue" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -2350,7 +2209,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-hms" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -2374,7 +2233,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-loader" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "iceberg", @@ -2390,7 +2249,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "chrono", @@ -2414,7 +2273,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-s3tables" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -2428,7 +2287,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-sql" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "iceberg", @@ -2442,7 +2301,7 @@ dependencies = [ [[package]] name = "iceberg-examples" -version = "0.7.0" +version = "0.8.0" dependencies = [ "futures", "iceberg", @@ -2452,7 +2311,7 @@ dependencies = [ [[package]] name = "iceberg-integration-tests" -version = "0.7.0" +version = "0.8.0" dependencies = [ "arrow-array", "arrow-schema", @@ -2469,7 +2328,7 @@ dependencies = [ [[package]] name = "iceberg_test_utils" -version = "0.7.0" +version = "0.8.0" dependencies = [ "tracing", "tracing-subscriber", @@ -2663,6 +2522,47 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jiff" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" +dependencies = [ + "jiff-static", + "jiff-tzdb-platform", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", + "windows-sys 0.61.2", +] + +[[package]] +name = "jiff-static" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -3211,20 +3111,20 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "opendal" -version = "0.54.1" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42afda58fa2cf50914402d132cc1caacff116a85d10c72ab2082bb7c50021754" +checksum = "d075ab8a203a6ab4bc1bce0a4b9fe486a72bf8b939037f4b78d95386384bc80a" dependencies = [ "anyhow", "backon", "base64 0.22.1", "bytes", - "chrono", "crc32c", "futures", "getrandom 0.2.16", "http 1.3.1", "http-body 1.0.1", + "jiff", "log", "md-5", "percent-encoding", @@ -3234,6 +3134,7 @@ dependencies = [ "serde", "serde_json", "tokio", + "url", "uuid", ] @@ -3439,17 +3340,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "piper" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" -dependencies = [ - "atomic-waker", - "fastrand", - "futures-io", -] - [[package]] name = "pkcs1" version = "0.7.5" @@ -3494,20 +3384,6 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" -[[package]] -name = "polling" -version = "3.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218" -dependencies = [ - "cfg-if", - "concurrent-queue", - "hermit-abi", - "pin-project-lite", - "rustix", - "windows-sys 0.61.2", -] - [[package]] name = "port_scanner" version = "0.1.5" @@ -3520,6 +3396,15 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "potential_utf" version = "0.1.4" @@ -4567,23 +4452,6 @@ dependencies = [ "serde", ] -[[package]] -name = "smol" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33bd3e260892199c3ccfc487c88b2da2265080acb316cd920da72fdfd7c599f" -dependencies = [ - "async-channel", - "async-executor", - "async-fs", - "async-io", - "async-lock", - "async-net", - "async-process", - "blocking", - "futures-lite", -] - [[package]] name = "snap" version = "1.1.1" diff --git a/Cargo.toml b/Cargo.toml index 7d980e6eda..901fbda097 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,16 +35,16 @@ resolver = "2" [workspace.package] edition = "2024" homepage = "https://rust.iceberg.apache.org/" -version = "0.7.0" +version = "0.8.0" license = "Apache-2.0" repository = "https://github.com/apache/iceberg-rust" # Check the MSRV policy in README.md before changing this -rust-version = "1.87" +rust-version = "1.88" [workspace.dependencies] anyhow = "1.0.72" -apache-avro = { version = "0.20", features = ["zstandard"] } +apache-avro = { version = "0.21", features = ["zstandard"] } array-init = "2" arrow-arith = "57.1" arrow-array = "57.1" @@ -66,9 +66,6 @@ bytes = "1.10" chrono = "0.4.41" clap = { version = "4.5.48", features = ["derive", "cargo"] } ctor = "0.2.8" -datafusion = "50" -datafusion-cli = "50" -datafusion-sqllogictest = "50" derive_builder = "0.20" dirs = "6" enum-ordinalize = "4.3.0" @@ -82,13 +79,13 @@ futures = "0.3" hive_metastore = "0.2.0" home = "=0.5.11" http = "1.2" -iceberg = { version = "0.7.0", path = "./crates/iceberg" } -iceberg-catalog-glue = { version = "0.7.0", path = "./crates/catalog/glue" } -iceberg-catalog-hms = { version = "0.7.0", path = "./crates/catalog/hms" } -iceberg-catalog-sql = { version = "0.7.0", path = "./crates/catalog/sql" } -iceberg-catalog-rest = { version = "0.7.0", path = "./crates/catalog/rest" } -iceberg-catalog-s3tables = { version = "0.7.0", path = "./crates/catalog/s3tables" } -iceberg-datafusion = { version = "0.7.0", path = "./crates/integrations/datafusion" } +iceberg = { version = "0.8.0", path = "./crates/iceberg" } +iceberg-catalog-glue = { version = "0.8.0", path = "./crates/catalog/glue" } +iceberg-catalog-hms = { version = "0.8.0", path = "./crates/catalog/hms" } +iceberg-catalog-rest = { version = "0.8.0", path = "./crates/catalog/rest" } +iceberg-catalog-s3tables = { version = "0.8.0", path = "./crates/catalog/s3tables" } +iceberg-catalog-sql = { version = "0.8.0", path = "./crates/catalog/sql" } +iceberg-datafusion = { version = "0.8.0", path = "./crates/integrations/datafusion" } indicatif = "0.18" itertools = "0.13" libtest-mimic = "0.8.1" @@ -103,7 +100,7 @@ motore-macros = "0.4.3" murmur3 = "0.5.2" num-bigint = "0.4.6" once_cell = "1.20" -opendal = "0.54.0" +opendal = "0.55.0" ordered-float = "4" parquet = "57.1" pilota = "0.11.10" @@ -113,14 +110,13 @@ rand = "0.8.5" regex = "1.11.3" reqwest = { version = "0.12.12", default-features = false, features = ["json"] } roaring = { version = "0.11" } -rust_decimal = "1.37.2" +rust_decimal = { version = "1.39", default-features = false, features = ["std"] } serde = { version = "1.0.219", features = ["rc"] } serde_bytes = "0.11.17" serde_derive = "1.0.219" serde_json = "1.0.142" serde_repr = "0.1.16" serde_with = "3.4" -smol = "2.0.2" sqllogictest = "0.28.3" sqlx = { version = "0.8.1", default-features = false } stacker = "0.1.20" @@ -136,4 +132,4 @@ url = "2.5.7" uuid = { version = "1.18", features = ["v7"] } volo = "0.10.6" volo-thrift = "0.10.8" -zstd = "0.13.3" +zstd = "0.13.3" \ No newline at end of file diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 8249414b8d..d33abed581 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -128,9 +128,9 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "apache-avro" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a033b4ced7c585199fb78ef50fca7fe2f444369ec48080c5fd072efa1a03cc7" +checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" dependencies = [ "bigdecimal", "bon", @@ -180,9 +180,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae" dependencies = [ "arrow-arith", "arrow-array", @@ -202,23 +202,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -228,25 +228,28 @@ dependencies = [ "chrono-tz", "half", "hashbrown 0.16.0", - "num", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168" dependencies = [ "arrow-array", "arrow-buffer", @@ -259,15 +262,15 @@ dependencies = [ "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-csv" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" +checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c" dependencies = [ "arrow-array", "arrow-cast", @@ -280,21 +283,22 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27" dependencies = [ "arrow-array", "arrow-buffer", @@ -308,9 +312,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" +checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8" dependencies = [ "arrow-array", "arrow-buffer", @@ -320,19 +324,21 @@ dependencies = [ "chrono", "half", "indexmap 2.12.0", + "itoa", "lexical-core", "memchr", - "num", - "serde", + "num-traits", + "ryu", + "serde_core", "serde_json", "simdutf8", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b" dependencies = [ "arrow-array", "arrow-buffer", @@ -343,9 +349,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d924b32e96f8bb74d94cd82bd97b313c432fcb0ea331689ef9e7c6b8be4b258" +checksum = "cfcfb2be2e9096236f449c11f425cddde18c4cc540f516d90f066f10a29ed515" dependencies = [ "arrow-array", "arrow-data", @@ -355,9 +361,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2" dependencies = [ "arrow-array", "arrow-buffer", @@ -368,34 +374,34 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5" dependencies = [ "bitflags", - "serde", + "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47" dependencies = [ "ahash 0.8.12", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2" dependencies = [ "arrow-array", "arrow-buffer", @@ -403,7 +409,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -662,8 +668,20 @@ version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" dependencies = [ - "bytecheck_derive", - "ptr_meta", + "bytecheck_derive 0.6.12", + "ptr_meta 0.1.4", + "simdutf8", +] + +[[package]] +name = "bytecheck" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0caa33a2c0edca0419d15ac723dff03f1956f7978329b1e3b5fdaaaed9d3ca8b" +dependencies = [ + "bytecheck_derive 0.8.2", + "ptr_meta 0.3.1", + "rancor", "simdutf8", ] @@ -678,6 +696,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "bytecheck_derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "bytemuck" version = "1.24.0" @@ -1028,12 +1057,11 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" +checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ "arrow", - "arrow-ipc", "arrow-schema", "async-trait", "bytes", @@ -1044,6 +1072,7 @@ dependencies = [ "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", + "datafusion-datasource-arrow", "datafusion-datasource-csv", "datafusion-datasource-json", "datafusion-datasource-parquet", @@ -1072,6 +1101,7 @@ dependencies = [ "parquet", "rand 0.9.2", "regex", + "rstest", "sqlparser", "tempfile", "tokio", @@ -1083,9 +1113,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" +checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ "arrow", "async-trait", @@ -1098,7 +1128,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -1109,9 +1138,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" +checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" dependencies = [ "arrow", "async-trait", @@ -1121,10 +1150,11 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", - "datafusion-session", "futures", + "itertools 0.14.0", "log", "object_store", "tokio", @@ -1132,14 +1162,13 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" +checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ "ahash 0.8.12", "arrow", "arrow-ipc", - "base64", "chrono", "half", "hashbrown 0.14.5", @@ -1157,9 +1186,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" +checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95" dependencies = [ "futures", "log", @@ -1168,9 +1197,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" +checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ "arrow", "async-compression", @@ -1193,9 +1222,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "parquet", "rand 0.9.2", - "tempfile", "tokio", "tokio-util", "url", @@ -1203,22 +1230,44 @@ dependencies = [ "zstd", ] +[[package]] +name = "datafusion-datasource-arrow" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" +dependencies = [ + "arrow", + "arrow-ipc", + "async-trait", + "bytes", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "itertools 0.14.0", + "object_store", + "tokio", +] + [[package]] name = "datafusion-datasource-csv" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" +checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -1230,49 +1279,44 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" +checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "object_store", - "serde_json", "tokio", ] [[package]] name = "datafusion-datasource-parquet" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" +checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", - "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", @@ -1282,21 +1326,20 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" +checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" [[package]] name = "datafusion-execution" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" +checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ "arrow", "async-trait", @@ -1314,9 +1357,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" +checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ "arrow", "async-trait", @@ -1328,6 +1371,7 @@ dependencies = [ "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap 2.12.0", + "itertools 0.14.0", "paste", "recursive", "serde_json", @@ -1336,9 +1380,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" +checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ "arrow", "datafusion-common", @@ -1349,9 +1393,9 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ddb7c4e645df080c27dad13a198d191da328dd1c98e198664a7a0f64b335cc" +checksum = "ec510e7787641279b0336e8b79e4b7bd1385d5976875ff9b97f4269ce5231a67" dependencies = [ "abi_stable", "arrow", @@ -1359,6 +1403,7 @@ dependencies = [ "async-ffi", "async-trait", "datafusion", + "datafusion-common", "datafusion-functions-aggregate-common", "datafusion-proto", "datafusion-proto-common", @@ -1371,9 +1416,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" +checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ "arrow", "arrow-buffer", @@ -1391,6 +1436,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "num-traits", "rand 0.9.2", "regex", "sha2", @@ -1400,9 +1446,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" +checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ "ahash 0.8.12", "arrow", @@ -1421,9 +1467,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" +checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ "ahash 0.8.12", "arrow", @@ -1434,9 +1480,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" +checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ "arrow", "arrow-ord", @@ -1444,6 +1490,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", @@ -1456,9 +1503,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" +checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ "arrow", "async-trait", @@ -1472,9 +1519,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" +checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ "arrow", "datafusion-common", @@ -1490,9 +1537,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" +checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1500,20 +1547,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" +checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ - "datafusion-expr", + "datafusion-doc", "quote", "syn 2.0.108", ] [[package]] name = "datafusion-optimizer" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" +checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ "arrow", "chrono", @@ -1531,9 +1578,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" +checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ "ahash 0.8.12", "arrow", @@ -1546,7 +1593,6 @@ dependencies = [ "hashbrown 0.14.5", "indexmap 2.12.0", "itertools 0.14.0", - "log", "parking_lot", "paste", "petgraph", @@ -1554,9 +1600,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ "arrow", "datafusion-common", @@ -1569,9 +1615,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" +checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ "ahash 0.8.12", "arrow", @@ -1583,9 +1629,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" +checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ "arrow", "datafusion-common", @@ -1597,15 +1643,14 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "log", "recursive", ] [[package]] name = "datafusion-physical-plan" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" +checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ "ahash 0.8.12", "arrow", @@ -1634,15 +1679,26 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7df9f606892e6af45763d94d210634eec69b9bb6ced5353381682ff090028a3" +checksum = "d368093a98a17d1449b1083ac22ed16b7128e4c67789991869480d8c4a40ecb9" dependencies = [ "arrow", "chrono", - "datafusion", + "datafusion-catalog", + "datafusion-catalog-listing", "datafusion-common", + "datafusion-datasource", + "datafusion-datasource-arrow", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", "datafusion-expr", + "datafusion-functions-table", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", "datafusion-proto-common", "object_store", "prost", @@ -1650,9 +1706,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b14f288ca4ef77743d9672cafecf3adfffff0b9b04af9af79ecbeaaf736901" +checksum = "3b6aef3d5e5c1d2bc3114c4876730cb76a9bdc5a8df31ef1b6db48f0c1671895" dependencies = [ "arrow", "datafusion-common", @@ -1661,12 +1717,11 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -1679,36 +1734,27 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" +checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot", - "tokio", ] [[package]] name = "datafusion-sql" -version = "50.3.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" +checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ "arrow", "bigdecimal", + "chrono", "datafusion-common", "datafusion-expr", "indexmap 2.12.0", @@ -1993,6 +2039,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -2127,12 +2179,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" - [[package]] name = "hex" version = "0.4.3" @@ -2291,7 +2337,7 @@ dependencies = [ [[package]] name = "iceberg" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "apache-avro", @@ -2313,6 +2359,7 @@ dependencies = [ "chrono", "derive_builder", "expect-test", + "flate2", "fnv", "futures", "itertools 0.13.0", @@ -2326,6 +2373,7 @@ dependencies = [ "rand 0.8.5", "reqsign", "reqwest", + "rkyv 0.8.13", "roaring", "rust_decimal", "serde", @@ -2335,7 +2383,6 @@ dependencies = [ "serde_repr", "serde_with", "strum 0.27.2", - "thrift", "tokio", "typed-builder", "url", @@ -2345,7 +2392,7 @@ dependencies = [ [[package]] name = "iceberg-datafusion" -version = "0.7.0" +version = "0.8.0" dependencies = [ "anyhow", "async-trait", @@ -2543,6 +2590,47 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jiff" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" +dependencies = [ + "jiff-static", + "jiff-tzdb-platform", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", + "windows-sys 0.61.2", +] + +[[package]] +name = "jiff-static" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -2778,25 +2866,31 @@ dependencies = [ ] [[package]] -name = "murmur3" -version = "0.5.2" +name = "munge" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" +checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c" +dependencies = [ + "munge_macro", +] [[package]] -name = "num" -version = "0.4.3" +name = "munge_macro" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", + "proc-macro2", + "quote", + "syn 2.0.108", ] +[[package]] +name = "murmur3" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" + [[package]] name = "num-bigint" version = "0.4.6" @@ -2832,28 +2926,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -2864,16 +2936,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" version = "0.32.2" @@ -2915,20 +2977,20 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "opendal" -version = "0.54.1" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42afda58fa2cf50914402d132cc1caacff116a85d10c72ab2082bb7c50021754" +checksum = "d075ab8a203a6ab4bc1bce0a4b9fe486a72bf8b939037f4b78d95386384bc80a" dependencies = [ "anyhow", "backon", "base64", "bytes", - "chrono", "crc32c", "futures", "getrandom 0.2.16", "http", "http-body", + "jiff", "log", "md-5", "percent-encoding", @@ -2938,6 +3000,7 @@ dependencies = [ "serde", "serde_json", "tokio", + "url", "uuid", ] @@ -3000,9 +3063,9 @@ dependencies = [ [[package]] name = "parquet" -version = "56.2.0" +version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -3021,11 +3084,11 @@ dependencies = [ "half", "hashbrown 0.16.0", "lz4_flex", - "num", "num-bigint", + "num-integer", + "num-traits", "object_store", "paste", - "ring", "seq-macro", "simdutf8", "snap", @@ -3101,6 +3164,15 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "potential_utf" version = "0.1.4" @@ -3155,9 +3227,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" dependencies = [ "bytes", "prost-derive", @@ -3165,9 +3237,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ "anyhow", "itertools 0.14.0", @@ -3192,7 +3264,16 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" dependencies = [ - "ptr_meta_derive", + "ptr_meta_derive 0.1.4", +] + +[[package]] +name = "ptr_meta" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79" +dependencies = [ + "ptr_meta_derive 0.3.1", ] [[package]] @@ -3206,23 +3287,35 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "ptr_meta_derive" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "pyiceberg_core_rust" -version = "0.7.0" +version = "0.8.0" dependencies = [ "arrow", "datafusion-ffi", "iceberg", "iceberg-datafusion", "pyo3", + "rust_decimal", "tokio", ] [[package]] name = "pyo3" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a" +checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" dependencies = [ "indoc", "libc", @@ -3237,19 +3330,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598" +checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" dependencies = [ - "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c" +checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" dependencies = [ "libc", "pyo3-build-config", @@ -3257,9 +3349,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50" +checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -3269,9 +3361,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc" +checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" dependencies = [ "heck", "proc-macro2", @@ -3382,6 +3474,15 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "rancor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee" +dependencies = [ + "ptr_meta 0.3.1", +] + [[package]] name = "rand" version = "0.8.5" @@ -3525,13 +3626,28 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + [[package]] name = "rend" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" dependencies = [ - "bytecheck", + "bytecheck 0.6.12", +] + +[[package]] +name = "rend" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6" +dependencies = [ + "bytecheck 0.8.2", ] [[package]] @@ -3634,17 +3750,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" dependencies = [ "bitvec", - "bytecheck", + "bytecheck 0.6.12", "bytes", "hashbrown 0.12.3", - "ptr_meta", - "rend", - "rkyv_derive", + "ptr_meta 0.1.4", + "rend 0.4.2", + "rkyv_derive 0.7.45", "seahash", "tinyvec", "uuid", ] +[[package]] +name = "rkyv" +version = "0.8.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2e88acca7157d83d789836a3987dafc12bc3d88a050e54b8fe9ea4aaa29d20" +dependencies = [ + "bytecheck 0.8.2", + "bytes", + "hashbrown 0.16.0", + "indexmap 2.12.0", + "munge", + "ptr_meta 0.3.1", + "rancor", + "rend 0.5.3", + "rkyv_derive 0.8.13", + "tinyvec", + "uuid", +] + [[package]] name = "rkyv_derive" version = "0.7.45" @@ -3656,6 +3791,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "rkyv_derive" +version = "0.8.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6dffea3c91fa91a3c0fc8a061b0e27fef25c6304728038a6d6bcb1c58ba9bd" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "roaring" version = "0.11.2" @@ -3666,6 +3812,35 @@ dependencies = [ "byteorder", ] +[[package]] +name = "rstest" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", +] + +[[package]] +name = "rstest_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn 2.0.108", + "unicode-ident", +] + [[package]] name = "rust-ini" version = "0.21.3" @@ -3687,7 +3862,7 @@ dependencies = [ "bytes", "num-traits", "rand 0.8.5", - "rkyv", + "rkyv 0.7.45", "serde", "serde_json", ] @@ -4007,9 +4182,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", "recursive", @@ -4191,15 +4366,6 @@ dependencies = [ "syn 2.0.108", ] -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - [[package]] name = "thrift" version = "0.17.0" @@ -4208,9 +4374,7 @@ checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" dependencies = [ "byteorder", "integer-encoding", - "log", "ordered-float 2.10.1", - "threadpool", ] [[package]] diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 953d4a98fc..8346d02703 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -19,8 +19,8 @@ edition = "2024" homepage = "https://rust.iceberg.apache.org" name = "pyiceberg_core_rust" -rust-version = "1.87" -version = "0.7.0" +rust-version = "1.88" +version = "0.8.0" # This crate is used to build python bindings, we don't want to publish it publish = false @@ -31,12 +31,14 @@ license = "Apache-2.0" crate-type = ["cdylib"] [dependencies] -arrow = { version = "56", features = ["pyarrow", "chrono-tz"] } +arrow = { version = "57.0", features = ["pyarrow", "chrono-tz"] } iceberg = { path = "../../crates/iceberg" } -pyo3 = { version = "0.25", features = ["extension-module", "abi3-py310"] } +pyo3 = { version = "0.26", features = ["extension-module", "abi3-py310"] } iceberg-datafusion = { path = "../../crates/integrations/datafusion" } -datafusion-ffi = { version = "50" } +datafusion-ffi = { version = "51.0" } tokio = { version = "1.46.1", default-features = false } +# Security: disable rkyv feature to avoid RUSTSEC-2026-0001 (rkyv 0.7.45 vulnerability) +rust_decimal = { version = "1.39", default-features = false, features = ["std"] } [profile.release] codegen-units = 1 @@ -44,3 +46,7 @@ debug = false lto = "thin" opt-level = "z" strip = true + +[package.metadata.cargo-machete] +# rust_decimal is included to override feature flags for security (disable rkyv) +ignored = ["rust_decimal"] diff --git a/bindings/python/DEPENDENCIES.rust.tsv b/bindings/python/DEPENDENCIES.rust.tsv index 7565a13e1d..5fe0da5b2a 100644 --- a/bindings/python/DEPENDENCIES.rust.tsv +++ b/bindings/python/DEPENDENCIES.rust.tsv @@ -1,445 +1,439 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib -abi_stable@0.11.3 X X -abi_stable_derive@0.11.3 X X -abi_stable_shared@0.11.0 X X -addr2line@0.24.2 X X -adler2@2.0.1 X X X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-pyarrow@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -as_derive_utils@0.11.0 X X -async-compression@0.4.19 X X -async-ffi@0.5.0 X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -const_panic@0.2.14 X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -core_extensions@1.5.4 X X -core_extensions_proc_macros@1.5.4 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-ffi@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-proto@48.0.1 X -datafusion-proto-common@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generational-arena@0.2.9 X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@1.3.1 X X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-datafusion@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -indoc@2.0.6 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libloading@0.7.4 X -libm@0.2.15 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -memoffset@0.9.1 X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -opendal@0.54.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -prost@0.13.5 X -prost-derive@0.13.5 X -psm@0.1.26 X X -pyiceberg_core_rust@0.7.0 X -pyo3@0.24.2 X X -pyo3-build-config@0.24.2 X X -pyo3-ffi@0.24.2 X X -pyo3-macros@0.24.2 X X -pyo3-macros-backend@0.24.2 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -repr_offset@0.2.2 X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -roaring@0.11.2 X X -rust_decimal@1.37.2 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@1.0.109 X X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -target-lexicon@0.13.2 X -tempfile@3.21.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -tstr@0.2.4 X -tstr_proc_macros@0.2.2 X -twox-hash@2.1.2 X -typed-arena@2.0.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -typewit@1.14.1 X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -unindent@0.2.4 X X -untrusted@0.9.0 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi@0.3.9 X X -winapi-i686-pc-windows-gnu@0.4.0 X X -winapi-util@0.1.11 X X -winapi-x86_64-pc-windows-gnu@0.4.0 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-targets@0.53.3 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.0 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.0 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.0 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.0 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.0 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.0 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.0 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.0 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +abi_stable@0.11.3 X X +abi_stable_derive@0.11.3 X X +abi_stable_shared@0.11.0 X X +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.3 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anyhow@1.0.100 X X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.0.0 X +arrow-arith@57.0.0 X +arrow-array@57.0.0 X +arrow-buffer@57.0.0 X +arrow-cast@57.0.0 X +arrow-csv@57.0.0 X +arrow-data@57.0.0 X +arrow-ipc@57.0.0 X +arrow-json@57.0.0 X +arrow-ord@57.0.0 X +arrow-pyarrow@57.0.0 X +arrow-row@57.0.0 X +arrow-schema@57.0.0 X +arrow-select@57.0.0 X +arrow-string@57.0.0 X +as-any@0.3.2 X X +as_derive_utils@0.11.0 X X +async-compression@0.4.19 X X +async-ffi@0.5.0 X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.10.1 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.43 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +comfy-table@7.1.2 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +const_panic@0.2.15 X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +core_extensions@1.5.4 X X +core_extensions_proc_macros@1.5.4 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.6 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-ffi@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-proto@51.0.0 X +datafusion-proto-common@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.4 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generational-arena@0.2.9 X +generic-array@0.14.9 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.0 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.3.1 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +humantime@2.3.0 X X +hyper@1.7.0 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.17 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-datafusion@0.8.0 X +icu_collections@2.1.0 X +icu_locale_core@2.1.0 X +icu_normalizer@2.1.0 X +icu_normalizer_data@2.1.0 X +icu_properties@2.1.0 X +icu_properties_data@2.1.0 X +icu_provider@2.1.0 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.0 X X +indoc@2.0.7 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.8 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.5 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.82 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.177 X X +libloading@0.7.4 X +libm@0.2.15 X +libz-rs-sys@0.5.2 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.28 X X +lz4_flex@0.11.5 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +memoffset@0.9.1 X +miniz_oxide@0.8.9 X X X +mio@1.1.0 X +moka@0.12.11 X X +murmur3@0.5.2 X X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.0.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +prost@0.14.1 X +prost-derive@0.14.1 X +psm@0.1.28 X X +pyiceberg_core_rust@0.8.0 X +pyo3@0.26.0 X X +pyo3-build-config@0.26.0 X X +pyo3-ffi@0.26.0 X X +pyo3-macros@0.26.0 X X +pyo3-macros-backend@0.26.0 X X +quad-rand@0.2.3 X +quick-xml@0.38.3 X +quote@1.0.41 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +repr_offset@0.2.2 X +reqsign@0.16.5 X +reqwest@0.12.24 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.34 X X X +rustls-pki-types@1.13.0 X X +rustls-webpki@0.103.7 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_urlencoded@0.7.1 X X +serde_with@3.15.1 X X +serde_with_macros@3.15.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +shlex@1.3.0 X X +simd-adler32@0.3.7 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.26.3 X +strum@0.27.2 X +strum_macros@0.26.4 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@1.0.109 X X +syn@2.0.108 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +target-lexicon@0.13.3 X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.16 X +toml_datetime@0.7.3 X X +toml_edit@0.23.7 X X +toml_parser@1.0.4 X X +tower@0.5.2 X +tower-http@0.6.6 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.41 X +tracing-attributes@0.1.30 X +tracing-core@0.1.34 X +try-lock@0.2.5 X +tstr@0.2.4 X +tstr_proc_macros@0.2.2 X +twox-hash@2.1.2 X +typed-arena@2.0.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +typewit@1.14.2 X +unicode-ident@1.0.20 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +unindent@0.2.4 X X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +uuid@1.18.1 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.105 X X +wasm-bindgen-futures@0.4.55 X X +wasm-bindgen-macro@0.2.105 X X +wasm-bindgen-macro-support@0.2.105 X X +wasm-bindgen-shared@0.2.105 X X +wasm-streams@0.4.2 X X +web-sys@0.3.82 X X +web-time@1.1.0 X X +webpki-roots@1.0.3 X +winapi@0.3.9 X X +winapi-i686-pc-windows-gnu@0.4.0 X X +winapi-util@0.1.11 X X +winapi-x86_64-pc-windows-gnu@0.4.0 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.13 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.27 X X X +zerocopy-derive@0.8.27 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.2 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/bindings/python/src/data_file.rs b/bindings/python/src/data_file.rs index 900d6c6014..b0e42e7d73 100644 --- a/bindings/python/src/data_file.rs +++ b/bindings/python/src/data_file.rs @@ -143,7 +143,7 @@ impl PyDataFile { } #[getter] - fn split_offsets(&self) -> &[i64] { + fn split_offsets(&self) -> Option<&[i64]> { self.inner.split_offsets() } diff --git a/bindings/python/src/datafusion_table_provider.rs b/bindings/python/src/datafusion_table_provider.rs index b5e1bf952e..8db7223b34 100644 --- a/bindings/python/src/datafusion_table_provider.rs +++ b/bindings/python/src/datafusion_table_provider.rs @@ -23,7 +23,7 @@ use datafusion_ffi::table_provider::FFI_TableProvider; use iceberg::TableIdent; use iceberg::io::FileIO; use iceberg::table::StaticTable; -use iceberg_datafusion::table::IcebergTableProvider; +use iceberg_datafusion::table::IcebergStaticTableProvider; use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; use pyo3::types::PyCapsule; @@ -32,7 +32,7 @@ use crate::runtime::runtime; #[pyclass(name = "IcebergDataFusionTable")] pub struct PyIcebergDataFusionTable { - inner: Arc, + inner: Arc, } #[pymethods] @@ -69,7 +69,7 @@ impl PyIcebergDataFusionTable { let table = static_table.into_table(); - IcebergTableProvider::try_new_from_table(table) + IcebergStaticTableProvider::try_new_from_table(table) .await .map_err(|e| { PyRuntimeError::new_err(format!("Failed to create table provider: {e}")) diff --git a/bindings/python/src/transform.rs b/bindings/python/src/transform.rs index 24e9f061dd..c159d573fc 100644 --- a/bindings/python/src/transform.rs +++ b/bindings/python/src/transform.rs @@ -24,46 +24,46 @@ use pyo3::prelude::*; use crate::error::to_py_err; #[pyfunction] -pub fn identity(py: Python, array: PyObject) -> PyResult { +pub fn identity(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Identity) } #[pyfunction] -pub fn void(py: Python, array: PyObject) -> PyResult { +pub fn void(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Void) } #[pyfunction] -pub fn year(py: Python, array: PyObject) -> PyResult { +pub fn year(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Year) } #[pyfunction] -pub fn month(py: Python, array: PyObject) -> PyResult { +pub fn month(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Month) } #[pyfunction] -pub fn day(py: Python, array: PyObject) -> PyResult { +pub fn day(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Day) } #[pyfunction] -pub fn hour(py: Python, array: PyObject) -> PyResult { +pub fn hour(py: Python, array: Py) -> PyResult> { apply(py, array, Transform::Hour) } #[pyfunction] -pub fn bucket(py: Python, array: PyObject, num_buckets: u32) -> PyResult { +pub fn bucket(py: Python, array: Py, num_buckets: u32) -> PyResult> { apply(py, array, Transform::Bucket(num_buckets)) } #[pyfunction] -pub fn truncate(py: Python, array: PyObject, width: u32) -> PyResult { +pub fn truncate(py: Python, array: Py, width: u32) -> PyResult> { apply(py, array, Transform::Truncate(width)) } -fn apply(py: Python, array: PyObject, transform: Transform) -> PyResult { +fn apply(py: Python, array: Py, transform: Transform) -> PyResult> { // import let array = ArrayData::from_pyarrow_bound(array.bind(py))?; let array = make_array(array); @@ -71,7 +71,7 @@ fn apply(py: Python, array: PyObject, transform: Transform) -> PyResult, m: &Bound<'_, PyModule>) -> PyResult<()> { diff --git a/crates/catalog/glue/DEPENDENCIES.rust.tsv b/crates/catalog/glue/DEPENDENCIES.rust.tsv index 2d9f686262..e34ff1afc1 100644 --- a/crates/catalog/glue/DEPENDENCIES.rust.tsv +++ b/crates/catalog/glue/DEPENDENCIES.rust.tsv @@ -1,77 +1,73 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-glue@1.119.0 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.21.7 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-glue@1.132.0 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X base64@0.22.1 X X base64-simd@0.8.0 X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X +bytes@1.11.0 X bytes-utils@0.1.4 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation@0.10.1 X X -core-foundation@0.9.4 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X crc32c@0.6.8 X X @@ -80,14 +76,14 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X darling_core@0.21.3 X darling_macro@0.20.11 X darling_macro@0.21.3 X -deranged@0.5.3 X X +deranged@0.5.5 X X derive_builder@0.20.2 X X derive_builder_core@0.20.2 X X derive_builder_macro@0.20.2 X X @@ -100,9 +96,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -114,114 +110,107 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.3.27 X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X http@0.2.12 X X -http@1.3.1 X X +http@1.4.0 X X http-body@0.4.6 X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X hyper@0.14.32 X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.24.2 X X X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-glue@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-glue@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-conv@0.1.0 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X openssl-probe@0.1.6 X X ordered-float@2.10.1 X ordered-float@4.6.0 X outref@0.5.2 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X powerfmt@0.2.0 X X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -229,152 +218,150 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X rustls@0.21.12 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.6.3 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@1.0.4 X X X -rustls-pki-types@1.12.0 X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pki-types@1.13.1 X X rustls-webpki@0.101.7 X -rustls-webpki@0.103.4 X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X +schannel@0.1.28 X scopeguard@1.2.0 X X sct@0.7.1 X X X -security-framework@2.11.1 X X -security-framework@3.4.0 X X +security-framework@3.5.1 X X security-framework-sys@2.15.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X -time@0.3.43 X X +time@0.3.44 X X time-core@0.1.6 X X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X tokio-rustls@0.24.1 X X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X urlencoding@2.1.3 X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X vsimd@0.8.0 X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X xmlparser@0.13.6 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index dce287ed6e..37a7996f80 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -151,33 +151,33 @@ impl GlueCatalog { async fn new(config: GlueCatalogConfig) -> Result { let sdk_config = create_sdk_config(&config.props, config.uri.as_ref()).await; let mut file_io_props = config.props.clone(); - if !file_io_props.contains_key(S3_ACCESS_KEY_ID) { - if let Some(access_key_id) = file_io_props.get(AWS_ACCESS_KEY_ID) { - file_io_props.insert(S3_ACCESS_KEY_ID.to_string(), access_key_id.to_string()); - } + if !file_io_props.contains_key(S3_ACCESS_KEY_ID) + && let Some(access_key_id) = file_io_props.get(AWS_ACCESS_KEY_ID) + { + file_io_props.insert(S3_ACCESS_KEY_ID.to_string(), access_key_id.to_string()); } - if !file_io_props.contains_key(S3_SECRET_ACCESS_KEY) { - if let Some(secret_access_key) = file_io_props.get(AWS_SECRET_ACCESS_KEY) { - file_io_props.insert( - S3_SECRET_ACCESS_KEY.to_string(), - secret_access_key.to_string(), - ); - } + if !file_io_props.contains_key(S3_SECRET_ACCESS_KEY) + && let Some(secret_access_key) = file_io_props.get(AWS_SECRET_ACCESS_KEY) + { + file_io_props.insert( + S3_SECRET_ACCESS_KEY.to_string(), + secret_access_key.to_string(), + ); } - if !file_io_props.contains_key(S3_REGION) { - if let Some(region) = file_io_props.get(AWS_REGION_NAME) { - file_io_props.insert(S3_REGION.to_string(), region.to_string()); - } + if !file_io_props.contains_key(S3_REGION) + && let Some(region) = file_io_props.get(AWS_REGION_NAME) + { + file_io_props.insert(S3_REGION.to_string(), region.to_string()); } - if !file_io_props.contains_key(S3_SESSION_TOKEN) { - if let Some(session_token) = file_io_props.get(AWS_SESSION_TOKEN) { - file_io_props.insert(S3_SESSION_TOKEN.to_string(), session_token.to_string()); - } + if !file_io_props.contains_key(S3_SESSION_TOKEN) + && let Some(session_token) = file_io_props.get(AWS_SESSION_TOKEN) + { + file_io_props.insert(S3_SESSION_TOKEN.to_string(), session_token.to_string()); } - if !file_io_props.contains_key(S3_ENDPOINT) { - if let Some(aws_endpoint) = config.uri.as_ref() { - file_io_props.insert(S3_ENDPOINT.to_string(), aws_endpoint.to_string()); - } + if !file_io_props.contains_key(S3_ENDPOINT) + && let Some(aws_endpoint) = config.uri.as_ref() + { + file_io_props.insert(S3_ENDPOINT.to_string(), aws_endpoint.to_string()); } let client = aws_sdk_glue::Client::new(&sdk_config); diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs index cfd7487973..864320dae4 100644 --- a/crates/catalog/glue/src/schema.rs +++ b/crates/catalog/glue/src/schema.rs @@ -165,7 +165,12 @@ impl SchemaVisitor for GlueSchemaBuilder { PrimitiveType::Date => "date".to_string(), PrimitiveType::Timestamp => "timestamp".to_string(), PrimitiveType::TimestampNs => "timestamp_ns".to_string(), - PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(), + PrimitiveType::Timestamptz | PrimitiveType::TimestamptzNs => { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + format!("Conversion from {p:?} is not supported"), + )); + } PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => { "string".to_string() } @@ -173,12 +178,6 @@ impl SchemaVisitor for GlueSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } - _ => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - "Conversion from 'Timestamptz' is not supported", - )); - } }; Ok(glue_type) diff --git a/crates/catalog/hms/DEPENDENCIES.rust.tsv b/crates/catalog/hms/DEPENDENCIES.rust.tsv index cef38cabcd..5025f7b184 100644 --- a/crates/catalog/hms/DEPENDENCIES.rust.tsv +++ b/crates/catalog/hms/DEPENDENCIES.rust.tsv @@ -1,25 +1,24 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-broadcast@0.7.2 X X async-lock@3.4.1 X X @@ -28,23 +27,22 @@ async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X cfg_aliases@0.2.1 X chrono@0.4.42 X X concurrent-queue@2.5.0 X X @@ -59,7 +57,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -80,9 +78,9 @@ event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X faststr@0.2.32 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -94,103 +92,95 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X +half@2.7.1 X X hashbrown@0.14.5 X X -hashbrown@0.15.5 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hive_metastore@0.2.0 X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-hms@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-hms@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -integer-encoding@4.0.2 X -io-uring@0.7.10 X X +integer-encoding@4.1.0 X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X +libz-rs-sys@0.5.3 X linked-hash-map@0.5.6 X X linkedbytes@0.1.16 X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X memoffset@0.9.1 X metainfo@0.7.14 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X motore@0.4.1 X X motore-macros@0.4.3 X X mur3@0.1.0 X murmur3@0.5.2 X X nix@0.29.0 X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -num_enum@0.7.4 X X X -num_enum_derive@0.7.4 X X X -object@0.36.7 X X +num_enum@0.7.5 X X X +num_enum_derive@0.7.5 X X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pilota@0.11.10 X X @@ -200,14 +190,15 @@ pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro-crate@3.3.0 X X -proc-macro2@1.0.101 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -215,148 +206,152 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -ref-cast@1.0.24 X X -ref-cast-impl@1.0.24 X X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +ref-cast@1.0.25 X X +ref-cast-impl@1.0.25 X X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc-hash@2.1.1 X X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X +socket2@0.6.1 X X sonic-number@0.1.0 X sonic-rs@0.3.17 X -sonic-simd@0.1.1 X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +sonic-simd@0.1.2 X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X thiserror@1.0.69 X X -thiserror@2.0.16 X X +thiserror@2.0.17 X X thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X tokio-stream@0.1.17 X -tokio-util@0.7.16 X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X volo@0.10.7 X X volo-thrift@0.10.8 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/hms/src/schema.rs b/crates/catalog/hms/src/schema.rs index 8893a80521..c23b48719d 100644 --- a/crates/catalog/hms/src/schema.rs +++ b/crates/catalog/hms/src/schema.rs @@ -122,7 +122,12 @@ impl SchemaVisitor for HiveSchemaBuilder { PrimitiveType::Date => "date".to_string(), PrimitiveType::Timestamp => "timestamp".to_string(), PrimitiveType::TimestampNs => "timestamp_ns".to_string(), - PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(), + PrimitiveType::Timestamptz | PrimitiveType::TimestamptzNs => { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + format!("Conversion from {p:?} is not supported"), + )); + } PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => { "string".to_string() } @@ -130,12 +135,6 @@ impl SchemaVisitor for HiveSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } - _ => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - "Conversion from 'Timestamptz' is not supported", - )); - } }; Ok(hive_type) diff --git a/crates/catalog/loader/DEPENDENCIES.rust.tsv b/crates/catalog/loader/DEPENDENCIES.rust.tsv index d809a30ca4..02b06c4479 100644 --- a/crates/catalog/loader/DEPENDENCIES.rust.tsv +++ b/crates/catalog/loader/DEPENDENCIES.rust.tsv @@ -1,25 +1,25 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-broadcast@0.7.2 X X async-lock@3.4.1 X X @@ -28,46 +28,44 @@ async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-glue@1.119.0 X -aws-sdk-s3tables@1.37.0 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.21.7 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-glue@1.132.0 X +aws-sdk-s3tables@1.46.0 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X base64@0.22.1 X X base64-simd@0.8.0 X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X +bytes@1.11.0 X bytes-utils@0.1.4 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X cfg_aliases@0.2.1 X chrono@0.4.42 X X concurrent-queue@2.5.0 X X @@ -75,16 +73,18 @@ const-oid@0.9.6 X X const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation@0.10.1 X X -core-foundation@0.9.4 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X +crc@3.4.0 X X +crc-catalog@2.4.0 X X crc32c@0.6.8 X X crc32fast@1.5.0 X X crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X +crossbeam-queue@0.3.12 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -92,7 +92,7 @@ darling_core@0.21.3 X darling_macro@0.20.11 X darling_macro@0.21.3 X dashmap@6.1.0 X -deranged@0.5.3 X X +deranged@0.5.5 X X derive_builder@0.20.2 X X derive_builder_core@0.20.2 X X derive_builder_macro@0.20.2 X X @@ -106,131 +106,130 @@ event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X faststr@0.2.32 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +flume@0.11.1 X X fnv@1.0.7 X X +foldhash@0.1.5 X form_urlencoded@1.2.2 X X futures@0.3.31 X X futures-channel@0.3.31 X X futures-core@0.3.31 X X futures-executor@0.3.31 X X +futures-intrusive@0.5.0 X X futures-io@0.3.31 X X futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.3.27 X h2@0.4.12 X -half@2.6.0 X X +half@2.7.1 X X hashbrown@0.14.5 X X hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +hashlink@0.10.0 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hive_metastore@0.2.0 X hmac@0.12.1 X X home@0.5.11 X X http@0.2.12 X X -http@1.3.1 X X +http@1.4.0 X X http-body@0.4.6 X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X hyper@0.14.32 X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.24.2 X X X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-glue@0.7.0 X -iceberg-catalog-hms@0.7.0 X -iceberg-catalog-loader@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-catalog-s3tables@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-glue@0.8.0 X +iceberg-catalog-hms@0.8.0 X +iceberg-catalog-loader@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-catalog-s3tables@0.8.0 X +iceberg-catalog-sql@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -integer-encoding@4.0.2 X -io-uring@0.7.10 X X +integer-encoding@4.1.0 X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X +libsqlite3-sys@0.30.1 X +libz-rs-sys@0.5.3 X linked-hash-map@0.5.6 X X linkedbytes@0.1.16 X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X memoffset@0.9.1 X metainfo@0.7.14 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X motore@0.4.1 X X motore-macros@0.4.3 X X mur3@0.1.0 X murmur3@0.5.2 X X nix@0.29.0 X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-conv@0.1.0 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -num_enum@0.7.4 X X X -num_enum_derive@0.7.4 X X X -object@0.36.7 X X +num_enum@0.7.5 X X X +num_enum_derive@0.7.5 X X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X openssl-probe@0.1.6 X X ordered-float@2.10.1 X ordered-float@4.6.0 X outref@0.5.2 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pilota@0.11.10 X X @@ -240,15 +239,16 @@ pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X powerfmt@0.2.0 X X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro-crate@3.3.0 X X -proc-macro2@1.0.101 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -256,164 +256,171 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -ref-cast@1.0.24 X X -ref-cast-impl@1.0.24 X X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +ref-cast@1.0.25 X X +ref-cast-impl@1.0.25 X X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc-hash@2.1.1 X X rustc_version@0.4.1 X X rustls@0.21.12 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.6.3 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@1.0.4 X X X -rustls-pki-types@1.12.0 X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pki-types@1.13.1 X X rustls-webpki@0.101.7 X -rustls-webpki@0.103.4 X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X +schannel@0.1.28 X scopeguard@1.2.0 X X sct@0.7.1 X X X -security-framework@2.11.1 X X -security-framework@3.4.0 X X +security-framework@3.5.1 X X security-framework-sys@2.15.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X +socket2@0.6.1 X X sonic-number@0.1.0 X sonic-rs@0.3.17 X -sonic-simd@0.1.1 X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +sonic-simd@0.1.2 X +spin@0.9.8 X +sqlx@0.8.6 X X +sqlx-core@0.8.6 X X +sqlx-sqlite@0.8.6 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X thiserror@1.0.69 X X -thiserror@2.0.16 X X +thiserror@2.0.17 X X thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X -time@0.3.43 X X +time@0.3.44 X X time-core@0.1.6 X X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X tokio-rustls@0.24.1 X X -tokio-rustls@0.26.2 X X +tokio-rustls@0.26.4 X X tokio-stream@0.1.17 X -tokio-util@0.7.16 X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X urlencoding@2.1.3 X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X +vcpkg@0.2.15 X X version_check@0.9.5 X X volo@0.10.7 X X volo-thrift@0.10.8 X X vsimd@0.8.0 X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@0.26.11 X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X xmlparser@0.13.6 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/rest/DEPENDENCIES.rust.tsv b/crates/catalog/rest/DEPENDENCIES.rust.tsv index cf238f4b4c..c78434fa51 100644 --- a/crates/catalog/rest/DEPENDENCIES.rust.tsv +++ b/crates/catalog/rest/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -75,9 +73,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -89,105 +87,98 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -195,134 +186,135 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/rest/src/catalog.rs b/crates/catalog/rest/src/catalog.rs index c784f32039..e41fe8381b 100644 --- a/crates/catalog/rest/src/catalog.rs +++ b/crates/catalog/rest/src/catalog.rs @@ -43,9 +43,9 @@ use crate::client::{ deserialize_unexpected_catalog_error, }; use crate::types::{ - CatalogConfig, CommitTableRequest, CommitTableResponse, CreateTableRequest, - ListNamespaceResponse, ListTableResponse, LoadCredentialsResponse, LoadTableResponse, - NamespaceSerde, RegisterTableRequest, RenameTableRequest, + CatalogConfig, CommitTableRequest, CommitTableResponse, CreateNamespaceRequest, + CreateTableRequest, ListNamespaceResponse, ListTablesResponse, LoadCredentialsResponse, + LoadTableResult, NamespaceResponse, RegisterTableRequest, RenameTableRequest, }; /// REST catalog URI @@ -485,7 +485,7 @@ impl RestCatalog { let response = match http_response.status() { StatusCode::OK | StatusCode::NOT_MODIFIED => { - deserialize_catalog_response::(http_response).await? + deserialize_catalog_response::(http_response).await? } StatusCode::NOT_FOUND => { return Err(Error::new( @@ -502,7 +502,6 @@ impl RestCatalog { // 3. storage_credentials (vended credentials - highest priority) let mut config: HashMap = response .config - .unwrap_or_default() .into_iter() .chain(self.user_config.props.clone()) .collect(); @@ -598,13 +597,7 @@ impl Catalog for RestCatalog { deserialize_catalog_response::(http_response) .await?; - let ns_identifiers = response - .namespaces - .into_iter() - .map(NamespaceIdent::from_vec) - .collect::>>()?; - - namespaces.extend(ns_identifiers); + namespaces.extend(response.namespaces); match response.next_page_token { Some(token) => next_token = Some(token), @@ -634,9 +627,9 @@ impl Catalog for RestCatalog { let request = context .client .request(Method::POST, context.config.namespaces_endpoint()) - .json(&NamespaceSerde { - namespace: namespace.as_ref().clone(), - properties: Some(properties), + .json(&CreateNamespaceRequest { + namespace: namespace.clone(), + properties, }) .build()?; @@ -645,8 +638,8 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::OK => { let response = - deserialize_catalog_response::(http_response).await?; - Namespace::try_from(response) + deserialize_catalog_response::(http_response).await?; + Ok(Namespace::from(response)) } StatusCode::CONFLICT => Err(Error::new( ErrorKind::Unexpected, @@ -669,8 +662,8 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::OK => { let response = - deserialize_catalog_response::(http_response).await?; - Namespace::try_from(response) + deserialize_catalog_response::(http_response).await?; + Ok(Namespace::from(response)) } StatusCode::NOT_FOUND => Err(Error::new( ErrorKind::Unexpected, @@ -746,7 +739,7 @@ impl Catalog for RestCatalog { match http_response.status() { StatusCode::OK => { let response = - deserialize_catalog_response::(http_response).await?; + deserialize_catalog_response::(http_response).await?; identifiers.extend(response.identifiers); @@ -793,11 +786,7 @@ impl Catalog for RestCatalog { partition_spec: creation.partition_spec, write_order: creation.sort_order, stage_create: Some(false), - properties: if creation.properties.is_empty() { - None - } else { - Some(creation.properties) - }, + properties: creation.properties, }) .build()?; @@ -805,7 +794,7 @@ impl Catalog for RestCatalog { let response = match http_response.status() { StatusCode::OK => { - deserialize_catalog_response::(http_response).await? + deserialize_catalog_response::(http_response).await? } StatusCode::NOT_FOUND => { return Err(Error::new( @@ -829,7 +818,6 @@ impl Catalog for RestCatalog { let config = response .config - .unwrap_or_default() .into_iter() .chain(self.user_config.props.clone()) .collect(); @@ -951,9 +939,9 @@ impl Catalog for RestCatalog { let http_response = context.client.query_catalog(request).await?; - let response: LoadTableResponse = match http_response.status() { + let response: LoadTableResult = match http_response.status() { StatusCode::OK => { - deserialize_catalog_response::(http_response).await? + deserialize_catalog_response::(http_response).await? } StatusCode::NOT_FOUND => { return Err(Error::new( @@ -995,7 +983,7 @@ impl Catalog for RestCatalog { context.config.table_endpoint(commit.identifier()), ) .json(&CommitTableRequest { - identifier: commit.identifier().clone(), + identifier: Some(commit.identifier().clone()), requirements: commit.take_requirements(), updates: commit.take_updates(), }) @@ -2519,7 +2507,7 @@ mod tests { )) .unwrap(); let reader = BufReader::new(file); - let resp = serde_json::from_reader::<_, LoadTableResponse>(reader).unwrap(); + let resp = serde_json::from_reader::<_, LoadTableResult>(reader).unwrap(); Table::builder() .metadata(resp.metadata) @@ -2659,7 +2647,7 @@ mod tests { )) .unwrap(); let reader = BufReader::new(file); - let resp = serde_json::from_reader::<_, LoadTableResponse>(reader).unwrap(); + let resp = serde_json::from_reader::<_, LoadTableResult>(reader).unwrap(); Table::builder() .metadata(resp.metadata) @@ -2841,7 +2829,7 @@ mod tests { let mut props = HashMap::new(); props.insert( "credential".to_string(), - format!("{}:{}", client_id, client_secret), + format!("{client_id}:{client_secret}"), ); props.insert("scope".to_string(), "PRINCIPAL_ROLE:ALL".to_string()); props.insert( @@ -2875,7 +2863,7 @@ mod tests { assert!(!credentials.storage_credentials.is_empty()); } Err(e) => { - panic!("Failed to load table credentials: {:?}", e); + panic!("Failed to load table credentials: {e:?}"); } } @@ -2907,17 +2895,17 @@ mod tests { println!(" Batch: {} rows", batch.num_rows()); } Err(e) => { - panic!("Failed to read batch: {:?}", e); + panic!("Failed to read batch: {e:?}"); } } } - println!("Total rows scanned: {}", row_count); + println!("Total rows scanned: {row_count}"); assert_eq!(row_count, 25, "Expected 25 rows in nation table"); println!("✓ Successfully verified 25 rows in table"); } Err(e) => { - panic!("Failed to load table with vended credentials: {:?}", e); + panic!("Failed to load table with vended credentials: {e:?}"); } } @@ -2944,21 +2932,20 @@ mod tests { } Err(e) => { println!("✓ Scan failed as expected without vended credentials"); - println!("Error: {}", e); + println!("Error: {e}"); // Verify it's a permission/authentication error let error_msg = e.to_string(); assert!( error_msg.contains("PermissionDenied") && error_msg.contains("InvalidAccessKeyId") && error_msg.contains("403"), - "Expected permission/authentication error, got: {}", - error_msg + "Expected permission/authentication error, got: {error_msg}" ); } } } Err(e) => { - panic!("Failed to load table without vended credentials: {:?}", e); + panic!("Failed to load table without vended credentials: {e:?}"); } } } diff --git a/crates/catalog/rest/src/lib.rs b/crates/catalog/rest/src/lib.rs index c8e1b98877..b94ffb0d4f 100644 --- a/crates/catalog/rest/src/lib.rs +++ b/crates/catalog/rest/src/lib.rs @@ -57,3 +57,4 @@ mod types; pub use catalog::*; pub use client::CustomAuthenticator; +pub use types::*; diff --git a/crates/catalog/rest/src/types.rs b/crates/catalog/rest/src/types.rs index 2acd18e47e..4dbbeabb60 100644 --- a/crates/catalog/rest/src/types.rs +++ b/crates/catalog/rest/src/types.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Request and response types for the Iceberg REST API. + use std::collections::HashMap; use iceberg::spec::{Schema, SortOrder, TableMetadata, UnboundPartitionSpec}; @@ -30,7 +32,8 @@ pub(super) struct CatalogConfig { } #[derive(Debug, Serialize, Deserialize)] -pub(super) struct ErrorResponse { +/// Wrapper for all non-2xx error responses from the REST API +pub struct ErrorResponse { error: ErrorModel, } @@ -41,11 +44,16 @@ impl From for Error { } #[derive(Debug, Serialize, Deserialize)] -pub(super) struct ErrorModel { - pub(super) message: String, - pub(super) r#type: String, - pub(super) code: u16, - pub(super) stack: Option>, +/// Error payload returned in a response with further details on the error +pub struct ErrorModel { + /// Human-readable error message + pub message: String, + /// Internal type definition of the error + pub r#type: String, + /// HTTP response code + pub code: u16, + /// Optional error stack / context + pub stack: Option>, } impl From for Error { @@ -96,119 +104,263 @@ pub(super) struct TokenResponse { pub(super) issued_token_type: Option, } -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct NamespaceSerde { - pub(super) namespace: Vec, - pub(super) properties: Option>, -} - -impl TryFrom for Namespace { - type Error = Error; - fn try_from(value: NamespaceSerde) -> std::result::Result { - Ok(Namespace::with_properties( - NamespaceIdent::from_vec(value.namespace)?, - value.properties.unwrap_or_default(), - )) - } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Namespace response +pub struct NamespaceResponse { + /// Namespace identifier + pub namespace: NamespaceIdent, + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + /// Properties stored on the namespace, if supported by the server. + pub properties: HashMap, } -impl From<&Namespace> for NamespaceSerde { +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Create namespace request +pub struct CreateNamespaceRequest { + /// Name of the namespace to create + pub namespace: NamespaceIdent, + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + /// Properties to set on the namespace + pub properties: HashMap, +} + +impl From<&Namespace> for NamespaceResponse { fn from(value: &Namespace) -> Self { Self { - namespace: value.name().as_ref().clone(), - properties: Some(value.properties().clone()), + namespace: value.name().clone(), + properties: value.properties().clone(), } } } -#[derive(Debug, Serialize, Deserialize)] +impl From for Namespace { + fn from(value: NamespaceResponse) -> Self { + Namespace::with_properties(value.namespace, value.properties) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct ListNamespaceResponse { - pub(super) namespaces: Vec>, - #[serde(default)] - pub(super) next_page_token: Option, +/// Response containing a list of namespace identifiers, with optional pagination support. +pub struct ListNamespaceResponse { + /// List of namespace identifiers returned by the server + pub namespaces: Vec, + /// Opaque token for pagination. If present, indicates there are more results available. + /// Use this value in subsequent requests to retrieve the next page. + pub next_page_token: Option, } -#[allow(dead_code)] -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct UpdateNamespacePropsRequest { - removals: Option>, - updates: Option>, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Request to update properties on a namespace. +/// +/// Properties that are not in the request are not modified or removed by this call. +/// Server implementations are not required to support namespace properties. +pub struct UpdateNamespacePropertiesRequest { + /// List of property keys to remove from the namespace + pub removals: Option>, + /// Map of property keys to values to set or update on the namespace + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub updates: HashMap, } -#[allow(dead_code)] -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct UpdateNamespacePropsResponse { - updated: Vec, - removed: Vec, - missing: Option>, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Response from updating namespace properties, indicating which properties were changed. +pub struct UpdateNamespacePropertiesResponse { + /// List of property keys that were added or updated + pub updated: Vec, + /// List of properties that were removed + pub removed: Vec, + /// List of properties requested for removal that were not found in the namespace's properties. + /// Represents a partial success response. Servers do not need to implement this. + #[serde(skip_serializing_if = "Option::is_none")] + pub missing: Option>, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct ListTableResponse { - pub(super) identifiers: Vec, +/// Response containing a list of table identifiers, with optional pagination support. +pub struct ListTablesResponse { + /// List of table identifiers under the requested namespace + pub identifiers: Vec, + /// Opaque token for pagination. If present, indicates there are more results available. + /// Use this value in subsequent requests to retrieve the next page. #[serde(default)] - pub(super) next_page_token: Option, + pub next_page_token: Option, } -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct RenameTableRequest { - pub(super) source: TableIdent, - pub(super) destination: TableIdent, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Request to rename a table from one identifier to another. +/// +/// It's valid to move a table across namespaces, but the server implementation +/// is not required to support it. +pub struct RenameTableRequest { + /// Current table identifier to rename + pub source: TableIdent, + /// New table identifier to rename to + pub destination: TableIdent, } -#[derive(Debug, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct LoadTableResponse { - pub(super) metadata_location: Option, - pub(super) metadata: TableMetadata, - pub(super) config: Option>, - pub(super) storage_credentials: Option>, +/// Result returned when a table is successfully loaded or created. +/// +/// The table metadata JSON is returned in the `metadata` field. The corresponding file location +/// of table metadata should be returned in the `metadata_location` field, unless the metadata +/// is not yet committed. For example, a create transaction may return metadata that is staged +/// but not committed. +/// +/// The `config` map returns table-specific configuration for the table's resources, including +/// its HTTP client and FileIO. For example, config may contain a specific FileIO implementation +/// class for the table depending on its underlying storage. +pub struct LoadTableResult { + /// May be null if the table is staged as part of a transaction + pub metadata_location: Option, + /// The table's full metadata + pub metadata: TableMetadata, + /// Table-specific configuration overriding catalog configuration + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub config: HashMap, + /// Storage credentials for accessing table data. Clients should check this field + /// before falling back to credentials in the `config` field. + #[serde(skip_serializing_if = "Option::is_none")] + pub storage_credentials: Option>, } -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub(super) struct CreateTableRequest { - pub(super) name: String, - pub(super) location: Option, - pub(super) schema: Schema, - pub(super) partition_spec: Option, - pub(super) write_order: Option, - pub(super) stage_create: Option, - pub(super) properties: Option>, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +/// Storage credential for a specific location prefix. +/// +/// Indicates a storage location prefix where the credential is relevant. Clients should +/// choose the most specific prefix (by selecting the longest prefix) if several credentials +/// of the same type are available. +pub struct StorageCredential { + /// Storage location prefix where this credential is relevant + pub prefix: String, + /// Configuration map containing credential information + pub config: HashMap, } -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct CommitTableRequest { - pub(super) identifier: TableIdent, - pub(super) requirements: Vec, - pub(super) updates: Vec, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "kebab-case")] +/// Response containing vended credentials for a table. +pub struct LoadCredentialsResponse { + /// Storage credentials for accessing table data + pub storage_credentials: Vec, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub(super) struct CommitTableResponse { - pub(super) metadata_location: String, - pub(super) metadata: TableMetadata, +/// Request to create a new table in a namespace. +/// +/// If `stage_create` is false, the table is created immediately. +/// If `stage_create` is true, the table is not created, but table metadata is initialized +/// and returned. The service should prepare as needed for a commit to the table commit +/// endpoint to complete the create transaction. +pub struct CreateTableRequest { + /// Name of the table to create + pub name: String, + /// Optional table location. If not provided, the server will choose a location. + pub location: Option, + /// Table schema + pub schema: Schema, + /// Optional partition specification. If not provided, the table will be unpartitioned. + pub partition_spec: Option, + /// Optional sort order for the table + pub write_order: Option, + /// Whether to stage the create for a transaction (true) or create immediately (false) + pub stage_create: Option, + /// Optional properties to set on the table + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub properties: HashMap, } -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub(super) struct RegisterTableRequest { - pub(super) name: String, - pub(super) metadata_location: String, - pub(super) overwrite: Option, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +/// Request to commit updates to a table. +/// +/// Commits have two parts: requirements and updates. Requirements are assertions that will +/// be validated before attempting to make and commit changes. Updates are changes to make +/// to table metadata. +/// +/// Create table transactions that are started by createTable with `stage-create` set to true +/// are committed using this request. Transactions should include all changes to the table, +/// including table initialization, like AddSchemaUpdate and SetCurrentSchemaUpdate. +pub struct CommitTableRequest { + /// Table identifier to update; must be present for CommitTransactionRequest + #[serde(skip_serializing_if = "Option::is_none")] + pub identifier: Option, + /// List of requirements that must be satisfied before committing changes + pub requirements: Vec, + /// List of updates to apply to the table metadata + pub updates: Vec, } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct StorageCredential { - pub prefix: String, - pub config: HashMap, +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "kebab-case")] +/// Response returned when a table is successfully updated. +/// +/// The table metadata JSON is returned in the metadata field. The corresponding file location +/// of table metadata must be returned in the metadata-location field. Clients can check whether +/// metadata has changed by comparing metadata locations. +pub struct CommitTableResponse { + /// Location of the updated table metadata file + pub metadata_location: String, + /// The table's updated metadata + pub metadata: TableMetadata, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] -pub struct LoadCredentialsResponse { - pub storage_credentials: Vec, +/// Request to register a table using an existing metadata file location. +pub struct RegisterTableRequest { + /// Name of the table to register + pub name: String, + /// Location of the metadata file for the table + pub metadata_location: String, + /// Whether to overwrite table metadata if the table already exists + pub overwrite: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_namespace_response_serde() { + let json = serde_json::json!({ + "namespace": ["nested", "ns"], + "properties": { + "key1": "value1", + "key2": "value2" + } + }); + let ns_response: NamespaceResponse = + serde_json::from_value(json.clone()).expect("Deserialization failed"); + assert_eq!(ns_response, NamespaceResponse { + namespace: NamespaceIdent::from_vec(vec!["nested".to_string(), "ns".to_string()]) + .unwrap(), + properties: HashMap::from([ + ("key1".to_string(), "value1".to_string()), + ("key2".to_string(), "value2".to_string()), + ]), + }); + assert_eq!( + serde_json::to_value(&ns_response).expect("Serialization failed"), + json + ); + + // Without properties + let json_no_props = serde_json::json!({ + "namespace": ["db", "schema"] + }); + let ns_response_no_props: NamespaceResponse = + serde_json::from_value(json_no_props.clone()).expect("Deserialization failed"); + assert_eq!(ns_response_no_props, NamespaceResponse { + namespace: NamespaceIdent::from_vec(vec!["db".to_string(), "schema".to_string()]) + .unwrap(), + properties: HashMap::new(), + }); + assert_eq!( + serde_json::to_value(&ns_response_no_props).expect("Serialization failed"), + json_no_props + ); + } } diff --git a/crates/catalog/rest/tests/rest_catalog_test.rs b/crates/catalog/rest/tests/rest_catalog_test.rs index 068890b4c8..23dfac30bc 100644 --- a/crates/catalog/rest/tests/rest_catalog_test.rs +++ b/crates/catalog/rest/tests/rest_catalog_test.rs @@ -511,8 +511,7 @@ async fn test_authenticator_token_refresh() { let count = *token_request_count.lock().unwrap(); assert!( count >= 2, - "Authenticator should have been called at least twice, but was called {} times", - count + "Authenticator should have been called at least twice, but was called {count} times" ); } diff --git a/crates/catalog/s3tables/Cargo.toml b/crates/catalog/s3tables/Cargo.toml index 66fb70fefc..fde08b9a49 100644 --- a/crates/catalog/s3tables/Cargo.toml +++ b/crates/catalog/s3tables/Cargo.toml @@ -21,6 +21,7 @@ homepage = { workspace = true } name = "iceberg-catalog-s3tables" rust-version = { workspace = true } version = { workspace = true } +readme = "README.md" categories = ["database"] description = "Apache Iceberg Rust S3Tables Catalog" diff --git a/crates/catalog/s3tables/DEPENDENCIES.rust.tsv b/crates/catalog/s3tables/DEPENDENCIES.rust.tsv index 13d8eebe7a..7dd182e435 100644 --- a/crates/catalog/s3tables/DEPENDENCIES.rust.tsv +++ b/crates/catalog/s3tables/DEPENDENCIES.rust.tsv @@ -1,77 +1,73 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-s3tables@1.37.0 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.21.7 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-s3tables@1.46.0 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X base64@0.22.1 X X base64-simd@0.8.0 X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X +bytes@1.11.0 X bytes-utils@0.1.4 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation@0.10.1 X X -core-foundation@0.9.4 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X crc32c@0.6.8 X X @@ -80,14 +76,14 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X darling_core@0.21.3 X darling_macro@0.20.11 X darling_macro@0.21.3 X -deranged@0.5.3 X X +deranged@0.5.5 X X derive_builder@0.20.2 X X derive_builder_core@0.20.2 X X derive_builder_macro@0.20.2 X X @@ -100,9 +96,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -114,114 +110,107 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.3.27 X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X http@0.2.12 X X -http@1.3.1 X X +http@1.4.0 X X http-body@0.4.6 X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X hyper@0.14.32 X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.24.2 X X X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-s3tables@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-s3tables@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-conv@0.1.0 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X openssl-probe@0.1.6 X X ordered-float@2.10.1 X ordered-float@4.6.0 X outref@0.5.2 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X powerfmt@0.2.0 X X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -229,152 +218,150 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X rustls@0.21.12 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.6.3 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@1.0.4 X X X -rustls-pki-types@1.12.0 X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pki-types@1.13.1 X X rustls-webpki@0.101.7 X -rustls-webpki@0.103.4 X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X +schannel@0.1.28 X scopeguard@1.2.0 X X sct@0.7.1 X X X -security-framework@2.11.1 X X -security-framework@3.4.0 X X +security-framework@3.5.1 X X security-framework-sys@2.15.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X socket2@0.5.10 X X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X -time@0.3.43 X X +time@0.3.44 X X time-core@0.1.6 X X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X tokio-rustls@0.24.1 X X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X urlencoding@2.1.3 X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X vsimd@0.8.0 X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X xmlparser@0.13.6 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/s3tables/LICENSE b/crates/catalog/s3tables/LICENSE new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/crates/catalog/s3tables/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/crates/catalog/s3tables/NOTICE b/crates/catalog/s3tables/NOTICE new file mode 100644 index 0000000000..9340680cbd --- /dev/null +++ b/crates/catalog/s3tables/NOTICE @@ -0,0 +1,5 @@ +Apache Iceberg Rust +Copyright 2023-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). diff --git a/crates/catalog/s3tables/README.md b/crates/catalog/s3tables/README.md new file mode 100644 index 0000000000..d9784d5231 --- /dev/null +++ b/crates/catalog/s3tables/README.md @@ -0,0 +1,56 @@ + + +# Apache Iceberg S3Tables Catalog (Rust) + +[![crates.io](https://img.shields.io/crates/v/iceberg.svg)](https://crates.io/crates/iceberg-catalog-s3tables) +[![docs.rs](https://img.shields.io/docsrs/iceberg.svg)](https://docs.rs/iceberg/latest/iceberg_catalog_s3tables/) + +Official Native Rust implementation of the Apache Iceberg S3Tables catalog. + +## Quick start + +```rust,no_run +use std::collections::HashMap; + +use iceberg::CatalogBuilder; +use iceberg_catalog_s3tables::{ + S3TABLES_CATALOG_PROP_ENDPOINT_URL, S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN, + S3TablesCatalogBuilder, +}; + +#[tokio::main] +async fn main() { + let catalog = S3TablesCatalogBuilder::default() + .with_endpoint_url("http://localhost:4566") + .load( + "s3tables", + HashMap::from([( + S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN.to_string(), + "arn:aws:s3tables:us-east-1:123456789012:bucket/my-bucket".to_string(), + )]), + ) + .await + .unwrap(); + + // use `catalog` as any Iceberg Catalog +} +``` + +See the [API documentation](https://docs.rs/iceberg_catalog_s3tables/latest) for the full API surface. diff --git a/crates/catalog/sql/DEPENDENCIES.rust.tsv b/crates/catalog/sql/DEPENDENCIES.rust.tsv index 335c980967..cc48621d0c 100644 --- a/crates/catalog/sql/DEPENDENCIES.rust.tsv +++ b/crates/catalog/sql/DEPENDENCIES.rust.tsv @@ -1,49 +1,47 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X allocator-api2@0.2.21 X X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -51,7 +49,7 @@ const-random@0.1.18 X X const-random-macro@0.1.16 X X core-foundation-sys@0.8.7 X X cpufeatures@0.2.17 X X -crc@3.3.0 X X +crc@3.4.0 X X crc-catalog@2.4.0 X X crc32c@0.6.8 X X crc32fast@1.5.0 X X @@ -60,7 +58,7 @@ crossbeam-epoch@0.9.18 X X crossbeam-queue@0.3.12 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -79,9 +77,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X flume@0.11.1 X X fnv@1.0.7 X X foldhash@0.1.5 X @@ -96,105 +94,99 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X +half@2.7.1 X X hashbrown@0.15.5 X X +hashbrown@0.16.1 X X hashlink@0.10.0 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-sql@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-sql@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X libsqlite3-sys@0.30.1 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -202,141 +194,142 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X +socket2@0.6.1 X X spin@0.9.8 X sqlx@0.8.6 X X sqlx-core@0.8.6 X X sqlx-sqlite@0.8.6 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X tokio-stream@0.1.17 X -tokio-util@0.7.16 X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X vcpkg@0.2.15 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X +web-sys@0.3.83 X X webpki-roots@0.26.11 X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index 77b35a228f..8209cd04c1 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -917,11 +917,55 @@ impl Catalog for SqlCatalog { .build()?) } - async fn update_table(&self, _commit: TableCommit) -> Result { - Err(Error::new( - ErrorKind::FeatureUnsupported, - "Updating a table is not supported yet", - )) + /// Updates an existing table within the SQL catalog. + async fn update_table(&self, commit: TableCommit) -> Result
{ + let table_ident = commit.identifier().clone(); + let current_table = self.load_table(&table_ident).await?; + let current_metadata_location = current_table.metadata_location_result()?.to_string(); + + let staged_table = commit.apply(current_table)?; + let staged_metadata_location = staged_table.metadata_location_result()?; + + staged_table + .metadata() + .write_to(staged_table.file_io(), &staged_metadata_location) + .await?; + + let update_result = self + .execute( + &format!( + "UPDATE {CATALOG_TABLE_NAME} + SET {CATALOG_FIELD_METADATA_LOCATION_PROP} = ?, {CATALOG_FIELD_PREVIOUS_METADATA_LOCATION_PROP} = ? + WHERE {CATALOG_FIELD_CATALOG_NAME} = ? + AND {CATALOG_FIELD_TABLE_NAME} = ? + AND {CATALOG_FIELD_TABLE_NAMESPACE} = ? + AND ( + {CATALOG_FIELD_RECORD_TYPE} = '{CATALOG_FIELD_TABLE_RECORD_TYPE}' + OR {CATALOG_FIELD_RECORD_TYPE} IS NULL + ) + AND {CATALOG_FIELD_METADATA_LOCATION_PROP} = ?" + ), + vec![ + Some(staged_metadata_location), + Some(current_metadata_location.as_str()), + Some(&self.name), + Some(table_ident.name()), + Some(&table_ident.namespace().join(".")), + Some(current_metadata_location.as_str()), + ], + None, + ) + .await?; + + if update_result.rows_affected() == 0 { + return Err(Error::new( + ErrorKind::CatalogCommitConflicts, + format!("Commit conflicted for table: {table_ident}"), + ) + .with_retryable(true)); + } + + Ok(staged_table) } } @@ -932,6 +976,7 @@ mod tests { use iceberg::spec::{NestedField, PartitionSpec, PrimitiveType, Schema, SortOrder, Type}; use iceberg::table::Table; + use iceberg::transaction::{ApplyTransactionAction, Transaction}; use iceberg::{Catalog, CatalogBuilder, Namespace, NamespaceIdent, TableCreation, TableIdent}; use itertools::Itertools; use regex::Regex; @@ -2293,4 +2338,56 @@ mod tests { assert_eq!(table.identifier(), expected_table.identifier()); assert_eq!(table.metadata_location(), Some(metadata_location.as_str())); } + + #[tokio::test] + async fn test_update_table() { + let warehouse_loc = temp_path(); + let catalog = new_sql_catalog(warehouse_loc).await; + + // Create a test namespace and table + let namespace_ident = NamespaceIdent::new("ns1".into()); + create_namespace(&catalog, &namespace_ident).await; + let table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into()); + create_table(&catalog, &table_ident).await; + + let table = catalog.load_table(&table_ident).await.unwrap(); + + // Store the original metadata location for comparison + let original_metadata_location = table.metadata_location().unwrap().to_string(); + + // Create a transaction to update the table + let tx = Transaction::new(&table); + let tx = tx + .update_table_properties() + .set("test_property".to_string(), "test_value".to_string()) + .apply(tx) + .unwrap(); + + // Commit the transaction to the catalog + let updated_table = tx.commit(&catalog).await.unwrap(); + + // Verify the update was successful + assert_eq!( + updated_table.metadata().properties().get("test_property"), + Some(&"test_value".to_string()) + ); + // Verify the metadata location has been updated + assert_ne!( + updated_table.metadata_location().unwrap(), + original_metadata_location.as_str() + ); + + // Load the table again from the catalog to verify changes were persisted + let reloaded = catalog.load_table(&table_ident).await.unwrap(); + + // Verify the reloaded table matches the updated table + assert_eq!( + reloaded.metadata().properties().get("test_property"), + Some(&"test_value".to_string()) + ); + assert_eq!( + reloaded.metadata_location(), + updated_table.metadata_location() + ); + } } diff --git a/crates/examples/DEPENDENCIES.rust.tsv b/crates/examples/DEPENDENCIES.rust.tsv index 20702597a7..de07f1c650 100644 --- a/crates/examples/DEPENDENCIES.rust.tsv +++ b/crates/examples/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -75,9 +73,9 @@ event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -89,106 +87,99 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X httpdate@1.0.3 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-examples@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-examples@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X -indexmap@2.11.0 X X +indexmap@2.12.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -196,135 +187,136 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index d903d4f14d..6f1332a444 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -29,7 +29,7 @@ license = { workspace = true } repository = { workspace = true } [features] -default = ["storage-memory", "storage-fs", "storage-s3", "tokio"] +default = ["storage-memory", "storage-fs", "storage-s3"] storage-all = ["storage-memory", "storage-fs", "storage-s3", "storage-gcs"] storage-azdls = ["opendal/services-azdls"] @@ -39,8 +39,6 @@ storage-memory = ["opendal/services-memory"] storage-oss = ["opendal/services-oss"] storage-s3 = ["opendal/services-s3", "reqsign"] -smol = ["dep:smol"] -tokio = ["tokio/rt-multi-thread"] [dependencies] anyhow = { workspace = true } @@ -85,7 +83,6 @@ serde_derive = { workspace = true } serde_json = { workspace = true } serde_repr = { workspace = true } serde_with = { workspace = true } -smol = { workspace = true, optional = true } strum = { workspace = true, features = ["derive"] } tokio = { workspace = true, optional = false, features = ["sync"] } typed-builder = { workspace = true } diff --git a/crates/iceberg/DEPENDENCIES.rust.tsv b/crates/iceberg/DEPENDENCIES.rust.tsv index 460a531644..c446954437 100644 --- a/crates/iceberg/DEPENDENCIES.rust.tsv +++ b/crates/iceberg/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -70,13 +68,14 @@ digest@0.10.7 X X displaydoc@0.2.5 X X dissimilar@1.0.10 X either@1.15.0 X X +equivalent@1.0.2 X X event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -88,101 +87,94 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -190,134 +182,135 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/iceberg/src/arrow/caching_delete_file_loader.rs b/crates/iceberg/src/arrow/caching_delete_file_loader.rs index 192ca390a8..5d0b1da712 100644 --- a/crates/iceberg/src/arrow/caching_delete_file_loader.rs +++ b/crates/iceberg/src/arrow/caching_delete_file_loader.rs @@ -23,7 +23,7 @@ use arrow_array::{Array, ArrayRef, Int64Array, StringArray, StructArray}; use futures::{StreamExt, TryStreamExt}; use tokio::sync::oneshot::{Receiver, channel}; -use super::delete_filter::DeleteFilter; +use super::delete_filter::{DeleteFilter, PosDelLoadAction}; use crate::arrow::delete_file_loader::BasicDeleteFileLoader; use crate::arrow::{arrow_primitive_to_literal, arrow_schema_to_schema}; use crate::delete_vector::DeleteVector; @@ -42,13 +42,20 @@ use crate::{Error, ErrorKind, Result}; pub(crate) struct CachingDeleteFileLoader { basic_delete_file_loader: BasicDeleteFileLoader, concurrency_limit_data_files: usize, + /// Shared filter state to allow caching loaded deletes across multiple + /// calls to `load_deletes` (e.g., across multiple file scan tasks). + delete_filter: DeleteFilter, } // Intermediate context during processing of a delete file task. enum DeleteFileContext { // TODO: Delete Vector loader from Puffin files ExistingEqDel, - PosDels(ArrowRecordBatchStream), + ExistingPosDel, + PosDels { + file_path: String, + stream: ArrowRecordBatchStream, + }, FreshEqDel { batch_stream: ArrowRecordBatchStream, equality_ids: HashSet, @@ -59,8 +66,12 @@ enum DeleteFileContext { // Final result of the processing of a delete file task before // results are fully merged into the DeleteFileManager's state enum ParsedDeleteFileContext { - DelVecs(HashMap), + DelVecs { + file_path: String, + results: HashMap, + }, EqDel, + ExistingPosDel, } #[allow(unused_variables)] @@ -69,6 +80,7 @@ impl CachingDeleteFileLoader { CachingDeleteFileLoader { basic_delete_file_loader: BasicDeleteFileLoader::new(file_io), concurrency_limit_data_files, + delete_filter: DeleteFilter::default(), } } @@ -142,7 +154,6 @@ impl CachingDeleteFileLoader { schema: SchemaRef, ) -> Receiver> { let (tx, rx) = channel(); - let del_filter = DeleteFilter::default(); let stream_items = delete_file_entries .iter() @@ -150,14 +161,14 @@ impl CachingDeleteFileLoader { ( t.clone(), self.basic_delete_file_loader.clone(), - del_filter.clone(), + self.delete_filter.clone(), schema.clone(), ) }) .collect::>(); let task_stream = futures::stream::iter(stream_items); - let del_filter = del_filter.clone(); + let del_filter = self.delete_filter.clone(); let concurrency_limit_data_files = self.concurrency_limit_data_files; let basic_delete_file_loader = self.basic_delete_file_loader.clone(); crate::runtime::spawn(async move { @@ -165,7 +176,7 @@ impl CachingDeleteFileLoader { let mut del_filter = del_filter; let basic_delete_file_loader = basic_delete_file_loader.clone(); - let results: Vec = task_stream + let mut results_stream = task_stream .map(move |(task, file_io, del_filter, schema)| { let basic_delete_file_loader = basic_delete_file_loader.clone(); async move { @@ -181,15 +192,16 @@ impl CachingDeleteFileLoader { .map(move |ctx| { Ok(async { Self::parse_file_content_for_task(ctx.await?).await }) }) - .try_buffer_unordered(concurrency_limit_data_files) - .try_collect::>() - .await?; + .try_buffer_unordered(concurrency_limit_data_files); - for item in results { - if let ParsedDeleteFileContext::DelVecs(hash_map) = item { - for (data_file_path, delete_vector) in hash_map.into_iter() { + while let Some(item) = results_stream.next().await { + let item = item?; + if let ParsedDeleteFileContext::DelVecs { file_path, results } = item { + for (data_file_path, delete_vector) in results.into_iter() { del_filter.upsert_delete_vector(data_file_path, delete_vector); } + // Mark the positional delete file as fully loaded so waiters can proceed + del_filter.finish_pos_del_load(&file_path); } } @@ -210,11 +222,24 @@ impl CachingDeleteFileLoader { schema: SchemaRef, ) -> Result { match task.file_type { - DataContentType::PositionDeletes => Ok(DeleteFileContext::PosDels( - basic_delete_file_loader - .parquet_to_batch_stream(&task.file_path) - .await?, - )), + DataContentType::PositionDeletes => { + match del_filter.try_start_pos_del_load(&task.file_path) { + PosDelLoadAction::AlreadyLoaded => Ok(DeleteFileContext::ExistingPosDel), + PosDelLoadAction::WaitFor(notify) => { + // Positional deletes are accessed synchronously by ArrowReader. + // We must wait here to ensure the data is ready before returning, + // otherwise ArrowReader might get an empty/partial result. + notify.notified().await; + Ok(DeleteFileContext::ExistingPosDel) + } + PosDelLoadAction::Load => Ok(DeleteFileContext::PosDels { + file_path: task.file_path.clone(), + stream: basic_delete_file_loader + .parquet_to_batch_stream(&task.file_path) + .await?, + }), + } + } DataContentType::EqualityDeletes => { let Some(notify) = del_filter.try_start_eq_del_load(&task.file_path) else { @@ -255,10 +280,13 @@ impl CachingDeleteFileLoader { ) -> Result { match ctx { DeleteFileContext::ExistingEqDel => Ok(ParsedDeleteFileContext::EqDel), - DeleteFileContext::PosDels(batch_stream) => { - let del_vecs = - Self::parse_positional_deletes_record_batch_stream(batch_stream).await?; - Ok(ParsedDeleteFileContext::DelVecs(del_vecs)) + DeleteFileContext::ExistingPosDel => Ok(ParsedDeleteFileContext::ExistingPosDel), + DeleteFileContext::PosDels { file_path, stream } => { + let del_vecs = Self::parse_positional_deletes_record_batch_stream(stream).await?; + Ok(ParsedDeleteFileContext::DelVecs { + file_path, + results: del_vecs, + }) } DeleteFileContext::FreshEqDel { sender, @@ -330,7 +358,7 @@ impl CachingDeleteFileLoader { mut stream: ArrowRecordBatchStream, equality_ids: HashSet, ) -> Result { - let mut result_predicate = AlwaysTrue; + let mut row_predicates = Vec::new(); let mut batch_schema_iceberg: Option = None; let accessor = EqDelRecordBatchPartnerAccessor; @@ -374,10 +402,29 @@ impl CachingDeleteFileLoader { row_predicate = row_predicate.and(cell_predicate) } } - result_predicate = result_predicate.and(row_predicate.not()); + row_predicates.push(row_predicate.not().rewrite_not()); + } + } + + // All row predicates are combined to a single predicate by creating a balanced binary tree. + // Using a simple fold would result in a deeply nested predicate that can cause a stack overflow. + while row_predicates.len() > 1 { + let mut next_level = Vec::with_capacity(row_predicates.len().div_ceil(2)); + let mut iter = row_predicates.into_iter(); + while let Some(p1) = iter.next() { + if let Some(p2) = iter.next() { + next_level.push(p1.and(p2)); + } else { + next_level.push(p1); + } } + row_predicates = next_level; + } + + match row_predicates.pop() { + Some(p) => Ok(p), + None => Ok(AlwaysTrue), } - Ok(result_predicate.rewrite_not()) } } @@ -892,6 +939,7 @@ mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; // Load the deletes - should handle both types without error @@ -912,4 +960,90 @@ mod tests { result.err() ); } + + #[tokio::test] + async fn test_large_equality_delete_batch_stack_overflow() { + let tmp_dir = TempDir::new().unwrap(); + let table_location = tmp_dir.path().as_os_str().to_str().unwrap(); + let file_io = FileIO::from_path(table_location).unwrap().build().unwrap(); + + // Create a large batch of equality deletes + let num_rows = 20_000; + let col_y_vals: Vec = (0..num_rows).collect(); + let col_y = Arc::new(Int64Array::from(col_y_vals)) as ArrayRef; + + let schema = Arc::new(arrow_schema::Schema::new(vec![ + Field::new("y", arrow_schema::DataType::Int64, false).with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "2".to_string(), + )])), + ])); + + let record_batch = RecordBatch::try_new(schema.clone(), vec![col_y]).unwrap(); + + // Write to file + let path = format!("{}/large-eq-deletes.parquet", &table_location); + let file = File::create(&path).unwrap(); + let props = WriterProperties::builder() + .set_compression(Compression::SNAPPY) + .build(); + let mut writer = ArrowWriter::try_new(file, schema, Some(props)).unwrap(); + writer.write(&record_batch).unwrap(); + writer.close().unwrap(); + + let basic_delete_file_loader = BasicDeleteFileLoader::new(file_io.clone()); + let record_batch_stream = basic_delete_file_loader + .parquet_to_batch_stream(&path) + .await + .expect("could not get batch stream"); + + let eq_ids = HashSet::from_iter(vec![2]); + + let result = CachingDeleteFileLoader::parse_equality_deletes_record_batch_stream( + record_batch_stream, + eq_ids, + ) + .await; + + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_caching_delete_file_loader_caches_results() { + let tmp_dir = TempDir::new().unwrap(); + let table_location = tmp_dir.path(); + let file_io = FileIO::from_path(table_location.as_os_str().to_str().unwrap()) + .unwrap() + .build() + .unwrap(); + + let delete_file_loader = CachingDeleteFileLoader::new(file_io.clone(), 10); + + let file_scan_tasks = setup(table_location); + + // Load deletes for the first time + let delete_filter_1 = delete_file_loader + .load_deletes(&file_scan_tasks[0].deletes, file_scan_tasks[0].schema_ref()) + .await + .unwrap() + .unwrap(); + + // Load deletes for the second time (same task/files) + let delete_filter_2 = delete_file_loader + .load_deletes(&file_scan_tasks[0].deletes, file_scan_tasks[0].schema_ref()) + .await + .unwrap() + .unwrap(); + + let dv1 = delete_filter_1 + .get_delete_vector(&file_scan_tasks[0]) + .unwrap(); + let dv2 = delete_filter_2 + .get_delete_vector(&file_scan_tasks[0]) + .unwrap(); + + // Verify that the delete vectors point to the same memory location, + // confirming that the second load reused the result from the first. + assert!(Arc::ptr_eq(&dv1, &dv2)); + } } diff --git a/crates/iceberg/src/arrow/delete_filter.rs b/crates/iceberg/src/arrow/delete_filter.rs index 64df2b800f..ed32b74ac0 100644 --- a/crates/iceberg/src/arrow/delete_filter.rs +++ b/crates/iceberg/src/arrow/delete_filter.rs @@ -34,10 +34,23 @@ enum EqDelState { Loaded(Predicate), } +/// State tracking for positional delete files. +/// Unlike equality deletes, positional deletes must be fully loaded before +/// the ArrowReader proceeds because retrieval is synchronous and non-blocking. +#[derive(Debug)] +enum PosDelState { + /// The file is currently being loaded by a task. + /// The notifier allows other tasks to wait for completion. + Loading(Arc), + /// The file has been fully loaded and merged into the delete vector map. + Loaded, +} + #[derive(Debug, Default)] pub(crate) struct DeleteFileFilterState { delete_vectors: HashMap>>, equality_deletes: HashMap, + positional_deletes: HashMap, } impl DeleteFileFilterState { @@ -56,6 +69,18 @@ pub(crate) struct DeleteFilter { state: Arc>, } +/// Action to take when trying to start loading a positional delete file +pub(crate) enum PosDelLoadAction { + /// The file is not loaded, the caller should load it. + Load, + /// The file is already loaded, nothing to do. + AlreadyLoaded, + /// The file is currently being loaded by another task. + /// The caller *must* wait for this notifier to ensure data availability + /// before returning, as subsequent access (get_delete_vector) is synchronous. + WaitFor(Arc), +} + impl DeleteFilter { /// Retrieve a delete vector for the data file associated with a given file scan task pub(crate) fn get_delete_vector( @@ -68,12 +93,12 @@ impl DeleteFilter { /// Retrieve a delete vector for a data file pub(crate) fn get_delete_vector_for_path( &self, - delete_file_path: &str, + data_file_path: &str, ) -> Option>> { self.state .read() .ok() - .and_then(|st| st.delete_vectors.get(delete_file_path).cloned()) + .and_then(|st| st.delete_vectors.get(data_file_path).cloned()) } pub(crate) fn with_read(&self, f: F) -> Result @@ -81,7 +106,7 @@ impl DeleteFilter { let state = self.state.read().map_err(|e| { Error::new( ErrorKind::Unexpected, - format!("Failed to acquire read lock: {}", e), + format!("Failed to acquire read lock: {e}"), ) })?; f(&state) @@ -92,7 +117,7 @@ impl DeleteFilter { let mut state = self.state.write().map_err(|e| { Error::new( ErrorKind::Unexpected, - format!("Failed to acquire write lock: {}", e), + format!("Failed to acquire write lock: {e}"), ) })?; f(&mut state) @@ -115,6 +140,47 @@ impl DeleteFilter { Some(notifier) } + /// Attempts to mark a positional delete file as "loading". + /// + /// Returns an action dictating whether the caller should load the file, + /// wait for another task to load it, or do nothing. + pub(crate) fn try_start_pos_del_load(&self, file_path: &str) -> PosDelLoadAction { + let mut state = self.state.write().unwrap(); + + if let Some(state) = state.positional_deletes.get(file_path) { + match state { + PosDelState::Loaded => return PosDelLoadAction::AlreadyLoaded, + PosDelState::Loading(notify) => return PosDelLoadAction::WaitFor(notify.clone()), + } + } + + let notifier = Arc::new(Notify::new()); + state + .positional_deletes + .insert(file_path.to_string(), PosDelState::Loading(notifier)); + + PosDelLoadAction::Load + } + + /// Marks a positional delete file as successfully loaded and notifies any waiting tasks. + pub(crate) fn finish_pos_del_load(&self, file_path: &str) { + let notify = { + let mut state = self.state.write().unwrap(); + if let Some(PosDelState::Loading(notify)) = state + .positional_deletes + .insert(file_path.to_string(), PosDelState::Loaded) + { + Some(notify) + } else { + None + } + }; + + if let Some(notify) = notify { + notify.notify_waiters(); + } + } + /// Retrieve the equality delete predicate for a given eq delete file path pub(crate) async fn get_equality_delete_predicate_for_delete_file_path( &self, @@ -174,8 +240,8 @@ impl DeleteFilter { return Ok(None); } - // TODO: handle case-insensitive case - let bound_predicate = combined_predicate.bind(file_scan_task.schema.clone(), false)?; + let bound_predicate = combined_predicate + .bind(file_scan_task.schema.clone(), file_scan_task.case_sensitive)?; Ok(Some(bound_predicate)) } @@ -244,8 +310,9 @@ pub(crate) mod tests { use super::*; use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader; + use crate::expr::Reference; use crate::io::FileIO; - use crate::spec::{DataFileFormat, Schema}; + use crate::spec::{DataFileFormat, Datum, NestedField, PrimitiveType, Schema, Type}; type ArrowSchemaRef = Arc; @@ -377,6 +444,7 @@ pub(crate) mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }, FileScanTask { start: 0, @@ -391,6 +459,7 @@ pub(crate) mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }, ]; @@ -413,4 +482,57 @@ pub(crate) mod tests { ]; Arc::new(arrow_schema::Schema::new(fields)) } + + #[tokio::test] + async fn test_build_equality_delete_predicate_case_sensitive() { + let schema = Arc::new( + Schema::builder() + .with_schema_id(1) + .with_fields(vec![ + NestedField::required(1, "Id", Type::Primitive(PrimitiveType::Long)).into(), + ]) + .build() + .unwrap(), + ); + + // ---------- fake FileScanTask ---------- + let task = FileScanTask { + start: 0, + length: 0, + record_count: None, + data_file_path: "data.parquet".to_string(), + data_file_format: crate::spec::DataFileFormat::Parquet, + schema: schema.clone(), + project_field_ids: vec![], + predicate: None, + deletes: vec![FileScanTaskDeleteFile { + file_path: "eq-del.parquet".to_string(), + file_type: DataContentType::EqualityDeletes, + partition_spec_id: 0, + equality_ids: None, + }], + partition: None, + partition_spec: None, + name_mapping: None, + case_sensitive: true, + }; + + let filter = DeleteFilter::default(); + + // ---------- insert equality delete predicate ---------- + let pred = Reference::new("id").equal_to(Datum::long(10)); + + let (tx, rx) = tokio::sync::oneshot::channel(); + filter.insert_equality_delete("eq-del.parquet", rx); + + tx.send(pred).unwrap(); + + // ---------- should FAIL ---------- + let result = filter.build_equality_delete_predicate(&task).await; + + assert!( + result.is_err(), + "case_sensitive=true should fail when column case mismatches" + ); + } } diff --git a/crates/iceberg/src/arrow/incremental.rs b/crates/iceberg/src/arrow/incremental.rs index 09bc358896..7c9ed0bd56 100644 --- a/crates/iceberg/src/arrow/incremental.rs +++ b/crates/iceberg/src/arrow/incremental.rs @@ -30,12 +30,13 @@ use crate::arrow::record_batch_transformer::RecordBatchTransformerBuilder; use crate::arrow::{ArrowReader, StreamsInto}; use crate::delete_vector::DeleteVector; use crate::io::FileIO; -use crate::metadata_columns::{RESERVED_FIELD_ID_UNDERSCORE_POS, row_pos_field}; +use crate::metadata_columns::{RESERVED_FIELD_ID_POS, row_pos_field}; use crate::runtime::spawn; use crate::scan::ArrowRecordBatchStream; use crate::scan::incremental::{ AppendedFileScanTask, DeleteScanTask, IncrementalFileScanTaskStreams, }; +use crate::spec::{Datum, PrimitiveType}; use crate::{Error, ErrorKind, Result}; /// Default batch size for incremental delete operations. @@ -65,10 +66,7 @@ async fn process_incremental_append_task( let mut virtual_columns = Vec::new(); // Check if _pos column is requested and add it as a virtual column - let has_pos_column = task - .base - .project_field_ids - .contains(&RESERVED_FIELD_ID_UNDERSCORE_POS); + let has_pos_column = task.base.project_field_ids.contains(&RESERVED_FIELD_ID_POS); if has_pos_column { // Add _pos as a virtual column to be produced by the Parquet reader virtual_columns.push(Arc::clone(row_pos_field())); @@ -102,12 +100,13 @@ async fn process_incremental_append_task( // RecordBatchTransformer performs any transformations required on the RecordBatches // that come back from the file, such as type promotion, default column insertion, // column re-ordering, and virtual field addition (like _file) + let datum = Datum::new( + PrimitiveType::String, + crate::spec::PrimitiveLiteral::String(task.base.data_file_path.clone()), + ); let mut record_batch_transformer_builder = RecordBatchTransformerBuilder::new(task.schema_ref(), &task.base.project_field_ids) - .with_constant( - crate::metadata_columns::RESERVED_FIELD_ID_FILE, - crate::spec::PrimitiveLiteral::String(task.base.data_file_path.clone()), - )?; + .with_constant(crate::metadata_columns::RESERVED_FIELD_ID_FILE, datum); if has_pos_column { record_batch_transformer_builder = @@ -184,7 +183,7 @@ fn process_incremental_delete_task( // Create schema with _file column first, then pos (Int64) let schema = Arc::new(ArrowSchema::new(vec![ Arc::clone(crate::metadata_columns::file_path_field()), - Arc::clone(crate::metadata_columns::pos_field()), + Arc::clone(crate::metadata_columns::pos_field_arrow()), ])); let treemap = delete_vector.inner; @@ -206,7 +205,7 @@ fn process_incremental_deleted_file_task( // Create schema with _file column first, then pos (Int64) let schema = Arc::new(ArrowSchema::new(vec![ Arc::clone(crate::metadata_columns::file_path_field()), - Arc::clone(crate::metadata_columns::pos_field()), + Arc::clone(crate::metadata_columns::pos_field_arrow()), ])); // Create a stream of position values from 0 to total_records-1 (0-indexed) diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index c5d6362ded..9b79c41b36 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -56,11 +56,11 @@ use crate::expr::visitors::row_group_metrics_evaluator::RowGroupMetricsEvaluator use crate::expr::{BoundPredicate, BoundReference}; use crate::io::{FileIO, FileMetadata, FileRead}; use crate::metadata_columns::{ - RESERVED_FIELD_ID_FILE, RESERVED_FIELD_ID_UNDERSCORE_POS, is_metadata_field, row_pos_field, + RESERVED_FIELD_ID_FILE, RESERVED_FIELD_ID_POS, is_metadata_field, row_pos_field, }; use crate::runtime::spawn; use crate::scan::{ArrowRecordBatchStream, FileScanTask, FileScanTaskStream}; -use crate::spec::{Datum, NameMapping, NestedField, PrimitiveLiteral, PrimitiveType, Schema, Type}; +use crate::spec::{Datum, NameMapping, NestedField, PrimitiveType, Schema, Type}; use crate::utils::available_parallelism; use crate::{Error, ErrorKind}; @@ -133,7 +133,7 @@ impl ArrowReaderBuilder { pub struct ArrowReader { pub(crate) batch_size: Option, pub(crate) file_io: FileIO, - pub(crate) delete_file_loader: CachingDeleteFileLoader, + delete_file_loader: CachingDeleteFileLoader, /// the maximum number of data files that can be fetched at the same time pub(crate) concurrency_limit_data_files: usize, @@ -248,10 +248,8 @@ impl ArrowReader { let mut virtual_columns = Vec::new(); - // Check if _pos column is requested and add it as a virtual column - let has_pos_column = task - .project_field_ids - .contains(&RESERVED_FIELD_ID_UNDERSCORE_POS); + // Check if _pos column is requested and prepare virtual columns + let has_pos_column = task.project_field_ids.contains(&RESERVED_FIELD_ID_POS); if has_pos_column { // Add _pos as a virtual column to be produced by the Parquet reader virtual_columns.push(Arc::clone(row_pos_field())); @@ -311,7 +309,7 @@ impl ArrowReader { let options = ArrowReaderOptions::new() .with_schema(arrow_schema) - .with_virtual_columns(virtual_columns)?; + .with_virtual_columns(virtual_columns.clone())?; Self::create_parquet_record_batch_stream_builder( &task.data_file_path, @@ -352,12 +350,16 @@ impl ArrowReader { // that come back from the file, such as type promotion, default column insertion, // column re-ordering, partition constants, and virtual field addition (like _file) let mut record_batch_transformer_builder = - RecordBatchTransformerBuilder::new(task.schema_ref(), task.project_field_ids()) - .with_constant( - RESERVED_FIELD_ID_FILE, - PrimitiveLiteral::String(task.data_file_path.clone()), - )?; + RecordBatchTransformerBuilder::new(task.schema_ref(), task.project_field_ids()); + + // Add the _file metadata column if it's in the projected fields + if task.project_field_ids().contains(&RESERVED_FIELD_ID_FILE) { + let file_datum = Datum::string(task.data_file_path.clone()); + record_batch_transformer_builder = + record_batch_transformer_builder.with_constant(RESERVED_FIELD_ID_FILE, file_datum); + } + // Add the _pos virtual column if it's requested and produced by Parquet reader if has_pos_column { record_batch_transformer_builder = record_batch_transformer_builder.with_virtual_field(Arc::clone(row_pos_field()))?; @@ -580,10 +582,10 @@ impl ArrowReader { // we need to call next() to update the cache with the newly positioned value. delete_vector_iter.advance_to(next_row_group_base_idx); // Only update the cache if the cached value is stale (in the skipped range) - if let Some(cached_idx) = next_deleted_row_idx_opt { - if cached_idx < next_row_group_base_idx { - next_deleted_row_idx_opt = delete_vector_iter.next(); - } + if let Some(cached_idx) = next_deleted_row_idx_opt + && cached_idx < next_row_group_base_idx + { + next_deleted_row_idx_opt = delete_vector_iter.next(); } // still increment the current page base index but then skip to the next row group @@ -937,10 +939,10 @@ impl ArrowReader { }; // If all row groups were filtered out, return an empty RowSelection (select no rows) - if let Some(selected_row_groups) = selected_row_groups { - if selected_row_groups.is_empty() { - return Ok(RowSelection::from(Vec::new())); - } + if let Some(selected_row_groups) = selected_row_groups + && selected_row_groups.is_empty() + { + return Ok(RowSelection::from(Vec::new())); } let mut selected_row_groups_idx = 0; @@ -973,10 +975,10 @@ impl ArrowReader { results.push(selections_for_page); - if let Some(selected_row_groups) = selected_row_groups { - if selected_row_groups_idx == selected_row_groups.len() { - break; - } + if let Some(selected_row_groups) = selected_row_groups + && selected_row_groups_idx == selected_row_groups.len() + { + break; } } @@ -1107,14 +1109,13 @@ fn apply_name_mapping_to_arrow_schema( let mut metadata = field.metadata().clone(); - if let Some(mapped_field) = mapped_field_opt { - if let Some(field_id) = mapped_field.field_id() { - // Field found in mapping with a field_id → assign it - metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string()); - } - // If field_id is None, leave the field without an ID (will be filtered by projection) + if let Some(mapped_field) = mapped_field_opt + && let Some(field_id) = mapped_field.field_id() + { + // Field found in mapping with a field_id → assign it + metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string()); } - // If field not found in mapping, leave it without an ID (will be filtered by projection) + // If field_id is None, leave the field without an ID (will be filtered by projection) Field::new(field.name(), field.data_type().clone(), field.is_nullable()) .with_metadata(metadata) @@ -1991,7 +1992,7 @@ message schema { assert_eq!(err.kind(), ErrorKind::DataInvalid); assert_eq!( err.to_string(), - "DataInvalid => Unsupported Arrow data type: Duration(µs)" + "DataInvalid => Unsupported Arrow data type: Duration(µs)".to_string() ); // Omitting field c2, we still get an error due to c3 being selected @@ -2159,6 +2160,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -2217,7 +2219,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap(); @@ -2398,7 +2400,7 @@ message schema { let tmp_dir = TempDir::new().unwrap(); let table_location = tmp_dir.path().to_str().unwrap().to_string(); - let file_path = format!("{}/multi_row_group.parquet", &table_location); + let file_path = format!("{table_location}/multi_row_group.parquet"); // Force each batch into its own row group for testing byte range filtering. let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from( @@ -2480,6 +2482,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; // Task 2: read the second and third row groups @@ -2496,6 +2499,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks1 = Box::pin(futures::stream::iter(vec![Ok(task1)])) as FileScanTaskStream; @@ -2602,7 +2606,7 @@ message schema { let props = WriterProperties::builder() .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/old_file.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/old_file.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); @@ -2623,6 +2627,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -2708,7 +2713,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -2742,7 +2747,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -2794,6 +2799,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream; @@ -2807,15 +2813,14 @@ message schema { // Step 4: Verify we got 199 rows (not 200) let total_rows: usize = result.iter().map(|b| b.num_rows()).sum(); - println!("Total rows read: {}", total_rows); + println!("Total rows read: {total_rows}"); println!("Expected: 199 rows (deleted row 199 which had id=200)"); // This assertion will FAIL before the fix and PASS after the fix assert_eq!( total_rows, 199, - "Expected 199 rows after deleting row 199, but got {} rows. \ - The bug causes position deletes in later row groups to be ignored.", - total_rows + "Expected 199 rows after deleting row 199, but got {total_rows} rows. \ + The bug causes position deletes in later row groups to be ignored." ); // Verify the deleted row (id=200) is not present @@ -2902,7 +2907,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -2936,7 +2941,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -3012,6 +3017,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream; @@ -3026,16 +3032,15 @@ message schema { // Row group 1 has 100 rows (ids 101-200), minus 1 delete (id=200) = 99 rows let total_rows: usize = result.iter().map(|b| b.num_rows()).sum(); - println!("Total rows read from row group 1: {}", total_rows); + println!("Total rows read from row group 1: {total_rows}"); println!("Expected: 99 rows (row group 1 has 100 rows, 1 delete at position 199)"); // This assertion will FAIL before the fix and PASS after the fix assert_eq!( total_rows, 99, - "Expected 99 rows from row group 1 after deleting position 199, but got {} rows. \ + "Expected 99 rows from row group 1 after deleting position 199, but got {total_rows} rows. \ The bug causes position deletes to be lost when advance_to() is followed by next() \ - when skipping unselected row groups.", - total_rows + when skipping unselected row groups." ); // Verify the deleted row (id=200) is not present @@ -3124,7 +3129,7 @@ message schema { // Step 1: Create data file with 200 rows in 2 row groups // Row group 0: rows 0-99 (ids 1-100) // Row group 1: rows 100-199 (ids 101-200) - let data_file_path = format!("{}/data.parquet", &table_location); + let data_file_path = format!("{table_location}/data.parquet"); let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( Int32Array::from_iter_values(1..=100), @@ -3158,7 +3163,7 @@ message schema { ); // Step 2: Create position delete file that deletes row 0 (id=1, first row in row group 0) - let delete_file_path = format!("{}/deletes.parquet", &table_location); + let delete_file_path = format!("{table_location}/deletes.parquet"); let delete_schema = Arc::new(ArrowSchema::new(vec![ Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([( @@ -3223,6 +3228,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream; @@ -3304,7 +3310,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3317,7 +3323,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3326,6 +3332,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3401,7 +3408,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3414,7 +3421,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 3], @@ -3423,6 +3430,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3487,7 +3495,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3500,7 +3508,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2, 3], @@ -3509,6 +3517,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3575,7 +3584,7 @@ message schema { .set_max_row_group_size(2) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap(); // Write 6 rows in 3 batches (will create 3 row groups) @@ -3600,7 +3609,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3609,6 +3618,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3641,7 +3651,7 @@ message schema { assert_eq!(all_values.len(), 6); for i in 0..6 { - assert_eq!(all_names[i], format!("name_{}", i)); + assert_eq!(all_names[i], format!("name_{i}")); assert_eq!(all_values[i], i as i32); } } @@ -3716,7 +3726,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); @@ -3729,7 +3739,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -3738,6 +3748,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3813,7 +3824,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); @@ -3825,7 +3836,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 5, 2], @@ -3834,6 +3845,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -3915,7 +3927,7 @@ message schema { .set_compression(Compression::SNAPPY) .build(); - let file = File::create(format!("{}/1.parquet", &table_location)).unwrap(); + let file = File::create(format!("{table_location}/1.parquet")).unwrap(); let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap(); writer.write(&to_write).expect("Writing batch"); writer.close().unwrap(); @@ -3934,7 +3946,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/1.parquet", table_location), + data_file_path: format!("{table_location}/1.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2, 3], @@ -3943,6 +3955,7 @@ message schema { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; @@ -4073,7 +4086,7 @@ message schema { start: 0, length: 0, record_count: None, - data_file_path: format!("{}/data.parquet", table_location), + data_file_path: format!("{table_location}/data.parquet"), data_file_format: DataFileFormat::Parquet, schema: schema.clone(), project_field_ids: vec![1, 2], @@ -4082,6 +4095,7 @@ message schema { partition: Some(partition_data), partition_spec: Some(partition_spec), name_mapping: None, + case_sensitive: false, })] .into_iter(), )) as FileScanTaskStream; diff --git a/crates/iceberg/src/arrow/record_batch_projector.rs b/crates/iceberg/src/arrow/record_batch_projector.rs index 45de0212e8..7028eee961 100644 --- a/crates/iceberg/src/arrow/record_batch_projector.rs +++ b/crates/iceberg/src/arrow/record_batch_projector.rs @@ -133,25 +133,24 @@ impl RecordBatchProjector { { for (pos, field) in fields.iter().enumerate() { let id = field_id_fetch_func(field)?; - if let Some(id) = id { - if target_field_id == id { - index_vec.push(pos); - return Ok(Some(field.clone())); - } + if let Some(id) = id + && target_field_id == id + { + index_vec.push(pos); + return Ok(Some(field.clone())); } - if let DataType::Struct(inner) = field.data_type() { - if searchable_field_func(field) { - if let Some(res) = Self::fetch_field_index( - inner, - index_vec, - target_field_id, - field_id_fetch_func, - searchable_field_func, - )? { - index_vec.push(pos); - return Ok(Some(res)); - } - } + if let DataType::Struct(inner) = field.data_type() + && searchable_field_func(field) + && let Some(res) = Self::fetch_field_index( + inner, + index_vec, + target_field_id, + field_id_fetch_func, + searchable_field_func, + )? + { + index_vec.push(pos); + return Ok(Some(res)); } } Ok(None) diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs b/crates/iceberg/src/arrow/record_batch_transformer.rs index 4b4010bbba..63c0cb0b1b 100644 --- a/crates/iceberg/src/arrow/record_batch_transformer.rs +++ b/crates/iceberg/src/arrow/record_batch_transformer.rs @@ -18,26 +18,22 @@ use std::collections::HashMap; use std::sync::Arc; -use arrow_array::{ - Array as ArrowArray, ArrayRef, BinaryArray, BooleanArray, Date32Array, Decimal128Array, - Float32Array, Float64Array, Int32Array, Int64Array, NullArray, RecordBatch, RecordBatchOptions, - RunArray, StringArray, StructArray, -}; -use arrow_buffer::NullBuffer; +use arrow_array::{Array as ArrowArray, ArrayRef, RecordBatch, RecordBatchOptions}; use arrow_cast::cast; use arrow_schema::{ DataType, Field, FieldRef, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, SchemaRef, }; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; -use crate::arrow::schema_to_arrow_schema; +use crate::arrow::value::create_primitive_array_repeated; +use crate::arrow::{datum_to_arrow_type, schema_to_arrow_schema}; use crate::metadata_columns::get_metadata_field; use crate::spec::{ - Literal, PartitionSpec, PrimitiveLiteral, Schema as IcebergSchema, Struct, Transform, + Datum, Literal, PartitionSpec, PrimitiveLiteral, Schema as IcebergSchema, Struct, Transform, }; use crate::{Error, ErrorKind, Result}; -/// Build a map of field ID to constant value for identity-partitioned fields. +/// Build a map of field ID to constant value (as Datum) for identity-partitioned fields. /// /// Implements Iceberg spec "Column Projection" rule #1: use partition metadata constants /// only for identity-transformed fields. Non-identity transforms (bucket, truncate, year, etc.) @@ -54,20 +50,61 @@ use crate::{Error, ErrorKind, Result}; fn constants_map( partition_spec: &PartitionSpec, partition_data: &Struct, -) -> HashMap { + schema: &IcebergSchema, +) -> Result> { let mut constants = HashMap::new(); for (pos, field) in partition_spec.fields().iter().enumerate() { // Only identity transforms should use constant values from partition metadata if matches!(field.transform, Transform::Identity) { + // Get the field from schema to extract its type + let iceberg_field = schema.field_by_id(field.source_id).ok_or(Error::new( + ErrorKind::Unexpected, + format!("Field {} not found in schema", field.source_id), + ))?; + + // Ensure the field type is primitive + let prim_type = match &*iceberg_field.field_type { + crate::spec::Type::Primitive(prim_type) => prim_type, + _ => { + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "Partition field {} has non-primitive type {:?}", + field.source_id, iceberg_field.field_type + ), + )); + } + }; + // Get the partition value for this field - if let Some(Literal::Primitive(value)) = &partition_data[pos] { - constants.insert(field.source_id, value.clone()); + // Handle both None (null) and Some(Literal::Primitive) cases + match &partition_data[pos] { + None => { + // Skip null partition values - they will be resolved as null per Iceberg spec rule #4. + // When a partition value is null, we don't add it to the constants map, + // allowing downstream column resolution to handle it correctly. + continue; + } + Some(Literal::Primitive(value)) => { + // Create a Datum from the primitive type and value + let datum = Datum::new(prim_type.clone(), value.clone()); + constants.insert(field.source_id, datum); + } + Some(literal) => { + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "Partition field {} has non-primitive value: {:?}", + field.source_id, literal + ), + )); + } } } } - constants + Ok(constants) } /// Indicates how a particular column in a processed RecordBatch should @@ -153,7 +190,7 @@ enum SchemaComparison { pub(crate) struct RecordBatchTransformerBuilder { snapshot_schema: Arc, projected_iceberg_field_ids: Vec, - constant_fields: HashMap, + constant_fields: HashMap, virtual_fields: HashMap, } @@ -175,11 +212,10 @@ impl RecordBatchTransformerBuilder { /// /// # Arguments /// * `field_id` - The field ID to associate with the constant - /// * `value` - The constant value for this field - pub(crate) fn with_constant(mut self, field_id: i32, value: PrimitiveLiteral) -> Result { - let arrow_type = RecordBatchTransformer::primitive_literal_to_arrow_type(&value)?; - self.constant_fields.insert(field_id, (arrow_type, value)); - Ok(self) + /// * `datum` - The constant value (with type) for this field + pub(crate) fn with_constant(mut self, field_id: i32, datum: Datum) -> Self { + self.constant_fields.insert(field_id, datum); + self } /// Add a virtual field for a specific field ID. @@ -213,13 +249,13 @@ impl RecordBatchTransformerBuilder { partition_spec: Arc, partition_data: Struct, ) -> Result { - // Compute partition constants for identity-transformed fields - let partition_constants = constants_map(&partition_spec, &partition_data); + // Compute partition constants for identity-transformed fields (already returns Datum) + let partition_constants = + constants_map(&partition_spec, &partition_data, &self.snapshot_schema)?; - // Add partition constants to constant_fields (compute REE types from literals) - for (field_id, value) in partition_constants { - let arrow_type = RecordBatchTransformer::primitive_literal_to_arrow_type(&value)?; - self.constant_fields.insert(field_id, (arrow_type, value)); + // Add partition constants to constant_fields + for (field_id, datum) in partition_constants { + self.constant_fields.insert(field_id, datum); } Ok(self) @@ -270,10 +306,10 @@ impl RecordBatchTransformerBuilder { pub(crate) struct RecordBatchTransformer { snapshot_schema: Arc, projected_iceberg_field_ids: Vec, - // Pre-computed constant field information: field_id -> (arrow_type, value) + // Pre-computed constant field information: field_id -> Datum // Includes both virtual/metadata fields (like _file) and identity-partitioned fields - // Avoids type conversions during batch processing - constant_fields: HashMap, + // Datum holds both the Iceberg type and the value + constant_fields: HashMap, // Virtual fields are metadata fields that are not present in the snapshot schema, // but are present in the source schema (arrow reader produces them) // Map from field_id to FieldRef @@ -339,7 +375,7 @@ impl RecordBatchTransformer { source_schema: &ArrowSchemaRef, snapshot_schema: &IcebergSchema, projected_iceberg_field_ids: &[i32], - constant_fields: &HashMap, + constant_fields: &HashMap, virtual_fields: &HashMap, ) -> Result { let mapped_unprojected_arrow_schema = Arc::new(schema_to_arrow_schema(snapshot_schema)?); @@ -356,23 +392,38 @@ impl RecordBatchTransformer { return Ok(Arc::clone(virtual_field)); } - // Check if this is a constant field (metadata/virtual or partition) + // Check if this is a constant field if constant_fields.contains_key(field_id) { // For metadata/virtual fields (like _file), get name from metadata_columns // For partition fields, get name from schema (they exist in schema) - if let Ok(field) = get_metadata_field(*field_id) { - // This is a metadata/virtual field - use the predefined field - Ok(field) + if let Ok(iceberg_field) = get_metadata_field(*field_id) { + // This is a metadata/virtual field - convert Iceberg field to Arrow + let datum = constant_fields.get(field_id).ok_or(Error::new( + ErrorKind::Unexpected, + "constant field not found", + ))?; + let arrow_type = datum_to_arrow_type(datum); + let arrow_field = + Field::new(&iceberg_field.name, arrow_type, !iceberg_field.required) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + iceberg_field.id.to_string(), + )])); + Ok(Arc::new(arrow_field)) } else { // This is a partition constant field (exists in schema but uses constant value) let field = &field_id_to_mapped_schema_map .get(field_id) .ok_or(Error::new(ErrorKind::Unexpected, "field not found"))? .0; - let (arrow_type, _) = constant_fields.get(field_id).unwrap(); + let datum = constant_fields.get(field_id).ok_or(Error::new( + ErrorKind::Unexpected, + "constant field not found", + ))?; + let arrow_type = datum_to_arrow_type(datum); // Use the type from constant_fields (REE for constants) let constant_field = - Field::new(field.name(), arrow_type.clone(), field.is_nullable()) + Field::new(field.name(), arrow_type, field.is_nullable()) .with_metadata(field.metadata().clone()); Ok(Arc::new(constant_field)) } @@ -456,7 +507,7 @@ impl RecordBatchTransformer { snapshot_schema: &IcebergSchema, projected_iceberg_field_ids: &[i32], field_id_to_mapped_schema_map: HashMap, - constant_fields: &HashMap, + constant_fields: &HashMap, virtual_fields: &HashMap, ) -> Result> { let field_id_to_source_schema_map = @@ -469,10 +520,11 @@ impl RecordBatchTransformer { // Constant fields always use their pre-computed constant values, regardless of whether // they exist in the Parquet file. This is per Iceberg spec rule #1: partition metadata // is authoritative and should be preferred over file data. - if let Some((arrow_type, value)) = constant_fields.get(field_id) { + if let Some(datum) = constant_fields.get(field_id) { + let arrow_type = datum_to_arrow_type(datum); return Ok(ColumnSource::Add { - value: Some(value.clone()), - target_type: arrow_type.clone(), + value: Some(datum.literal().clone()), + target_type: arrow_type, }); } @@ -574,7 +626,7 @@ impl RecordBatchTransformer { let this_field_id = field_id_str.parse().map_err(|e| { Error::new( ErrorKind::DataInvalid, - format!("field id not parseable as an i32: {}", e), + format!("field id not parseable as an i32: {e}"), ) })?; @@ -621,247 +673,18 @@ impl RecordBatchTransformer { prim_lit: &Option, num_rows: usize, ) -> Result { - // Check if this is a RunEndEncoded type (for constant fields) + // For constant columns, create repeated arrays instead of Run-End Encoded arrays + // This avoids the overhead of REE for columns that are entirely constant if let DataType::RunEndEncoded(_, values_field) = target_type { - // Helper to create a Run-End Encoded array - let create_ree_array = |values_array: ArrayRef| -> Result { - let run_ends = if num_rows == 0 { - Int32Array::from(Vec::::new()) - } else { - Int32Array::from(vec![num_rows as i32]) - }; - Ok(Arc::new( - RunArray::try_new(&run_ends, &values_array).map_err(|e| { - Error::new( - ErrorKind::Unexpected, - "Failed to create RunArray for constant value", - ) - .with_source(e) - })?, - )) - }; - - // Create the values array based on the literal value - let values_array: ArrayRef = match (values_field.data_type(), prim_lit) { - (DataType::Boolean, Some(PrimitiveLiteral::Boolean(v))) => { - Arc::new(BooleanArray::from(vec![*v])) - } - (DataType::Boolean, None) => { - Arc::new(BooleanArray::from(vec![Option::::None])) - } - (DataType::Int32, Some(PrimitiveLiteral::Int(v))) => { - Arc::new(Int32Array::from(vec![*v])) - } - (DataType::Int32, None) => Arc::new(Int32Array::from(vec![Option::::None])), - (DataType::Date32, Some(PrimitiveLiteral::Int(v))) => { - Arc::new(Date32Array::from(vec![*v])) - } - (DataType::Date32, None) => Arc::new(Date32Array::from(vec![Option::::None])), - (DataType::Int64, Some(PrimitiveLiteral::Long(v))) => { - Arc::new(Int64Array::from(vec![*v])) - } - (DataType::Int64, None) => Arc::new(Int64Array::from(vec![Option::::None])), - (DataType::Float32, Some(PrimitiveLiteral::Float(v))) => { - Arc::new(Float32Array::from(vec![v.0])) - } - (DataType::Float32, None) => { - Arc::new(Float32Array::from(vec![Option::::None])) - } - (DataType::Float64, Some(PrimitiveLiteral::Double(v))) => { - Arc::new(Float64Array::from(vec![v.0])) - } - (DataType::Float64, None) => { - Arc::new(Float64Array::from(vec![Option::::None])) - } - (DataType::Utf8, Some(PrimitiveLiteral::String(v))) => { - Arc::new(StringArray::from(vec![v.as_str()])) - } - (DataType::Utf8, None) => Arc::new(StringArray::from(vec![Option::<&str>::None])), - (DataType::Binary, Some(PrimitiveLiteral::Binary(v))) => { - Arc::new(BinaryArray::from_vec(vec![v.as_slice()])) - } - (DataType::Binary, None) => { - Arc::new(BinaryArray::from_opt_vec(vec![Option::<&[u8]>::None])) - } - (DataType::Decimal128(_, _), Some(PrimitiveLiteral::Int128(v))) => { - Arc::new(arrow_array::Decimal128Array::from(vec![{ *v }])) - } - (DataType::Decimal128(_, _), Some(PrimitiveLiteral::UInt128(v))) => { - Arc::new(arrow_array::Decimal128Array::from(vec![*v as i128])) - } - (DataType::Decimal128(_, _), None) => { - Arc::new(arrow_array::Decimal128Array::from(vec![ - Option::::None, - ])) - } - (DataType::Struct(fields), None) => { - // Create a single-element StructArray with nulls - let null_arrays: Vec = fields - .iter() - .map(|f| { - // Recursively create null arrays for struct fields - // For primitive fields in structs, use simple null arrays (not REE within struct) - match f.data_type() { - DataType::Boolean => { - Arc::new(BooleanArray::from(vec![Option::::None])) - as ArrayRef - } - DataType::Int32 | DataType::Date32 => { - Arc::new(Int32Array::from(vec![Option::::None])) - } - DataType::Int64 => { - Arc::new(Int64Array::from(vec![Option::::None])) - } - DataType::Float32 => { - Arc::new(Float32Array::from(vec![Option::::None])) - } - DataType::Float64 => { - Arc::new(Float64Array::from(vec![Option::::None])) - } - DataType::Utf8 => { - Arc::new(StringArray::from(vec![Option::<&str>::None])) - } - DataType::Binary => { - Arc::new(BinaryArray::from_opt_vec(vec![Option::<&[u8]>::None])) - } - _ => panic!("Unsupported struct field type: {:?}", f.data_type()), - } - }) - .collect(); - Arc::new(arrow_array::StructArray::new( - fields.clone(), - null_arrays, - Some(arrow_buffer::NullBuffer::new_null(1)), - )) - } - _ => { - return Err(Error::new( - ErrorKind::Unexpected, - format!( - "Unsupported constant type combination: {:?} with {:?}", - values_field.data_type(), - prim_lit - ), - )); - } - }; - - // Wrap in Run-End Encoding - create_ree_array(values_array) + // Extract the values type from the RunEndEncoded wrapper + // and create a repeated array instead of REE + let values_type = values_field.data_type(); + create_primitive_array_repeated(values_type, prim_lit, num_rows) } else { // Non-REE type (simple arrays for non-constant fields) - Ok(match (target_type, prim_lit) { - (DataType::Boolean, Some(PrimitiveLiteral::Boolean(value))) => { - Arc::new(BooleanArray::from(vec![*value; num_rows])) - } - (DataType::Boolean, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(BooleanArray::from(vals)) - } - (DataType::Int32, Some(PrimitiveLiteral::Int(value))) => { - Arc::new(Int32Array::from(vec![*value; num_rows])) - } - (DataType::Int32, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Int32Array::from(vals)) - } - (DataType::Date32, Some(PrimitiveLiteral::Int(value))) => { - Arc::new(Date32Array::from(vec![*value; num_rows])) - } - (DataType::Date32, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Date32Array::from(vals)) - } - (DataType::Int64, Some(PrimitiveLiteral::Long(value))) => { - Arc::new(Int64Array::from(vec![*value; num_rows])) - } - (DataType::Int64, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Int64Array::from(vals)) - } - (DataType::Float32, Some(PrimitiveLiteral::Float(value))) => { - Arc::new(Float32Array::from(vec![value.0; num_rows])) - } - (DataType::Float32, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Float32Array::from(vals)) - } - (DataType::Float64, Some(PrimitiveLiteral::Double(value))) => { - Arc::new(Float64Array::from(vec![value.0; num_rows])) - } - (DataType::Float64, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Float64Array::from(vals)) - } - (DataType::Utf8, Some(PrimitiveLiteral::String(value))) => { - Arc::new(StringArray::from(vec![value.clone(); num_rows])) - } - (DataType::Utf8, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(StringArray::from(vals)) - } - (DataType::Binary, Some(PrimitiveLiteral::Binary(value))) => { - Arc::new(BinaryArray::from_vec(vec![value; num_rows])) - } - (DataType::Binary, None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(BinaryArray::from_opt_vec(vals)) - } - (DataType::Decimal128(_, _), Some(PrimitiveLiteral::Int128(value))) => { - Arc::new(Decimal128Array::from(vec![*value; num_rows])) - } - (DataType::Decimal128(_, _), Some(PrimitiveLiteral::UInt128(value))) => { - Arc::new(Decimal128Array::from(vec![*value as i128; num_rows])) - } - (DataType::Decimal128(_, _), None) => { - let vals: Vec> = vec![None; num_rows]; - Arc::new(Decimal128Array::from(vals)) - } - (DataType::Struct(fields), None) => { - // Create a StructArray filled with nulls - let null_arrays: Vec = fields - .iter() - .map(|field| Self::create_column(field.data_type(), &None, num_rows)) - .collect::>>()?; - - Arc::new(StructArray::new( - fields.clone(), - null_arrays, - Some(NullBuffer::new_null(num_rows)), - )) - } - (DataType::Null, _) => Arc::new(NullArray::new(num_rows)), - (dt, _) => { - return Err(Error::new( - ErrorKind::Unexpected, - format!("unexpected target column type {}", dt), - )); - } - }) + create_primitive_array_repeated(target_type, prim_lit, num_rows) } } - - /// Converts a PrimitiveLiteral to its corresponding Arrow DataType. - /// This is used for constant fields to determine the Arrow type. - fn primitive_literal_to_arrow_type(literal: &PrimitiveLiteral) -> Result { - Ok(match literal { - PrimitiveLiteral::Boolean(_) => DataType::Boolean, - PrimitiveLiteral::Int(_) => DataType::Int32, - PrimitiveLiteral::Long(_) => DataType::Int64, - PrimitiveLiteral::Float(_) => DataType::Float32, - PrimitiveLiteral::Double(_) => DataType::Float64, - PrimitiveLiteral::String(_) => DataType::Utf8, - PrimitiveLiteral::Binary(_) => DataType::Binary, - PrimitiveLiteral::Int128(_) => DataType::Decimal128(38, 0), - PrimitiveLiteral::UInt128(_) => DataType::Decimal128(38, 0), - PrimitiveLiteral::AboveMax | PrimitiveLiteral::BelowMin => { - return Err(Error::new( - ErrorKind::Unexpected, - "Cannot create arrow type for AboveMax/BelowMin literal", - )); - } - }) - } } #[cfg(test)] @@ -1812,4 +1635,73 @@ mod test { assert_eq!(get_string_value(result.column(4).as_ref(), 0), ""); assert_eq!(get_string_value(result.column(4).as_ref(), 1), ""); } + + /// Test handling of null values in identity-partitioned columns. + /// + /// Reproduces TestPartitionValues.testNullPartitionValue() from iceberg-java, which + /// writes records where the partition column has null values. Before the fix in #1922, + /// this would error with "Partition field X has null value for identity transform". + #[test] + fn null_identity_partition_value() { + use crate::spec::{Struct, Transform}; + + let schema = Arc::new( + Schema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::optional(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::optional(2, "data", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(), + ); + + let partition_spec = Arc::new( + crate::spec::PartitionSpec::builder(schema.clone()) + .with_spec_id(0) + .add_partition_field("data", "data", Transform::Identity) + .unwrap() + .build() + .unwrap(), + ); + + // Partition has null value for the data column + let partition_data = Struct::from_iter(vec![None]); + + let file_schema = Arc::new(ArrowSchema::new(vec![simple_field( + "id", + DataType::Int32, + true, + "1", + )])); + + let projected_field_ids = [1, 2]; + + let mut transformer = RecordBatchTransformerBuilder::new(schema, &projected_field_ids) + .with_partition(partition_spec, partition_data) + .expect("Should handle null partition values") + .build(); + + let file_batch = + RecordBatch::try_new(file_schema, vec![Arc::new(Int32Array::from(vec![1, 2, 3]))]) + .unwrap(); + + let result = transformer.process_record_batch(file_batch).unwrap(); + + assert_eq!(result.num_columns(), 2); + assert_eq!(result.num_rows(), 3); + + let id_col = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.values(), &[1, 2, 3]); + + // Partition column with null value should produce nulls + let data_col = result.column(1); + assert!(data_col.is_null(0)); + assert!(data_col.is_null(1)); + assert!(data_col.is_null(2)); + } } diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index ec0135bd77..f5f56454fc 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -35,8 +35,8 @@ use uuid::Uuid; use crate::error::Result; use crate::spec::{ - Datum, ListType, MapType, NestedField, NestedFieldRef, PrimitiveLiteral, PrimitiveType, Schema, - SchemaVisitor, StructType, Type, + Datum, FIRST_FIELD_ID, ListType, MapType, NestedField, NestedFieldRef, PrimitiveLiteral, + PrimitiveType, Schema, SchemaVisitor, StructType, Type, }; use crate::{Error, ErrorKind}; @@ -221,6 +221,19 @@ pub fn arrow_schema_to_schema(schema: &ArrowSchema) -> Result { visit_schema(schema, &mut visitor) } +/// Convert Arrow schema to Iceberg schema with automatically assigned field IDs. +/// +/// Unlike [`arrow_schema_to_schema`], this function does not require field IDs in the Arrow +/// schema metadata. Instead, it automatically assigns unique field IDs starting from 1, +/// following Iceberg's field ID assignment rules. +/// +/// This is useful when converting Arrow schemas that don't originate from Iceberg tables, +/// such as schemas from DataFusion or other Arrow-based systems. +pub fn arrow_schema_to_schema_auto_assign_ids(schema: &ArrowSchema) -> Result { + let mut visitor = ArrowSchemaConverter::new_with_field_ids_from(FIRST_FIELD_ID); + visit_schema(schema, &mut visitor) +} + /// Convert Arrow type to iceberg type. pub fn arrow_type_to_type(ty: &DataType) -> Result { let mut visitor = ArrowSchemaConverter::new(); @@ -229,7 +242,7 @@ pub fn arrow_type_to_type(ty: &DataType) -> Result { const ARROW_FIELD_DOC_KEY: &str = "doc"; -pub(super) fn get_field_id(field: &Field) -> Result { +pub(super) fn get_field_id_from_metadata(field: &Field) -> Result { if let Some(value) = field.metadata().get(PARQUET_FIELD_ID_META_KEY) { return value.parse::().map_err(|e| { Error::new( @@ -253,19 +266,55 @@ fn get_field_doc(field: &Field) -> Option { None } -struct ArrowSchemaConverter; +struct ArrowSchemaConverter { + /// When set, the schema builder will reassign field IDs starting from this value + /// using level-order traversal (breadth-first). + reassign_field_ids_from: Option, + /// Generates unique placeholder IDs for fields before reassignment. + /// Required because `ReassignFieldIds` builds an old-to-new ID mapping + /// that expects unique input IDs. + next_field_id: i32, +} impl ArrowSchemaConverter { fn new() -> Self { - Self {} + Self { + reassign_field_ids_from: None, + next_field_id: 0, + } } - fn convert_fields(fields: &Fields, field_results: &[Type]) -> Result> { + fn new_with_field_ids_from(start_from: i32) -> Self { + Self { + reassign_field_ids_from: Some(start_from), + next_field_id: 0, + } + } + + fn get_field_id(&mut self, field: &Field) -> Result { + if self.reassign_field_ids_from.is_some() { + // Field IDs will be reassigned by the schema builder. + // We need unique temporary IDs because ReassignFieldIds builds an + // old->new ID mapping that requires unique input IDs. + let temp_id = self.next_field_id; + self.next_field_id += 1; + Ok(temp_id) + } else { + // Get field ID from arrow field metadata + get_field_id_from_metadata(field) + } + } + + fn convert_fields( + &mut self, + fields: &Fields, + field_results: &[Type], + ) -> Result> { let mut results = Vec::with_capacity(fields.len()); for i in 0..fields.len() { let field = &fields[i]; let field_type = &field_results[i]; - let id = get_field_id(field)?; + let id = self.get_field_id(field)?; let doc = get_field_doc(field); let nested_field = NestedField { id, @@ -287,13 +336,16 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { type U = Schema; fn schema(&mut self, schema: &ArrowSchema, values: Vec) -> Result { - let fields = Self::convert_fields(schema.fields(), &values)?; - let builder = Schema::builder().with_fields(fields); + let fields = self.convert_fields(schema.fields(), &values)?; + let mut builder = Schema::builder().with_fields(fields); + if let Some(start_from) = self.reassign_field_ids_from { + builder = builder.with_reassigned_field_ids(start_from) + } builder.build() } fn r#struct(&mut self, fields: &Fields, results: Vec) -> Result { - let fields = Self::convert_fields(fields, &results)?; + let fields = self.convert_fields(fields, &results)?; Ok(Type::Struct(StructType::new(fields))) } @@ -310,7 +362,7 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { } }; - let id = get_field_id(element_field)?; + let id = self.get_field_id(element_field)?; let doc = get_field_doc(element_field); let mut element_field = NestedField::list_element(id, value.clone(), !element_field.is_nullable()); @@ -335,7 +387,7 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { let key_field = &fields[0]; let value_field = &fields[1]; - let key_id = get_field_id(key_field)?; + let key_id = self.get_field_id(key_field)?; let key_doc = get_field_doc(key_field); let mut key_field = NestedField::map_key_element(key_id, key_value.clone()); if let Some(doc) = key_doc { @@ -343,7 +395,7 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { } let key_field = Arc::new(key_field); - let value_id = get_field_id(value_field)?; + let value_id = self.get_field_id(value_field)?; let value_doc = get_field_doc(value_field); let mut value_field = NestedField::map_value_element( value_id, @@ -1019,6 +1071,86 @@ impl TryFrom<&crate::spec::Schema> for ArrowSchema { } } +/// Converts a Datum (Iceberg type + primitive literal) to its corresponding Arrow DataType +/// with Run-End Encoding (REE). +/// +/// This function is used for constant fields in record batches, where all values are the same. +/// Run-End Encoding provides efficient storage for such constant columns. +/// +/// # Arguments +/// * `datum` - The Datum to convert, which contains both type and value information +/// +/// # Returns +/// Arrow DataType with Run-End Encoding applied +/// +/// # Example +/// ``` +/// use iceberg::arrow::datum_to_arrow_type_with_ree; +/// use iceberg::spec::Datum; +/// +/// let datum = Datum::string("test_file.parquet"); +/// let ree_type = datum_to_arrow_type_with_ree(&datum); +/// // Returns: RunEndEncoded(Int32, Utf8) +/// ``` +pub fn datum_to_arrow_type_with_ree(datum: &Datum) -> DataType { + // Helper to create REE type with the given values type. + // Note: values field is nullable as Arrow expects this when building the + // final Arrow schema with `RunArray::try_new`. + let make_ree = |values_type: DataType| -> DataType { + let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false)); + let values_field = Arc::new(Field::new("values", values_type, true)); + DataType::RunEndEncoded(run_ends_field, values_field) + }; + + // Match on the PrimitiveType from the Datum to determine the Arrow type + match datum.data_type() { + PrimitiveType::Boolean => make_ree(DataType::Boolean), + PrimitiveType::Int => make_ree(DataType::Int32), + PrimitiveType::Long => make_ree(DataType::Int64), + PrimitiveType::Float => make_ree(DataType::Float32), + PrimitiveType::Double => make_ree(DataType::Float64), + PrimitiveType::Date => make_ree(DataType::Date32), + PrimitiveType::Time => make_ree(DataType::Int64), + PrimitiveType::Timestamp => make_ree(DataType::Int64), + PrimitiveType::Timestamptz => make_ree(DataType::Int64), + PrimitiveType::TimestampNs => make_ree(DataType::Int64), + PrimitiveType::TimestamptzNs => make_ree(DataType::Int64), + PrimitiveType::String => make_ree(DataType::Utf8), + PrimitiveType::Uuid => make_ree(DataType::Binary), + PrimitiveType::Fixed(_) => make_ree(DataType::Binary), + PrimitiveType::Binary => make_ree(DataType::Binary), + PrimitiveType::Decimal { precision, scale } => { + make_ree(DataType::Decimal128(*precision as u8, *scale as i8)) + } + } +} + +/// Converts a Datum to an Arrow DataType without Run-End Encoding. +/// For constant fields, this returns the plain Arrow type instead of wrapped in REE. +pub(crate) fn datum_to_arrow_type(datum: &Datum) -> DataType { + // Match on the PrimitiveType from the Datum to determine the Arrow type + match datum.data_type() { + PrimitiveType::Boolean => DataType::Boolean, + PrimitiveType::Int => DataType::Int32, + PrimitiveType::Long => DataType::Int64, + PrimitiveType::Float => DataType::Float32, + PrimitiveType::Double => DataType::Float64, + PrimitiveType::Date => DataType::Date32, + PrimitiveType::Time => DataType::Int64, + PrimitiveType::Timestamp => DataType::Int64, + PrimitiveType::Timestamptz => DataType::Int64, + PrimitiveType::TimestampNs => DataType::Int64, + PrimitiveType::TimestamptzNs => DataType::Int64, + PrimitiveType::String => DataType::Utf8, + PrimitiveType::Uuid => DataType::Binary, + PrimitiveType::Fixed(_) => DataType::Binary, + PrimitiveType::Binary => DataType::Binary, + PrimitiveType::Decimal { precision, scale } => { + DataType::Decimal128(*precision as u8, *scale as i8) + } + } +} + #[cfg(test)] mod tests { use std::collections::HashMap; @@ -1878,4 +2010,159 @@ mod tests { assert_eq!(array.value(0), [66u8; 16]); } } + + #[test] + fn test_arrow_schema_to_schema_with_field_id() { + // Create a complex Arrow schema without field ID metadata + // Including: primitives, list, nested struct, map, and nested list of structs + let arrow_schema = ArrowSchema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + Field::new("price", DataType::Decimal128(10, 2), false), + Field::new( + "created_at", + DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())), + true, + ), + Field::new( + "tags", + DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), + true, + ), + Field::new( + "address", + DataType::Struct(Fields::from(vec![ + Field::new("street", DataType::Utf8, true), + Field::new("city", DataType::Utf8, false), + Field::new("zip", DataType::Int32, true), + ])), + true, + ), + Field::new( + "attributes", + DataType::Map( + Arc::new(Field::new( + DEFAULT_MAP_FIELD_NAME, + DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Utf8, false), + Field::new("value", DataType::Utf8, true), + ])), + false, + )), + false, + ), + true, + ), + Field::new( + "orders", + DataType::List(Arc::new(Field::new( + "element", + DataType::Struct(Fields::from(vec![ + Field::new("order_id", DataType::Int64, false), + Field::new("amount", DataType::Float64, false), + ])), + true, + ))), + true, + ), + ]); + + let schema = arrow_schema_to_schema_auto_assign_ids(&arrow_schema).unwrap(); + + // Build expected schema with exact field IDs following level-order assignment: + // Level 0: id=1, name=2, price=3, created_at=4, tags=5, address=6, attributes=7, orders=8 + // Level 1: tags.element=9, address.{street=10,city=11,zip=12}, attributes.{key=13,value=14}, orders.element=15 + // Level 2: orders.element.{order_id=16,amount=17} + let expected = Schema::builder() + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Long)).into(), + NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(), + NestedField::required( + 3, + "price", + Type::Primitive(PrimitiveType::Decimal { + precision: 10, + scale: 2, + }), + ) + .into(), + NestedField::optional(4, "created_at", Type::Primitive(PrimitiveType::Timestamptz)) + .into(), + NestedField::optional( + 5, + "tags", + Type::List(ListType { + element_field: NestedField::list_element( + 9, + Type::Primitive(PrimitiveType::String), + false, + ) + .into(), + }), + ) + .into(), + NestedField::optional( + 6, + "address", + Type::Struct(StructType::new(vec![ + NestedField::optional(10, "street", Type::Primitive(PrimitiveType::String)) + .into(), + NestedField::required(11, "city", Type::Primitive(PrimitiveType::String)) + .into(), + NestedField::optional(12, "zip", Type::Primitive(PrimitiveType::Int)) + .into(), + ])), + ) + .into(), + NestedField::optional( + 7, + "attributes", + Type::Map(MapType { + key_field: NestedField::map_key_element( + 13, + Type::Primitive(PrimitiveType::String), + ) + .into(), + value_field: NestedField::map_value_element( + 14, + Type::Primitive(PrimitiveType::String), + false, + ) + .into(), + }), + ) + .into(), + NestedField::optional( + 8, + "orders", + Type::List(ListType { + element_field: NestedField::list_element( + 15, + Type::Struct(StructType::new(vec![ + NestedField::required( + 16, + "order_id", + Type::Primitive(PrimitiveType::Long), + ) + .into(), + NestedField::required( + 17, + "amount", + Type::Primitive(PrimitiveType::Double), + ) + .into(), + ])), + false, + ) + .into(), + }), + ) + .into(), + ]) + .build() + .unwrap(); + + pretty_assertions::assert_eq!(schema, expected); + assert_eq!(schema.highest_field_id(), 17); + } } diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index f1cf225bb4..77b5620fcb 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -15,18 +15,21 @@ // specific language governing permissions and limitations // under the License. +use std::sync::Arc; + use arrow_array::{ Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Decimal128Array, FixedSizeBinaryArray, FixedSizeListArray, Float32Array, Float64Array, Int32Array, Int64Array, LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, MapArray, StringArray, StructArray, Time64MicrosecondArray, TimestampMicrosecondArray, TimestampNanosecondArray, }; +use arrow_buffer::NullBuffer; use arrow_schema::{DataType, FieldRef}; use uuid::Uuid; -use super::get_field_id; +use super::get_field_id_from_metadata; use crate::spec::{ - ListType, Literal, Map, MapType, NestedField, PartnerAccessor, PrimitiveType, + ListType, Literal, Map, MapType, NestedField, PartnerAccessor, PrimitiveLiteral, PrimitiveType, SchemaWithPartnerVisitor, Struct, StructType, Type, visit_struct_with_partner, visit_type_with_partner, }; @@ -258,15 +261,15 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { "The partner is not a decimal128 array", ) })?; - if let DataType::Decimal128(arrow_precision, arrow_scale) = array.data_type() { - if *arrow_precision as u32 != *precision || *arrow_scale as u32 != *scale { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "The precision or scale ({arrow_precision},{arrow_scale}) of arrow decimal128 array is not compatible with iceberg decimal type ({precision},{scale})" - ), - )); - } + if let DataType::Decimal128(arrow_precision, arrow_scale) = array.data_type() + && (*arrow_precision as u32 != *precision || *arrow_scale as u32 != *scale) + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "The precision or scale ({arrow_precision},{arrow_scale}) of arrow decimal128 array is not compatible with iceberg decimal type ({precision},{scale})" + ), + )); } Ok(array.iter().map(|v| v.map(Literal::decimal)).collect()) } @@ -348,10 +351,10 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { } else if let Some(array) = partner.as_any().downcast_ref::() { Ok(array.iter().map(|v| v.map(Literal::string)).collect()) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "The partner is not a string array", - )); + )) } } PrimitiveType::Uuid => { @@ -415,10 +418,10 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { .map(|v| v.map(|v| Literal::binary(v.to_vec()))) .collect()) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "The partner is not a binary array", - )); + )) } } } @@ -447,7 +450,7 @@ impl FieldMatchMode { /// Determines if an Arrow field matches an Iceberg field based on the matching mode. pub fn match_field(&self, arrow_field: &FieldRef, iceberg_field: &NestedField) -> bool { match self { - FieldMatchMode::Id => get_field_id(arrow_field) + FieldMatchMode::Id => get_field_id_from_metadata(arrow_field) .map(|id| id == iceberg_field.id) .unwrap_or(false), FieldMatchMode::Name => arrow_field.name() == &iceberg_field.name, @@ -617,6 +620,274 @@ pub fn arrow_primitive_to_literal( ) } +/// Create a single-element array from a primitive literal. +/// +/// This is used for creating constant arrays (Run-End Encoded arrays) where we need +/// a single value that represents all rows. +#[allow(dead_code)] +pub(crate) fn create_primitive_array_single_element( + data_type: &DataType, + prim_lit: &Option, +) -> Result { + match (data_type, prim_lit) { + (DataType::Boolean, Some(PrimitiveLiteral::Boolean(v))) => { + Ok(Arc::new(BooleanArray::from(vec![*v]))) + } + (DataType::Boolean, None) => Ok(Arc::new(BooleanArray::from(vec![Option::::None]))), + (DataType::Int32, Some(PrimitiveLiteral::Int(v))) => { + Ok(Arc::new(Int32Array::from(vec![*v]))) + } + (DataType::Int32, None) => Ok(Arc::new(Int32Array::from(vec![Option::::None]))), + (DataType::Date32, Some(PrimitiveLiteral::Int(v))) => { + Ok(Arc::new(Date32Array::from(vec![*v]))) + } + (DataType::Date32, None) => Ok(Arc::new(Date32Array::from(vec![Option::::None]))), + (DataType::Int64, Some(PrimitiveLiteral::Long(v))) => { + Ok(Arc::new(Int64Array::from(vec![*v]))) + } + (DataType::Int64, None) => Ok(Arc::new(Int64Array::from(vec![Option::::None]))), + (DataType::Float32, Some(PrimitiveLiteral::Float(v))) => { + Ok(Arc::new(Float32Array::from(vec![v.0]))) + } + (DataType::Float32, None) => Ok(Arc::new(Float32Array::from(vec![Option::::None]))), + (DataType::Float64, Some(PrimitiveLiteral::Double(v))) => { + Ok(Arc::new(Float64Array::from(vec![v.0]))) + } + (DataType::Float64, None) => Ok(Arc::new(Float64Array::from(vec![Option::::None]))), + (DataType::Utf8, Some(PrimitiveLiteral::String(v))) => { + Ok(Arc::new(StringArray::from(vec![v.as_str()]))) + } + (DataType::Utf8, None) => Ok(Arc::new(StringArray::from(vec![Option::<&str>::None]))), + (DataType::Binary, Some(PrimitiveLiteral::Binary(v))) => { + Ok(Arc::new(BinaryArray::from_vec(vec![v.as_slice()]))) + } + (DataType::Binary, None) => Ok(Arc::new(BinaryArray::from_opt_vec(vec![ + Option::<&[u8]>::None, + ]))), + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::Int128(v))) => { + let array = Decimal128Array::from(vec![{ *v }]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?; + Ok(Arc::new(array)) + } + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::UInt128(v))) => { + let array = Decimal128Array::from(vec![*v as i128]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?; + Ok(Arc::new(array)) + } + (DataType::Decimal128(precision, scale), None) => { + let array = Decimal128Array::from(vec![Option::::None]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?; + Ok(Arc::new(array)) + } + (DataType::Struct(fields), None) => { + // Create a single-element StructArray with nulls + let null_arrays: Vec = fields + .iter() + .map(|f| { + // Recursively create null arrays for struct fields + // For primitive fields in structs, use simple null arrays (not REE within struct) + match f.data_type() { + DataType::Boolean => { + Ok(Arc::new(BooleanArray::from(vec![Option::::None])) + as ArrayRef) + } + DataType::Int32 | DataType::Date32 => { + Ok(Arc::new(Int32Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Int64 => { + Ok(Arc::new(Int64Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Float32 => { + Ok(Arc::new(Float32Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Float64 => { + Ok(Arc::new(Float64Array::from(vec![Option::::None])) as ArrayRef) + } + DataType::Utf8 => { + Ok(Arc::new(StringArray::from(vec![Option::<&str>::None])) as ArrayRef) + } + DataType::Binary => { + Ok( + Arc::new(BinaryArray::from_opt_vec(vec![Option::<&[u8]>::None])) + as ArrayRef, + ) + } + _ => Err(Error::new( + ErrorKind::Unexpected, + format!("Unsupported struct field type: {:?}", f.data_type()), + )), + } + }) + .collect::>>()?; + Ok(Arc::new(arrow_array::StructArray::new( + fields.clone(), + null_arrays, + Some(arrow_buffer::NullBuffer::new_null(1)), + ))) + } + _ => Err(Error::new( + ErrorKind::Unexpected, + format!("Unsupported constant type combination: {data_type:?} with {prim_lit:?}"), + )), + } +} + +/// Create a repeated array from a primitive literal for a given number of rows. +/// +/// This is used for creating non-constant arrays where we need the same value +/// repeated for each row. +pub(crate) fn create_primitive_array_repeated( + data_type: &DataType, + prim_lit: &Option, + num_rows: usize, +) -> Result { + Ok(match (data_type, prim_lit) { + (DataType::Boolean, Some(PrimitiveLiteral::Boolean(value))) => { + Arc::new(BooleanArray::from(vec![*value; num_rows])) + } + (DataType::Boolean, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(BooleanArray::from(vals)) + } + (DataType::Int32, Some(PrimitiveLiteral::Int(value))) => { + Arc::new(Int32Array::from(vec![*value; num_rows])) + } + (DataType::Int32, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Int32Array::from(vals)) + } + (DataType::Date32, Some(PrimitiveLiteral::Int(value))) => { + Arc::new(Date32Array::from(vec![*value; num_rows])) + } + (DataType::Date32, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Date32Array::from(vals)) + } + (DataType::Int64, Some(PrimitiveLiteral::Long(value))) => { + Arc::new(Int64Array::from(vec![*value; num_rows])) + } + (DataType::Int64, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Int64Array::from(vals)) + } + (DataType::Float32, Some(PrimitiveLiteral::Float(value))) => { + Arc::new(Float32Array::from(vec![value.0; num_rows])) + } + (DataType::Float32, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Float32Array::from(vals)) + } + (DataType::Float64, Some(PrimitiveLiteral::Double(value))) => { + Arc::new(Float64Array::from(vec![value.0; num_rows])) + } + (DataType::Float64, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(Float64Array::from(vals)) + } + (DataType::Utf8, Some(PrimitiveLiteral::String(value))) => { + Arc::new(StringArray::from(vec![value.clone(); num_rows])) + } + (DataType::Utf8, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(StringArray::from(vals)) + } + (DataType::Binary, Some(PrimitiveLiteral::Binary(value))) => { + Arc::new(BinaryArray::from_vec(vec![value; num_rows])) + } + (DataType::Binary, None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new(BinaryArray::from_opt_vec(vals)) + } + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::Int128(value))) => { + Arc::new( + Decimal128Array::from(vec![*value; num_rows]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?, + ) + } + (DataType::Decimal128(precision, scale), Some(PrimitiveLiteral::UInt128(value))) => { + Arc::new( + Decimal128Array::from(vec![*value as i128; num_rows]) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?, + ) + } + (DataType::Decimal128(precision, scale), None) => { + let vals: Vec> = vec![None; num_rows]; + Arc::new( + Decimal128Array::from(vals) + .with_precision_and_scale(*precision, *scale) + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to create Decimal128Array with precision {precision} and scale {scale}: {e}" + ), + ) + })?, + ) + } + (DataType::Struct(fields), None) => { + // Create a StructArray filled with nulls + let null_arrays: Vec = fields + .iter() + .map(|field| create_primitive_array_repeated(field.data_type(), &None, num_rows)) + .collect::>>()?; + + Arc::new(StructArray::new( + fields.clone(), + null_arrays, + Some(NullBuffer::new_null(num_rows)), + )) + } + (DataType::Null, _) => Arc::new(arrow_array::NullArray::new(num_rows)), + (dt, _) => { + return Err(Error::new( + ErrorKind::Unexpected, + format!("unexpected target column type {dt}"), + )); + } + }) +} + #[cfg(test)] mod test { use std::collections::HashMap; @@ -1467,4 +1738,48 @@ mod test { ]))), ]); } + + #[test] + fn test_create_decimal_array_respects_precision() { + // Decimal128Array::from() uses Arrow's default precision (38) instead of the + // target precision, causing RecordBatch construction to fail when schemas don't match. + let target_precision = 18u8; + let target_scale = 10i8; + let target_type = DataType::Decimal128(target_precision, target_scale); + let value = PrimitiveLiteral::Int128(10000000000); + + let array = create_primitive_array_single_element(&target_type, &Some(value)) + .expect("Failed to create decimal array"); + + match array.data_type() { + DataType::Decimal128(precision, scale) => { + assert_eq!(*precision, target_precision); + assert_eq!(*scale, target_scale); + } + other => panic!("Expected Decimal128, got {other:?}"), + } + } + + #[test] + fn test_create_decimal_array_repeated_respects_precision() { + // Ensure repeated arrays also respect target precision, not Arrow's default. + let target_precision = 18u8; + let target_scale = 10i8; + let target_type = DataType::Decimal128(target_precision, target_scale); + let value = PrimitiveLiteral::Int128(10000000000); + let num_rows = 5; + + let array = create_primitive_array_repeated(&target_type, &Some(value), num_rows) + .expect("Failed to create repeated decimal array"); + + match array.data_type() { + DataType::Decimal128(precision, scale) => { + assert_eq!(*precision, target_precision); + assert_eq!(*scale, target_scale); + } + other => panic!("Expected Decimal128, got {other:?}"), + } + + assert_eq!(array.len(), num_rows); + } } diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index cfa3dc6b52..df0299acb2 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -163,8 +163,12 @@ impl Catalog for MemoryCatalog { let namespaces = root_namespace_state .list_namespaces_under(parent_namespace_ident)? .into_iter() - .map(|name| NamespaceIdent::new(name.to_string())) - .collect_vec(); + .map(|name| { + let mut names = parent_namespace_ident.iter().cloned().collect::>(); + names.push(name.to_string()); + NamespaceIdent::from_vec(names) + }) + .collect::>>()?; Ok(namespaces) } @@ -599,7 +603,7 @@ pub(crate) mod tests { .list_namespaces(Some(&namespace_ident_1)) .await .unwrap(), - vec![NamespaceIdent::new("b".into())] + vec![namespace_ident_2] ); } @@ -628,9 +632,9 @@ pub(crate) mod tests { .unwrap() ), to_set(vec![ - NamespaceIdent::new("a".into()), - NamespaceIdent::new("b".into()), - NamespaceIdent::new("c".into()), + namespace_ident_2, + namespace_ident_3, + namespace_ident_4, ]) ); } diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 27d5edaedb..f3a521379e 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -1000,13 +1000,13 @@ mod _serde_set_statistics { snapshot_id, statistics, } = SetStatistics::deserialize(deserializer)?; - if let Some(snapshot_id) = snapshot_id { - if snapshot_id != statistics.snapshot_id { - return Err(serde::de::Error::custom(format!( - "Snapshot id to set {snapshot_id} does not match the statistics file snapshot id {}", - statistics.snapshot_id - ))); - } + if let Some(snapshot_id) = snapshot_id + && snapshot_id != statistics.snapshot_id + { + return Err(serde::de::Error::custom(format!( + "Snapshot id to set {snapshot_id} does not match the statistics file snapshot id {}", + statistics.snapshot_id + ))); } Ok(statistics) diff --git a/crates/iceberg/src/delete_vector.rs b/crates/iceberg/src/delete_vector.rs index d2fc33b6f9..035cef12ff 100644 --- a/crates/iceberg/src/delete_vector.rs +++ b/crates/iceberg/src/delete_vector.rs @@ -93,10 +93,10 @@ impl Iterator for DeleteVectorIterator<'_> { type Item = u64; fn next(&mut self) -> Option { - if let Some(inner) = &mut self.inner { - if let Some(inner_next) = inner.bitmap_iter.next() { - return Some(u64::from(inner.high_bits) << 32 | u64::from(inner_next)); - } + if let Some(inner) = &mut self.inner + && let Some(inner_next) = inner.bitmap_iter.next() + { + return Some(u64::from(inner.high_bits) << 32 | u64::from(inner_next)); } if let Some((high_bits, next_bitmap)) = self.outer.next() { diff --git a/crates/iceberg/src/expr/visitors/expression_evaluator.rs b/crates/iceberg/src/expr/visitors/expression_evaluator.rs index 3675ce355f..570c409502 100644 --- a/crates/iceberg/src/expr/visitors/expression_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/expression_evaluator.rs @@ -346,7 +346,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -374,7 +374,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs index 2b65cf12aa..06c92ab3e8 100644 --- a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs @@ -1995,7 +1995,7 @@ mod test { lower_bounds: Default::default(), upper_bounds: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2021,7 +2021,7 @@ mod test { lower_bounds: Default::default(), upper_bounds: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2083,7 +2083,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2114,7 +2114,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2146,7 +2146,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -2178,7 +2178,7 @@ mod test { column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs index abbd136cb1..770163ae95 100644 --- a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs @@ -161,10 +161,10 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> { _predicate: &BoundPredicate, ) -> crate::Result { let field = self.field_summary_for_reference(reference); - if let Some(contains_nan) = field.contains_nan { - if !contains_nan { - return ROWS_CANNOT_MATCH; - } + if let Some(contains_nan) = field.contains_nan + && !contains_nan + { + return ROWS_CANNOT_MATCH; } if ManifestFilterVisitor::are_all_null(field, &reference.field().field_type) { @@ -389,16 +389,16 @@ impl BoundPredicateVisitor for ManifestFilterVisitor<'_> { return ROWS_MIGHT_MATCH; } - if prefix.as_bytes().eq(&lower_bound[..prefix_len]) { - if let Some(upper_bound) = &field.upper_bound { - // if upper is shorter than the prefix then upper can't start with the prefix - if prefix_len > upper_bound.len() { - return ROWS_MIGHT_MATCH; - } + if prefix.as_bytes().eq(&lower_bound[..prefix_len]) + && let Some(upper_bound) = &field.upper_bound + { + // if upper is shorter than the prefix then upper can't start with the prefix + if prefix_len > upper_bound.len() { + return ROWS_MIGHT_MATCH; + } - if prefix.as_bytes().eq(&upper_bound[..prefix_len]) { - return ROWS_CANNOT_MATCH; - } + if prefix.as_bytes().eq(&upper_bound[..prefix_len]) { + return ROWS_CANNOT_MATCH; } } } diff --git a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs index af13f3af2c..66e2898532 100644 --- a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs @@ -23,7 +23,7 @@ use fnv::FnvHashSet; use ordered_float::OrderedFloat; use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; use parquet::file::metadata::RowGroupMetaData; -use parquet::file::page_index::column_index::ColumnIndexMetaData as Index; +use parquet::file::page_index::column_index::ColumnIndexMetaData; use parquet::file::page_index::offset_index::OffsetIndexMetaData; use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; @@ -59,7 +59,7 @@ impl PageNullCount { } pub(crate) struct PageIndexEvaluator<'a> { - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, iceberg_field_id_to_parquet_column_index: &'a HashMap, @@ -69,7 +69,7 @@ pub(crate) struct PageIndexEvaluator<'a> { impl<'a> PageIndexEvaluator<'a> { pub(crate) fn new( - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, field_id_map: &'a HashMap, @@ -92,7 +92,7 @@ impl<'a> PageIndexEvaluator<'a> { /// matching the filter predicate. pub(crate) fn eval( filter: &'a BoundPredicate, - column_index: &'a [Index], + column_index: &'a [ColumnIndexMetaData], offset_index: &'a OffsetIndex, row_group_metadata: &'a RowGroupMetaData, field_id_map: &'a HashMap, @@ -240,143 +240,135 @@ impl<'a> PageIndexEvaluator<'a> { fn apply_predicate_to_column_index( predicate: F, field_type: &PrimitiveType, - column_index: &Index, + column_index: &ColumnIndexMetaData, row_counts: &[usize], ) -> Result>> where F: Fn(Option, Option, PageNullCount) -> Result, { let result: Result> = match column_index { - Index::NONE => { + ColumnIndexMetaData::NONE => { return Ok(None); } - Index::BOOLEAN(idx) => (0..idx.num_pages() as usize) + ColumnIndexMetaData::BOOLEAN(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(page_idx, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - idx.min_value(page_idx).copied().map(|val| { + min.map(|&val| { Datum::new(field_type.clone(), PrimitiveLiteral::Boolean(val)) }), - idx.max_value(page_idx).copied().map(|val| { + max.map(|&val| { Datum::new(field_type.clone(), PrimitiveLiteral::Boolean(val)) }), - PageNullCount::from_row_and_null_counts( - row_count, - idx.null_count(page_idx), - ), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::INT32(idx) => (0..idx.num_pages() as usize) + ColumnIndexMetaData::INT32(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(page_idx, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - idx.min_value(page_idx) - .copied() - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), - idx.max_value(page_idx) - .copied() - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), - PageNullCount::from_row_and_null_counts( - row_count, - idx.null_count(page_idx), - ), + min.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), + max.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Int(val))), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::INT64(idx) => (0..idx.num_pages() as usize) + ColumnIndexMetaData::INT64(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(page_idx, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - idx.min_value(page_idx) - .copied() - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), - idx.max_value(page_idx) - .copied() - .map(|val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), - PageNullCount::from_row_and_null_counts( - row_count, - idx.null_count(page_idx), - ), + min.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), + max.map(|&val| Datum::new(field_type.clone(), PrimitiveLiteral::Long(val))), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::FLOAT(idx) => (0..idx.num_pages() as usize) + ColumnIndexMetaData::FLOAT(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(page_idx, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - idx.min_value(page_idx).copied().map(|val| { + min.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Float(OrderedFloat::from(val)), ) }), - idx.max_value(page_idx).copied().map(|val| { + max.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Float(OrderedFloat::from(val)), ) }), - PageNullCount::from_row_and_null_counts( - row_count, - idx.null_count(page_idx), - ), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::DOUBLE(idx) => (0..idx.num_pages() as usize) + ColumnIndexMetaData::DOUBLE(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(page_idx, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - idx.min_value(page_idx).copied().map(|val| { + min.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Double(OrderedFloat::from(val)), ) }), - idx.max_value(page_idx).copied().map(|val| { + max.map(|&val| { Datum::new( field_type.clone(), PrimitiveLiteral::Double(OrderedFloat::from(val)), ) }), - PageNullCount::from_row_and_null_counts( - row_count, - idx.null_count(page_idx), - ), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::BYTE_ARRAY(idx) => (0..idx.num_pages() as usize) + ColumnIndexMetaData::BYTE_ARRAY(idx) => idx + .min_values_iter() + .zip(idx.max_values_iter()) + .enumerate() .zip(row_counts.iter()) - .map(|(page_idx, &row_count)| { + .map(|((i, (min, max)), &row_count)| { predicate( - idx.min_value(page_idx).map(|val| { + min.map(|val| { Datum::new( field_type.clone(), PrimitiveLiteral::String(String::from_utf8(val.to_vec()).unwrap()), ) }), - idx.max_value(page_idx).map(|val| { + max.map(|val| { Datum::new( field_type.clone(), PrimitiveLiteral::String(String::from_utf8(val.to_vec()).unwrap()), ) }), - PageNullCount::from_row_and_null_counts( - row_count, - idx.null_count(page_idx), - ), + PageNullCount::from_row_and_null_counts(row_count, idx.null_count(i)), ) }) .collect(), - Index::FIXED_LEN_BYTE_ARRAY(_) => { + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(_) => { return Err(Error::new( ErrorKind::FeatureUnsupported, "unsupported 'FIXED_LEN_BYTE_ARRAY' index type in column_index", )); } - Index::INT96(_) => { + ColumnIndexMetaData::INT96(_) => { return Err(Error::new( ErrorKind::FeatureUnsupported, "unsupported 'INT96' index type in column_index", @@ -553,16 +545,16 @@ impl BoundPredicateVisitor for PageIndexEvaluator<'_> { return Ok(false); } - if let Some(min) = min { - if min.gt(datum) { - return Ok(false); - } + if let Some(min) = min + && min.gt(datum) + { + return Ok(false); } - if let Some(max) = max { - if max.lt(datum) { - return Ok(false); - } + if let Some(max) = max + && max.lt(datum) + { + return Ok(false); } Ok(true) @@ -793,26 +785,164 @@ mod tests { use std::collections::HashMap; use std::sync::Arc; - use parquet::arrow::arrow_reader::RowSelector; - use parquet::basic::{LogicalType as ParquetLogicalType, Type as ParquetPhysicalType}; - use parquet::file::metadata::{ColumnChunkMetaData, ColumnIndexBuilder, RowGroupMetaData}; - use parquet::file::page_index::column_index::ColumnIndexMetaData as Index; - use parquet::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation}; - use parquet::file::statistics::Statistics; - use parquet::schema::types::{ - ColumnDescriptor, ColumnPath, SchemaDescriptor, Type as parquetSchemaType, + use arrow_array::{ArrayRef, Float32Array, RecordBatch, StringArray}; + use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + use parquet::arrow::ArrowWriter; + use parquet::arrow::arrow_reader::{ + ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowSelector, }; + use parquet::file::metadata::ParquetMetaData; + use parquet::file::properties::WriterProperties; use rand::{Rng, thread_rng}; + use tempfile::NamedTempFile; use super::PageIndexEvaluator; use crate::expr::{Bind, Reference}; use crate::spec::{Datum, NestedField, PrimitiveType, Schema, Type}; use crate::{ErrorKind, Result}; + /// Helper function to create a test parquet file with page indexes + /// and return the metadata needed for testing + fn create_test_parquet_file() -> Result<(Arc, NamedTempFile)> { + let arrow_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("col_float", DataType::Float32, true), + Field::new("col_string", DataType::Utf8, true), + ])); + + let temp_file = NamedTempFile::new().unwrap(); + let file = temp_file.reopen().unwrap(); + + let props = WriterProperties::builder() + .set_data_page_row_count_limit(1024) + .set_write_batch_size(512) + .build(); + + let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap(); + + let mut batches = vec![]; + + // Batch 1: 1024 rows - strings with AARDVARK, BEAR, BISON + let float_vals: Vec> = vec![None; 1024]; + let mut string_vals = vec![]; + string_vals.push(Some("AARDVARK".to_string())); + for _ in 1..1023 { + string_vals.push(Some("BEAR".to_string())); + } + string_vals.push(Some("BISON".to_string())); + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 2: 1024 rows - all DEER + let float_vals: Vec> = vec![None; 1024]; + let string_vals = vec![Some("DEER".to_string()); 1024]; + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 3: 1024 rows - float 0-10 + let mut float_vals = vec![]; + for i in 0..1024 { + float_vals.push(Some(i as f32 * 10.0 / 1024.0)); + } + let mut string_vals = vec![]; + string_vals.push(Some("GIRAFFE".to_string())); + string_vals.push(None); + for _ in 2..1024 { + string_vals.push(Some("HIPPO".to_string())); + } + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Batch 4: 1024 rows - float 10-20 + let mut float_vals = vec![None]; + for i in 1..1024 { + float_vals.push(Some(10.0 + i as f32 * 10.0 / 1024.0)); + } + let string_vals = vec![Some("HIPPO".to_string()); 1024]; + + batches.push( + RecordBatch::try_new(arrow_schema.clone(), vec![ + Arc::new(Float32Array::from(float_vals)), + Arc::new(StringArray::from(string_vals)), + ]) + .unwrap(), + ); + + // Write rows one at a time to give the writer a chance to split into pages + for batch in &batches { + for i in 0..batch.num_rows() { + writer.write(&batch.slice(i, 1)).unwrap(); + } + } + + writer.close().unwrap(); + + let file = temp_file.reopen().unwrap(); + let options = ArrowReaderOptions::new().with_page_index(true); + let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); + let metadata = reader.metadata().clone(); + + Ok((metadata, temp_file)) + } + + /// Get the test metadata components for testing + fn get_test_metadata( + metadata: &ParquetMetaData, + ) -> ( + Vec, + Vec, + &parquet::file::metadata::RowGroupMetaData, + ) { + let row_group_metadata = metadata.row_group(0); + let column_index = metadata.column_index().unwrap()[0].to_vec(); + let offset_index = metadata.offset_index().unwrap()[0].to_vec(); + (column_index, offset_index, row_group_metadata) + } + #[test] fn eval_matches_no_rows_for_empty_row_group() -> Result<()> { - let row_group_metadata = create_row_group_metadata(0, 0, None, 0, None)?; - let (column_index, offset_index) = create_page_index()?; + let arrow_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("col_float", DataType::Float32, true), + Field::new("col_string", DataType::Utf8, true), + ])); + + let empty_float: ArrayRef = Arc::new(Float32Array::from(Vec::>::new())); + let empty_string: ArrayRef = Arc::new(StringArray::from(Vec::>::new())); + let empty_batch = + RecordBatch::try_new(arrow_schema.clone(), vec![empty_float, empty_string]).unwrap(); + + let temp_file = NamedTempFile::new().unwrap(); + let file = temp_file.reopen().unwrap(); + + let mut writer = ArrowWriter::try_new(file, arrow_schema, None).unwrap(); + writer.write(&empty_batch).unwrap(); + writer.close().unwrap(); + + let file = temp_file.reopen().unwrap(); + let options = ArrowReaderOptions::new().with_page_index(true); + let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap(); + let metadata = reader.metadata(); + + if metadata.num_row_groups() == 0 || metadata.row_group(0).num_rows() == 0 { + return Ok(()); + } let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; @@ -820,27 +950,28 @@ mod tests { .greater_than(Datum::float(1.0)) .bind(iceberg_schema_ref.clone(), false)?; + let row_group_metadata = metadata.row_group(0); + let column_index = metadata.column_index().unwrap()[0].to_vec(); + let offset_index = metadata.offset_index().unwrap()[0].to_vec(); + let result = PageIndexEvaluator::eval( &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![]; - - assert_eq!(result, expected); + assert_eq!(result.len(), 0); Ok(()) } #[test] fn eval_is_null_select_only_pages_with_nulls() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -851,15 +982,15 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; let expected = vec![ - RowSelector::select(1024), - RowSelector::skip(1024), RowSelector::select(2048), + RowSelector::skip(1024), + RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -869,9 +1000,8 @@ mod tests { #[test] fn eval_is_not_null_dont_select_pages_with_all_nulls() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -882,12 +1012,12 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![RowSelector::skip(1024), RowSelector::select(3072)]; + let expected = vec![RowSelector::skip(2048), RowSelector::select(2048)]; assert_eq!(result, expected); @@ -896,9 +1026,8 @@ mod tests { #[test] fn eval_is_nan_select_all() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -909,7 +1038,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -923,9 +1052,8 @@ mod tests { #[test] fn eval_not_nan_select_all() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -936,7 +1064,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -950,9 +1078,8 @@ mod tests { #[test] fn eval_inequality_nan_datum_all_rows_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -963,12 +1090,12 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![RowSelector::skip(1024), RowSelector::select(3072)]; + let expected = vec![RowSelector::skip(2048), RowSelector::select(2048)]; assert_eq!(result, expected); @@ -977,9 +1104,8 @@ mod tests { #[test] fn eval_inequality_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -990,16 +1116,15 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; let expected = vec![ - RowSelector::skip(1024), + RowSelector::skip(2048), RowSelector::select(1024), RowSelector::skip(1024), - RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -1009,9 +1134,8 @@ mod tests { #[test] fn eval_eq_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1022,16 +1146,18 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Pages 0-1: all null (skip) + // Page 2: 0-10 (select, might contain 5.0) + // Page 3: 10-20 (skip, min > 5.0) let expected = vec![ - RowSelector::skip(1024), + RowSelector::skip(2048), RowSelector::select(1024), RowSelector::skip(1024), - RowSelector::select(1024), ]; assert_eq!(result, expected); @@ -1041,9 +1167,8 @@ mod tests { #[test] fn eval_not_eq_all_rows() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1054,7 +1179,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -1068,9 +1193,8 @@ mod tests { #[test] fn eval_starts_with_error_float_col() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1081,7 +1205,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), ); @@ -1093,11 +1217,13 @@ mod tests { #[test] fn eval_starts_with_pages_containing_value_except_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test starts_with on string column where only some pages match + // Our file has 4 pages: ["AARDVARK".."BISON"], ["DEER"], ["GIRAFFE".."HIPPO"], ["HIPPO"] + // Testing starts_with("B") should select only page 0 let filter = Reference::new("col_string") .starts_with(Datum::string("B")) .bind(iceberg_schema_ref.clone(), false)?; @@ -1106,16 +1232,13 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; - let expected = vec![ - RowSelector::select(512), - RowSelector::skip(3536), - RowSelector::select(48), - ]; + // Page 0 has "BEAR" and "BISON" (starts with B), rest don't + let expected = vec![RowSelector::select(1024), RowSelector::skip(3072)]; assert_eq!(result, expected); @@ -1125,11 +1248,13 @@ mod tests { #[test] fn eval_not_starts_with_pages_containing_value_except_pages_with_min_and_max_equal_to_prefix_and_all_null_pages() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test not_starts_with where one page has ALL values starting with prefix + // Our file has page 1 with all "DEER" (min="DEER", max="DEER") + // Testing not_starts_with("DE") should skip page 1 where all values start with "DE" let filter = Reference::new("col_string") .not_starts_with(Datum::string("DE")) .bind(iceberg_schema_ref.clone(), false)?; @@ -1138,15 +1263,18 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Page 0: mixed values (select) + // Page 1: all "DEER" starting with "DE" (skip) + // Pages 2-3: other values not all starting with "DE" (select) let expected = vec![ - RowSelector::select(512), - RowSelector::skip(512), - RowSelector::select(3072), + RowSelector::select(1024), + RowSelector::skip(1024), + RowSelector::select(2048), ]; assert_eq!(result, expected); @@ -1157,10 +1285,8 @@ mod tests { #[test] fn eval_in_length_of_set_above_limit_all_rows() -> Result<()> { let mut rng = thread_rng(); - - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; let filter = Reference::new("col_float") @@ -1171,7 +1297,7 @@ mod tests { &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; @@ -1185,30 +1311,32 @@ mod tests { #[test] fn eval_in_valid_set_size_some_rows() -> Result<()> { - let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; - let (column_index, offset_index) = create_page_index()?; - + let (metadata, _temp_file) = create_test_parquet_file()?; + let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata); let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?; + // Test is_in with multiple values using min/max bounds + // Our file has 4 pages: ["AARDVARK".."BISON"], ["DEER"], ["GIRAFFE".."HIPPO"], ["HIPPO"] + // Testing is_in(["AARDVARK", "GIRAFFE"]) - both are in different pages let filter = Reference::new("col_string") - .is_in([Datum::string("AARDVARK"), Datum::string("ICEBERG")]) + .is_in([Datum::string("AARDVARK"), Datum::string("GIRAFFE")]) .bind(iceberg_schema_ref.clone(), false)?; let result = PageIndexEvaluator::eval( &filter, &column_index, &offset_index, - &row_group_metadata, + row_group_metadata, &field_id_map, iceberg_schema_ref.as_ref(), )?; + // Page 0 contains "AARDVARK", page 1 doesn't contain either, page 2 contains "GIRAFFE", page 3 doesn't let expected = vec![ - RowSelector::select(512), - RowSelector::skip(512), - RowSelector::select(2976), - RowSelector::skip(48), - RowSelector::select(48), + RowSelector::select(1024), + RowSelector::skip(1024), + RowSelector::select(1024), + RowSelector::skip(1024), ]; assert_eq!(result, expected); @@ -1239,169 +1367,4 @@ mod tests { Ok((iceberg_schema_ref, field_id_map)) } - - fn build_parquet_schema_descriptor() -> Result> { - let field_1 = Arc::new( - parquetSchemaType::primitive_type_builder("col_float", ParquetPhysicalType::FLOAT) - .with_id(Some(1)) - .build()?, - ); - - let field_2 = Arc::new( - parquetSchemaType::primitive_type_builder( - "col_string", - ParquetPhysicalType::BYTE_ARRAY, - ) - .with_id(Some(2)) - .with_logical_type(Some(ParquetLogicalType::String)) - .build()?, - ); - - let group_type = Arc::new( - parquetSchemaType::group_type_builder("all") - .with_id(Some(1000)) - .with_fields(vec![field_1, field_2]) - .build()?, - ); - - let schema_descriptor = SchemaDescriptor::new(group_type); - let schema_descriptor_arc = Arc::new(schema_descriptor); - Ok(schema_descriptor_arc) - } - - fn create_row_group_metadata( - num_rows: i64, - col_1_num_vals: i64, - col_1_stats: Option, - col_2_num_vals: i64, - col_2_stats: Option, - ) -> Result { - let schema_descriptor_arc = build_parquet_schema_descriptor()?; - - let column_1_desc_ptr = Arc::new(ColumnDescriptor::new( - schema_descriptor_arc.column(0).self_type_ptr(), - 1, - 1, - ColumnPath::new(vec!["col_float".to_string()]), - )); - - let column_2_desc_ptr = Arc::new(ColumnDescriptor::new( - schema_descriptor_arc.column(1).self_type_ptr(), - 1, - 1, - ColumnPath::new(vec!["col_string".to_string()]), - )); - - let mut col_1_meta = - ColumnChunkMetaData::builder(column_1_desc_ptr).set_num_values(col_1_num_vals); - if let Some(stats1) = col_1_stats { - col_1_meta = col_1_meta.set_statistics(stats1) - } - - let mut col_2_meta = - ColumnChunkMetaData::builder(column_2_desc_ptr).set_num_values(col_2_num_vals); - if let Some(stats2) = col_2_stats { - col_2_meta = col_2_meta.set_statistics(stats2) - } - - let row_group_metadata = RowGroupMetaData::builder(schema_descriptor_arc) - .set_num_rows(num_rows) - .set_column_metadata(vec![ - col_1_meta.build()?, - // .set_statistics(Statistics::float(None, None, None, 1, false)) - col_2_meta.build()?, - ]) - .build(); - - Ok(row_group_metadata?) - } - - fn create_page_index() -> Result<(Vec, Vec)> { - let mut idx_float_builder = ColumnIndexBuilder::new(ParquetPhysicalType::FLOAT); - idx_float_builder.append(true, vec![], vec![], 1024); - idx_float_builder.append( - false, - 0.0f32.to_le_bytes().to_vec(), - 10.0f32.to_le_bytes().to_vec(), - 0, - ); - idx_float_builder.append( - false, - 10.0f32.to_le_bytes().to_vec(), - 20.0f32.to_le_bytes().to_vec(), - 1, - ); - idx_float_builder.append(true, vec![], vec![], -1); - let idx_float = idx_float_builder.build().unwrap(); - - let mut idx_string_builder = ColumnIndexBuilder::new(ParquetPhysicalType::BYTE_ARRAY); - idx_string_builder.append(false, b"AA".to_vec(), b"DD".to_vec(), 0); - idx_string_builder.append(false, b"DE".to_vec(), b"DE".to_vec(), 0); - idx_string_builder.append(false, b"DF".to_vec(), b"UJ".to_vec(), 1); - idx_string_builder.append(false, vec![], vec![], 48); - idx_string_builder.append(true, vec![], vec![], -1); - let idx_string = idx_string_builder.build().unwrap(); - - let page_locs_float = vec![ - PageLocation { - offset: 0, - compressed_page_size: 1024, - first_row_index: 0, - }, - PageLocation { - offset: 1024, - compressed_page_size: 1024, - first_row_index: 1024, - }, - PageLocation { - offset: 2048, - compressed_page_size: 1024, - first_row_index: 2048, - }, - PageLocation { - offset: 3072, - compressed_page_size: 1024, - first_row_index: 3072, - }, - ]; - - let page_locs_string = vec![ - PageLocation { - offset: 0, - compressed_page_size: 512, - first_row_index: 0, - }, - PageLocation { - offset: 512, - compressed_page_size: 512, - first_row_index: 512, - }, - PageLocation { - offset: 1024, - compressed_page_size: 2976, - first_row_index: 1024, - }, - PageLocation { - offset: 4000, - compressed_page_size: 48, - first_row_index: 4000, - }, - PageLocation { - offset: 4048, - compressed_page_size: 48, - first_row_index: 4048, - }, - ]; - - Ok((vec![idx_float, idx_string], vec![ - OffsetIndexMetaData { - page_locations: page_locs_float, - unencoded_byte_array_data_bytes: None, - }, - OffsetIndexMetaData { - page_locations: page_locs_string, - unencoded_byte_array_data_bytes: None, - }, - ])) - } } diff --git a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs index e9bed775ef..a6af2990c8 100644 --- a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs @@ -129,10 +129,10 @@ impl<'a> StrictMetricsEvaluator<'a> { self.upper_bound(field_id) }; - if let Some(bound) = bound { - if cmp_fn(bound, datum) { - return ROWS_MUST_MATCH; - } + if let Some(bound) = bound + && cmp_fn(bound, datum) + { + return ROWS_MUST_MATCH; } ROWS_MIGHT_NOT_MATCH @@ -219,10 +219,10 @@ impl BoundPredicateVisitor for StrictMetricsEvaluator<'_> { ) -> crate::Result { let field_id = reference.field().id; - if let Some(&nan_count) = self.nan_count(field_id) { - if nan_count == 0 { - return ROWS_MUST_MATCH; - } + if let Some(&nan_count) = self.nan_count(field_id) + && nan_count == 0 + { + return ROWS_MUST_MATCH; } if self.contains_nulls_only(field_id) { @@ -258,10 +258,10 @@ impl BoundPredicateVisitor for StrictMetricsEvaluator<'_> { ) -> crate::Result { let field_id = reference.field().id; - if let Some(lower) = self.lower_bound(field_id) { - if lower.is_nan() { - return ROWS_MIGHT_NOT_MATCH; - } + if let Some(lower) = self.lower_bound(field_id) + && lower.is_nan() + { + return ROWS_MIGHT_NOT_MATCH; } self.visit_inequality(reference, datum, PartialOrd::gt, true) @@ -578,7 +578,7 @@ mod test { ]), column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -604,7 +604,7 @@ mod test { lower_bounds: Default::default(), upper_bounds: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -630,7 +630,7 @@ mod test { upper_bounds: HashMap::from([(1, Datum::int(42))]), column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -657,7 +657,7 @@ mod test { upper_bounds: HashMap::from([(3, Datum::string("dC"))]), column_sizes: Default::default(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/io/storage.rs b/crates/iceberg/src/io/storage.rs index d5f2ad8fab..03e43600dd 100644 --- a/crates/iceberg/src/io/storage.rs +++ b/crates/iceberg/src/io/storage.rs @@ -15,6 +15,12 @@ // specific language governing permissions and limitations // under the License. +#[cfg(any( + feature = "storage-s3", + feature = "storage-gcs", + feature = "storage-oss", + feature = "storage-azdls", +))] use std::sync::Arc; use opendal::layers::RetryLayer; @@ -71,6 +77,7 @@ impl Storage { /// Convert iceberg config to opendal config. pub(crate) fn build(file_io_builder: FileIOBuilder) -> crate::Result { let (scheme_str, props, extensions) = file_io_builder.into_parts(); + let _ = (&props, &extensions); let scheme = Self::parse_scheme(&scheme_str)?; match scheme { @@ -127,6 +134,7 @@ impl Storage { path: &'a impl AsRef, ) -> crate::Result<(Operator, &'a str)> { let path = path.as_ref(); + let _ = path; let (operator, relative_path): (Operator, &str) = match self { #[cfg(feature = "storage-memory")] Storage::Memory(op) => { @@ -175,7 +183,7 @@ impl Storage { } else { Err(Error::new( ErrorKind::DataInvalid, - format!("Invalid gcs url: {}, should start with {}", path, prefix), + format!("Invalid gcs url: {path}, should start with {prefix}"), )) } } @@ -190,7 +198,7 @@ impl Storage { } else { Err(Error::new( ErrorKind::DataInvalid, - format!("Invalid oss url: {}, should start with {}", path, prefix), + format!("Invalid oss url: {path}, should start with {prefix}"), )) } } diff --git a/crates/iceberg/src/io/storage_azdls.rs b/crates/iceberg/src/io/storage_azdls.rs index 7294896551..524bec5e68 100644 --- a/crates/iceberg/src/io/storage_azdls.rs +++ b/crates/iceberg/src/io/storage_azdls.rs @@ -118,14 +118,14 @@ fn find_sas_token( let find_with_prefix = |prefix: &str| { properties .iter() - .filter(|(key, _)| key.as_str() == prefix || key.starts_with(&format!("{}.", prefix))) + .filter(|(key, _)| key.as_str() == prefix || key.starts_with(&format!("{prefix}."))) .min_by_key(|(key, _)| key.len()) .map(|(_, value)| value.strip_prefix('?').unwrap_or(value).to_string()) }; // Try account-specific prefix first if account name is known, then fall back to base if let Some(account) = account_name { - let account_prefix = format!("{}.{}", ADLS_SAS_TOKEN, account); + let account_prefix = format!("{ADLS_SAS_TOKEN}.{account}"); if let Some(token) = find_with_prefix(&account_prefix) { return Some(token); } @@ -204,7 +204,7 @@ impl FromStr for AzureStorageScheme { "wasbs" => Ok(AzureStorageScheme::Wasbs), _ => Err(Error::new( ErrorKind::DataInvalid, - format!("Unexpected Azure Storage scheme: {}", s), + format!("Unexpected Azure Storage scheme: {s}"), )), } } @@ -517,11 +517,11 @@ mod tests { let config = azdls_config_parse(properties); match expected { Some(expected_config) => { - assert!(config.is_ok(), "Test case {} failed: {:?}", name, config); - assert_eq!(config.unwrap(), expected_config, "Test case: {}", name); + assert!(config.is_ok(), "Test case {name} failed: {config:?}"); + assert_eq!(config.unwrap(), expected_config, "Test case: {name}"); } None => { - assert!(config.is_err(), "Test case {} expected error.", name); + assert!(config.is_err(), "Test case {name} expected error."); } } } @@ -629,14 +629,14 @@ mod tests { let result = azdls_create_operator(input.0, &input.1, &input.2); match expected { Some((expected_filesystem, expected_path)) => { - assert!(result.is_ok(), "Test case {} failed: {:?}", name, result); + assert!(result.is_ok(), "Test case {name} failed: {result:?}"); let (op, relative_path) = result.unwrap(); assert_eq!(op.info().name(), expected_filesystem); assert_eq!(relative_path, expected_path); } None => { - assert!(result.is_err(), "Test case {} expected error.", name); + assert!(result.is_err(), "Test case {name} expected error."); } } } @@ -677,11 +677,11 @@ mod tests { let result = input.parse::(); match expected { Some(expected_path) => { - assert!(result.is_ok(), "Test case {} failed: {:?}", name, result); - assert_eq!(result.unwrap(), expected_path, "Test case: {}", name); + assert!(result.is_ok(), "Test case {name} failed: {result:?}"); + assert_eq!(result.unwrap(), expected_path, "Test case: {name}"); } None => { - assert!(result.is_err(), "Test case {} expected error.", name); + assert!(result.is_err(), "Test case {name} expected error."); } } } @@ -727,7 +727,7 @@ mod tests { for (name, path, expected) in test_cases { let endpoint = path.as_endpoint(); - assert_eq!(endpoint, expected, "Test case: {}", name); + assert_eq!(endpoint, expected, "Test case: {name}"); } } } diff --git a/crates/iceberg/src/io/storage_gcs.rs b/crates/iceberg/src/io/storage_gcs.rs index 8c3d914c86..7718df603f 100644 --- a/crates/iceberg/src/io/storage_gcs.rs +++ b/crates/iceberg/src/io/storage_gcs.rs @@ -71,20 +71,20 @@ pub(crate) fn gcs_config_parse(mut m: HashMap) -> Result Result let bucket = url.host_str().ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!("Invalid gcs url: {}, bucket is required", path), + format!("Invalid gcs url: {path}, bucket is required"), ) })?; diff --git a/crates/iceberg/src/io/storage_oss.rs b/crates/iceberg/src/io/storage_oss.rs index 8bfffc6ca8..e82dda23a5 100644 --- a/crates/iceberg/src/io/storage_oss.rs +++ b/crates/iceberg/src/io/storage_oss.rs @@ -56,7 +56,7 @@ pub(crate) fn oss_config_build(cfg: &OssConfig, path: &str) -> Result let bucket = url.host_str().ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!("Invalid oss url: {}, missing bucket", path), + format!("Invalid oss url: {path}, missing bucket"), ) })?; diff --git a/crates/iceberg/src/io/storage_s3.rs b/crates/iceberg/src/io/storage_s3.rs index fcf9afed1f..f069e0e2f9 100644 --- a/crates/iceberg/src/io/storage_s3.rs +++ b/crates/iceberg/src/io/storage_s3.rs @@ -134,20 +134,20 @@ pub(crate) fn s3_config_parse(mut m: HashMap) -> Result = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_FILE, + RESERVED_COL_NAME_FILE, + Type::Primitive(PrimitiveType::String), + ) + .with_doc("Path of the file in which a row is stored"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _pos metadata column. +/// This field represents the ordinal position of a row in the source data file. +static POS_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_POS, + RESERVED_COL_NAME_POS, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("Ordinal position of a row in the source data file"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _deleted metadata column. +/// This field indicates whether a row has been deleted. +static DELETED_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_DELETED, + RESERVED_COL_NAME_DELETED, + Type::Primitive(PrimitiveType::Boolean), + ) + .with_doc("Whether the row has been deleted"), + ) +}); -/// Column name for the pos column used in delete file reading (positional deletes) -pub const RESERVED_COL_NAME_POS: &str = "pos"; +/// Lazy-initialized Iceberg field definition for the _spec_id metadata column. +/// This field represents the spec ID used to track the file containing a row. +static SPEC_ID_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_SPEC_ID, + RESERVED_COL_NAME_SPEC_ID, + Type::Primitive(PrimitiveType::Int), + ) + .with_doc("Spec ID used to track the file containing a row"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the file_path column in position delete files. +/// This field represents the path of a file in position-based delete files. +static DELETE_FILE_PATH_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_DELETE_FILE_PATH, + RESERVED_COL_NAME_DELETE_FILE_PATH, + Type::Primitive(PrimitiveType::String), + ) + .with_doc("Path of a file, used in position-based delete files"), + ) +}); -/// Lazy-initialized Arrow Field definition for the _file metadata column. -static FILE_FIELD: Lazy> = Lazy::new(|| { +/// Lazy-initialized Iceberg field definition for the pos column in position delete files. +/// This field represents the ordinal position of a row in position-based delete files. +static DELETE_FILE_POS_FIELD: Lazy = Lazy::new(|| { Arc::new( - Field::new(RESERVED_COL_NAME_FILE, DataType::Utf8, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - RESERVED_FIELD_ID_FILE.to_string(), - )])), + NestedField::required( + RESERVED_FIELD_ID_DELETE_FILE_POS, + RESERVED_COL_NAME_DELETE_FILE_POS, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("Ordinal position of a row, used in position-based delete files"), ) }); -/// Returns the Arrow Field definition for the _file metadata column. +/// Lazy-initialized Iceberg field definition for the _change_type metadata column. +/// This field represents the record type in the changelog. +static CHANGE_TYPE_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_CHANGE_TYPE, + RESERVED_COL_NAME_CHANGE_TYPE, + Type::Primitive(PrimitiveType::String), + ) + .with_doc( + "The record type in the changelog (INSERT, DELETE, UPDATE_BEFORE, or UPDATE_AFTER)", + ), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _change_ordinal metadata column. +/// This field represents the order of the change. +static CHANGE_ORDINAL_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_CHANGE_ORDINAL, + RESERVED_COL_NAME_CHANGE_ORDINAL, + Type::Primitive(PrimitiveType::Int), + ) + .with_doc("The order of the change"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _commit_snapshot_id metadata column. +/// This field represents the snapshot ID in which the change occurred. +static COMMIT_SNAPSHOT_ID_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID, + RESERVED_COL_NAME_COMMIT_SNAPSHOT_ID, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("The snapshot ID in which the change occurred"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _row_id metadata column. +/// This field represents a unique long assigned for row lineage. +static ROW_ID_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_ROW_ID, + RESERVED_COL_NAME_ROW_ID, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("A unique long assigned for row lineage"), + ) +}); + +/// Lazy-initialized Iceberg field definition for the _last_updated_sequence_number metadata column. +/// This field represents the sequence number which last updated this row. +static LAST_UPDATED_SEQUENCE_NUMBER_FIELD: Lazy = Lazy::new(|| { + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER, + RESERVED_COL_NAME_LAST_UPDATED_SEQUENCE_NUMBER, + Type::Primitive(PrimitiveType::Long), + ) + .with_doc("The sequence number which last updated this row"), + ) +}); + +/// Returns the Iceberg field definition for the _file metadata column. /// /// # Returns -/// A reference to the _file field definition (RunEndEncoded type) -pub fn file_field() -> &'static Arc { +/// A reference to the _file field definition as an Iceberg NestedField +pub fn file_field() -> &'static NestedFieldRef { &FILE_FIELD } +/// Returns the Iceberg field definition for the _pos metadata column. +/// +/// # Returns +/// A reference to the _pos field definition as an Iceberg NestedField +pub fn pos_field() -> &'static NestedFieldRef { + &POS_FIELD +} + +/// Returns the Iceberg field definition for the _deleted metadata column. +/// +/// # Returns +/// A reference to the _deleted field definition as an Iceberg NestedField +pub fn deleted_field() -> &'static NestedFieldRef { + &DELETED_FIELD +} + +/// Returns the Iceberg field definition for the _spec_id metadata column. +/// +/// # Returns +/// A reference to the _spec_id field definition as an Iceberg NestedField +pub fn spec_id_field() -> &'static NestedFieldRef { + &SPEC_ID_FIELD +} + +/// Returns the Iceberg field definition for the file_path column in position delete files. +/// +/// # Returns +/// A reference to the file_path field definition as an Iceberg NestedField +pub fn delete_file_path_field() -> &'static NestedFieldRef { + &DELETE_FILE_PATH_FIELD +} + +/// Returns the Iceberg field definition for the pos column in position delete files. +/// +/// # Returns +/// A reference to the pos field definition as an Iceberg NestedField +pub fn delete_file_pos_field() -> &'static NestedFieldRef { + &DELETE_FILE_POS_FIELD +} + +/// Returns the Iceberg field definition for the _change_type metadata column. +/// +/// # Returns +/// A reference to the _change_type field definition as an Iceberg NestedField +pub fn change_type_field() -> &'static NestedFieldRef { + &CHANGE_TYPE_FIELD +} + +/// Returns the Iceberg field definition for the _change_ordinal metadata column. +/// +/// # Returns +/// A reference to the _change_ordinal field definition as an Iceberg NestedField +pub fn change_ordinal_field() -> &'static NestedFieldRef { + &CHANGE_ORDINAL_FIELD +} + +/// Returns the Iceberg field definition for the _commit_snapshot_id metadata column. +/// +/// # Returns +/// A reference to the _commit_snapshot_id field definition as an Iceberg NestedField +pub fn commit_snapshot_id_field() -> &'static NestedFieldRef { + &COMMIT_SNAPSHOT_ID_FIELD +} + +/// Returns the Iceberg field definition for the _row_id metadata column. +/// +/// # Returns +/// A reference to the _row_id field definition as an Iceberg NestedField +pub fn row_id_field() -> &'static NestedFieldRef { + &ROW_ID_FIELD +} + +/// Returns the Iceberg field definition for the _last_updated_sequence_number metadata column. +/// +/// # Returns +/// A reference to the _last_updated_sequence_number field definition as an Iceberg NestedField +pub fn last_updated_sequence_number_field() -> &'static NestedFieldRef { + &LAST_UPDATED_SEQUENCE_NUMBER_FIELD +} + /// Lazy-initialized Arrow Field definition for the _pos metadata column. -/// Used for row position within a file. +/// Used for row position within a file, with RowNumber extension type for Parquet reader. static ROW_POS_FIELD: Lazy> = Lazy::new(|| { Arc::new( - Field::new(RESERVED_COL_NAME_UNDERSCORE_POS, DataType::Int64, false) + Field::new(RESERVED_COL_NAME_POS, DataType::Int64, false) .with_metadata(HashMap::from([( PARQUET_FIELD_ID_META_KEY.to_string(), - RESERVED_FIELD_ID_UNDERSCORE_POS.to_string(), + RESERVED_FIELD_ID_POS.to_string(), )])) .with_extension_type(RowNumber), ) }); /// Returns the Arrow Field definition for the _pos metadata column. +/// This field is used by the Parquet reader to produce row position data. /// /// # Returns -/// A reference to the _pos field definition +/// A reference to the _pos field definition as an Arrow Field pub fn row_pos_field() -> &'static Arc { &ROW_POS_FIELD } -/// Lazy-initialized Arrow Field definition for the pos metadata column. -/// Used in positional delete records. -static POS_FIELD: Lazy> = Lazy::new(|| { +/// Lazy-initialized Arrow Field definition for the file_path metadata column. +/// Used in delete file context for the file path of a deleted file. +static FILE_PATH_FIELD: Lazy> = Lazy::new(|| { Arc::new( - Field::new(RESERVED_COL_NAME_POS, DataType::Int64, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - RESERVED_FIELD_ID_POS.to_string(), - )])), + Field::new(RESERVED_COL_NAME_DELETE_FILE_PATH, DataType::Utf8, false).with_metadata( + HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + RESERVED_FIELD_ID_DELETE_FILE_PATH.to_string(), + )]), + ), ) }); -/// Returns the Arrow Field definition for the pos metadata column. -/// -/// # Returns -/// A reference to the pos field definition -pub fn pos_field() -> &'static Arc { - &POS_FIELD +/// Returns the Arrow Field definition for file_path in delete context. +pub fn file_path_field() -> &'static Arc { + &FILE_PATH_FIELD } -/// Lazy-initialized Arrow Field definition for the file_path metadata column. -/// Used in positional delete records to track which file each delete applies to. -static FILE_PATH_FIELD: Lazy> = Lazy::new(|| { +/// Lazy-initialized Arrow Field definition for the pos metadata column in delete context. +/// Used in delete file context for the position within a deleted file. +static POS_FIELD_ARROW: Lazy> = Lazy::new(|| { Arc::new( - Field::new(RESERVED_COL_NAME_FILE_PATH, DataType::Utf8, false).with_metadata( + Field::new(RESERVED_COL_NAME_DELETE_FILE_POS, DataType::Int64, false).with_metadata( HashMap::from([( PARQUET_FIELD_ID_META_KEY.to_string(), - RESERVED_FIELD_ID_FILE_PATH.to_string(), + RESERVED_FIELD_ID_DELETE_FILE_POS.to_string(), )]), ), ) }); -/// Returns the Arrow Field definition for the file_path metadata column. +/// Returns the Arrow Field definition for pos in delete context. +pub fn pos_field_arrow() -> &'static Arc { + &POS_FIELD_ARROW +} + +/// Creates the Iceberg field definition for the _partition metadata column. +/// +/// The _partition field is a struct whose fields depend on the partition spec. +/// This function creates the field dynamically with the provided partition fields. +/// +/// # Arguments +/// * `partition_fields` - The fields that make up the partition struct /// /// # Returns -/// A reference to the file_path field definition -pub fn file_path_field() -> &'static Arc { - &FILE_PATH_FIELD +/// A new _partition field definition as an Iceberg NestedField +/// +/// # Example +/// ``` +/// use std::sync::Arc; +/// +/// use iceberg::metadata_columns::partition_field; +/// use iceberg::spec::{NestedField, PrimitiveType, Type}; +/// +/// let fields = vec![ +/// Arc::new(NestedField::required( +/// 1, +/// "year", +/// Type::Primitive(PrimitiveType::Int), +/// )), +/// Arc::new(NestedField::required( +/// 2, +/// "month", +/// Type::Primitive(PrimitiveType::Int), +/// )), +/// ]; +/// let partition_field = partition_field(fields); +/// ``` +pub fn partition_field(partition_fields: Vec) -> NestedFieldRef { + use crate::spec::StructType; + + Arc::new( + NestedField::required( + RESERVED_FIELD_ID_PARTITION, + RESERVED_COL_NAME_PARTITION, + Type::Struct(StructType::new(partition_fields)), + ) + .with_doc("Partition to which a row belongs"), + ) } -/// Returns the Arrow Field definition for a metadata field ID. +/// Returns the Iceberg field definition for a metadata field ID. +/// +/// Note: This function does not support `_partition` (field ID `i32::MAX - 5`) because +/// it's a struct field that requires dynamic partition fields. Use `partition_field()` +/// instead to create the `_partition` field with the appropriate partition fields. /// /// # Arguments /// * `field_id` - The metadata field ID /// /// # Returns -/// The Arrow Field definition for the metadata column, or an error if not a metadata field -pub fn get_metadata_field(field_id: i32) -> Result> { +/// The Iceberg field definition for the metadata column, or an error if not a metadata field +pub fn get_metadata_field(field_id: i32) -> Result<&'static NestedFieldRef> { match field_id { - RESERVED_FIELD_ID_FILE => Ok(Arc::clone(file_field())), - RESERVED_FIELD_ID_UNDERSCORE_POS => Ok(Arc::clone(row_pos_field())), - RESERVED_FIELD_ID_FILE_PATH => Ok(Arc::clone(file_path_field())), - RESERVED_FIELD_ID_POS => Ok(Arc::clone(pos_field())), + RESERVED_FIELD_ID_FILE => Ok(file_field()), + RESERVED_FIELD_ID_POS => Ok(pos_field()), + RESERVED_FIELD_ID_DELETED => Ok(deleted_field()), + RESERVED_FIELD_ID_SPEC_ID => Ok(spec_id_field()), + RESERVED_FIELD_ID_PARTITION => Err(Error::new( + ErrorKind::Unexpected, + "The _partition field must be created using partition_field() with appropriate partition fields", + )), + RESERVED_FIELD_ID_DELETE_FILE_PATH => Ok(delete_file_path_field()), + RESERVED_FIELD_ID_DELETE_FILE_POS => Ok(delete_file_pos_field()), + RESERVED_FIELD_ID_CHANGE_TYPE => Ok(change_type_field()), + RESERVED_FIELD_ID_CHANGE_ORDINAL => Ok(change_ordinal_field()), + RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID => Ok(commit_snapshot_id_field()), + RESERVED_FIELD_ID_ROW_ID => Ok(row_id_field()), + RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER => Ok(last_updated_sequence_number_field()), + _ if is_metadata_field(field_id) => { + // Future metadata fields can be added here + Err(Error::new( + ErrorKind::Unexpected, + format!( + "Metadata field ID {field_id} recognized but field definition not implemented" + ), + )) + } _ => Err(Error::new( ErrorKind::Unexpected, - format!("Field ID {} is not a (supported) metadata field", field_id), + format!("Field ID {field_id} is not a metadata field"), )), } } @@ -164,12 +494,22 @@ pub fn get_metadata_field(field_id: i32) -> Result> { pub fn get_metadata_field_id(column_name: &str) -> Result { match column_name { RESERVED_COL_NAME_FILE => Ok(RESERVED_FIELD_ID_FILE), - RESERVED_COL_NAME_UNDERSCORE_POS => Ok(RESERVED_FIELD_ID_UNDERSCORE_POS), - RESERVED_COL_NAME_FILE_PATH => Ok(RESERVED_FIELD_ID_FILE_PATH), RESERVED_COL_NAME_POS => Ok(RESERVED_FIELD_ID_POS), + RESERVED_COL_NAME_DELETED => Ok(RESERVED_FIELD_ID_DELETED), + RESERVED_COL_NAME_SPEC_ID => Ok(RESERVED_FIELD_ID_SPEC_ID), + RESERVED_COL_NAME_PARTITION => Ok(RESERVED_FIELD_ID_PARTITION), + RESERVED_COL_NAME_DELETE_FILE_PATH => Ok(RESERVED_FIELD_ID_DELETE_FILE_PATH), + RESERVED_COL_NAME_DELETE_FILE_POS => Ok(RESERVED_FIELD_ID_DELETE_FILE_POS), + RESERVED_COL_NAME_CHANGE_TYPE => Ok(RESERVED_FIELD_ID_CHANGE_TYPE), + RESERVED_COL_NAME_CHANGE_ORDINAL => Ok(RESERVED_FIELD_ID_CHANGE_ORDINAL), + RESERVED_COL_NAME_COMMIT_SNAPSHOT_ID => Ok(RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID), + RESERVED_COL_NAME_ROW_ID => Ok(RESERVED_FIELD_ID_ROW_ID), + RESERVED_COL_NAME_LAST_UPDATED_SEQUENCE_NUMBER => { + Ok(RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER) + } _ => Err(Error::new( ErrorKind::Unexpected, - format!("Unknown metadata column name: {column_name}"), + format!("Unknown/unsupported metadata column name: {column_name}"), )), } } @@ -180,14 +520,22 @@ pub fn get_metadata_field_id(column_name: &str) -> Result { /// * `field_id` - The field ID to check /// /// # Returns -/// `true` if the field ID is a metadata field, `false` otherwise +/// `true` if the field ID is a (currently supported) metadata field, `false` otherwise pub fn is_metadata_field(field_id: i32) -> bool { matches!( field_id, RESERVED_FIELD_ID_FILE - | RESERVED_FIELD_ID_UNDERSCORE_POS - | RESERVED_FIELD_ID_FILE_PATH | RESERVED_FIELD_ID_POS + | RESERVED_FIELD_ID_DELETED + | RESERVED_FIELD_ID_SPEC_ID + | RESERVED_FIELD_ID_PARTITION + | RESERVED_FIELD_ID_DELETE_FILE_PATH + | RESERVED_FIELD_ID_DELETE_FILE_POS + | RESERVED_FIELD_ID_CHANGE_TYPE + | RESERVED_FIELD_ID_CHANGE_ORDINAL + | RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID + | RESERVED_FIELD_ID_ROW_ID + | RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER ) } @@ -201,3 +549,85 @@ pub fn is_metadata_field(field_id: i32) -> bool { pub fn is_metadata_column_name(column_name: &str) -> bool { get_metadata_field_id(column_name).is_ok() } + +#[cfg(test)] +mod tests { + use super::*; + use crate::spec::PrimitiveType; + + #[test] + fn test_partition_field_creation() { + // Create partition fields for a hypothetical year/month partition + let partition_fields = vec![ + Arc::new(NestedField::required( + 1000, + "year", + Type::Primitive(PrimitiveType::Int), + )), + Arc::new(NestedField::required( + 1001, + "month", + Type::Primitive(PrimitiveType::Int), + )), + ]; + + // Create the _partition metadata field + let partition = partition_field(partition_fields); + + // Verify field properties + assert_eq!(partition.id, RESERVED_FIELD_ID_PARTITION); + assert_eq!(partition.name, RESERVED_COL_NAME_PARTITION); + assert!(partition.required); + + // Verify it's a struct type with correct fields + if let Type::Struct(struct_type) = partition.field_type.as_ref() { + assert_eq!(struct_type.fields().len(), 2); + assert_eq!(struct_type.fields()[0].name, "year"); + assert_eq!(struct_type.fields()[1].name, "month"); + } else { + panic!("Expected struct type for _partition field"); + } + } + + #[test] + fn test_partition_field_id_recognized() { + assert!(is_metadata_field(RESERVED_FIELD_ID_PARTITION)); + } + + #[test] + fn test_partition_field_name_recognized() { + assert_eq!( + get_metadata_field_id(RESERVED_COL_NAME_PARTITION).unwrap(), + RESERVED_FIELD_ID_PARTITION + ); + } + + #[test] + fn test_get_metadata_field_returns_error_for_partition() { + // partition field requires dynamic creation, so get_metadata_field should return an error + let result = get_metadata_field(RESERVED_FIELD_ID_PARTITION); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("partition_field()") + ); + } + + #[test] + fn test_all_metadata_field_ids() { + // Test that all non-partition metadata fields can be retrieved + assert!(get_metadata_field(RESERVED_FIELD_ID_FILE).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_POS).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_DELETED).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_SPEC_ID).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_DELETE_FILE_PATH).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_DELETE_FILE_POS).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_CHANGE_TYPE).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_CHANGE_ORDINAL).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_ROW_ID).is_ok()); + assert!(get_metadata_field(RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER).is_ok()); + } +} diff --git a/crates/iceberg/src/runtime/mod.rs b/crates/iceberg/src/runtime/mod.rs index d0a3ce6602..61aa623f58 100644 --- a/crates/iceberg/src/runtime/mod.rs +++ b/crates/iceberg/src/runtime/mod.rs @@ -21,28 +21,20 @@ use std::future::Future; use std::pin::Pin; use std::task::{Context, Poll}; -pub enum JoinHandle { - #[cfg(feature = "tokio")] - Tokio(tokio::task::JoinHandle), - #[cfg(all(feature = "smol", not(feature = "tokio")))] - Smol(smol::Task), - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - Unimplemented(Box), -} +use tokio::task; + +pub struct JoinHandle(task::JoinHandle); + +impl Unpin for JoinHandle {} impl Future for JoinHandle { type Output = T; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { match self.get_mut() { - #[cfg(feature = "tokio")] - JoinHandle::Tokio(handle) => Pin::new(handle) + JoinHandle(handle) => Pin::new(handle) .poll(cx) - .map(|h| h.expect("tokio spawned task failed")), - #[cfg(all(feature = "smol", not(feature = "tokio")))] - JoinHandle::Smol(handle) => Pin::new(handle).poll(cx), - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - JoinHandle::Unimplemented(_) => unimplemented!("no runtime has been enabled"), + .map(|r| r.expect("tokio spawned task failed")), } } } @@ -50,17 +42,10 @@ impl Future for JoinHandle { #[allow(dead_code)] pub fn spawn(f: F) -> JoinHandle where - F: Future + Send + 'static, + F: std::future::Future + Send + 'static, F::Output: Send + 'static, { - #[cfg(feature = "tokio")] - return JoinHandle::Tokio(tokio::task::spawn(f)); - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - return JoinHandle::Smol(smol::spawn(f)); - - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - unimplemented!("no runtime has been enabled") + JoinHandle(task::spawn(f)) } #[allow(dead_code)] @@ -69,45 +54,22 @@ where F: FnOnce() -> T + Send + 'static, T: Send + 'static, { - #[cfg(feature = "tokio")] - return JoinHandle::Tokio(tokio::task::spawn_blocking(f)); - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - return JoinHandle::Smol(smol::unblock(f)); - - #[cfg(all(not(feature = "smol"), not(feature = "tokio")))] - unimplemented!("no runtime has been enabled") + JoinHandle(task::spawn_blocking(f)) } #[cfg(test)] mod tests { use super::*; - #[cfg(feature = "tokio")] #[tokio::test] async fn test_tokio_spawn() { let handle = spawn(async { 1 + 1 }); assert_eq!(handle.await, 2); } - #[cfg(feature = "tokio")] #[tokio::test] async fn test_tokio_spawn_blocking() { let handle = spawn_blocking(|| 1 + 1); assert_eq!(handle.await, 2); } - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - #[smol::test] - async fn test_smol_spawn() { - let handle = spawn(async { 1 + 1 }); - assert_eq!(handle.await, 2); - } - - #[cfg(all(feature = "smol", not(feature = "tokio")))] - #[smo::test] - async fn test_smol_spawn_blocking() { - let handle = spawn_blocking(|| 1 + 1); - assert_eq!(handle.await, 2); - } } diff --git a/crates/iceberg/src/scan/context.rs b/crates/iceberg/src/scan/context.rs index e87dba4a1f..3e7b87b099 100644 --- a/crates/iceberg/src/scan/context.rs +++ b/crates/iceberg/src/scan/context.rs @@ -38,18 +38,19 @@ pub(crate) type ManifestEntryFilterFn = dyn Fn(&ManifestEntryRef) -> bool + Send /// Wraps a [`ManifestFile`] alongside the objects that are needed /// to process it in a thread-safe manner pub(crate) struct ManifestFileContext { - pub manifest_file: ManifestFile, + pub(crate) manifest_file: ManifestFile, - pub sender: Sender, + pub(crate) sender: Sender, - pub field_ids: Arc>, - pub bound_predicates: Option>, - pub object_cache: Arc, - pub snapshot_schema: SchemaRef, - pub expression_evaluator_cache: Arc, - pub delete_file_index: DeleteFileIndex, + pub(crate) field_ids: Arc>, + pub(crate) bound_predicates: Option>, + pub(crate) object_cache: Arc, + pub(crate) snapshot_schema: SchemaRef, + pub(crate) expression_evaluator_cache: Arc, + pub(crate) delete_file_index: DeleteFileIndex, + pub(crate) case_sensitive: bool, - pub filter_fn: Option>, + pub(crate) filter_fn: Option>, } /// Wraps a [`ManifestEntryRef`] alongside the objects that are needed @@ -63,6 +64,7 @@ pub(crate) struct ManifestEntryContext { pub partition_spec_id: i32, pub snapshot_schema: SchemaRef, pub delete_file_index: DeleteFileIndex, + pub case_sensitive: bool, } impl ManifestFileContext { @@ -79,7 +81,7 @@ impl ManifestFileContext { expression_evaluator_cache, delete_file_index, filter_fn, - .. + case_sensitive, } = self; let filter_fn = filter_fn.unwrap_or_else(|| Arc::new(|_| true)); @@ -96,6 +98,7 @@ impl ManifestFileContext { bound_predicates: bound_predicates.clone(), snapshot_schema: snapshot_schema.clone(), delete_file_index: delete_file_index.clone(), + case_sensitive, }; sender @@ -142,6 +145,7 @@ impl ManifestEntryContext { partition_spec: None, // TODO: Extract name_mapping from table metadata property "schema.name-mapping.default" name_mapping: None, + case_sensitive: self.case_sensitive, }) } } @@ -201,8 +205,19 @@ impl PlanContext { delete_file_idx: DeleteFileIndex, delete_file_tx: Sender, ) -> Result> + 'static>> { - let manifest_files = manifest_list.entries().iter(); - + let mut manifest_files = manifest_list.entries().iter().collect::>(); + // Sort manifest files to process delete manifests first. + // This avoids a deadlock where the producer blocks on sending data manifest entries + // (because the data channel is full) while the delete manifest consumer is waiting + // for delete manifest entries (which haven't been produced yet). + // By processing delete manifests first, we ensure the delete consumer can finish, + // which then allows the data consumer to start draining the data channel. + manifest_files.sort_by_key(|m| match m.content { + ManifestContentType::Deletes => 0, + ManifestContentType::Data => 1, + }); + + // TODO: Ideally we could ditch this intermediate Vec as we return an iterator. let mut filtered_mfcs = vec![]; for manifest_file in manifest_files { let tx = if manifest_file.content == ManifestContentType::Deletes { @@ -237,7 +252,6 @@ impl PlanContext { partition_bound_predicate, tx, delete_file_idx.clone(), - None, ); filtered_mfcs.push(Ok(mfc)); @@ -252,7 +266,6 @@ impl PlanContext { partition_filter: Option>, sender: Sender, delete_file_index: DeleteFileIndex, - filter_fn: Option>, ) -> ManifestFileContext { let bound_predicates = if let (Some(ref partition_bound_predicate), Some(snapshot_bound_predicate)) = @@ -275,7 +288,8 @@ impl PlanContext { field_ids: self.field_ids.clone(), expression_evaluator_cache: self.expression_evaluator_cache.clone(), delete_file_index, - filter_fn, + case_sensitive: self.case_sensitive, + filter_fn: None, } } } diff --git a/crates/iceberg/src/scan/incremental/context.rs b/crates/iceberg/src/scan/incremental/context.rs index 7cf04224de..c60c3461d8 100644 --- a/crates/iceberg/src/scan/incremental/context.rs +++ b/crates/iceberg/src/scan/incremental/context.rs @@ -55,6 +55,9 @@ pub(crate) struct IncrementalPlanContext { /// The caching delete file loader to use for the scan. pub caching_delete_file_loader: CachingDeleteFileLoader, + + /// Whether to match column names case-sensitively. + pub case_sensitive: bool, } impl IncrementalPlanContext { @@ -123,6 +126,7 @@ impl IncrementalPlanContext { field_ids: self.field_ids.clone(), expression_evaluator_cache: self.expression_evaluator_cache.clone(), delete_file_index: delete_file_idx.clone(), + case_sensitive: self.case_sensitive, filter_fn: filter_fn.clone(), }; diff --git a/crates/iceberg/src/scan/incremental/mod.rs b/crates/iceberg/src/scan/incremental/mod.rs index ca95ca021c..81f37b70cf 100644 --- a/crates/iceberg/src/scan/incremental/mod.rs +++ b/crates/iceberg/src/scan/incremental/mod.rs @@ -29,8 +29,7 @@ use crate::arrow::{ use crate::delete_file_index::DeleteFileIndex; use crate::io::FileIO; use crate::metadata_columns::{ - RESERVED_COL_NAME_FILE, RESERVED_COL_NAME_UNDERSCORE_POS, get_metadata_field_id, - is_metadata_column_name, + RESERVED_COL_NAME_FILE, RESERVED_COL_NAME_POS, get_metadata_field_id, is_metadata_column_name, }; use crate::scan::DeleteFileContext; use crate::scan::cache::ExpressionEvaluatorCache; @@ -62,6 +61,7 @@ pub struct IncrementalTableScanBuilder<'a> { // None means scan to the current/last snapshot to_snapshot_id: Option, batch_size: Option, + case_sensitive: bool, concurrency_limit_data_files: usize, concurrency_limit_manifest_entries: usize, concurrency_limit_manifest_files: usize, @@ -80,6 +80,7 @@ impl<'a> IncrementalTableScanBuilder<'a> { from_snapshot_id, to_snapshot_id, batch_size: None, + case_sensitive: true, concurrency_limit_data_files: num_cpus, concurrency_limit_manifest_entries: num_cpus, concurrency_limit_manifest_files: num_cpus, @@ -190,7 +191,7 @@ impl<'a> IncrementalTableScanBuilder<'a> { }); // Add _pos column - columns.push(RESERVED_COL_NAME_UNDERSCORE_POS.to_string()); + columns.push(RESERVED_COL_NAME_POS.to_string()); self.column_names = Some(columns); self @@ -226,6 +227,12 @@ impl<'a> IncrementalTableScanBuilder<'a> { self } + /// Set whether column names should be matched case-sensitively. + pub fn with_case_sensitive(mut self, case_sensitive: bool) -> Self { + self.case_sensitive = case_sensitive; + self + } + /// Build the incremental table scan. pub fn build(self) -> Result { let metadata = self.table.metadata(); @@ -245,7 +252,7 @@ impl<'a> IncrementalTableScanBuilder<'a> { .ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!("Snapshot with id {} not found", to_snapshot_id), + format!("Snapshot with id {to_snapshot_id} not found"), ) })? .clone(); @@ -258,7 +265,7 @@ impl<'a> IncrementalTableScanBuilder<'a> { let _ = metadata.snapshot_by_id(from_id).ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!("Snapshot with id {} not found", from_id), + format!("Snapshot with id {from_id} not found"), ) })?; Some(from_id) @@ -287,7 +294,7 @@ impl<'a> IncrementalTableScanBuilder<'a> { .ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!("Snapshot with id {} not found", from_snapshot_id), + format!("Snapshot with id {from_snapshot_id} not found"), ) })? .clone(); @@ -311,10 +318,7 @@ impl<'a> IncrementalTableScanBuilder<'a> { if schema.field_by_name(column_name).is_none() { return Err(Error::new( ErrorKind::DataInvalid, - format!( - "Column {} not found in table. Schema: {}", - column_name, schema - ), + format!("Column {column_name} not found in table. Schema: {schema}"), )); } } @@ -340,10 +344,7 @@ impl<'a> IncrementalTableScanBuilder<'a> { let field_id = schema.field_id_by_name(column_name).ok_or_else(|| { Error::new( ErrorKind::DataInvalid, - format!( - "Column {} not found in table. Schema: {}", - column_name, schema - ), + format!("Column {column_name} not found in table. Schema: {schema}"), ) })?; @@ -354,8 +355,7 @@ impl<'a> IncrementalTableScanBuilder<'a> { Error::new( ErrorKind::FeatureUnsupported, format!( - "Column {} is not a direct child of schema but a nested field, which is not supported now. Schema: {}", - column_name, schema + "Column {column_name} is not a direct child of schema but a nested field, which is not supported now. Schema: {schema}" ), ) })?; @@ -375,6 +375,7 @@ impl<'a> IncrementalTableScanBuilder<'a> { self.table.file_io().clone(), self.concurrency_limit_data_files, ), + case_sensitive: self.case_sensitive, }; Ok(IncrementalTableScan { @@ -511,7 +512,7 @@ impl IncrementalTableScan { Err(e) => { return Err(Error::new( ErrorKind::Unexpected, - format!("Failed to load positional deletes: {}", e), + format!("Failed to load positional deletes: {e}"), )); } }; @@ -624,7 +625,7 @@ impl IncrementalTableScan { state.remove_delete_vector(&path).ok_or_else(|| { Error::new( ErrorKind::Unexpected, - format!("DeleteVector for path {} not found", path), + format!("DeleteVector for path {path} not found"), ) }) })?; diff --git a/crates/iceberg/src/scan/incremental/tests.rs b/crates/iceberg/src/scan/incremental/tests.rs index b45f0cc823..6ba4774642 100644 --- a/crates/iceberg/src/scan/incremental/tests.rs +++ b/crates/iceberg/src/scan/incremental/tests.rs @@ -31,7 +31,7 @@ use uuid::Uuid; use crate::TableIdent; use crate::io::{FileIO, OutputFile}; -use crate::metadata_columns::{RESERVED_COL_NAME_FILE, RESERVED_COL_NAME_UNDERSCORE_POS}; +use crate::metadata_columns::{RESERVED_COL_NAME_FILE, RESERVED_COL_NAME_POS}; use crate::spec::{ DataContentType, DataFileBuilder, DataFileFormat, ManifestEntry, ManifestListWriter, ManifestStatus, ManifestWriterBuilder, PartitionSpec, SchemaRef, Struct, TableMetadata, @@ -162,11 +162,11 @@ impl IncrementalTestFixture { }; let manifest_list_location = - table_location.join(format!("metadata/snap-{}-manifest-list.avro", snapshot_id)); + table_location.join(format!("metadata/snap-{snapshot_id}-manifest-list.avro")); manifest_list_locations.push(manifest_list_location.clone()); let parent_str = if let Some(pid) = parent_id { - format!(r#""parent-snapshot-id": {},"#, pid) + format!(r#""parent-snapshot-id": {pid},"#) } else { String::new() }; @@ -190,8 +190,7 @@ impl IncrementalTestFixture { )); snapshot_log_json.push(format!( - r#" {{"snapshot-id": {}, "timestamp-ms": {}}}"#, - snapshot_id, timestamp + r#" {{"snapshot-id": {snapshot_id}, "timestamp-ms": {timestamp}}}"# )); } @@ -994,10 +993,10 @@ impl IncrementalTestFixture { .enumerate() { // Skip this record if it was deleted via positional delete - if let Some(deleted) = file_deleted_positions { - if deleted.contains(&(position as i64)) { - continue; - } + if let Some(deleted) = file_deleted_positions + && deleted.contains(&(position as i64)) + { + continue; } compacted_data.push((*n, d.clone())); } @@ -2519,13 +2518,11 @@ async fn test_incremental_scan_with_file_column() { let file_path = string_array.value(i); assert!( file_path.ends_with(".parquet"), - "File path should end with .parquet: {}", - file_path + "File path should end with .parquet: {file_path}" ); assert!( file_path.contains("/data/"), - "File path should contain /data/: {}", - file_path + "File path should contain /data/: {file_path}" ); } } @@ -2553,7 +2550,7 @@ async fn test_incremental_select_with_pos_column() { let scan = fixture .table .incremental_scan(Some(1), Some(2)) - .select(["n", RESERVED_COL_NAME_UNDERSCORE_POS]) + .select(["n", RESERVED_COL_NAME_POS]) .build() .unwrap(); @@ -2578,7 +2575,7 @@ async fn test_incremental_select_with_pos_column() { assert!(batch.column_by_name("n").is_some(), "n column should exist"); // Verify the _pos column exists - let pos_col = batch.column_by_name(RESERVED_COL_NAME_UNDERSCORE_POS); + let pos_col = batch.column_by_name(RESERVED_COL_NAME_POS); assert!( pos_col.is_some(), "_pos column should be present in the batch" @@ -2603,9 +2600,7 @@ async fn test_incremental_select_with_pos_column() { assert_eq!( pos_array.value(i), i as i64, - "Row {} should have position {}", - i, - i + "Row {i} should have position {i}" ); } @@ -2640,7 +2635,7 @@ async fn test_incremental_select_with_pos_column() { ); // Verify the _pos column exists - let pos_col = batch.column_by_name(RESERVED_COL_NAME_UNDERSCORE_POS); + let pos_col = batch.column_by_name(RESERVED_COL_NAME_POS); assert!( pos_col.is_some(), "_pos column should be present when using with_pos_column()" @@ -2661,9 +2656,7 @@ async fn test_incremental_select_with_pos_column() { assert_eq!( pos_array.value(i), i as i64, - "Row {} should have position {}", - i, - i + "Row {i} should have position {i}" ); } } @@ -2692,12 +2685,7 @@ async fn test_incremental_select_with_pos_and_file_columns() { let scan = fixture .table .incremental_scan(Some(1), Some(2)) - .select([ - "n", - RESERVED_COL_NAME_FILE, - "data", - RESERVED_COL_NAME_UNDERSCORE_POS, - ]) + .select(["n", RESERVED_COL_NAME_FILE, "data", RESERVED_COL_NAME_POS]) .build() .unwrap(); @@ -2726,16 +2714,10 @@ async fn test_incremental_select_with_pos_and_file_columns() { assert!(batch.column_by_name("n").is_some()); assert!(batch.column_by_name(RESERVED_COL_NAME_FILE).is_some()); assert!(batch.column_by_name("data").is_some()); - assert!( - batch - .column_by_name(RESERVED_COL_NAME_UNDERSCORE_POS) - .is_some() - ); + assert!(batch.column_by_name(RESERVED_COL_NAME_POS).is_some()); // Verify the _pos column has correct data type - let pos_col = batch - .column_by_name(RESERVED_COL_NAME_UNDERSCORE_POS) - .unwrap(); + let pos_col = batch.column_by_name(RESERVED_COL_NAME_POS).unwrap(); assert_eq!( pos_col.data_type(), &arrow_schema::DataType::Int64, @@ -2748,9 +2730,7 @@ async fn test_incremental_select_with_pos_and_file_columns() { assert_eq!( pos_array.value(i), i as i64, - "Row {} should have position {}", - i, - i + "Row {i} should have position {i}" ); } @@ -2761,8 +2741,7 @@ async fn test_incremental_select_with_pos_and_file_columns() { let file_path = string_array.value(i); assert!( file_path.ends_with(".parquet"), - "File path should end with .parquet: {}", - file_path + "File path should end with .parquet: {file_path}" ); } } @@ -2783,8 +2762,8 @@ async fn test_incremental_scan_with_no_deletes() { for i in 0..20 { let start = i * 10 + 1; let end = (i + 1) * 10; - let data: Vec<_> = (start..=end).map(|n| (n, format!("data-{}", n))).collect(); - operations.push(Operation::Add(data, format!("data-{}.parquet", i))); + let data: Vec<_> = (start..=end).map(|n| (n, format!("data-{n}"))).collect(); + operations.push(Operation::Add(data, format!("data-{i}.parquet"))); } let fixture = IncrementalTestFixture::new(operations).await; @@ -2833,13 +2812,13 @@ async fn test_incremental_scan_deadlock_with_deletes_and_appends() { // - Deadlock! // Snapshot 1: Create table with some rows - let snapshot1_data: Vec<_> = (1..=100).map(|n| (n, format!("initial-{}", n))).collect(); + let snapshot1_data: Vec<_> = (1..=100).map(|n| (n, format!("initial-{n}"))).collect(); // Snapshot 2: Add more rows - let snapshot2_data: Vec<_> = (101..=200).map(|n| (n, format!("second-{}", n))).collect(); + let snapshot2_data: Vec<_> = (101..=200).map(|n| (n, format!("second-{n}"))).collect(); // Snapshot 3: Add even more rows - let snapshot3_data: Vec<_> = (201..=300).map(|n| (n, format!("third-{}", n))).collect(); + let snapshot3_data: Vec<_> = (201..=300).map(|n| (n, format!("third-{n}"))).collect(); // Snapshot 4: Positional delete of 2 rows from first file let deletes = [(0, "data-1.parquet"), (1, "data-1.parquet")]; @@ -2896,10 +2875,7 @@ async fn test_incremental_scan_deadlock_with_deletes_and_appends() { let total_delete_rows: usize = delete_batches.iter().map(|b| b.num_rows()).sum(); let total_append_rows: usize = append_batches.iter().map(|b| b.num_rows()).sum(); - eprintln!( - "Total delete rows: {}, total append rows: {}", - total_delete_rows, total_append_rows - ); + eprintln!("Total delete rows: {total_delete_rows}, total append rows: {total_append_rows}"); // We expect 2 deletes and 300 appends assert_eq!(total_delete_rows, 0, "Should have 0 deleted rows"); diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index 5992ecf704..1e44ed6ee8 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -21,9 +21,7 @@ mod cache; use cache::*; mod context; use context::*; - pub mod incremental; - mod task; use std::sync::Arc; @@ -40,8 +38,7 @@ use crate::expr::visitors::inclusive_metrics_evaluator::InclusiveMetricsEvaluato use crate::expr::{Bind, BoundPredicate, Predicate}; use crate::io::FileIO; use crate::metadata_columns::{ - RESERVED_COL_NAME_FILE, RESERVED_COL_NAME_UNDERSCORE_POS, get_metadata_field_id, - is_metadata_column_name, + RESERVED_COL_NAME_FILE, RESERVED_COL_NAME_POS, get_metadata_field_id, is_metadata_column_name, }; use crate::runtime::spawn; use crate::spec::{DataContentType, SnapshotRef}; @@ -206,7 +203,7 @@ impl<'a> TableScanBuilder<'a> { }); // Add _pos column - columns.push(RESERVED_COL_NAME_UNDERSCORE_POS.to_string()); + columns.push(RESERVED_COL_NAME_POS.to_string()); self.column_names = Some(columns); self @@ -681,7 +678,7 @@ pub mod tests { use crate::arrow::ArrowReaderBuilder; use crate::expr::{BoundPredicate, Reference}; use crate::io::{FileIO, OutputFile}; - use crate::metadata_columns::{RESERVED_COL_NAME_FILE, RESERVED_COL_NAME_UNDERSCORE_POS}; + use crate::metadata_columns::{RESERVED_COL_NAME_FILE, RESERVED_COL_NAME_POS}; use crate::scan::FileScanTask; use crate::spec::{ DataContentType, DataFileBuilder, DataFileFormat, Datum, Literal, ManifestEntry, @@ -1263,6 +1260,97 @@ pub mod tests { writer.close().unwrap(); } } + + pub async fn setup_deadlock_manifests(&mut self) { + let current_snapshot = self.table.metadata().current_snapshot().unwrap(); + let _parent_snapshot = current_snapshot + .parent_snapshot(self.table.metadata()) + .unwrap(); + let current_schema = current_snapshot.schema(self.table.metadata()).unwrap(); + let current_partition_spec = self.table.metadata().default_partition_spec(); + + // 1. Write DATA manifest with MULTIPLE entries to fill buffer + let mut writer = ManifestWriterBuilder::new( + self.next_manifest_file(), + Some(current_snapshot.snapshot_id()), + None, + current_schema.clone(), + current_partition_spec.as_ref().clone(), + ) + .build_v2_data(); + + // Add 10 data entries + for i in 0..10 { + writer + .add_entry( + ManifestEntry::builder() + .status(ManifestStatus::Added) + .data_file( + DataFileBuilder::default() + .partition_spec_id(0) + .content(DataContentType::Data) + .file_path(format!("{}/{}.parquet", &self.table_location, i)) + .file_format(DataFileFormat::Parquet) + .file_size_in_bytes(100) + .record_count(1) + .partition(Struct::from_iter([Some(Literal::long(100))])) + .key_metadata(None) + .build() + .unwrap(), + ) + .build(), + ) + .unwrap(); + } + let data_manifest = writer.write_manifest_file().await.unwrap(); + + // 2. Write DELETE manifest + let mut writer = ManifestWriterBuilder::new( + self.next_manifest_file(), + Some(current_snapshot.snapshot_id()), + None, + current_schema.clone(), + current_partition_spec.as_ref().clone(), + ) + .build_v2_deletes(); + + writer + .add_entry( + ManifestEntry::builder() + .status(ManifestStatus::Added) + .data_file( + DataFileBuilder::default() + .partition_spec_id(0) + .content(DataContentType::PositionDeletes) + .file_path(format!("{}/del.parquet", &self.table_location)) + .file_format(DataFileFormat::Parquet) + .file_size_in_bytes(100) + .record_count(1) + .partition(Struct::from_iter([Some(Literal::long(100))])) + .build() + .unwrap(), + ) + .build(), + ) + .unwrap(); + let delete_manifest = writer.write_manifest_file().await.unwrap(); + + // Write to manifest list - DATA FIRST then DELETE + // This order is crucial for reproduction + let mut manifest_list_write = ManifestListWriter::v2( + self.table + .file_io() + .new_output(current_snapshot.manifest_list()) + .unwrap(), + current_snapshot.snapshot_id(), + current_snapshot.parent_snapshot_id(), + current_snapshot.sequence_number(), + ); + manifest_list_write + .add_manifests(vec![data_manifest, delete_manifest].into_iter()) + .unwrap(); + manifest_list_write.close().await.unwrap(); + } } #[test] @@ -1887,6 +1975,7 @@ pub mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; test_fn(task); @@ -1904,6 +1993,7 @@ pub mod tests { partition: None, partition_spec: None, name_mapping: None, + case_sensitive: false, }; test_fn(task); } @@ -1942,7 +2032,7 @@ pub mod tests { "_file column should be present in the batch" ); - // Verify the _file column contains a file path (simple StringArray) + // Verify the _file column contains a file path (non-REE StringArray) let file_col = file_col.unwrap(); assert_eq!( file_col.data_type(), @@ -1955,8 +2045,7 @@ pub mod tests { let file_path = string_array.value(0); assert!( file_path.ends_with(".parquet"), - "File path should end with .parquet, got: {}", - file_path + "File path should end with .parquet, got: {file_path}" ); } @@ -2058,8 +2147,7 @@ pub mod tests { for path in &file_paths { assert!( path.ends_with(".parquet"), - "All file paths should end with .parquet, got: {}", - path + "All file paths should end with .parquet, got: {path}" ); } } @@ -2214,7 +2302,7 @@ pub mod tests { let table_scan = fixture .table .scan() - .select(["x", RESERVED_COL_NAME_UNDERSCORE_POS]) + .select(["x", RESERVED_COL_NAME_POS]) .with_row_selection_enabled(true) .build() .unwrap(); @@ -2231,7 +2319,7 @@ pub mod tests { assert_eq!(x_arr.value(0), 1); // Verify the _pos column exists - let pos_col = batches[0].column_by_name(RESERVED_COL_NAME_UNDERSCORE_POS); + let pos_col = batches[0].column_by_name(RESERVED_COL_NAME_POS); assert!( pos_col.is_some(), "_pos column should be present in the batch" @@ -2256,9 +2344,7 @@ pub mod tests { assert_eq!( pos_array.value(i), i as i64, - "Row {} should have position {}", - i, - i + "Row {i} should have position {i}" ); } @@ -2279,7 +2365,7 @@ pub mod tests { assert_eq!(batches[0].num_columns(), 2); // Verify the _pos column exists - let pos_col = batches[0].column_by_name(RESERVED_COL_NAME_UNDERSCORE_POS); + let pos_col = batches[0].column_by_name(RESERVED_COL_NAME_POS); assert!( pos_col.is_some(), "_pos column should be present when using with_pos_column()" @@ -2300,9 +2386,7 @@ pub mod tests { assert_eq!( pos_array.value(i), i as i64, - "Row {} should have position {}", - i, - i + "Row {i} should have position {i}" ); } } @@ -2318,12 +2402,7 @@ pub mod tests { let table_scan = fixture .table .scan() - .select([ - "x", - RESERVED_COL_NAME_FILE, - "y", - RESERVED_COL_NAME_UNDERSCORE_POS, - ]) + .select(["x", RESERVED_COL_NAME_FILE, "y", RESERVED_COL_NAME_POS]) .with_row_selection_enabled(true) .build() .unwrap(); @@ -2345,7 +2424,7 @@ pub mod tests { assert_eq!(schema.field(2).name(), "y", "Column 2 should be y"); assert_eq!( schema.field(3).name(), - RESERVED_COL_NAME_UNDERSCORE_POS, + RESERVED_COL_NAME_POS, "Column 3 should be _pos" ); @@ -2371,18 +2450,14 @@ pub mod tests { ); // Verify _pos column has valid sequential data - let pos_col = batches[0] - .column_by_name(RESERVED_COL_NAME_UNDERSCORE_POS) - .unwrap(); + let pos_col = batches[0].column_by_name(RESERVED_COL_NAME_POS).unwrap(); let pos_array = pos_col.as_primitive::(); assert_eq!(pos_array.value(0), 0, "First row should have position 0"); for i in 1..pos_array.len().min(10) { assert_eq!( pos_array.value(i), i as i64, - "Row {} should have position {}", - i, - i + "Row {i} should have position {i}" ); } @@ -2390,12 +2465,7 @@ pub mod tests { let table_scan = fixture .table .scan() - .select([ - "x", - RESERVED_COL_NAME_UNDERSCORE_POS, - "y", - RESERVED_COL_NAME_FILE, - ]) + .select(["x", RESERVED_COL_NAME_POS, "y", RESERVED_COL_NAME_FILE]) .with_row_selection_enabled(true) .build() .unwrap(); @@ -2411,7 +2481,7 @@ pub mod tests { assert_eq!(schema.field(0).name(), "x", "Column 0 should be x"); assert_eq!( schema.field(1).name(), - RESERVED_COL_NAME_UNDERSCORE_POS, + RESERVED_COL_NAME_POS, "Column 1 should be _pos" ); assert_eq!(schema.field(2).name(), "y", "Column 2 should be y"); @@ -2422,9 +2492,7 @@ pub mod tests { ); // Verify data is still correct - let pos_col = batches[0] - .column_by_name(RESERVED_COL_NAME_UNDERSCORE_POS) - .unwrap(); + let pos_col = batches[0].column_by_name(RESERVED_COL_NAME_POS).unwrap(); let pos_array = pos_col.as_primitive::(); assert_eq!(pos_array.value(0), 0, "First row should have position 0"); @@ -2440,12 +2508,7 @@ pub mod tests { let table_scan = fixture .table .scan() - .select([ - RESERVED_COL_NAME_FILE, - RESERVED_COL_NAME_UNDERSCORE_POS, - "x", - "y", - ]) + .select([RESERVED_COL_NAME_FILE, RESERVED_COL_NAME_POS, "x", "y"]) .with_row_selection_enabled(true) .build() .unwrap(); @@ -2462,42 +2525,69 @@ pub mod tests { ); assert_eq!( schema.field(1).name(), - RESERVED_COL_NAME_UNDERSCORE_POS, + RESERVED_COL_NAME_POS, "Column 1 should be _pos" ); assert_eq!(schema.field(2).name(), "x", "Column 2 should be x"); assert_eq!(schema.field(3).name(), "y", "Column 3 should be y"); - // Test 4: Both at the end + // Verify data types + assert_eq!( + schema.field(0).data_type(), + &arrow_schema::DataType::Utf8, + "_file column should use Utf8 type" + ); + assert_eq!( + schema.field(1).data_type(), + &arrow_schema::DataType::Int64, + "_pos column should use Int64 type" + ); + + // Verify data is correct + let file_col = batches[0].column_by_name(RESERVED_COL_NAME_FILE).unwrap(); + let file_array = file_col.as_string::(); + let file_path = file_array.value(0); + assert!( + file_path.ends_with(".parquet"), + "File path should end with .parquet" + ); + + let pos_col = batches[0].column_by_name(RESERVED_COL_NAME_POS).unwrap(); + let pos_array = pos_col.as_primitive::(); + assert_eq!(pos_array.value(0), 0, "First row should have position 0"); + } + + #[tokio::test] + async fn test_scan_deadlock() { + let mut fixture = TableTestFixture::new(); + fixture.setup_deadlock_manifests().await; + + // Create table scan with concurrency limit 1 + // This sets channel size to 1. + // Data manifest has 10 entries -> will block producer. + // Delete manifest is 2nd in list -> won't be processed. + // Consumer 2 (Data) not started -> blocked. + // Consumer 1 (Delete) waiting -> blocked. let table_scan = fixture .table .scan() - .select([ - "x", - "y", - RESERVED_COL_NAME_UNDERSCORE_POS, - RESERVED_COL_NAME_FILE, - ]) - .with_row_selection_enabled(true) + .with_concurrency_limit(1) .build() .unwrap(); - let batch_stream = table_scan.to_arrow().await.unwrap(); - let batches: Vec<_> = batch_stream.try_collect().await.unwrap(); + // This should timeout/hang if deadlock exists + // We can use tokio::time::timeout + let result = tokio::time::timeout(std::time::Duration::from_secs(5), async { + table_scan + .plan_files() + .await + .unwrap() + .try_collect::>() + .await + }) + .await; - assert_eq!(batches[0].num_columns(), 4); - let schema = batches[0].schema(); - assert_eq!(schema.field(0).name(), "x", "Column 0 should be x"); - assert_eq!(schema.field(1).name(), "y", "Column 1 should be y"); - assert_eq!( - schema.field(2).name(), - RESERVED_COL_NAME_UNDERSCORE_POS, - "Column 2 should be _pos" - ); - assert_eq!( - schema.field(3).name(), - RESERVED_COL_NAME_FILE, - "Column 3 should be _file" - ); + // Assert it finished (didn't timeout) + assert!(result.is_ok(), "Scan timed out - deadlock detected"); } } diff --git a/crates/iceberg/src/scan/task.rs b/crates/iceberg/src/scan/task.rs index e1ef241a57..5349a9bdd2 100644 --- a/crates/iceberg/src/scan/task.rs +++ b/crates/iceberg/src/scan/task.rs @@ -104,6 +104,9 @@ pub struct FileScanTask { #[serde(serialize_with = "serialize_not_implemented")] #[serde(deserialize_with = "deserialize_not_implemented")] pub name_mapping: Option>, + + /// Whether this scan task should treat column names as case-sensitive when binding predicates. + pub case_sensitive: bool, } impl FileScanTask { diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index 456b754408..0379465584 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -427,8 +427,7 @@ impl<'de> Deserialize<'de> for StructType { let type_val: String = map.next_value()?; if type_val != "struct" { return Err(serde::de::Error::custom(format!( - "expected type 'struct', got '{}'", - type_val + "expected type 'struct', got '{type_val}'" ))); } } diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 7738af46d4..247b6dde5f 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -22,7 +22,7 @@ use serde_with::serde_as; use super::{Datum, ManifestEntry, Schema, Struct}; use crate::spec::{FormatVersion, Literal, RawLiteral, StructType, Type}; -use crate::{Error, ErrorKind}; +use crate::{Error, ErrorKind, metadata_columns}; #[derive(Serialize, Deserialize)] pub(super) struct ManifestEntryV2 { @@ -153,7 +153,7 @@ impl DataFileSerde { lower_bounds: Some(to_bytes_entry(value.lower_bounds)?), upper_bounds: Some(to_bytes_entry(value.upper_bounds)?), key_metadata: value.key_metadata.map(serde_bytes::ByteBuf::from), - split_offsets: Some(value.split_offsets), + split_offsets: value.split_offsets, equality_ids: value.equality_ids, sort_order_id: value.sort_order_id, first_row_id: value.first_row_id, @@ -222,7 +222,7 @@ impl DataFileSerde { .transpose()? .unwrap_or_default(), key_metadata: self.key_metadata.map(|v| v.to_vec()), - split_offsets: self.split_offsets.unwrap_or_default(), + split_offsets: self.split_offsets, equality_ids: self.equality_ids, sort_order_id: self.sort_order_id, partition_spec_id, @@ -245,8 +245,12 @@ struct BytesEntry { fn parse_bytes_entry(v: Vec, schema: &Schema) -> Result, Error> { let mut m = HashMap::with_capacity(v.len()); for entry in v { - // We ignore the entry if the field is not found in the schema, due to schema evolution. - if let Some(field) = schema.field_by_id(entry.key) { + // First try to find the field in the schema, or check if it's a reserved metadata field + let field = schema + .field_by_id(entry.key) + .or_else(|| metadata_columns::get_metadata_field(entry.key).ok()); + + if let Some(field) = field { let data_type = field .field_type .as_primitive_type() @@ -259,6 +263,7 @@ fn parse_bytes_entry(v: Vec, schema: &Schema) -> Result, + pub(crate) split_offsets: Option>, /// field id: 135 /// element field id: 136 /// @@ -247,8 +248,9 @@ impl DataFile { } /// Get the split offsets of the data file. /// For example, all row group offsets in a Parquet file. - pub fn split_offsets(&self) -> &[i64] { - &self.split_offsets + /// Returns `None` if no split offsets are present. + pub fn split_offsets(&self) -> Option<&[i64]> { + self.split_offsets.as_deref() } /// Get the equality ids of the data file. /// Field ids used to determine row equality in equality delete files. diff --git a/crates/iceberg/src/spec/manifest/mod.rs b/crates/iceberg/src/spec/manifest/mod.rs index 51219bfdb7..b126396e3c 100644 --- a/crates/iceberg/src/spec/manifest/mod.rs +++ b/crates/iceberg/src/spec/manifest/mod.rs @@ -257,7 +257,7 @@ mod tests { snapshot_id: None, sequence_number: None, file_sequence_number: None, - data_file: DataFile {content:DataContentType::Data,file_path:"s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(),file_format:DataFileFormat::Parquet,partition:Struct::empty(),record_count:1,file_size_in_bytes:5442,column_sizes:HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]),value_counts:HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]),null_value_counts:HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]),nan_value_counts:HashMap::new(),lower_bounds:HashMap::new(),upper_bounds:HashMap::new(),key_metadata:None,split_offsets:vec![4],equality_ids:Some(Vec::new()),sort_order_id:None, partition_spec_id: 0,first_row_id: None,referenced_data_file: None,content_offset: None,content_size_in_bytes: None } + data_file: DataFile {content:DataContentType::Data,file_path:"s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(),file_format:DataFileFormat::Parquet,partition:Struct::empty(),record_count:1,file_size_in_bytes:5442,column_sizes:HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]),value_counts:HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]),null_value_counts:HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]),nan_value_counts:HashMap::new(),lower_bounds:HashMap::new(),upper_bounds:HashMap::new(),key_metadata:None,split_offsets:Some(vec![4]),equality_ids:Some(Vec::new()),sort_order_id:None, partition_spec_id: 0,first_row_id: None,referenced_data_file: None,content_offset: None,content_size_in_bytes: None } } ]; @@ -435,7 +435,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: Some(Vec::new()), sort_order_id: None, partition_spec_id: 0, @@ -532,7 +532,7 @@ mod tests { lower_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), upper_bounds: HashMap::from([(1,Datum::int(1)),(2,Datum::string("a")),(3,Datum::string("AC/DC"))]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -640,7 +640,7 @@ mod tests { (3, Datum::string("x")) ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -749,7 +749,7 @@ mod tests { (3, Datum::string("x")) ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -840,7 +840,7 @@ mod tests { (2, Datum::int(2)), ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -922,7 +922,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -957,7 +957,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -992,7 +992,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -1027,7 +1027,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -1182,7 +1182,7 @@ mod tests { "lower_bounds": [], "upper_bounds": [], "key_metadata": null, - "split_offsets": [], + "split_offsets": null, "equality_ids": null, "sort_order_id": null, "first_row_id": null, @@ -1213,7 +1213,7 @@ mod tests { "lower_bounds": [], "upper_bounds": [], "key_metadata": null, - "split_offsets": [], + "split_offsets": null, "equality_ids": null, "sort_order_id": null, "first_row_id": null, diff --git a/crates/iceberg/src/spec/manifest/writer.rs b/crates/iceberg/src/spec/manifest/writer.rs index ebb0590bcf..0669651603 100644 --- a/crates/iceberg/src/spec/manifest/writer.rs +++ b/crates/iceberg/src/spec/manifest/writer.rs @@ -388,10 +388,10 @@ impl ManifestWriter { self.existing_rows += entry.data_file.record_count; } } - if entry.is_alive() { - if let Some(seq_num) = entry.sequence_number { - self.min_seq_num = Some(self.min_seq_num.map_or(seq_num, |v| min(v, seq_num))); - } + if entry.is_alive() + && let Some(seq_num) = entry.sequence_number + { + self.min_seq_num = Some(self.min_seq_num.map_or(seq_num, |v| min(v, seq_num))); } self.manifest_entries.push(entry); Ok(()) @@ -437,9 +437,12 @@ impl ManifestWriter { "format-version".to_string(), (self.metadata.format_version as u8).to_string(), )?; - if self.metadata.format_version == FormatVersion::V2 { - avro_writer - .add_user_metadata("content".to_string(), self.metadata.content.to_string())?; + match self.metadata.format_version { + FormatVersion::V1 => {} + FormatVersion::V2 | FormatVersion::V3 => { + avro_writer + .add_user_metadata("content".to_string(), self.metadata.content.to_string())?; + } } let partition_summary = self.construct_partition_summaries(&partition_type)?; @@ -608,7 +611,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: Some(Vec::new()), - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -637,7 +640,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: Some(Vec::new()), - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -666,7 +669,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: Some(Vec::new()), - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -708,4 +711,93 @@ mod tests { entries[0].file_sequence_number = None; assert_eq!(actual_manifest, Manifest::new(metadata, entries)); } + + #[tokio::test] + async fn test_v3_delete_manifest_delete_file_roundtrip() { + let schema = Arc::new( + Schema::builder() + .with_fields(vec![ + Arc::new(NestedField::optional( + 1, + "id", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 2, + "data", + Type::Primitive(PrimitiveType::String), + )), + ]) + .build() + .unwrap(), + ); + + let partition_spec = PartitionSpec::builder(schema.clone()) + .with_spec_id(0) + .build() + .unwrap(); + + // Create a position delete file entry + let delete_entry = ManifestEntry { + status: ManifestStatus::Added, + snapshot_id: None, + sequence_number: None, + file_sequence_number: None, + data_file: DataFile { + content: DataContentType::PositionDeletes, + file_path: "s3://bucket/table/data/delete-00000.parquet".to_string(), + file_format: DataFileFormat::Parquet, + partition: Struct::empty(), + record_count: 10, + file_size_in_bytes: 1024, + column_sizes: HashMap::new(), + value_counts: HashMap::new(), + null_value_counts: HashMap::new(), + nan_value_counts: HashMap::new(), + lower_bounds: HashMap::new(), + upper_bounds: HashMap::new(), + key_metadata: None, + split_offsets: None, + equality_ids: None, + sort_order_id: None, + partition_spec_id: 0, + first_row_id: None, + referenced_data_file: None, + content_offset: None, + content_size_in_bytes: None, + }, + }; + + // Write a V3 delete manifest + let tmp_dir = TempDir::new().unwrap(); + let path = tmp_dir.path().join("v3_delete_manifest.avro"); + let io = FileIOBuilder::new_fs_io().build().unwrap(); + let output_file = io.new_output(path.to_str().unwrap()).unwrap(); + + let mut writer = ManifestWriterBuilder::new( + output_file, + Some(1), + None, + schema.clone(), + partition_spec.clone(), + ) + .build_v3_deletes(); + + writer.add_entry(delete_entry).unwrap(); + let manifest_file = writer.write_manifest_file().await.unwrap(); + + // The returned ManifestFile correctly reports Deletes content + assert_eq!(manifest_file.content, ManifestContentType::Deletes); + + // Read back the manifest file + let actual_manifest = + Manifest::parse_avro(fs::read(&path).expect("read_file must succeed").as_slice()) + .unwrap(); + + // Verify the content type is correctly preserved as Deletes + assert_eq!( + actual_manifest.metadata().content, + ManifestContentType::Deletes, + ); + } } diff --git a/crates/iceberg/src/spec/mod.rs b/crates/iceberg/src/spec/mod.rs index 44b35e5a6b..a2b540f08b 100644 --- a/crates/iceberg/src/spec/mod.rs +++ b/crates/iceberg/src/spec/mod.rs @@ -49,6 +49,7 @@ pub use snapshot_summary::*; pub use sort::*; pub use statistic_file::*; pub use table_metadata::*; +pub(crate) use table_metadata_builder::FIRST_FIELD_ID; pub use table_properties::*; pub use transform::*; pub use values::*; diff --git a/crates/iceberg/src/spec/schema/mod.rs b/crates/iceberg/src/spec/schema/mod.rs index 7080b6e700..13ad41818b 100644 --- a/crates/iceberg/src/spec/schema/mod.rs +++ b/crates/iceberg/src/spec/schema/mod.rs @@ -102,8 +102,8 @@ impl SchemaBuilder { /// Reassignment starts from the field-id specified in `start_from` (inclusive). /// /// All specified aliases and identifier fields will be updated to the new field-ids. - pub(crate) fn with_reassigned_field_ids(mut self, start_from: u32) -> Self { - self.reassign_field_ids_from = Some(start_from.try_into().unwrap_or(i32::MAX)); + pub(crate) fn with_reassigned_field_ids(mut self, start_from: i32) -> Self { + self.reassign_field_ids_from = Some(start_from); self } diff --git a/crates/iceberg/src/spec/schema/prune_columns.rs b/crates/iceberg/src/spec/schema/prune_columns.rs index 5a2f0b50fc..14f1bfd25f 100644 --- a/crates/iceberg/src/spec/schema/prune_columns.rs +++ b/crates/iceberg/src/spec/schema/prune_columns.rs @@ -110,19 +110,19 @@ impl SchemaVisitor for PruneColumn { if self.select_full_types { Ok(Some(*field.field_type.clone())) } else if field.field_type.is_struct() { - return Ok(Some(Type::Struct(PruneColumn::project_selected_struct( + Ok(Some(Type::Struct(PruneColumn::project_selected_struct( value, - )?))); + )?))) } else if !field.field_type.is_nested() { - return Ok(Some(*field.field_type.clone())); + Ok(Some(*field.field_type.clone())) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, "Can't project list or map field directly when not selecting full type." .to_string(), ) .with_context("field_id", field.id.to_string()) - .with_context("field_type", field.field_type.to_string())); + .with_context("field_type", field.field_type.to_string())) } } else { Ok(value) @@ -174,20 +174,20 @@ impl SchemaVisitor for PruneColumn { Ok(Some(Type::List(list.clone()))) } else if list.element_field.field_type.is_struct() { let projected_struct = PruneColumn::project_selected_struct(value).unwrap(); - return Ok(Some(Type::List(PruneColumn::project_list( + Ok(Some(Type::List(PruneColumn::project_list( list, Type::Struct(projected_struct), - )?))); + )?))) } else if list.element_field.field_type.is_primitive() { - return Ok(Some(Type::List(list.clone()))); + Ok(Some(Type::List(list.clone()))) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, format!( "Cannot explicitly project List or Map types, List element {} of type {} was selected", list.element_field.id, list.element_field.field_type ), - )); + )) } } else if let Some(result) = value { Ok(Some(Type::List(PruneColumn::project_list(list, result)?))) @@ -208,26 +208,26 @@ impl SchemaVisitor for PruneColumn { } else if map.value_field.field_type.is_struct() { let projected_struct = PruneColumn::project_selected_struct(Some(value.unwrap())).unwrap(); - return Ok(Some(Type::Map(PruneColumn::project_map( + Ok(Some(Type::Map(PruneColumn::project_map( map, Type::Struct(projected_struct), - )?))); + )?))) } else if map.value_field.field_type.is_primitive() { - return Ok(Some(Type::Map(map.clone()))); + Ok(Some(Type::Map(map.clone()))) } else { - return Err(Error::new( + Err(Error::new( ErrorKind::DataInvalid, format!( "Cannot explicitly project List or Map types, Map value {} of type {} was selected", map.value_field.id, map.value_field.field_type ), - )); + )) } } else if let Some(value_result) = value { - return Ok(Some(Type::Map(PruneColumn::project_map( + Ok(Some(Type::Map(PruneColumn::project_map( map, value_result, - )?))); + )?))) } else if self.selected.contains(&map.key_field.id) { Ok(Some(Type::Map(map.clone()))) } else { diff --git a/crates/iceberg/src/spec/snapshot.rs b/crates/iceberg/src/spec/snapshot.rs index 5371cf68f2..270279988b 100644 --- a/crates/iceberg/src/spec/snapshot.rs +++ b/crates/iceberg/src/spec/snapshot.rs @@ -266,9 +266,9 @@ pub(super) mod _serde { use serde::{Deserialize, Serialize}; use super::{Operation, Snapshot, Summary}; - use crate::Error; use crate::spec::SchemaId; use crate::spec::snapshot::SnapshotRowRange; + use crate::{Error, ErrorKind}; #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] @@ -408,9 +408,19 @@ pub(super) mod _serde { timestamp_ms: v1.timestamp_ms, manifest_list: match (v1.manifest_list, v1.manifests) { (Some(file), None) => file, - (Some(_), Some(_)) => "Invalid v1 snapshot, when manifest list provided, manifest files should be omitted".to_string(), - (None, _) => "Unsupported v1 snapshot, only manifest list is supported".to_string() - }, + (Some(_), Some(_)) => { + return Err(Error::new( + ErrorKind::DataInvalid, + "Invalid v1 snapshot, when manifest list provided, manifest files should be omitted", + )); + } + (None, _) => { + return Err(Error::new( + ErrorKind::DataInvalid, + "Unsupported v1 snapshot, only manifest list is supported", + )); + } + }, summary: v1.summary.unwrap_or(Summary { operation: Operation::default(), additional_properties: HashMap::new(), @@ -517,6 +527,7 @@ mod tests { use chrono::{TimeZone, Utc}; + use crate::spec::TableMetadata; use crate::spec::snapshot::_serde::SnapshotV1; use crate::spec::snapshot::{Operation, Snapshot, Summary}; @@ -604,6 +615,84 @@ mod tests { ); } + #[test] + fn test_v1_snapshot_with_manifest_list_and_manifests() { + { + let metadata = r#" + { + "format-version": 1, + "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", + "location": "s3://bucket/test/location", + "last-updated-ms": 1700000000000, + "last-column-id": 1, + "schema": { + "type": "struct", + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"} + ] + }, + "partition-spec": [], + "properties": {}, + "current-snapshot-id": 111111111, + "snapshots": [ + { + "snapshot-id": 111111111, + "timestamp-ms": 1600000000000, + "summary": {"operation": "append"}, + "manifest-list": "s3://bucket/metadata/snap-123.avro", + "manifests": ["s3://bucket/metadata/manifest-1.avro"] + } + ] + } + "#; + + let result_both_manifest_list_and_manifest_set = + serde_json::from_str::(metadata); + assert!(result_both_manifest_list_and_manifest_set.is_err()); + assert_eq!( + result_both_manifest_list_and_manifest_set + .unwrap_err() + .to_string(), + "DataInvalid => Invalid v1 snapshot, when manifest list provided, manifest files should be omitted" + ) + } + + { + let metadata = r#" + { + "format-version": 1, + "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", + "location": "s3://bucket/test/location", + "last-updated-ms": 1700000000000, + "last-column-id": 1, + "schema": { + "type": "struct", + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"} + ] + }, + "partition-spec": [], + "properties": {}, + "current-snapshot-id": 111111111, + "snapshots": [ + { + "snapshot-id": 111111111, + "timestamp-ms": 1600000000000, + "summary": {"operation": "append"}, + "manifests": ["s3://bucket/metadata/manifest-1.avro"] + } + ] + } + "#; + let result_missing_manifest_list = serde_json::from_str::(metadata); + assert!(result_missing_manifest_list.is_err()); + assert_eq!( + result_missing_manifest_list.unwrap_err().to_string(), + "DataInvalid => Unsupported v1 snapshot, only manifest list is supported" + ) + } + } + #[test] fn test_snapshot_v1_to_v2_with_missing_summary() { use crate::spec::snapshot::_serde::SnapshotV1; diff --git a/crates/iceberg/src/spec/snapshot_summary.rs b/crates/iceberg/src/spec/snapshot_summary.rs index 4cd3715e06..c67ee37d3e 100644 --- a/crates/iceberg/src/spec/snapshot_summary.rs +++ b/crates/iceberg/src/spec/snapshot_summary.rs @@ -767,7 +767,7 @@ mod tests { (3, Datum::string("x")), ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -799,7 +799,7 @@ mod tests { (3, Datum::string("x")), ]), key_metadata: None, - split_offsets: vec![4], + split_offsets: Some(vec![4]), equality_ids: None, sort_order_id: Some(0), partition_spec_id: 0, @@ -910,7 +910,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -938,7 +938,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, @@ -993,7 +993,7 @@ mod tests { lower_bounds: HashMap::new(), upper_bounds: HashMap::new(), key_metadata: None, - split_offsets: vec![], + split_offsets: None, equality_ids: None, sort_order_id: None, partition_spec_id: 0, diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index 06b32cc847..cfa25deccb 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -390,18 +390,18 @@ impl TableMetadata { } fn construct_refs(&mut self) { - if let Some(current_snapshot_id) = self.current_snapshot_id { - if !self.refs.contains_key(MAIN_BRANCH) { - self.refs - .insert(MAIN_BRANCH.to_string(), SnapshotReference { - snapshot_id: current_snapshot_id, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: None, - max_snapshot_age_ms: None, - max_ref_age_ms: None, - }, - }); - } + if let Some(current_snapshot_id) = self.current_snapshot_id + && !self.refs.contains_key(MAIN_BRANCH) + { + self.refs + .insert(MAIN_BRANCH.to_string(), SnapshotReference { + snapshot_id: current_snapshot_id, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, + }); } } @@ -506,6 +506,19 @@ impl TableMetadata { /// If the default sort order is unsorted but the sort order is not present, add it fn try_normalize_sort_order(&mut self) -> Result<()> { + // Validate that sort order ID 0 (reserved for unsorted) has no fields + if let Some(sort_order) = self.sort_order_by_id(SortOrder::UNSORTED_ORDER_ID) + && !sort_order.fields.is_empty() + { + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "Sort order ID {} is reserved for unsorted order", + SortOrder::UNSORTED_ORDER_ID + ), + )); + } + if self.sort_order_by_id(self.default_sort_order_id).is_some() { return Ok(()); } @@ -572,17 +585,17 @@ impl TableMetadata { let main_ref = self.refs.get(MAIN_BRANCH); if self.current_snapshot_id.is_some() { - if let Some(main_ref) = main_ref { - if main_ref.snapshot_id != self.current_snapshot_id.unwrap_or_default() { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Current snapshot id does not match main branch ({:?} != {:?})", - self.current_snapshot_id.unwrap_or_default(), - main_ref.snapshot_id - ), - )); - } + if let Some(main_ref) = main_ref + && main_ref.snapshot_id != self.current_snapshot_id.unwrap_or_default() + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Current snapshot id does not match main branch ({:?} != {:?})", + self.current_snapshot_id.unwrap_or_default(), + main_ref.snapshot_id + ), + )); } } else if main_ref.is_some() { return Err(Error::new( @@ -606,22 +619,21 @@ impl TableMetadata { )); } - if self.format_version >= FormatVersion::V2 { - if let Some(snapshot) = self + if self.format_version >= FormatVersion::V2 + && let Some(snapshot) = self .snapshots .values() .find(|snapshot| snapshot.sequence_number() > self.last_sequence_number) - { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Invalid snapshot with id {} and sequence number {} greater than last sequence number {}", - snapshot.snapshot_id(), - snapshot.sequence_number(), - self.last_sequence_number - ), - )); - } + { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid snapshot with id {} and sequence number {} greater than last sequence number {}", + snapshot.snapshot_id(), + snapshot.sequence_number(), + self.last_sequence_number + ), + )); } Ok(()) @@ -3796,4 +3808,57 @@ mod tests { assert!(final_metadata.name_exists_in_any_schema("new_field")); // only in current schema assert!(!final_metadata.name_exists_in_any_schema("never_existed")); } + + #[test] + fn test_invalid_sort_order_id_zero_with_fields() { + let metadata = r#" + { + "format-version": 2, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 111, + "last-updated-ms": 1600000000000, + "last-column-id": 3, + "current-schema-id": 1, + "schemas": [ + { + "type": "struct", + "schema-id": 1, + "fields": [ + {"id": 1, "name": "x", "required": true, "type": "long"}, + {"id": 2, "name": "y", "required": true, "type": "long"} + ] + } + ], + "default-spec-id": 0, + "partition-specs": [{"spec-id": 0, "fields": []}], + "last-partition-id": 999, + "default-sort-order-id": 0, + "sort-orders": [ + { + "order-id": 0, + "fields": [ + { + "transform": "identity", + "source-id": 1, + "direction": "asc", + "null-order": "nulls-first" + } + ] + } + ], + "properties": {}, + "current-snapshot-id": -1, + "snapshots": [] + } + "#; + + let result: Result = serde_json::from_str(metadata); + + // Should fail because sort order ID 0 is reserved for unsorted order and cannot have fields + assert!( + result.is_err(), + "Parsing should fail for sort order ID 0 with fields" + ); + } } diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index 6b8ce1e6a5..3db327d48a 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -31,7 +31,7 @@ use crate::error::{Error, ErrorKind, Result}; use crate::spec::{EncryptedKey, INITIAL_ROW_ID, MIN_FORMAT_VERSION_ROW_LINEAGE}; use crate::{TableCreation, TableUpdate}; -const FIRST_FIELD_ID: u32 = 1; +pub(crate) const FIRST_FIELD_ID: i32 = 1; /// Manipulating table metadata. /// @@ -572,7 +572,6 @@ impl TableMetadataBuilder { pub fn remove_ref(mut self, ref_name: &str) -> Self { if ref_name == MAIN_BRANCH { self.metadata.current_snapshot_id = None; - self.metadata.snapshot_log.clear(); } if self.metadata.refs.remove(ref_name).is_some() || ref_name == MAIN_BRANCH { @@ -2237,6 +2236,73 @@ mod tests { assert_eq!(result.metadata.current_snapshot().unwrap().snapshot_id(), 2); } + #[test] + fn test_remove_main_ref_keeps_snapshot_log() { + let builder = builder_without_changes(FormatVersion::V2); + + let snapshot = Snapshot::builder() + .with_snapshot_id(1) + .with_timestamp_ms(builder.metadata.last_updated_ms + 1) + .with_sequence_number(0) + .with_schema_id(0) + .with_manifest_list("/snap-1.avro") + .with_summary(Summary { + operation: Operation::Append, + additional_properties: HashMap::from_iter(vec![ + ( + "spark.app.id".to_string(), + "local-1662532784305".to_string(), + ), + ("added-data-files".to_string(), "4".to_string()), + ("added-records".to_string(), "4".to_string()), + ("added-files-size".to_string(), "6001".to_string()), + ]), + }) + .build(); + + let result = builder + .add_snapshot(snapshot.clone()) + .unwrap() + .set_ref(MAIN_BRANCH, SnapshotReference { + snapshot_id: 1, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: Some(10), + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, + }) + .unwrap() + .build() + .unwrap(); + + // Verify snapshot log was created + assert_eq!(result.metadata.snapshot_log.len(), 1); + assert_eq!(result.metadata.snapshot_log[0].snapshot_id, 1); + assert_eq!(result.metadata.current_snapshot_id, Some(1)); + + // Remove the main ref + let result_after_remove = result + .metadata + .into_builder(Some( + "s3://bucket/test/location/metadata/metadata2.json".to_string(), + )) + .remove_ref(MAIN_BRANCH) + .build() + .unwrap(); + + // Verify snapshot log is kept even after removing main ref + assert_eq!(result_after_remove.metadata.snapshot_log.len(), 1); + assert_eq!(result_after_remove.metadata.snapshot_log[0].snapshot_id, 1); + assert_eq!(result_after_remove.metadata.current_snapshot_id, None); + assert_eq!(result_after_remove.changes.len(), 1); + assert_eq!( + result_after_remove.changes[0], + TableUpdate::RemoveSnapshotRef { + ref_name: MAIN_BRANCH.to_string() + } + ); + } + #[test] fn test_set_branch_snapshot_creates_branch_if_not_exists() { let builder = builder_without_changes(FormatVersion::V2); diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 4975456010..413604f51c 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -49,6 +49,8 @@ pub struct TableProperties { pub write_format_default: String, /// The target file size for files. pub write_target_file_size_bytes: usize, + /// Whether to use `FanoutWriter` for partitioned tables. + pub write_datafusion_fanout_enabled: bool, } impl TableProperties { @@ -137,6 +139,11 @@ impl TableProperties { pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES: &str = "write.target-file-size-bytes"; /// Default target file size pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT: usize = 512 * 1024 * 1024; // 512 MB + /// Whether to use `FanoutWriter` for partitioned tables (handles unsorted data). + /// If false, uses `ClusteredWriter` (requires sorted data, more memory efficient). + pub const PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED: &str = "write.datafusion.fanout.enabled"; + /// Default value for fanout writer enabled + pub const PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT: bool = true; } impl TryFrom<&HashMap> for TableProperties { @@ -175,6 +182,11 @@ impl TryFrom<&HashMap> for TableProperties { TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES, TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, )?, + write_datafusion_fanout_enabled: parse_property( + props, + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED, + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT, + )?, }) } } diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 6068716eff..354dc1889c 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -711,10 +711,10 @@ impl Transform { PredicateOperator::GreaterThan => Some(PredicateOperator::GreaterThanOrEq), PredicateOperator::StartsWith => match datum.literal() { PrimitiveLiteral::String(s) => { - if let Some(w) = width { - if s.len() == w as usize { - return Some(PredicateOperator::Eq); - }; + if let Some(w) = width + && s.len() == w as usize + { + return Some(PredicateOperator::Eq); }; Some(*op) } @@ -757,47 +757,45 @@ impl Transform { _ => false, }; - if should_adjust { - if let &PrimitiveLiteral::Int(v) = transformed.literal() { - match op { - PredicateOperator::LessThan - | PredicateOperator::LessThanOrEq - | PredicateOperator::In => { - if v < 0 { + if should_adjust && let &PrimitiveLiteral::Int(v) = transformed.literal() { + match op { + PredicateOperator::LessThan + | PredicateOperator::LessThanOrEq + | PredicateOperator::In => { + if v < 0 { + // # TODO + // An ugly hack to fix. Refine the increment and decrement logic later. + match self { + Transform::Day => { + return Some(AdjustedProjection::Single(Datum::date(v + 1))); + } + _ => { + return Some(AdjustedProjection::Single(Datum::int(v + 1))); + } + } + }; + } + PredicateOperator::Eq => { + if v < 0 { + let new_set = FnvHashSet::from_iter(vec![ + transformed.to_owned(), // # TODO // An ugly hack to fix. Refine the increment and decrement logic later. - match self { - Transform::Day => { - return Some(AdjustedProjection::Single(Datum::date(v + 1))); + { + match self { + Transform::Day => Datum::date(v + 1), + _ => Datum::int(v + 1), } - _ => { - return Some(AdjustedProjection::Single(Datum::int(v + 1))); - } - } - }; - } - PredicateOperator::Eq => { - if v < 0 { - let new_set = FnvHashSet::from_iter(vec![ - transformed.to_owned(), - // # TODO - // An ugly hack to fix. Refine the increment and decrement logic later. - { - match self { - Transform::Day => Datum::date(v + 1), - _ => Datum::int(v + 1), - } - }, - ]); - return Some(AdjustedProjection::Set(new_set)); - } - } - _ => { - return None; + }, + ]); + return Some(AdjustedProjection::Set(new_set)); } } - }; - } + _ => { + return None; + } + } + }; None } diff --git a/crates/iceberg/src/spec/values/datum.rs b/crates/iceberg/src/spec/values/datum.rs index cb60fb94e9..88209ae95c 100644 --- a/crates/iceberg/src/spec/values/datum.rs +++ b/crates/iceberg/src/spec/values/datum.rs @@ -166,36 +166,16 @@ impl<'de> Deserialize<'de> for Datum { // Compare following iceberg float ordering rules: // -NaN < -Infinity < -value < -0 < 0 < value < Infinity < NaN -fn iceberg_float_cmp(a: T, b: T) -> Option { - if a.is_nan() && b.is_nan() { - return match (a.is_sign_negative(), b.is_sign_negative()) { - (true, false) => Some(Ordering::Less), - (false, true) => Some(Ordering::Greater), - _ => Some(Ordering::Equal), - }; - } - - if a.is_nan() { - return Some(if a.is_sign_negative() { - Ordering::Less - } else { - Ordering::Greater - }); - } - - if b.is_nan() { - return Some(if b.is_sign_negative() { - Ordering::Greater - } else { - Ordering::Less - }); - } +fn iceberg_float_cmp_f32(a: OrderedFloat, b: OrderedFloat) -> Option { + Some(a.total_cmp(&b)) +} - a.partial_cmp(&b) +fn iceberg_float_cmp_f64(a: OrderedFloat, b: OrderedFloat) -> Option { + Some(a.total_cmp(&b)) } impl PartialOrd for Datum { - fn partial_cmp(&self, other: &Self) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { match (&self.literal, &other.literal, &self.r#type, &other.r#type) { // generate the arm with same type and same literal ( @@ -221,13 +201,13 @@ impl PartialOrd for Datum { PrimitiveLiteral::Float(other_val), PrimitiveType::Float, PrimitiveType::Float, - ) => iceberg_float_cmp(*val, *other_val), + ) => iceberg_float_cmp_f32(*val, *other_val), ( PrimitiveLiteral::Double(val), PrimitiveLiteral::Double(other_val), PrimitiveType::Double, PrimitiveType::Double, - ) => iceberg_float_cmp(*val, *other_val), + ) => iceberg_float_cmp_f64(*val, *other_val), ( PrimitiveLiteral::Int(val), PrimitiveLiteral::Int(other_val), diff --git a/crates/iceberg/src/spec/values/tests.rs b/crates/iceberg/src/spec/values/tests.rs index 0e99d44dfe..bb10701d87 100644 --- a/crates/iceberg/src/spec/values/tests.rs +++ b/crates/iceberg/src/spec/values/tests.rs @@ -447,7 +447,7 @@ fn check_raw_literal_bytes_error_via_avro(input_bytes: Vec, expected_type: & let avro_value = Value::Bytes(input_bytes); let raw_literal: _serde::RawLiteral = apache_avro::from_value(&avro_value).unwrap(); let result = raw_literal.try_into(expected_type); - assert!(result.is_err(), "Expected error but got: {:?}", result); + assert!(result.is_err(), "Expected error but got: {result:?}"); } #[test] @@ -1293,6 +1293,31 @@ fn test_iceberg_float_order() { assert_eq!(double_sorted, double_expected); } +#[test] +fn test_negative_zero_less_than_positive_zero() { + { + let neg_zero = Datum::float(-0.0); + let pos_zero = Datum::float(0.0); + + assert_eq!( + neg_zero.partial_cmp(&pos_zero), + Some(std::cmp::Ordering::Less), + "IEEE 754 totalOrder requires -0.0 < +0.0 on F32" + ); + } + + { + let neg_zero = Datum::double(-0.0); + let pos_zero = Datum::double(0.0); + + assert_eq!( + neg_zero.partial_cmp(&pos_zero), + Some(std::cmp::Ordering::Less), + "IEEE 754 totalOrder requires -0.0 < +0.0 on F64" + ); + } +} + /// Test Date deserialization from JSON as number (days since epoch). /// /// This reproduces the scenario from Iceberg Java's TestAddFilesProcedure where: diff --git a/crates/iceberg/src/spec/view_metadata_builder.rs b/crates/iceberg/src/spec/view_metadata_builder.rs index 9f542a7c61..38041ca625 100644 --- a/crates/iceberg/src/spec/view_metadata_builder.rs +++ b/crates/iceberg/src/spec/view_metadata_builder.rs @@ -478,10 +478,10 @@ impl ViewMetadataBuilder { // as it might panic if the metadata is invalid. self.metadata.validate()?; - if let Some(previous) = self.previous_view_version.take() { - if !allow_replace_drop_dialects(&self.metadata.properties) { - require_no_dialect_dropped(&previous, self.metadata.current_version())?; - } + if let Some(previous) = self.previous_view_version.take() + && !allow_replace_drop_dialects(&self.metadata.properties) + { + require_no_dialect_dropped(&previous, self.metadata.current_version())?; } let _expired_versions = self.expire_versions(); diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index 4116264a14..8ddaa26698 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -518,7 +518,7 @@ mod test_row_lineage { fn file_with_rows(record_count: u64) -> DataFile { DataFileBuilder::default() .content(DataContentType::Data) - .file_path(format!("test/{}.parquet", record_count)) + .file_path(format!("test/{record_count}.parquet")) .file_format(DataFileFormat::Parquet) .file_size_in_bytes(100) .record_count(record_count) diff --git a/crates/iceberg/src/transaction/snapshot.rs b/crates/iceberg/src/transaction/snapshot.rs index 6b3d0e4ff4..c8bf26a174 100644 --- a/crates/iceberg/src/transaction/snapshot.rs +++ b/crates/iceberg/src/transaction/snapshot.rs @@ -34,13 +34,53 @@ use crate::{Error, ErrorKind, TableRequirement, TableUpdate}; const META_ROOT_PATH: &str = "metadata"; +/// A trait that defines how different table operations produce new snapshots. +/// +/// `SnapshotProduceOperation` is used by [`SnapshotProducer`] to customize snapshot creation +/// based on the type of operation being performed (e.g., `Append`, `Overwrite`, `Delete`, etc.). +/// Each operation type implements this trait to specify: +/// - Which operation type to record in the snapshot summary +/// - Which existing manifest files should be included in the new snapshot +/// - Which manifest entries should be marked as deleted +/// +/// # When it accomplishes +/// +/// This trait is used during the snapshot creation process in [`SnapshotProducer::commit()`]: +/// +/// 1. **Operation Type Recording**: The `operation()` method determines which operation type +/// (e.g., `Operation::Append`, `Operation::Overwrite`) is recorded in the snapshot summary. +/// This metadata helps track what kind of change was made to the table. +/// +/// 2. **Manifest File Selection**: The `existing_manifest()` method determines which existing +/// manifest files from the current snapshot should be carried forward to the new snapshot. +/// For example: +/// - An `Append` operation typically includes all existing manifests plus new ones +/// - An `Overwrite` operation might exclude manifests for partitions being overwritten +/// +/// 3. **Delete Entry Processing**: The `delete_entries()` method is intended for future delete +/// operations to specify which manifest entries should be marked as deleted. pub(crate) trait SnapshotProduceOperation: Send + Sync { + /// Returns the operation type that will be recorded in the snapshot summary. + /// + /// This determines what kind of operation is being performed (e.g., `Append`, `Overwrite`), + /// which is stored in the snapshot metadata for tracking and auditing purposes. fn operation(&self) -> Operation; + + /// Returns manifest entries that should be marked as deleted in the new snapshot. #[allow(unused)] fn delete_entries( &self, snapshot_produce: &SnapshotProducer, ) -> impl Future>> + Send; + + /// Returns existing manifest files that should be included in the new snapshot. + /// + /// This method determines which manifest files from the current snapshot should be + /// carried forward to the new snapshot. The selection depends on the operation type: + /// + /// - **Append operations**: Typically include all existing manifests + /// - **Overwrite operations**: May exclude manifests for partitions being overwritten + /// - **Delete operations**: May exclude manifests for partitions being deleted fn existing_manifest( &self, snapshot_produce: &SnapshotProducer<'_>, @@ -236,13 +276,13 @@ impl<'a> SnapshotProducer<'a> { "Partition field should only be primitive type.", ) })?; - if let Some(value) = value { - if !field.compatible(&value.as_primitive_literal().unwrap()) { - return Err(Error::new( - ErrorKind::DataInvalid, - "Partition value is not compatible partition type", - )); - } + if let Some(value) = value + && !field.compatible(&value.as_primitive_literal().unwrap()) + { + return Err(Error::new( + ErrorKind::DataInvalid, + "Partition value is not compatible partition type", + )); } } Ok(()) diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index 8807fb1f79..e6786a70ca 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -78,12 +78,26 @@ impl Bucket { /// ref: https://iceberg.apache.org/spec/#appendix-b-32-bit-hash-requirements #[inline] fn hash_decimal(v: i128) -> i32 { + if v == 0 { + return Self::hash_bytes(&[0]); + } + let bytes = v.to_be_bytes(); - if let Some(start) = bytes.iter().position(|&x| x != 0) { - Self::hash_bytes(&bytes[start..]) + let start = if v > 0 { + // Positive: skip 0x00 unless next byte would appear negative + bytes + .windows(2) + .position(|w| w[0] != 0x00 || w[1] & 0x80 != 0) + .unwrap_or(15) } else { - Self::hash_bytes(&[0]) - } + // Negative: skip 0xFF only if next byte stays negative + bytes + .windows(2) + .position(|w| w[0] != 0xFF || w[1] & 0x80 == 0) + .unwrap_or(15) + }; + + Self::hash_bytes(&bytes[start..]) } /// def bucket_N(x) = (murmur3_x86_32_hash(x) & Integer.MAX_VALUE) % N @@ -790,6 +804,27 @@ mod test { ); } + #[test] + fn test_hash_decimal_with_negative_value() { + // Test cases from GitHub issue #1981 + assert_eq!(Bucket::hash_decimal(1), -463810133); + assert_eq!(Bucket::hash_decimal(-1), -43192051); + + // Additional test cases for edge case values + assert_eq!(Bucket::hash_decimal(0), Bucket::hash_decimal(0)); + assert_eq!(Bucket::hash_decimal(127), Bucket::hash_decimal(127)); + assert_eq!(Bucket::hash_decimal(-128), Bucket::hash_decimal(-128)); + + // Test minimum representation is used + // -1 should hash as [0xFF] not [0xFF, 0xFF, ..., 0xFF] + // 128 should hash as [0x00, 0x80] not [0x00, 0x00, ..., 0x80] + assert_eq!(Bucket::hash_decimal(128), Bucket::hash_bytes(&[0x00, 0x80])); + assert_eq!( + Bucket::hash_decimal(-129), + Bucket::hash_bytes(&[0xFF, 0x7F]) + ); + } + #[test] fn test_int_literal() { let bucket = Bucket::new(10); diff --git a/crates/iceberg/src/writer/base_writer/data_file_writer.rs b/crates/iceberg/src/writer/base_writer/data_file_writer.rs index dcaa56cc97..cb7bd172ea 100644 --- a/crates/iceberg/src/writer/base_writer/data_file_writer.rs +++ b/crates/iceberg/src/writer/base_writer/data_file_writer.rs @@ -27,7 +27,7 @@ use crate::writer::{CurrentFileStatus, IcebergWriter, IcebergWriterBuilder}; use crate::{Error, ErrorKind, Result}; /// Builder for `DataFileWriter`. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct DataFileWriterBuilder { inner: RollingFileWriterBuilder, } @@ -53,9 +53,9 @@ where { type R = DataFileWriter; - async fn build(self, partition_key: Option) -> Result { + async fn build(&self, partition_key: Option) -> Result { Ok(DataFileWriter { - inner: Some(self.inner.clone().build()), + inner: Some(self.inner.build()), partition_key, }) } diff --git a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs index 664ea84334..dd8487f9cc 100644 --- a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs +++ b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs @@ -34,7 +34,7 @@ use crate::writer::{IcebergWriter, IcebergWriterBuilder}; use crate::{Error, ErrorKind, Result}; /// Builder for `EqualityDeleteWriter`. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct EqualityDeleteFileWriterBuilder< B: FileWriterBuilder, L: LocationGenerator, @@ -60,7 +60,7 @@ where } /// Config for `EqualityDeleteWriter`. -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct EqualityDeleteWriterConfig { // Field ids used to determine row equality in equality delete files. equality_ids: Vec, @@ -123,11 +123,11 @@ where { type R = EqualityDeleteFileWriter; - async fn build(self, partition_key: Option) -> Result { + async fn build(&self, partition_key: Option) -> Result { Ok(EqualityDeleteFileWriter { - inner: Some(self.inner.clone().build()), - projector: self.config.projector, - equality_ids: self.config.equality_ids, + inner: Some(self.inner.build()), + projector: self.config.projector.clone(), + equality_ids: self.config.equality_ids.clone(), partition_key, }) } @@ -293,15 +293,15 @@ mod test { assert_eq!(*data_file.null_value_counts.get(id).unwrap(), expect); } - assert_eq!(data_file.split_offsets.len(), metadata.num_row_groups()); - data_file + let split_offsets = data_file .split_offsets - .iter() - .enumerate() - .for_each(|(i, &v)| { - let expect = metadata.row_groups()[i].file_offset().unwrap(); - assert_eq!(v, expect); - }); + .as_ref() + .expect("split_offsets should be set"); + assert_eq!(split_offsets.len(), metadata.num_row_groups()); + split_offsets.iter().enumerate().for_each(|(i, &v)| { + let expect = metadata.row_groups()[i].file_offset().unwrap(); + assert_eq!(v, expect); + }); } #[tokio::test] diff --git a/crates/iceberg/src/writer/file_writer/location_generator.rs b/crates/iceberg/src/writer/file_writer/location_generator.rs index a5cfc28292..0ad4d91ac6 100644 --- a/crates/iceberg/src/writer/file_writer/location_generator.rs +++ b/crates/iceberg/src/writer/file_writer/location_generator.rs @@ -24,7 +24,7 @@ use crate::Result; use crate::spec::{DataFileFormat, PartitionKey, TableMetadata}; /// `LocationGenerator` used to generate the location of data file. -pub trait LocationGenerator: Clone + Send + 'static { +pub trait LocationGenerator: Clone + Send + Sync + 'static { /// Generate an absolute path for the given file name that includes the partition path. /// /// # Arguments @@ -94,7 +94,7 @@ impl LocationGenerator for DefaultLocationGenerator { } /// `FileNameGeneratorTrait` used to generate file name for data file. The file name can be passed to `LocationGenerator` to generate the location of the file. -pub trait FileNameGenerator: Clone + Send + 'static { +pub trait FileNameGenerator: Clone + Send + Sync + 'static { /// Generate a file name. fn generate_file_name(&self) -> String; } diff --git a/crates/iceberg/src/writer/file_writer/mod.rs b/crates/iceberg/src/writer/file_writer/mod.rs index 2ed6414ce8..101919f5b3 100644 --- a/crates/iceberg/src/writer/file_writer/mod.rs +++ b/crates/iceberg/src/writer/file_writer/mod.rs @@ -36,11 +36,11 @@ pub mod rolling_writer; type DefaultOutput = Vec; /// File writer builder trait. -pub trait FileWriterBuilder: Send + Clone + 'static { +pub trait FileWriterBuilder: Clone + Send + Sync + 'static { /// The associated file writer type. type R: FileWriter; /// Build file writer. - fn build(self, output_file: OutputFile) -> impl Future> + Send; + fn build(&self, output_file: OutputFile) -> impl Future> + Send; } /// File writer focus on writing record batch to different physical file format.(Such as parquet. orc) diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index 411ea168ee..8fe40df71c 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -78,11 +78,11 @@ impl ParquetWriterBuilder { impl FileWriterBuilder for ParquetWriterBuilder { type R = ParquetWriter; - async fn build(self, output_file: OutputFile) -> Result { + async fn build(&self, output_file: OutputFile) -> Result { Ok(ParquetWriter { schema: self.schema.clone(), inner_writer: None, - writer_properties: self.props, + writer_properties: self.props.clone(), current_row_num: 0, output_file, nan_value_count_visitor: NanValueCountVisitor::new_with_match_mode(self.match_mode), @@ -412,13 +412,13 @@ impl ParquetWriter { // - We can ignore implementing distinct_counts due to this: https://lists.apache.org/thread/j52tsojv0x4bopxyzsp7m7bqt23n5fnd .lower_bounds(lower_bounds) .upper_bounds(upper_bounds) - .split_offsets( + .split_offsets(Some( metadata .row_groups() .iter() .filter_map(|group| group.file_offset()) .collect(), - ); + )); Ok(builder) } diff --git a/crates/iceberg/src/writer/file_writer/rolling_writer.rs b/crates/iceberg/src/writer/file_writer/rolling_writer.rs index 8f03654786..a93e494d48 100644 --- a/crates/iceberg/src/writer/file_writer/rolling_writer.rs +++ b/crates/iceberg/src/writer/file_writer/rolling_writer.rs @@ -103,15 +103,15 @@ where } /// Build a new [`RollingFileWriter`]. - pub fn build(self) -> RollingFileWriter { + pub fn build(&self) -> RollingFileWriter { RollingFileWriter { inner: None, - inner_builder: self.inner_builder, + inner_builder: self.inner_builder.clone(), target_file_size: self.target_file_size, data_file_builders: vec![], - file_io: self.file_io, - location_generator: self.location_generator, - file_name_generator: self.file_name_generator, + file_io: self.file_io.clone(), + location_generator: self.location_generator.clone(), + file_name_generator: self.file_name_generator.clone(), } } } @@ -192,25 +192,23 @@ where // initialize inner writer self.inner = Some( self.inner_builder - .clone() .build(self.new_output_file(partition_key)?) .await?, ); } - if self.should_roll() { - if let Some(inner) = self.inner.take() { - // close the current writer, roll to a new file - self.data_file_builders.extend(inner.close().await?); - - // start a new writer - self.inner = Some( - self.inner_builder - .clone() - .build(self.new_output_file(partition_key)?) - .await?, - ); - } + if self.should_roll() + && let Some(inner) = self.inner.take() + { + // close the current writer, roll to a new file + self.data_file_builders.extend(inner.close().await?); + + // start a new writer + self.inner = Some( + self.inner_builder + .build(self.new_output_file(partition_key)?) + .await?, + ); } // write the input diff --git a/crates/iceberg/src/writer/mod.rs b/crates/iceberg/src/writer/mod.rs index a7892d49e1..d475230685 100644 --- a/crates/iceberg/src/writer/mod.rs +++ b/crates/iceberg/src/writer/mod.rs @@ -148,7 +148,7 @@ //! impl IcebergWriterBuilder for LatencyRecordWriterBuilder { //! type R = LatencyRecordWriter; //! -//! async fn build(self, partition_key: Option) -> Result { +//! async fn build(&self, partition_key: Option) -> Result { //! Ok(LatencyRecordWriter { //! inner_writer: self.inner_writer_builder.build(partition_key).await?, //! }) @@ -398,13 +398,11 @@ type DefaultOutput = Vec; /// The builder for iceberg writer. #[async_trait::async_trait] -pub trait IcebergWriterBuilder: - Send + Clone + 'static -{ +pub trait IcebergWriterBuilder: Send + Sync + 'static { /// The associated writer type. type R: IcebergWriter; /// Build the iceberg writer with an optional partition key. - async fn build(self, partition_key: Option) -> Result; + async fn build(&self, partition_key: Option) -> Result; } /// The iceberg writer used to write data to iceberg table. diff --git a/crates/iceberg/src/writer/partitioning/clustered_writer.rs b/crates/iceberg/src/writer/partitioning/clustered_writer.rs index 3587723965..01eb452083 100644 --- a/crates/iceberg/src/writer/partitioning/clustered_writer.rs +++ b/crates/iceberg/src/writer/partitioning/clustered_writer.rs @@ -118,7 +118,6 @@ where // Create a new writer for the new partition self.current_writer = Some( self.inner_builder - .clone() .build(Some(partition_key.clone())) .await?, ); diff --git a/crates/iceberg/src/writer/partitioning/fanout_writer.rs b/crates/iceberg/src/writer/partitioning/fanout_writer.rs index 796c1a4888..21a174b0d0 100644 --- a/crates/iceberg/src/writer/partitioning/fanout_writer.rs +++ b/crates/iceberg/src/writer/partitioning/fanout_writer.rs @@ -73,7 +73,6 @@ where if !self.partition_writers.contains_key(partition_key.data()) { let writer = self .inner_builder - .clone() .build(Some(partition_key.clone())) .await?; self.partition_writers diff --git a/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs b/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs index 0fb9cba3f1..29825a5416 100644 --- a/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs +++ b/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs @@ -75,7 +75,7 @@ where pub async fn write(&mut self, input: I) -> Result<()> { // Lazily create writer on first write if self.writer.is_none() { - self.writer = Some(self.inner_builder.clone().build(None).await?); + self.writer = Some(self.inner_builder.build(None).await?); } // Write directly to inner writer diff --git a/crates/iceberg/tests/file_io_gcs_test.rs b/crates/iceberg/tests/file_io_gcs_test.rs index 161285ae6f..9fbcdadd0e 100644 --- a/crates/iceberg/tests/file_io_gcs_test.rs +++ b/crates/iceberg/tests/file_io_gcs_test.rs @@ -68,7 +68,7 @@ mod tests { FileIOBuilder::new("gcs") .with_props(vec![ - (GCS_SERVICE_PATH, format!("http://{}", addr)), + (GCS_SERVICE_PATH, format!("http://{addr}")), (GCS_NO_AUTH, "true".to_string()), ]) .build() @@ -81,13 +81,13 @@ mod tests { bucket_data.insert("name", name); let client = reqwest::Client::new(); - let endpoint = format!("http://{}/storage/v1/b", server_addr); + let endpoint = format!("http://{server_addr}/storage/v1/b"); client.post(endpoint).json(&bucket_data).send().await?; Ok(()) } fn get_gs_path() -> String { - format!("gs://{}", FAKE_GCS_BUCKET) + format!("gs://{FAKE_GCS_BUCKET}") } #[tokio::test] diff --git a/crates/integration_tests/DEPENDENCIES.rust.tsv b/crates/integration_tests/DEPENDENCIES.rust.tsv index 891d23966e..26f6d83820 100644 --- a/crates/integration_tests/DEPENDENCIES.rust.tsv +++ b/crates/integration_tests/DEPENDENCIES.rust.tsv @@ -1,406 +1,414 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -async-compression@0.4.19 X X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -ctor@0.2.9 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@1.3.1 X X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -httpdate@1.0.3 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-datafusion@0.7.0 X -iceberg-integration-tests@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libm@0.2.15 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -opendal@0.54.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -psm@0.1.26 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -untrusted@0.9.0 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi-util@0.1.11 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_msvc@0.52.6 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_msvc@0.52.6 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anyhow@1.0.100 X X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +ctor@0.2.9 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +h2@0.4.12 X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.4.0 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +httpdate@1.0.3 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg-integration-tests@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +murmur3@0.5.2 X X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integration_tests/tests/shared_tests/datafusion.rs b/crates/integration_tests/tests/shared_tests/datafusion.rs index 81bbb5f54c..60dd9f36c8 100644 --- a/crates/integration_tests/tests/shared_tests/datafusion.rs +++ b/crates/integration_tests/tests/shared_tests/datafusion.rs @@ -26,7 +26,7 @@ use datafusion::error::DataFusionError; use datafusion::prelude::SessionContext; use iceberg::{Catalog, CatalogBuilder, TableIdent}; use iceberg_catalog_rest::RestCatalogBuilder; -use iceberg_datafusion::IcebergTableProvider; +use iceberg_datafusion::IcebergStaticTableProvider; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; use crate::get_shared_containers; @@ -47,7 +47,7 @@ async fn test_basic_queries() -> Result<(), DataFusionError> { let ctx = SessionContext::new(); let table_provider = Arc::new( - IcebergTableProvider::try_new_from_table(table) + IcebergStaticTableProvider::try_new_from_table(table) .await .unwrap(), ); diff --git a/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv b/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv index 59b898d3fe..4763f71ec8 100644 --- a/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv +++ b/crates/integrations/cache-moka/DEPENDENCIES.rust.tsv @@ -1,48 +1,46 @@ crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X adler2@2.0.1 X X X ahash@0.8.12 X X -aho-corasick@1.1.3 X X +aho-corasick@1.1.4 X X alloc-no-stdlib@2.0.4 X alloc-stdlib@0.2.2 X android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X +anyhow@1.0.100 X X +apache-avro@0.21.0 X array-init@2.1.0 X X arrayvec@0.7.6 X X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-ord@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-ord@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X as-any@0.3.2 X X async-lock@3.4.1 X X async-trait@0.1.89 X X atoi@2.0.0 X atomic-waker@1.1.2 X X autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X +backon@1.6.0 X base64@0.22.1 X X -bigdecimal@0.4.8 X X +bigdecimal@0.4.9 X X bimap@0.6.3 X X -bitflags@2.9.4 X X +bitflags@2.10.0 X X block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X brotli@8.0.2 X X brotli-decompressor@5.0.0 X X bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X +bytemuck@1.24.0 X X X byteorder@1.5.0 X X -bytes@1.10.1 X -cc@1.2.36 X X -cfg-if@1.0.3 X X +bytes@1.11.0 X +cc@1.2.49 X X +cfg-if@1.0.4 X X chrono@0.4.42 X X concurrent-queue@2.5.0 X X const-oid@0.9.6 X X @@ -56,7 +54,7 @@ crossbeam-channel@0.5.15 X X crossbeam-epoch@0.9.18 X X crossbeam-utils@0.8.21 X X crunchy@0.2.4 X -crypto-common@0.1.6 X X +crypto-common@0.1.7 X X darling@0.20.11 X darling@0.21.3 X darling_core@0.20.11 X @@ -70,13 +68,14 @@ digest@0.10.7 X X displaydoc@0.2.5 X X dissimilar@1.0.10 X either@1.15.0 X X +equivalent@1.0.2 X X event-listener@5.4.1 X X event-listener-strategy@0.5.4 X X expect-test@1.5.1 X X fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X +find-msvc-tools@0.1.5 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X fnv@1.0.7 X X form_urlencoded@1.2.2 X X futures@0.3.31 X X @@ -88,102 +87,95 @@ futures-macro@0.3.31 X X futures-sink@0.3.31 X X futures-task@0.3.31 X X futures-util@0.3.31 X X -generator@0.8.7 X X generic-array@0.14.7 X getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X +getrandom@0.3.4 X X gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.15.5 X X +half@2.7.1 X X +hashbrown@0.16.1 X X heck@0.5.0 X X -hermit-abi@0.5.2 X X hex@0.4.3 X X hmac@0.12.1 X X home@0.5.11 X X -http@1.3.1 X X +http@1.4.0 X X http-body@1.0.1 X http-body-util@0.1.3 X httparse@1.10.1 X X -hyper@1.7.0 X +hyper@1.8.1 X hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-cache-moka@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X +iceberg@0.8.0 X +iceberg-cache-moka@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X ident_case@1.0.1 X X idna@1.1.0 X X idna_adapter@1.2.1 X X integer-encoding@3.0.4 X -io-uring@0.7.10 X X ipnet@2.11.0 X X -iri-string@0.7.8 X X +iri-string@0.7.9 X X itertools@0.13.0 X X itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X jobserver@0.1.34 X X -js-sys@0.3.78 X X +js-sys@0.3.83 X X lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libc@0.2.178 X X libm@0.2.15 X -libz-rs-sys@0.5.2 X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -matchers@0.2.0 X +libz-rs-sys@0.5.3 X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X md-5@0.10.6 X X -memchr@2.7.5 X X +memchr@2.7.6 X X miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X +mio@1.1.1 X +moka@0.12.11 X X murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X +nu-ansi-term@0.50.3 X num-bigint@0.4.6 X X num-complex@0.4.6 X X num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X once_cell@1.21.3 X X -opendal@0.54.0 X +opendal@0.55.0 X ordered-float@2.10.1 X ordered-float@4.6.0 X parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X paste@1.0.15 X X percent-encoding@2.3.2 X X pin-project-lite@0.2.16 X X pin-utils@0.1.0 X X pkg-config@0.3.32 X X portable-atomic@1.11.1 X X -potential_utf@0.1.3 X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X ppv-lite86@0.2.21 X X prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X +proc-macro2@1.0.103 X X quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X +quick-xml@0.38.4 X +quote@1.0.42 X X r-efi@5.3.0 X X X rand@0.8.5 X X rand@0.9.2 X X @@ -191,134 +183,135 @@ rand_chacha@0.3.1 X X rand_chacha@0.9.0 X X rand_core@0.6.4 X X rand_core@0.9.3 X X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X reqsign@0.16.5 X -reqwest@0.12.23 X X +reqwest@0.12.25 X X ring@0.17.14 X X roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X +rust_decimal@1.39.0 X rustc_version@0.4.1 X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X rustversion@1.0.22 X X ryu@1.0.20 X X -scoped-tls@1.0.1 X X scopeguard@1.2.0 X X -semver@1.0.26 X X +semver@1.0.27 X X seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X serde_repr@0.1.20 X X serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X sha1@0.10.6 X X sha2@0.10.9 X X sharded-slab@0.1.7 X shlex@1.3.0 X X +simd-adler32@0.3.8 X simdutf8@0.1.5 X X slab@0.4.11 X smallvec@1.15.1 X X snap@1.1.1 X -socket2@0.6.0 X X -stable_deref_trait@1.2.0 X X -static_assertions@1.1.0 X X +socket2@0.6.1 X X +stable_deref_trait@1.2.1 X X strsim@0.11.1 X strum@0.27.2 X strum_macros@0.27.2 X subtle@2.6.1 X -syn@2.0.106 X X +syn@2.0.111 X X sync_wrapper@1.0.2 X synstructure@0.13.2 X tagptr@0.2.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X thread_local@1.1.9 X X -threadpool@1.8.1 X X thrift@0.17.0 X tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X tower@0.5.2 X -tower-http@0.6.6 X +tower-http@0.6.8 X tower-layer@0.3.3 X tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X +tracing-subscriber@0.3.22 X try-lock@0.2.5 X twox-hash@2.1.2 X typed-builder@0.20.1 X X typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X untrusted@0.9.0 X url@2.5.7 X X utf8_iter@1.0.4 X X -uuid@1.18.1 X X +uuid@1.19.0 X X version_check@0.9.5 X X want@0.3.1 X wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -webpki-roots@1.0.2 X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X +web-sys@0.3.83 X X +webpki-roots@1.0.4 X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X windows-sys@0.52.0 X X windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X windows-targets@0.52.6 X X -windows-threading@0.1.0 X X +windows-targets@0.53.5 X X windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X +windows_x86_64_msvc@0.53.1 X X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X zerofrom@0.1.6 X zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X zstd@0.13.3 X zstd-safe@7.2.4 X X zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integrations/datafusion/DEPENDENCIES.rust.tsv b/crates/integrations/datafusion/DEPENDENCIES.rust.tsv index 8fb3e04f80..7a0f57e7a0 100644 --- a/crates/integrations/datafusion/DEPENDENCIES.rust.tsv +++ b/crates/integrations/datafusion/DEPENDENCIES.rust.tsv @@ -1,401 +1,409 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anyhow@1.0.99 X X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -async-compression@0.4.19 X X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@1.3.1 X X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-datafusion@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libm@0.2.15 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -opendal@0.54.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -psm@0.1.26 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.27.2 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.20.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -untrusted@0.9.0 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi-util@0.1.11 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_msvc@0.52.6 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_msvc@0.52.6 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_msvc@0.52.6 X X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anyhow@1.0.100 X X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.4.0 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +murmur3@0.5.2 X X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml_datetime@0.7.3 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integrations/datafusion/src/physical_plan/mod.rs b/crates/integrations/datafusion/src/physical_plan/mod.rs index eb58082fe5..5a9845cde0 100644 --- a/crates/integrations/datafusion/src/physical_plan/mod.rs +++ b/crates/integrations/datafusion/src/physical_plan/mod.rs @@ -21,6 +21,7 @@ pub(crate) mod metadata_scan; pub(crate) mod project; pub(crate) mod repartition; pub(crate) mod scan; +pub(crate) mod sort; pub(crate) mod write; pub(crate) const DATA_FILES_COL_NAME: &str = "data_files"; diff --git a/crates/integrations/datafusion/src/physical_plan/repartition.rs b/crates/integrations/datafusion/src/physical_plan/repartition.rs index 8ad87fd1cc..2d1d7f862c 100644 --- a/crates/integrations/datafusion/src/physical_plan/repartition.rs +++ b/crates/integrations/datafusion/src/physical_plan/repartition.rs @@ -159,9 +159,8 @@ fn determine_partitioning_strategy( // Case 2: Partitioned table missing _partition column (normally this should not happen) (true, Err(_)) => Err(DataFusionError::Plan(format!( - "Partitioned table input missing {} column. \ - Ensure projection happens before repartitioning.", - PROJECTED_PARTITION_VALUE_COLUMN + "Partitioned table input missing {PROJECTED_PARTITION_VALUE_COLUMN} column. \ + Ensure projection happens before repartitioning." ))), // Case 3: Unpartitioned table, always use RoundRobinBatch @@ -508,8 +507,7 @@ mod tests { assert!( column_names.contains(&PROJECTED_PARTITION_VALUE_COLUMN.to_string()), - "Should use _partition column, got: {:?}", - column_names + "Should use _partition column, got: {column_names:?}" ); } _ => panic!("Expected Hash partitioning with Identity transform"), @@ -733,8 +731,7 @@ mod tests { .collect(); assert!( column_names.contains(&PROJECTED_PARTITION_VALUE_COLUMN.to_string()), - "Should use _partition column for mixed transforms with Identity, got: {:?}", - column_names + "Should use _partition column for mixed transforms with Identity, got: {column_names:?}" ); } _ => panic!("Expected Hash partitioning for table with identity transforms"), diff --git a/crates/integrations/datafusion/src/physical_plan/scan.rs b/crates/integrations/datafusion/src/physical_plan/scan.rs index be92e93d25..d627b6a63d 100644 --- a/crates/integrations/datafusion/src/physical_plan/scan.rs +++ b/crates/integrations/datafusion/src/physical_plan/scan.rs @@ -51,6 +51,8 @@ pub struct IcebergTableScan { projection: Option>, /// Filters to apply to the table scan predicates: Option, + /// Optional limit on the number of rows to return + limit: Option, } impl IcebergTableScan { @@ -61,6 +63,7 @@ impl IcebergTableScan { schema: ArrowSchemaRef, projection: Option<&Vec>, filters: &[Expr], + limit: Option, ) -> Self { let output_schema = match projection { None => schema.clone(), @@ -76,6 +79,7 @@ impl IcebergTableScan { plan_properties, projection, predicates, + limit, } } @@ -95,6 +99,10 @@ impl IcebergTableScan { self.predicates.as_ref() } + pub fn limit(&self) -> Option { + self.limit + } + /// Computes [`PlanProperties`] used in query optimization. fn compute_properties(schema: ArrowSchemaRef) -> PlanProperties { // TODO: @@ -146,9 +154,29 @@ impl ExecutionPlan for IcebergTableScan { ); let stream = futures::stream::once(fut).try_flatten(); + // Apply limit if specified + let limited_stream: Pin> + Send>> = + if let Some(limit) = self.limit { + let mut remaining = limit; + Box::pin(stream.try_filter_map(move |batch| { + futures::future::ready(if remaining == 0 { + Ok(None) + } else if batch.num_rows() <= remaining { + remaining -= batch.num_rows(); + Ok(Some(batch)) + } else { + let limited_batch = batch.slice(0, remaining); + remaining = 0; + Ok(Some(limited_batch)) + }) + })) + } else { + Box::pin(stream) + }; + Ok(Box::pin(RecordBatchStreamAdapter::new( self.schema(), - stream, + limited_stream, ))) } } diff --git a/crates/integrations/datafusion/src/physical_plan/sort.rs b/crates/integrations/datafusion/src/physical_plan/sort.rs new file mode 100644 index 0000000000..587ab120ca --- /dev/null +++ b/crates/integrations/datafusion/src/physical_plan/sort.rs @@ -0,0 +1,240 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Partition-based sorting for Iceberg tables. + +use std::sync::Arc; + +use datafusion::arrow::compute::SortOptions; +use datafusion::common::Result as DFResult; +use datafusion::error::DataFusionError; +use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr}; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_plan::expressions::Column; +use datafusion::physical_plan::sorts::sort::SortExec; +use iceberg::arrow::PROJECTED_PARTITION_VALUE_COLUMN; + +/// Sorts an ExecutionPlan by partition values for Iceberg tables. +/// +/// This function takes an input ExecutionPlan that has been extended with partition values +/// (via `project_with_partition`) and returns a SortExec that sorts by the partition column. +/// The partition values are expected to be in a struct column named `PROJECTED_PARTITION_VALUE_COLUMN`. +/// +/// For unpartitioned tables or plans without the partition column, returns an error. +/// +/// # Arguments +/// * `input` - The input ExecutionPlan with projected partition values +/// +/// # Returns +/// * `Ok(Arc)` - A SortExec that sorts by partition values +/// * `Err` - If the partition column is not found +pub(crate) fn sort_by_partition(input: Arc) -> DFResult> { + let schema = input.schema(); + + // Find the partition column in the schema + let (partition_column_index, _partition_field) = schema + .column_with_name(PROJECTED_PARTITION_VALUE_COLUMN) + .ok_or_else(|| { + DataFusionError::Plan(format!( + "Partition column '{PROJECTED_PARTITION_VALUE_COLUMN}' not found in schema. Ensure the plan has been extended with partition values using project_with_partition." + )) + })?; + + // Create a single sort expression for the partition column + let column_expr = Arc::new(Column::new( + PROJECTED_PARTITION_VALUE_COLUMN, + partition_column_index, + )); + + let sort_expr = PhysicalSortExpr { + expr: column_expr, + options: SortOptions::default(), // Ascending, nulls last + }; + + // Create a SortExec with preserve_partitioning=true to ensure the output partitioning + // is the same as the input partitioning, and the data is sorted within each partition + let lex_ordering = LexOrdering::new(vec![sort_expr]).ok_or_else(|| { + DataFusionError::Plan("Failed to create LexOrdering from sort expression".to_string()) + })?; + + let sort_exec = SortExec::new(lex_ordering, input).with_preserve_partitioning(true); + + Ok(Arc::new(sort_exec)) +} + +#[cfg(test)] +mod tests { + use datafusion::arrow::array::{Int32Array, RecordBatch, StringArray, StructArray}; + use datafusion::arrow::datatypes::{DataType, Field, Fields, Schema as ArrowSchema}; + use datafusion::datasource::{MemTable, TableProvider}; + use datafusion::prelude::SessionContext; + + use super::*; + + #[tokio::test] + async fn test_sort_by_partition_basic() { + // Create a schema with a partition column + let partition_fields = + Fields::from(vec![Field::new("id_partition", DataType::Int32, false)]); + + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + Field::new( + PROJECTED_PARTITION_VALUE_COLUMN, + DataType::Struct(partition_fields.clone()), + false, + ), + ])); + + // Create test data with partition values + let id_array = Arc::new(Int32Array::from(vec![3, 1, 2])); + let name_array = Arc::new(StringArray::from(vec!["c", "a", "b"])); + let partition_array = Arc::new(StructArray::from(vec![( + Arc::new(Field::new("id_partition", DataType::Int32, false)), + Arc::new(Int32Array::from(vec![3, 1, 2])) as _, + )])); + + let batch = + RecordBatch::try_new(schema.clone(), vec![id_array, name_array, partition_array]) + .unwrap(); + + let ctx = SessionContext::new(); + let mem_table = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap(); + let input = mem_table.scan(&ctx.state(), None, &[], None).await.unwrap(); + + // Apply sort + let sorted_plan = sort_by_partition(input).unwrap(); + + // Execute and verify + let result = datafusion::physical_plan::collect(sorted_plan, ctx.task_ctx()) + .await + .unwrap(); + + assert_eq!(result.len(), 1); + let result_batch = &result[0]; + + let id_col = result_batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + // Verify data is sorted by partition value + assert_eq!(id_col.value(0), 1); + assert_eq!(id_col.value(1), 2); + assert_eq!(id_col.value(2), 3); + } + + #[tokio::test] + async fn test_sort_by_partition_missing_column() { + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ])); + + let batch = RecordBatch::try_new(schema.clone(), vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["a", "b", "c"])), + ]) + .unwrap(); + + let ctx = SessionContext::new(); + let mem_table = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap(); + let input = mem_table.scan(&ctx.state(), None, &[], None).await.unwrap(); + + let result = sort_by_partition(input); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Partition column '_partition' not found") + ); + } + + #[tokio::test] + async fn test_sort_by_partition_multi_field() { + // Test with multiple partition fields in the struct + let partition_fields = Fields::from(vec![ + Field::new("year", DataType::Int32, false), + Field::new("month", DataType::Int32, false), + ]); + + let schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("data", DataType::Utf8, false), + Field::new( + PROJECTED_PARTITION_VALUE_COLUMN, + DataType::Struct(partition_fields.clone()), + false, + ), + ])); + + // Create test data with partition values (year, month) + let id_array = Arc::new(Int32Array::from(vec![1, 2, 3, 4])); + let data_array = Arc::new(StringArray::from(vec!["a", "b", "c", "d"])); + + // Partition values: (2024, 2), (2024, 1), (2023, 12), (2024, 1) + let year_array = Arc::new(Int32Array::from(vec![2024, 2024, 2023, 2024])); + let month_array = Arc::new(Int32Array::from(vec![2, 1, 12, 1])); + + let partition_array = Arc::new(StructArray::from(vec![ + ( + Arc::new(Field::new("year", DataType::Int32, false)), + year_array as _, + ), + ( + Arc::new(Field::new("month", DataType::Int32, false)), + month_array as _, + ), + ])); + + let batch = + RecordBatch::try_new(schema.clone(), vec![id_array, data_array, partition_array]) + .unwrap(); + + let ctx = SessionContext::new(); + let mem_table = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap(); + let input = mem_table.scan(&ctx.state(), None, &[], None).await.unwrap(); + + // Apply sort + let sorted_plan = sort_by_partition(input).unwrap(); + + // Execute and verify + let result = datafusion::physical_plan::collect(sorted_plan, ctx.task_ctx()) + .await + .unwrap(); + + assert_eq!(result.len(), 1); + let result_batch = &result[0]; + + let id_col = result_batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + // Verify data is sorted by partition value (struct comparison) + // Expected order: (2023, 12), (2024, 1), (2024, 1), (2024, 2) + // Which corresponds to ids: 3, 2, 4, 1 + assert_eq!(id_col.value(0), 3); + assert_eq!(id_col.value(1), 2); + assert_eq!(id_col.value(2), 4); + assert_eq!(id_col.value(3), 1); + } +} diff --git a/crates/integrations/datafusion/src/physical_plan/write.rs b/crates/integrations/datafusion/src/physical_plan/write.rs index 9eb53c235f..fdfddf877b 100644 --- a/crates/integrations/datafusion/src/physical_plan/write.rs +++ b/crates/integrations/datafusion/src/physical_plan/write.rs @@ -266,8 +266,28 @@ impl ExecutionPlan for IcebergWriteExec { let data_file_writer_builder = DataFileWriterBuilder::new(rolling_writer_builder); // Create TaskWriter - // TODO: Make fanout_enabled configurable via table properties - let fanout_enabled = true; + let fanout_enabled = self + .table + .metadata() + .properties() + .get(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED) + .map(|value| { + value + .parse::() + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid value for {}, expected 'true' or 'false'", + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED + ), + ) + .with_source(e) + }) + .map_err(to_datafusion_error) + }) + .transpose()? + .unwrap_or(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT); let schema = self.table.metadata().current_schema().clone(); let partition_spec = self.table.metadata().default_partition_spec().clone(); let task_writer = TaskWriter::try_new( diff --git a/crates/integrations/datafusion/src/schema.rs b/crates/integrations/datafusion/src/schema.rs index 3920ee73ca..31bbdbd67f 100644 --- a/crates/integrations/datafusion/src/schema.rs +++ b/crates/integrations/datafusion/src/schema.rs @@ -28,6 +28,7 @@ use iceberg::inspect::MetadataTableType; use iceberg::{Catalog, NamespaceIdent, Result}; use crate::table::IcebergTableProvider; +use crate::to_datafusion_error; /// Represents a [`SchemaProvider`] for the Iceberg [`Catalog`], managing /// access to table providers within a specific namespace. @@ -113,7 +114,10 @@ impl SchemaProvider for IcebergSchemaProvider { let metadata_table_type = MetadataTableType::try_from(metadata_table_name).map_err(DataFusionError::Plan)?; if let Some(table) = self.tables.get(table_name) { - let metadata_table = table.metadata_table(metadata_table_type); + let metadata_table = table + .metadata_table(metadata_table_type) + .await + .map_err(to_datafusion_error)?; return Ok(Some(Arc::new(metadata_table))); } else { return Ok(None); diff --git a/crates/integrations/datafusion/src/table/mod.rs b/crates/integrations/datafusion/src/table/mod.rs index 42a3baad3b..ae87342fa5 100644 --- a/crates/integrations/datafusion/src/table/mod.rs +++ b/crates/integrations/datafusion/src/table/mod.rs @@ -15,6 +15,16 @@ // specific language governing permissions and limitations // under the License. +//! Iceberg table providers for DataFusion. +//! +//! This module provides two table provider implementations: +//! +//! - [`IcebergTableProvider`]: Catalog-backed provider with automatic metadata refresh. +//! Use for write operations and when you need to see the latest table state. +//! +//! - [`IcebergStaticTableProvider`]: Static provider for read-only access to a specific +//! table snapshot. Use for consistent analytical queries or time-travel scenarios. + pub mod metadata_table; pub mod table_provider_factory; @@ -34,102 +44,67 @@ use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; use iceberg::arrow::schema_to_arrow_schema; use iceberg::inspect::MetadataTableType; +use iceberg::spec::TableProperties; use iceberg::table::Table; use iceberg::{Catalog, Error, ErrorKind, NamespaceIdent, Result, TableIdent}; use metadata_table::IcebergMetadataTableProvider; +use crate::error::to_datafusion_error; use crate::physical_plan::commit::IcebergCommitExec; use crate::physical_plan::project::project_with_partition; use crate::physical_plan::repartition::repartition; use crate::physical_plan::scan::IcebergTableScan; +use crate::physical_plan::sort::sort_by_partition; use crate::physical_plan::write::IcebergWriteExec; -/// Represents a [`TableProvider`] for the Iceberg [`Catalog`], -/// managing access to a [`Table`]. +/// Catalog-backed table provider with automatic metadata refresh. +/// +/// This provider loads fresh table metadata from the catalog on every scan and write +/// operation, ensuring you always see the latest table state. Use this when you need +/// write operations or want to see the most up-to-date data. +/// +/// For read-only access to a specific snapshot without catalog overhead, use +/// [`IcebergStaticTableProvider`] instead. #[derive(Debug, Clone)] pub struct IcebergTableProvider { - /// A table in the catalog. - table: Table, - /// Table snapshot id that will be queried via this provider. - snapshot_id: Option, - /// A reference-counted arrow `Schema`. + /// The catalog that manages this table + catalog: Arc, + /// The table identifier (namespace + name) + table_ident: TableIdent, + /// A reference-counted arrow `Schema` (cached at construction) schema: ArrowSchemaRef, - /// The catalog that the table belongs to. - catalog: Option>, } impl IcebergTableProvider { - pub(crate) fn new(table: Table, schema: ArrowSchemaRef) -> Self { - IcebergTableProvider { - table, - snapshot_id: None, - schema, - catalog: None, - } - } - /// Asynchronously tries to construct a new [`IcebergTableProvider`] - /// using the given client and table name to fetch an actual [`Table`] - /// in the provided namespace. + /// Creates a new catalog-backed table provider. + /// + /// Loads the table once to get the initial schema, then stores the catalog + /// reference for future metadata refreshes on each operation. pub(crate) async fn try_new( - client: Arc, + catalog: Arc, namespace: NamespaceIdent, name: impl Into, ) -> Result { - let ident = TableIdent::new(namespace, name.into()); - let table = client.load_table(&ident).await?; - - let schema = Arc::new(schema_to_arrow_schema(table.metadata().current_schema())?); + let table_ident = TableIdent::new(namespace, name.into()); - Ok(IcebergTableProvider { - table, - snapshot_id: None, - schema, - catalog: Some(client), - }) - } - - /// Asynchronously tries to construct a new [`IcebergTableProvider`] - /// using the given table. Can be used to create a table provider from an existing table regardless of the catalog implementation. - pub async fn try_new_from_table(table: Table) -> Result { + // Load table once to get initial schema + let table = catalog.load_table(&table_ident).await?; let schema = Arc::new(schema_to_arrow_schema(table.metadata().current_schema())?); - Ok(IcebergTableProvider { - table, - snapshot_id: None, - schema, - catalog: None, - }) - } - /// Asynchronously tries to construct a new [`IcebergTableProvider`] - /// using a specific snapshot of the given table. Can be used to create a table provider from an existing table regardless of the catalog implementation. - pub async fn try_new_from_table_snapshot(table: Table, snapshot_id: i64) -> Result { - let snapshot = table - .metadata() - .snapshot_by_id(snapshot_id) - .ok_or_else(|| { - Error::new( - ErrorKind::Unexpected, - format!( - "snapshot id {snapshot_id} not found in table {}", - table.identifier().name() - ), - ) - })?; - let schema = snapshot.schema(table.metadata())?; - let schema = Arc::new(schema_to_arrow_schema(&schema)?); Ok(IcebergTableProvider { - table, - snapshot_id: Some(snapshot_id), + catalog, + table_ident, schema, - catalog: None, }) } - pub(crate) fn metadata_table(&self, r#type: MetadataTableType) -> IcebergMetadataTableProvider { - IcebergMetadataTableProvider { - table: self.table.clone(), - r#type, - } + pub(crate) async fn metadata_table( + &self, + r#type: MetadataTableType, + ) -> Result { + // Load fresh table metadata for metadata table access + let table = self.catalog.load_table(&self.table_ident).await?; + Ok(IcebergMetadataTableProvider { table, r#type }) } } @@ -152,14 +127,23 @@ impl TableProvider for IcebergTableProvider { _state: &dyn Session, projection: Option<&Vec>, filters: &[Expr], - _limit: Option, + limit: Option, ) -> DFResult> { + // Load fresh table metadata from catalog + let table = self + .catalog + .load_table(&self.table_ident) + .await + .map_err(to_datafusion_error)?; + + // Create scan with fresh metadata (always use current snapshot) Ok(Arc::new(IcebergTableScan::new( - self.table.clone(), - self.snapshot_id, + table, + None, // Always use current snapshot for catalog-backed provider self.schema.clone(), projection, filters, + limit, ))) } @@ -177,17 +161,18 @@ impl TableProvider for IcebergTableProvider { input: Arc, _insert_op: InsertOp, ) -> DFResult> { - let Some(catalog) = self.catalog.clone() else { - return Err(DataFusionError::Execution( - "Catalog cannot be none for insert_into".to_string(), - )); - }; + // Load fresh table metadata from catalog + let table = self + .catalog + .load_table(&self.table_ident) + .await + .map_err(to_datafusion_error)?; - let partition_spec = self.table.metadata().default_partition_spec(); + let partition_spec = table.metadata().default_partition_spec(); // Step 1: Project partition values for partitioned tables let plan_with_partition = if !partition_spec.is_unpartitioned() { - project_with_partition(input, &self.table)? + project_with_partition(input, &table)? } else { input }; @@ -200,15 +185,41 @@ impl TableProvider for IcebergTableProvider { ) })?; - let repartitioned_plan = repartition( - plan_with_partition, - self.table.metadata_ref(), - target_partitions, - )?; + let repartitioned_plan = + repartition(plan_with_partition, table.metadata_ref(), target_partitions)?; + + // Apply sort node when it's not fanout mode + let fanout_enabled = table + .metadata() + .properties() + .get(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED) + .map(|value| { + value + .parse::() + .map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid value for {}, expected 'true' or 'false'", + TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED + ), + ) + .with_source(e) + }) + .map_err(to_datafusion_error) + }) + .transpose()? + .unwrap_or(TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT); + + let write_input = if fanout_enabled { + repartitioned_plan + } else { + sort_by_partition(repartitioned_plan)? + }; let write_plan = Arc::new(IcebergWriteExec::new( - self.table.clone(), - repartitioned_plan, + table.clone(), + write_input, self.schema.clone(), )); @@ -216,21 +227,141 @@ impl TableProvider for IcebergTableProvider { let coalesce_partitions = Arc::new(CoalescePartitionsExec::new(write_plan)); Ok(Arc::new(IcebergCommitExec::new( - self.table.clone(), - catalog, + table, + self.catalog.clone(), coalesce_partitions, self.schema.clone(), ))) } } +/// Static table provider for read-only snapshot access. +/// +/// This provider holds a cached table instance and does not refresh metadata or support +/// write operations. Use this for consistent analytical queries, time-travel scenarios, +/// or when you want to avoid catalog overhead. +/// +/// For catalog-backed tables with write support and automatic refresh, use +/// [`IcebergTableProvider`] instead. +#[derive(Debug, Clone)] +pub struct IcebergStaticTableProvider { + /// The static table instance (never refreshed) + table: Table, + /// Optional snapshot ID for this static view + snapshot_id: Option, + /// A reference-counted arrow `Schema` + schema: ArrowSchemaRef, +} + +impl IcebergStaticTableProvider { + /// Creates a static provider from a table instance. + /// + /// Uses the table's current snapshot for all queries. Does not support write operations. + pub async fn try_new_from_table(table: Table) -> Result { + let schema = Arc::new(schema_to_arrow_schema(table.metadata().current_schema())?); + Ok(IcebergStaticTableProvider { + table, + snapshot_id: None, + schema, + }) + } + + /// Creates a static provider for a specific table snapshot. + /// + /// Queries the specified snapshot for all operations. Useful for time-travel queries. + /// Does not support write operations. + pub async fn try_new_from_table_snapshot(table: Table, snapshot_id: i64) -> Result { + let snapshot = table + .metadata() + .snapshot_by_id(snapshot_id) + .ok_or_else(|| { + Error::new( + ErrorKind::Unexpected, + format!( + "snapshot id {snapshot_id} not found in table {}", + table.identifier().name() + ), + ) + })?; + let table_schema = snapshot.schema(table.metadata())?; + let schema = Arc::new(schema_to_arrow_schema(&table_schema)?); + Ok(IcebergStaticTableProvider { + table, + snapshot_id: Some(snapshot_id), + schema, + }) + } +} + +#[async_trait] +impl TableProvider for IcebergStaticTableProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> ArrowSchemaRef { + self.schema.clone() + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + async fn scan( + &self, + _state: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> DFResult> { + // Use cached table (no refresh) + Ok(Arc::new(IcebergTableScan::new( + self.table.clone(), + self.snapshot_id, + self.schema.clone(), + projection, + filters, + limit, + ))) + } + + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> DFResult> { + // Push down all filters, as a single source of truth, the scanner will drop the filters which couldn't be push down + Ok(vec![TableProviderFilterPushDown::Inexact; filters.len()]) + } + + async fn insert_into( + &self, + _state: &dyn Session, + _input: Arc, + _insert_op: InsertOp, + ) -> DFResult> { + Err(to_datafusion_error(Error::new( + ErrorKind::FeatureUnsupported, + "Write operations are not supported on IcebergStaticTableProvider. \ + Use IcebergTableProvider with a catalog for write support." + .to_string(), + ))) + } +} + #[cfg(test)] mod tests { + use std::collections::HashMap; + use std::sync::Arc; + use datafusion::common::Column; + use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::SessionContext; - use iceberg::TableIdent; use iceberg::io::FileIO; + use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; + use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; use iceberg::table::{StaticTable, Table}; + use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation, TableIdent}; + use tempfile::TempDir; use super::*; @@ -253,10 +384,59 @@ mod tests { static_table.into_table() } + async fn get_test_catalog_and_table() -> (Arc, NamespaceIdent, String, TempDir) { + let temp_dir = TempDir::new().unwrap(); + let warehouse_path = temp_dir.path().to_str().unwrap().to_string(); + + let catalog = MemoryCatalogBuilder::default() + .load( + "memory", + HashMap::from([(MEMORY_CATALOG_WAREHOUSE.to_string(), warehouse_path.clone())]), + ) + .await + .unwrap(); + + let namespace = NamespaceIdent::new("test_ns".to_string()); + catalog + .create_namespace(&namespace, HashMap::new()) + .await + .unwrap(); + + let schema = Schema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(); + + let table_creation = TableCreation::builder() + .name("test_table".to_string()) + .location(format!("{warehouse_path}/test_table")) + .schema(schema) + .properties(HashMap::new()) + .build(); + + catalog + .create_table(&namespace, table_creation) + .await + .unwrap(); + + ( + Arc::new(catalog), + namespace, + "test_table".to_string(), + temp_dir, + ) + } + + // Tests for IcebergStaticTableProvider + #[tokio::test] - async fn test_try_new_from_table() { + async fn test_static_provider_from_table() { let table = get_test_table_from_metadata_file().await; - let table_provider = IcebergTableProvider::try_new_from_table(table.clone()) + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) .await .unwrap(); let ctx = SessionContext::new(); @@ -278,11 +458,11 @@ mod tests { } #[tokio::test] - async fn test_try_new_from_table_snapshot() { + async fn test_static_provider_from_snapshot() { let table = get_test_table_from_metadata_file().await; let snapshot_id = table.metadata().snapshots().next().unwrap().snapshot_id(); let table_provider = - IcebergTableProvider::try_new_from_table_snapshot(table.clone(), snapshot_id) + IcebergStaticTableProvider::try_new_from_table_snapshot(table.clone(), snapshot_id) .await .unwrap(); let ctx = SessionContext::new(); @@ -304,16 +484,388 @@ mod tests { } #[tokio::test] - async fn test_physical_input_schema_consistent_with_logical_input_schema() { + async fn test_static_provider_rejects_writes() { let table = get_test_table_from_metadata_file().await; - let table_provider = IcebergTableProvider::try_new_from_table(table.clone()) + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) .await .unwrap(); let ctx = SessionContext::new(); ctx.register_table("mytable", Arc::new(table_provider)) .unwrap(); + + // Attempt to insert into the static provider should fail + let result = ctx.sql("INSERT INTO mytable VALUES (1, 2, 3)").await; + + // The error should occur during planning or execution + // We expect an error indicating write operations are not supported + assert!( + result.is_err() || { + let df = result.unwrap(); + df.collect().await.is_err() + } + ); + } + + #[tokio::test] + async fn test_static_provider_scan() { + let table = get_test_table_from_metadata_file().await; + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) + .await + .unwrap(); + let ctx = SessionContext::new(); + ctx.register_table("mytable", Arc::new(table_provider)) + .unwrap(); + + // Test that scan operations work correctly let df = ctx.sql("SELECT count(*) FROM mytable").await.unwrap(); let physical_plan = df.create_physical_plan().await; - assert!(physical_plan.is_ok()) + assert!(physical_plan.is_ok()); + } + + // Tests for IcebergTableProvider + + #[tokio::test] + async fn test_catalog_backed_provider_creation() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + // Test creating a catalog-backed provider + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + // Verify the schema is loaded correctly + let schema = provider.schema(); + assert_eq!(schema.fields().len(), 2); + assert_eq!(schema.field(0).name(), "id"); + assert_eq!(schema.field(1).name(), "name"); + } + + #[tokio::test] + async fn test_catalog_backed_provider_scan() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + ctx.register_table("test_table", Arc::new(provider)) + .unwrap(); + + // Test that scan operations work correctly + let df = ctx.sql("SELECT * FROM test_table").await.unwrap(); + + // Verify the schema in the query result + let df_schema = df.schema(); + assert_eq!(df_schema.fields().len(), 2); + assert_eq!(df_schema.field(0).name(), "id"); + assert_eq!(df_schema.field(1).name(), "name"); + + let physical_plan = df.create_physical_plan().await; + assert!(physical_plan.is_ok()); + } + + #[tokio::test] + async fn test_catalog_backed_provider_insert() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + ctx.register_table("test_table", Arc::new(provider)) + .unwrap(); + + // Test that insert operations work correctly + let result = ctx.sql("INSERT INTO test_table VALUES (1, 'test')").await; + + // Insert should succeed (or at least not fail during planning) + assert!(result.is_ok()); + + // Try to execute the insert plan + let df = result.unwrap(); + let execution_result = df.collect().await; + + // The execution should succeed + assert!(execution_result.is_ok()); + } + + #[tokio::test] + async fn test_physical_input_schema_consistent_with_logical_input_schema() { + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + ctx.register_table("test_table", Arc::new(provider)) + .unwrap(); + + // Create a query plan + let df = ctx.sql("SELECT id, name FROM test_table").await.unwrap(); + + // Get logical schema before consuming df + let logical_schema = df.schema().clone(); + + // Get physical plan (this consumes df) + let physical_plan = df.create_physical_plan().await.unwrap(); + let physical_schema = physical_plan.schema(); + + // Verify that logical and physical schemas are consistent + assert_eq!( + logical_schema.fields().len(), + physical_schema.fields().len() + ); + + for (logical_field, physical_field) in logical_schema + .fields() + .iter() + .zip(physical_schema.fields().iter()) + { + assert_eq!(logical_field.name(), physical_field.name()); + assert_eq!(logical_field.data_type(), physical_field.data_type()); + } + } + + async fn get_partitioned_test_catalog_and_table( + fanout_enabled: Option, + ) -> (Arc, NamespaceIdent, String, TempDir) { + use iceberg::spec::{Transform, UnboundPartitionSpec}; + + let temp_dir = TempDir::new().unwrap(); + let warehouse_path = temp_dir.path().to_str().unwrap().to_string(); + + let catalog = MemoryCatalogBuilder::default() + .load( + "memory", + HashMap::from([(MEMORY_CATALOG_WAREHOUSE.to_string(), warehouse_path.clone())]), + ) + .await + .unwrap(); + + let namespace = NamespaceIdent::new("test_ns".to_string()); + catalog + .create_namespace(&namespace, HashMap::new()) + .await + .unwrap(); + + let schema = Schema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::required(2, "category", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(); + + let partition_spec = UnboundPartitionSpec::builder() + .with_spec_id(0) + .add_partition_field(2, "category", Transform::Identity) + .unwrap() + .build(); + + let mut properties = HashMap::new(); + if let Some(enabled) = fanout_enabled { + properties.insert( + iceberg::spec::TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED + .to_string(), + enabled.to_string(), + ); + } + + let table_creation = TableCreation::builder() + .name("partitioned_table".to_string()) + .location(format!("{warehouse_path}/partitioned_table")) + .schema(schema) + .partition_spec(partition_spec) + .properties(properties) + .build(); + + catalog + .create_table(&namespace, table_creation) + .await + .unwrap(); + + ( + Arc::new(catalog), + namespace, + "partitioned_table".to_string(), + temp_dir, + ) + } + + /// Helper to check if a plan contains a SortExec node + fn plan_contains_sort(plan: &Arc) -> bool { + if plan.name() == "SortExec" { + return true; + } + for child in plan.children() { + if plan_contains_sort(child) { + return true; + } + } + false + } + + #[tokio::test] + async fn test_insert_plan_fanout_enabled_no_sort() { + use datafusion::datasource::TableProvider; + use datafusion::logical_expr::dml::InsertOp; + use datafusion::physical_plan::empty::EmptyExec; + + // When fanout is enabled (default), no sort node should be added + let (catalog, namespace, table_name, _temp_dir) = + get_partitioned_test_catalog_and_table(Some(true)).await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let input_schema = provider.schema(); + let input = Arc::new(EmptyExec::new(input_schema)) as Arc; + + let state = ctx.state(); + let insert_plan = provider + .insert_into(&state, input, InsertOp::Append) + .await + .unwrap(); + + // With fanout enabled, there should be no SortExec in the plan + assert!( + !plan_contains_sort(&insert_plan), + "Plan should NOT contain SortExec when fanout is enabled" + ); + } + + #[tokio::test] + async fn test_insert_plan_fanout_disabled_has_sort() { + use datafusion::datasource::TableProvider; + use datafusion::logical_expr::dml::InsertOp; + use datafusion::physical_plan::empty::EmptyExec; + + // When fanout is disabled, a sort node should be added + let (catalog, namespace, table_name, _temp_dir) = + get_partitioned_test_catalog_and_table(Some(false)).await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let input_schema = provider.schema(); + let input = Arc::new(EmptyExec::new(input_schema)) as Arc; + + let state = ctx.state(); + let insert_plan = provider + .insert_into(&state, input, InsertOp::Append) + .await + .unwrap(); + + // With fanout disabled, there should be a SortExec in the plan + assert!( + plan_contains_sort(&insert_plan), + "Plan should contain SortExec when fanout is disabled" + ); + } + + #[tokio::test] + async fn test_limit_pushdown_static_provider() { + use datafusion::datasource::TableProvider; + + let table = get_test_table_from_metadata_file().await; + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let state = ctx.state(); + + // Test scan with limit + let scan_plan = table_provider + .scan(&state, None, &[], Some(10)) + .await + .unwrap(); + + // Verify that the scan plan is an IcebergTableScan + let iceberg_scan = scan_plan + .as_any() + .downcast_ref::() + .expect("Expected IcebergTableScan"); + + // Verify the limit is set + assert_eq!( + iceberg_scan.limit(), + Some(10), + "Limit should be set to 10 in the scan plan" + ); + } + + #[tokio::test] + async fn test_limit_pushdown_catalog_backed_provider() { + use datafusion::datasource::TableProvider; + + let (catalog, namespace, table_name, _temp_dir) = get_test_catalog_and_table().await; + + let provider = + IcebergTableProvider::try_new(catalog.clone(), namespace.clone(), table_name.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let state = ctx.state(); + + // Test scan with limit + let scan_plan = provider.scan(&state, None, &[], Some(5)).await.unwrap(); + + // Verify that the scan plan is an IcebergTableScan + let iceberg_scan = scan_plan + .as_any() + .downcast_ref::() + .expect("Expected IcebergTableScan"); + + // Verify the limit is set + assert_eq!( + iceberg_scan.limit(), + Some(5), + "Limit should be set to 5 in the scan plan" + ); + } + + #[tokio::test] + async fn test_no_limit_pushdown() { + use datafusion::datasource::TableProvider; + + let table = get_test_table_from_metadata_file().await; + let table_provider = IcebergStaticTableProvider::try_new_from_table(table.clone()) + .await + .unwrap(); + + let ctx = SessionContext::new(); + let state = ctx.state(); + + // Test scan without limit + let scan_plan = table_provider.scan(&state, None, &[], None).await.unwrap(); + + // Verify that the scan plan is an IcebergTableScan + let iceberg_scan = scan_plan + .as_any() + .downcast_ref::() + .expect("Expected IcebergTableScan"); + + // Verify the limit is None + assert_eq!( + iceberg_scan.limit(), + None, + "Limit should be None when not specified" + ); } } diff --git a/crates/integrations/datafusion/src/table/table_provider_factory.rs b/crates/integrations/datafusion/src/table/table_provider_factory.rs index e8e87dd318..8cae597b7b 100644 --- a/crates/integrations/datafusion/src/table/table_provider_factory.rs +++ b/crates/integrations/datafusion/src/table/table_provider_factory.rs @@ -24,12 +24,11 @@ use datafusion::catalog::{Session, TableProvider, TableProviderFactory}; use datafusion::error::Result as DFResult; use datafusion::logical_expr::CreateExternalTable; use datafusion::sql::TableReference; -use iceberg::arrow::schema_to_arrow_schema; use iceberg::io::FileIO; use iceberg::table::StaticTable; use iceberg::{Error, ErrorKind, Result, TableIdent}; -use super::IcebergTableProvider; +use super::IcebergStaticTableProvider; use crate::to_datafusion_error; /// A factory that implements DataFusion's `TableProviderFactory` to create `IcebergTableProvider` instances. @@ -126,10 +125,11 @@ impl TableProviderFactory for IcebergTableProviderFactory { .map_err(to_datafusion_error)? .into_table(); - let schema = schema_to_arrow_schema(table.metadata().current_schema()) + let provider = IcebergStaticTableProvider::try_new_from_table(table) + .await .map_err(to_datafusion_error)?; - Ok(Arc::new(IcebergTableProvider::new(table, Arc::new(schema)))) + Ok(Arc::new(provider)) } } @@ -244,6 +244,7 @@ mod tests { constraints: Constraints::default(), column_defaults: Default::default(), if_not_exists: Default::default(), + or_replace: false, temporary: false, definition: Default::default(), unbounded: Default::default(), diff --git a/crates/integrations/datafusion/tests/integration_datafusion_test.rs b/crates/integrations/datafusion/tests/integration_datafusion_test.rs index fdf5b17d18..6f8898abb8 100644 --- a/crates/integrations/datafusion/tests/integration_datafusion_test.rs +++ b/crates/integrations/datafusion/tests/integration_datafusion_test.rs @@ -347,14 +347,14 @@ async fn test_metadata_table() -> Result<()> { check_record_batches( snapshots, expect![[r#" - Field { name: "committed_at", data_type: Timestamp(Microsecond, Some("+00:00")), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "parent_id", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "operation", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "manifest_list", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "summary", data_type: Map(Field { name: "key_value", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "value", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }"#]], + Field { "committed_at": Timestamp(µs, "+00:00"), metadata: {"PARQUET:field_id": "1"} }, + Field { "snapshot_id": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "parent_id": nullable Int64, metadata: {"PARQUET:field_id": "3"} }, + Field { "operation": nullable Utf8, metadata: {"PARQUET:field_id": "4"} }, + Field { "manifest_list": nullable Utf8, metadata: {"PARQUET:field_id": "5"} }, + Field { "summary": nullable Map("key_value": non-null Struct("key": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "value": Utf8, metadata: {"PARQUET:field_id": "8"}), unsorted), metadata: {"PARQUET:field_id": "6"} }"#]], expect![[r#" - committed_at: PrimitiveArray + committed_at: PrimitiveArray [ ], snapshot_id: PrimitiveArray @@ -386,18 +386,18 @@ async fn test_metadata_table() -> Result<()> { check_record_batches( manifests, expect![[r#" - Field { name: "content", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "14"} }, - Field { name: "path", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "length", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "partition_spec_id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }, - Field { name: "added_snapshot_id", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, - Field { name: "added_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }, - Field { name: "existing_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, - Field { name: "deleted_data_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, - Field { name: "added_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "15"} }, - Field { name: "existing_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "16"} }, - Field { name: "deleted_delete_files_count", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "17"} }, - Field { name: "partition_summaries", data_type: List(Field { name: "item", data_type: Struct([Field { name: "contains_null", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }, Field { name: "contains_nan", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "11"} }, Field { name: "lower_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "12"} }, Field { name: "upper_bound", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "13"} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }"#]], + Field { "content": Int32, metadata: {"PARQUET:field_id": "14"} }, + Field { "path": Utf8, metadata: {"PARQUET:field_id": "1"} }, + Field { "length": Int64, metadata: {"PARQUET:field_id": "2"} }, + Field { "partition_spec_id": Int32, metadata: {"PARQUET:field_id": "3"} }, + Field { "added_snapshot_id": Int64, metadata: {"PARQUET:field_id": "4"} }, + Field { "added_data_files_count": Int32, metadata: {"PARQUET:field_id": "5"} }, + Field { "existing_data_files_count": Int32, metadata: {"PARQUET:field_id": "6"} }, + Field { "deleted_data_files_count": Int32, metadata: {"PARQUET:field_id": "7"} }, + Field { "added_delete_files_count": Int32, metadata: {"PARQUET:field_id": "15"} }, + Field { "existing_delete_files_count": Int32, metadata: {"PARQUET:field_id": "16"} }, + Field { "deleted_delete_files_count": Int32, metadata: {"PARQUET:field_id": "17"} }, + Field { "partition_summaries": List(non-null Struct("contains_null": non-null Boolean, metadata: {"PARQUET:field_id": "10"}, "contains_nan": Boolean, metadata: {"PARQUET:field_id": "11"}, "lower_bound": Utf8, metadata: {"PARQUET:field_id": "12"}, "upper_bound": Utf8, metadata: {"PARQUET:field_id": "13"}), metadata: {"PARQUET:field_id": "9"}), metadata: {"PARQUET:field_id": "8"} }"#]], expect![[r#" content: PrimitiveArray [ @@ -492,10 +492,6 @@ async fn test_insert_into() -> Result<()> { .unwrap(); assert_eq!(rows_inserted.value(0), 2); - // Refresh context to avoid getting stale table - let catalog = Arc::new(IcebergCatalogProvider::try_new(client).await?); - ctx.register_catalog("catalog", catalog); - // Query the table to verify the inserted data let df = ctx .sql("SELECT * FROM catalog.test_insert_into.my_table") @@ -508,8 +504,8 @@ async fn test_insert_into() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "foo1", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "foo2", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }"#]], + Field { "foo1": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "foo2": Utf8, metadata: {"PARQUET:field_id": "2"} }"#]], expect![[r#" foo1: PrimitiveArray [ @@ -650,10 +646,6 @@ async fn test_insert_into_nested() -> Result<()> { .unwrap(); assert_eq!(rows_inserted.value(0), 2); - // Refresh context to avoid getting stale table - let catalog = Arc::new(IcebergCatalogProvider::try_new(client).await?); - ctx.register_catalog("catalog", catalog); - // Query the table to verify the inserted data let df = ctx .sql("SELECT * FROM catalog.test_insert_nested.nested_table ORDER BY id") @@ -666,9 +658,9 @@ async fn test_insert_into_nested() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "profile", data_type: Struct([Field { name: "address", data_type: Struct([Field { name: "street", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "4"} }, Field { name: "contact", data_type: Struct([Field { name: "email", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "5"} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "name": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "profile": nullable Struct("address": Struct("street": non-null Utf8, metadata: {"PARQUET:field_id": "6"}, "city": non-null Utf8, metadata: {"PARQUET:field_id": "7"}, "zip": non-null Int32, metadata: {"PARQUET:field_id": "8"}), metadata: {"PARQUET:field_id": "4"}, "contact": Struct("email": Utf8, metadata: {"PARQUET:field_id": "9"}, "phone": Utf8, metadata: {"PARQUET:field_id": "10"}), metadata: {"PARQUET:field_id": "5"}), metadata: {"PARQUET:field_id": "3"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -687,7 +679,7 @@ async fn test_insert_into_nested() -> Result<()> { valid, ] [ - -- child 0: "address" (Struct([Field { name: "street", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }])) + -- child 0: "address" (Struct([Field { name: "street", data_type: Utf8, metadata: {"PARQUET:field_id": "6"} }, Field { name: "city", data_type: Utf8, metadata: {"PARQUET:field_id": "7"} }, Field { name: "zip", data_type: Int32, metadata: {"PARQUET:field_id": "8"} }])) StructArray -- validity: [ @@ -714,7 +706,7 @@ async fn test_insert_into_nested() -> Result<()> { 95113, ] ] - -- child 1: "contact" (Struct([Field { name: "email", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }])) + -- child 1: "contact" (Struct([Field { name: "email", data_type: Utf8, nullable: true, metadata: {"PARQUET:field_id": "9"} }, Field { name: "phone", data_type: Utf8, nullable: true, metadata: {"PARQUET:field_id": "10"} }])) StructArray -- validity: [ @@ -765,13 +757,13 @@ async fn test_insert_into_nested() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "name", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][street]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "6"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][city]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "7"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[address][zip]", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "8"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[contact][email]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "9"} }, - Field { name: "catalog.test_insert_nested.nested_table.profile[contact][phone]", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "10"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "name": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][street]": nullable Utf8, metadata: {"PARQUET:field_id": "6"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][city]": nullable Utf8, metadata: {"PARQUET:field_id": "7"} }, + Field { "catalog.test_insert_nested.nested_table.profile[address][zip]": nullable Int32, metadata: {"PARQUET:field_id": "8"} }, + Field { "catalog.test_insert_nested.nested_table.profile[contact][email]": nullable Utf8, metadata: {"PARQUET:field_id": "9"} }, + Field { "catalog.test_insert_nested.nested_table.profile[contact][phone]": nullable Utf8, metadata: {"PARQUET:field_id": "10"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -880,10 +872,6 @@ async fn test_insert_into_partitioned() -> Result<()> { .unwrap(); assert_eq!(rows_inserted.value(0), 5); - // Refresh catalog to get updated table - let catalog = Arc::new(IcebergCatalogProvider::try_new(client.clone()).await?); - ctx.register_catalog("catalog", catalog); - // Query the table to verify data let df = ctx .sql("SELECT * FROM catalog.test_partitioned_write.partitioned_table ORDER BY id") @@ -896,9 +884,9 @@ async fn test_insert_into_partitioned() -> Result<()> { check_record_batches( batches, expect![[r#" - Field { name: "id", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "1"} }, - Field { name: "category", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "2"} }, - Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {"PARQUET:field_id": "3"} }"#]], + Field { "id": Int32, metadata: {"PARQUET:field_id": "1"} }, + Field { "category": Utf8, metadata: {"PARQUET:field_id": "2"} }, + Field { "value": Utf8, metadata: {"PARQUET:field_id": "3"} }"#]], expect![[r#" id: PrimitiveArray [ @@ -935,25 +923,22 @@ async fn test_insert_into_partitioned() -> Result<()> { let file_io = table.file_io(); // List files under each expected partition path - let electronics_path = format!("{}/data/category=electronics", table_location); - let books_path = format!("{}/data/category=books", table_location); - let clothing_path = format!("{}/data/category=clothing", table_location); + let electronics_path = format!("{table_location}/data/category=electronics"); + let books_path = format!("{table_location}/data/category=books"); + let clothing_path = format!("{table_location}/data/category=clothing"); // Verify partition directories exist and contain data files assert!( file_io.exists(&electronics_path).await?, - "Expected partition directory: {}", - electronics_path + "Expected partition directory: {electronics_path}" ); assert!( file_io.exists(&books_path).await?, - "Expected partition directory: {}", - books_path + "Expected partition directory: {books_path}" ); assert!( file_io.exists(&clothing_path).await?, - "Expected partition directory: {}", - clothing_path + "Expected partition directory: {clothing_path}" ); Ok(()) diff --git a/crates/integrations/playground/DEPENDENCIES.rust.tsv b/crates/integrations/playground/DEPENDENCIES.rust.tsv index 84dd20ed3e..8f57b1d9ae 100644 --- a/crates/integrations/playground/DEPENDENCIES.rust.tsv +++ b/crates/integrations/playground/DEPENDENCIES.rust.tsv @@ -1,509 +1,492 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -adler32@1.2.0 X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anstream@0.6.20 X X -anstyle@1.0.11 X X -anstyle-parse@0.2.7 X X -anstyle-query@1.1.4 X X -anstyle-wincon@3.0.10 X X -anyhow@1.0.99 X X -apache-avro@0.17.0 X -apache-avro@0.20.0 X -array-init@2.1.0 X X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -as-any@0.3.2 X X -async-compression@0.4.19 X X -async-lock@3.4.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -atomic-waker@1.1.2 X X -autocfg@1.5.0 X X -aws-config@1.8.6 X -aws-credential-types@1.2.6 X -aws-runtime@1.5.10 X -aws-sdk-sso@1.83.0 X -aws-sdk-ssooidc@1.84.0 X -aws-sdk-sts@1.85.0 X -aws-sigv4@1.3.4 X -aws-smithy-async@1.2.5 X -aws-smithy-http@0.62.3 X -aws-smithy-http-client@1.1.1 X -aws-smithy-json@0.61.5 X -aws-smithy-observability@0.1.3 X -aws-smithy-query@0.60.7 X -aws-smithy-runtime@1.9.1 X -aws-smithy-runtime-api@1.9.0 X -aws-smithy-types@1.3.2 X -aws-smithy-xml@0.60.10 X -aws-types@1.3.8 X -backon@1.5.2 X -backtrace@0.3.75 X X -base64@0.22.1 X X -base64-simd@0.8.0 X -bigdecimal@0.4.8 X X -bimap@0.6.3 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -bon@3.7.2 X X -bon-macros@3.7.2 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -bytemuck@1.23.2 X X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bytes-utils@0.1.4 X X -bzip2@0.4.4 X X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -cfg_aliases@0.2.1 X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -clap@4.5.47 X X -clap_builder@4.5.47 X X -clap_derive@4.5.47 X X -clap_lex@0.7.5 X X -clipboard-win@5.4.1 X -colorchoice@1.0.4 X X -comfy-table@7.2.0 X -concurrent-queue@2.5.0 X X -const-oid@0.9.6 X X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation@0.10.1 X X -core-foundation-sys@0.8.7 X X -core2@0.4.0 X X -cpufeatures@0.2.17 X X -crc32c@0.6.8 X X -crc32fast@1.5.0 X X -crossbeam-channel@0.5.15 X X -crossbeam-epoch@0.9.18 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -darling@0.20.11 X -darling@0.21.3 X -darling_core@0.20.11 X -darling_core@0.21.3 X -darling_macro@0.20.11 X -darling_macro@0.21.3 X -dary_heap@0.3.7 X X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-cli@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-avro@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-sql@48.0.1 X -deranged@0.5.3 X X -derive_builder@0.20.2 X X -derive_builder_core@0.20.2 X X -derive_builder_macro@0.20.2 X X -digest@0.10.7 X X -dirs@6.0.0 X X -dirs-sys@0.5.0 X X -displaydoc@0.2.5 X X -dissimilar@1.0.10 X -either@1.15.0 X X -endian-type@0.1.2 X -env_filter@0.1.3 X X -env_logger@0.11.8 X X -equivalent@1.0.2 X X -errno@0.3.13 X X -error-code@3.3.2 X -event-listener@5.4.1 X X -event-listener-strategy@0.5.4 X X -expect-test@1.5.1 X X -fastrand@2.3.0 X X -fd-lock@4.0.4 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -fs-err@3.1.1 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generator@0.8.7 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -gloo-timers@0.3.0 X X -h2@0.4.12 X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hermit-abi@0.5.2 X X -hex@0.4.3 X X -hmac@0.12.1 X X -home@0.5.11 X X -http@0.2.12 X X -http@1.3.1 X X -http-body@0.4.6 X -http-body@1.0.1 X -http-body-util@0.1.3 X -httparse@1.10.1 X X -httpdate@1.0.3 X X -humantime@2.2.0 X X -hyper@1.7.0 X -hyper-rustls@0.27.7 X X X -hyper-util@0.1.16 X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg@0.7.0 X -iceberg-catalog-rest@0.7.0 X -iceberg-datafusion@0.7.0 X -iceberg-playground@0.7.0 X -iceberg_test_utils@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -ident_case@1.0.1 X X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -ipnet@2.11.0 X X -iri-string@0.7.8 X X -is_terminal_polyfill@1.70.1 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jiff@0.2.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lazy_static@1.5.0 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libflate@2.1.0 X -libflate_lz77@2.1.0 X -libm@0.2.15 X -libmimalloc-sys@0.1.44 X -libredox@0.1.9 X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -loom@0.7.2 X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -matchers@0.2.0 X -md-5@0.10.6 X X -memchr@2.7.5 X X -mimalloc@0.1.48 X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -moka@0.12.10 X X -murmur3@0.5.2 X X -nibble_vec@0.1.0 X -nix@0.30.1 X -nu-ansi-term@0.50.1 X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-conv@0.1.0 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -num_cpus@1.17.0 X X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -once_cell_polyfill@1.70.1 X X -opendal@0.54.0 X -openssl-probe@0.1.6 X X -option-ext@0.2.0 X -ordered-float@2.10.1 X -ordered-float@4.6.0 X -outref@0.5.2 X -parking@2.2.1 X X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -percent-encoding@2.3.2 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -portable-atomic-util@0.2.4 X X -potential_utf@0.1.3 X -powerfmt@0.2.0 X X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -psm@0.1.26 X X -quad-rand@0.2.3 X -quick-xml@0.37.5 X -quick-xml@0.38.3 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -radix_trie@0.2.1 X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -redox_users@0.5.2 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -reqsign@0.16.5 X -reqwest@0.12.23 X X -ring@0.17.14 X X -rle-decode-fast@1.0.3 X X -roaring@0.11.2 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustls@0.23.31 X X X -rustls-native-certs@0.8.1 X X X -rustls-pemfile@2.2.0 X X X -rustls-pki-types@1.12.0 X X -rustls-webpki@0.103.4 X -rustversion@1.0.22 X X -rustyline@16.0.0 X -ryu@1.0.20 X X -same-file@1.0.6 X X -schannel@0.1.27 X -scoped-tls@1.0.1 X X -scopeguard@1.2.0 X X -security-framework@3.4.0 X X -security-framework-sys@2.15.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_json@1.0.143 X X -serde_repr@0.1.20 X X -serde_spanned@0.6.9 X X -serde_urlencoded@0.7.1 X X -serde_with@3.14.0 X X -serde_with_macros@3.14.0 X X -sha1@0.10.6 X X -sha2@0.10.9 X X -sharded-slab@0.1.7 X -shlex@1.3.0 X X -signal-hook-registry@1.4.6 X X -simdutf8@0.1.5 X X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -socket2@0.6.0 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -strsim@0.11.1 X -strum@0.26.3 X -strum@0.27.2 X -strum_macros@0.26.4 X -strum_macros@0.27.2 X -subtle@2.6.1 X -syn@2.0.106 X X -sync_wrapper@1.0.2 X -synstructure@0.13.2 X -tagptr@0.2.0 X X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thread_local@1.1.9 X X -threadpool@1.8.1 X X -thrift@0.17.0 X -time@0.3.43 X X -time-core@0.1.6 X X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-rustls@0.26.2 X X -tokio-util@0.7.16 X -toml@0.8.23 X X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X -toml_write@0.1.2 X X -tower@0.5.2 X -tower-http@0.6.6 X -tower-layer@0.3.3 X -tower-service@0.3.3 X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -try-lock@0.2.5 X -twox-hash@2.1.2 X -typed-builder@0.19.1 X X -typed-builder@0.20.1 X X -typed-builder-macro@0.19.1 X X -typed-builder-macro@0.20.1 X X -typenum@1.18.0 X X -unicode-ident@1.0.18 X X X -unicode-segmentation@1.12.0 X X -unicode-width@0.2.1 X X -untrusted@0.9.0 X -url@2.5.7 X X -urlencoding@2.1.3 X -utf8_iter@1.0.4 X X -utf8parse@0.2.2 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -vsimd@0.8.0 X -walkdir@2.5.0 X X -want@0.3.1 X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -wasm-streams@0.4.2 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -webpki-roots@1.0.2 X -winapi-util@0.1.11 X X -windows@0.61.3 X X -windows-collections@0.2.0 X X -windows-core@0.61.2 X X -windows-future@0.2.1 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-numerics@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-targets@0.53.3 X X -windows-threading@0.1.0 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.0 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.0 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.0 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.0 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.0 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.0 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.0 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.0 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xmlparser@0.13.6 X X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zeroize@1.8.1 X X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 MPL-2.0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anstream@0.6.21 X X +anstyle@1.0.13 X X +anstyle-parse@0.2.7 X X +anstyle-query@1.1.5 X X +anstyle-wincon@3.0.11 X X +anyhow@1.0.100 X X +apache-avro@0.20.0 X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +aws-config@1.8.11 X +aws-credential-types@1.2.10 X +aws-runtime@1.5.16 X +aws-sdk-sso@1.90.0 X +aws-sdk-ssooidc@1.92.0 X +aws-sdk-sts@1.94.0 X +aws-sigv4@1.3.6 X +aws-smithy-async@1.2.7 X +aws-smithy-http@0.62.6 X +aws-smithy-http-client@1.1.5 X +aws-smithy-json@0.61.8 X +aws-smithy-observability@0.1.5 X +aws-smithy-query@0.60.9 X +aws-smithy-runtime@1.9.5 X +aws-smithy-runtime-api@1.9.3 X +aws-smithy-types@1.3.5 X +aws-smithy-xml@0.60.13 X +aws-types@1.3.10 X +backon@1.6.0 X +base64@0.22.1 X X +base64-simd@0.8.0 X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bytes-utils@0.1.4 X X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +cfg_aliases@0.2.1 X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +clap@4.5.53 X X +clap_builder@4.5.53 X X +clap_derive@4.5.49 X X +clap_lex@0.7.6 X X +clipboard-win@5.4.1 X +colorchoice@1.0.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation@0.10.1 X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-cli@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-avro@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-sql@51.0.0 X +deranged@0.5.5 X X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +dirs@6.0.0 X X +dirs-sys@0.5.0 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +either@1.15.0 X X +endian-type@0.1.2 X +env_filter@0.1.4 X X +env_logger@0.11.8 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +error-code@3.3.2 X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fastrand@2.3.0 X X +fd-lock@4.0.4 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +form_urlencoded@1.2.2 X X +fs-err@3.2.0 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +h2@0.4.12 X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@0.2.12 X X +http@1.4.0 X X +http-body@0.4.6 X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +httpdate@1.0.3 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-catalog-rest@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg-playground@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +is_terminal_polyfill@1.70.2 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libmimalloc-sys@0.1.44 X +libredox@0.1.10 X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +mimalloc@0.1.48 X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +murmur3@0.5.2 X X +nibble_vec@0.1.0 X +nix@0.30.1 X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-conv@0.1.0 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +once_cell_polyfill@1.70.2 X X +opendal@0.55.0 X +openssl-probe@0.1.6 X X +option-ext@0.2.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +outref@0.5.2 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +percent-encoding@2.3.2 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +potential_utf@0.1.4 X +powerfmt@0.2.0 X X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +radix_trie@0.2.1 X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +redox_users@0.5.2 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-native-certs@0.8.2 X X X +rustls-pemfile@2.2.0 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +rustyline@17.0.2 X +ryu@1.0.20 X X +same-file@1.0.6 X X +schannel@0.1.28 X +scopeguard@1.2.0 X X +security-framework@3.5.1 X X +security-framework-sys@2.15.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_spanned@0.6.9 X X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +signal-hook-registry@1.4.7 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +time@0.3.44 X X +time-core@0.1.6 X X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml@0.8.23 X X +toml_datetime@0.6.11 X X +toml_datetime@0.7.3 X X +toml_edit@0.22.27 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +toml_write@0.1.2 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +unicode-ident@1.0.22 X X X +unicode-segmentation@1.12.0 X X +unicode-width@0.2.2 X X +untrusted@0.9.0 X +url@2.5.7 X X +urlencoding@2.1.3 X +utf8_iter@1.0.4 X X +utf8parse@0.2.2 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +vsimd@0.8.0 X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xmlparser@0.13.6 X X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/integrations/playground/src/main.rs b/crates/integrations/playground/src/main.rs index c522209957..94068bb558 100644 --- a/crates/integrations/playground/src/main.rs +++ b/crates/integrations/playground/src/main.rs @@ -24,6 +24,7 @@ use clap::Parser; use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_cli::exec; +use datafusion_cli::object_storage::instrumented::InstrumentedObjectStoreRegistry; use datafusion_cli::print_format::PrintFormat; use datafusion_cli::print_options::{MaxRows, PrintOptions}; use iceberg_playground::{ICEBERG_PLAYGROUND_VERSION, IcebergCatalogList}; @@ -94,6 +95,7 @@ async fn main_inner() -> anyhow::Result<()> { quiet: args.quiet, maxrows: args.maxrows, color: args.color, + instrumented_registry: Arc::new(InstrumentedObjectStoreRegistry::new()), }; let rc = match args.rc { diff --git a/crates/sqllogictest/DEPENDENCIES.rust.tsv b/crates/sqllogictest/DEPENDENCIES.rust.tsv index e8af062ea3..94bd88e2e6 100644 --- a/crates/sqllogictest/DEPENDENCIES.rust.tsv +++ b/crates/sqllogictest/DEPENDENCIES.rust.tsv @@ -1,398 +1,482 @@ -crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib -addr2line@0.24.2 X X -adler2@2.0.1 X X X -adler32@1.2.0 X -ahash@0.8.12 X X -aho-corasick@1.1.3 X X -alloc-no-stdlib@2.0.4 X -alloc-stdlib@0.2.2 X -allocator-api2@0.2.21 X X -android_system_properties@0.1.5 X X -anstream@0.6.20 X X -anstyle@1.0.11 X X -anstyle-parse@0.2.7 X X -anstyle-query@1.1.4 X X -anstyle-wincon@3.0.10 X X -anyhow@1.0.99 X X -apache-avro@0.17.0 X -arrayref@0.3.9 X -arrayvec@0.7.6 X X -arrow@55.2.0 X -arrow-arith@55.2.0 X -arrow-array@55.2.0 X -arrow-buffer@55.2.0 X -arrow-cast@55.2.0 X -arrow-csv@55.2.0 X -arrow-data@55.2.0 X -arrow-ipc@55.2.0 X -arrow-json@55.2.0 X -arrow-ord@55.2.0 X -arrow-row@55.2.0 X -arrow-schema@55.2.0 X -arrow-select@55.2.0 X -arrow-string@55.2.0 X -async-compression@0.4.19 X X -async-recursion@1.1.1 X X -async-trait@0.1.89 X X -atoi@2.0.0 X -autocfg@1.5.0 X X -backtrace@0.3.75 X X -base64@0.21.7 X X -base64@0.22.1 X X -bigdecimal@0.4.8 X X -bitflags@2.9.4 X X -blake2@0.10.6 X X -blake3@1.8.2 X X X -block-buffer@0.10.4 X X -brotli@8.0.2 X X -brotli-decompressor@5.0.0 X X -bumpalo@3.19.0 X X -byteorder@1.5.0 X X -bytes@1.10.1 X -bzip2@0.4.4 X X -bzip2@0.5.2 X X -bzip2-sys@0.1.13+1.0.8 X X -cc@1.2.36 X X -cfg-if@1.0.3 X X -chrono@0.4.42 X X -chrono-tz@0.10.4 X X -clap@4.5.47 X X -clap_builder@4.5.47 X X -clap_derive@4.5.47 X X -clap_lex@0.7.5 X X -colorchoice@1.0.4 X X -comfy-table@7.2.0 X -console@0.15.11 X -const-random@0.1.18 X X -const-random-macro@0.1.16 X X -constant_time_eq@0.3.1 X X X -core-foundation-sys@0.8.7 X X -core2@0.4.0 X X -cpufeatures@0.2.17 X X -crc32fast@1.5.0 X X -crossbeam-utils@0.8.21 X X -crunchy@0.2.4 X -crypto-common@0.1.6 X X -csv@1.3.1 X X -csv-core@0.1.12 X X -dary_heap@0.3.7 X X -dashmap@6.1.0 X -datafusion@48.0.1 X -datafusion-catalog@48.0.1 X -datafusion-catalog-listing@48.0.1 X -datafusion-common@48.0.1 X -datafusion-common-runtime@48.0.1 X -datafusion-datasource@48.0.1 X -datafusion-datasource-avro@48.0.1 X -datafusion-datasource-csv@48.0.1 X -datafusion-datasource-json@48.0.1 X -datafusion-datasource-parquet@48.0.1 X -datafusion-doc@48.0.1 X -datafusion-execution@48.0.1 X -datafusion-expr@48.0.1 X -datafusion-expr-common@48.0.1 X -datafusion-functions@48.0.1 X -datafusion-functions-aggregate@48.0.1 X -datafusion-functions-aggregate-common@48.0.1 X -datafusion-functions-nested@48.0.1 X -datafusion-functions-table@48.0.1 X -datafusion-functions-window@48.0.1 X -datafusion-functions-window-common@48.0.1 X -datafusion-macros@48.0.1 X -datafusion-optimizer@48.0.1 X -datafusion-physical-expr@48.0.1 X -datafusion-physical-expr-common@48.0.1 X -datafusion-physical-optimizer@48.0.1 X -datafusion-physical-plan@48.0.1 X -datafusion-session@48.0.1 X -datafusion-spark@48.0.1 X -datafusion-sql@48.0.1 X -datafusion-sqllogictest@48.0.1 X -datafusion-substrait@48.0.1 X -digest@0.10.7 X X -displaydoc@0.2.5 X X -dyn-clone@1.0.20 X X -educe@0.6.0 X -either@1.15.0 X X -encode_unicode@1.0.0 X X -enum-ordinalize@4.3.0 X -enum-ordinalize-derive@4.3.1 X -equivalent@1.0.2 X X -errno@0.3.13 X X -escape8259@0.5.3 X -fallible-iterator@0.2.0 X X -fastrand@2.3.0 X X -find-msvc-tools@0.1.1 X X -fixedbitset@0.5.7 X X -flatbuffers@25.2.10 X -flate2@1.1.2 X X -fnv@1.0.7 X X -foldhash@0.1.5 X -form_urlencoded@1.2.2 X X -fs-err@3.1.1 X X -futures@0.3.31 X X -futures-channel@0.3.31 X X -futures-core@0.3.31 X X -futures-executor@0.3.31 X X -futures-io@0.3.31 X X -futures-macro@0.3.31 X X -futures-sink@0.3.31 X X -futures-task@0.3.31 X X -futures-util@0.3.31 X X -generic-array@0.14.7 X -getrandom@0.2.16 X X -getrandom@0.3.3 X X -gimli@0.31.1 X X -glob@0.3.3 X X -half@2.6.0 X X -hashbrown@0.14.5 X X -hashbrown@0.15.5 X X -heck@0.5.0 X X -hex@0.4.3 X X -hmac@0.12.1 X X -http@1.3.1 X X -humantime@2.2.0 X X -iana-time-zone@0.1.63 X X -iana-time-zone-haiku@0.1.2 X X -iceberg-sqllogictest@0.7.0 X -icu_collections@2.0.0 X -icu_locale_core@2.0.0 X -icu_normalizer@2.0.0 X -icu_normalizer_data@2.0.0 X -icu_properties@2.0.1 X -icu_properties_data@2.0.1 X -icu_provider@2.0.0 X -idna@1.1.0 X X -idna_adapter@1.2.1 X X -indexmap@2.11.0 X X -indicatif@0.17.11 X -integer-encoding@3.0.4 X -io-uring@0.7.10 X X -is_terminal_polyfill@1.70.1 X X -itertools@0.13.0 X X -itertools@0.14.0 X X -itoa@1.0.15 X X -jobserver@0.1.34 X X -js-sys@0.3.78 X X -lexical-core@1.0.5 X X -lexical-parse-float@1.0.5 X X -lexical-parse-integer@1.0.5 X X -lexical-util@1.0.6 X X -lexical-write-float@1.0.5 X X -lexical-write-integer@1.0.5 X X -libc@0.2.175 X X -libflate@2.1.0 X -libflate_lz77@2.1.0 X -libm@0.2.15 X -libtest-mimic@0.8.1 X X -libz-rs-sys@0.5.2 X -linux-raw-sys@0.9.4 X X X -litemap@0.8.0 X -lock_api@0.4.13 X X -log@0.4.28 X X -lz4_flex@0.11.5 X -lzma-sys@0.1.20 X X -md-5@0.10.6 X X -memchr@2.7.5 X X -miniz_oxide@0.8.9 X X X -mio@1.0.4 X -multimap@0.10.1 X X -num@0.4.3 X X -num-bigint@0.4.6 X X -num-complex@0.4.6 X X -num-integer@0.1.46 X X -num-iter@0.1.45 X X -num-rational@0.4.2 X X -num-traits@0.2.19 X X -number_prefix@0.4.0 X -object@0.36.7 X X -object_store@0.12.3 X X -once_cell@1.21.3 X X -once_cell_polyfill@1.70.1 X X -ordered-float@2.10.1 X -owo-colors@4.2.2 X -parking_lot@0.12.4 X X -parking_lot_core@0.9.11 X X -parquet@55.2.0 X -paste@1.0.15 X X -pbjson@0.7.0 X -pbjson-build@0.7.0 X -pbjson-types@0.7.0 X -percent-encoding@2.3.2 X X -petgraph@0.7.1 X X -petgraph@0.8.2 X X -phf@0.12.1 X -phf_shared@0.12.1 X -pin-project-lite@0.2.16 X X -pin-utils@0.1.0 X X -pkg-config@0.3.32 X X -portable-atomic@1.11.1 X X -postgres-protocol@0.6.8 X X -postgres-types@0.2.9 X X -potential_utf@0.1.3 X -ppv-lite86@0.2.21 X X -prettyplease@0.2.37 X X -proc-macro2@1.0.101 X X -prost@0.13.5 X -prost-build@0.13.5 X -prost-derive@0.13.5 X -prost-types@0.13.5 X -psm@0.1.26 X X -quad-rand@0.2.3 X -quote@1.0.40 X X -r-efi@5.3.0 X X X -rand@0.8.5 X X -rand@0.9.2 X X -rand_chacha@0.3.1 X X -rand_chacha@0.9.0 X X -rand_core@0.6.4 X X -rand_core@0.9.3 X X -recursive@0.1.1 X -recursive-proc-macro-impl@0.1.1 X -redox_syscall@0.5.17 X -regex@1.11.2 X X -regex-automata@0.4.10 X X -regex-lite@0.1.7 X X -regex-syntax@0.8.6 X X -regress@0.10.4 X X -rle-decode-fast@1.0.3 X X -rust_decimal@1.38.0 X -rustc-demangle@0.1.26 X X -rustc_version@0.4.1 X X -rustix@1.0.8 X X X -rustversion@1.0.22 X X -ryu@1.0.20 X X -same-file@1.0.6 X X -schemars@0.8.22 X -schemars_derive@0.8.22 X -scopeguard@1.2.0 X X -semver@1.0.26 X X -seq-macro@0.3.6 X X -serde@1.0.219 X X -serde_bytes@0.11.17 X X -serde_derive@1.0.219 X X -serde_derive_internals@0.29.1 X X -serde_json@1.0.143 X X -serde_spanned@0.6.9 X X -serde_tokenstream@0.2.2 X -serde_yaml@0.9.34+deprecated X X -sha2@0.10.9 X X -shlex@1.3.0 X X -simdutf8@0.1.5 X X -similar@2.7.0 X -siphasher@1.0.1 X X -slab@0.4.11 X -smallvec@1.15.1 X X -snap@1.1.1 X -sqllogictest@0.28.4 X X -sqlparser@0.55.0 X -sqlparser_derive@0.3.0 X -stable_deref_trait@1.2.0 X X -stacker@0.1.21 X X -static_assertions@1.1.0 X X -stringprep@0.1.5 X X -strsim@0.11.1 X -strum@0.26.3 X -strum_macros@0.26.4 X -subst@0.3.8 X X -substrait@0.56.0 X -subtle@2.6.1 X -syn@2.0.106 X X -synstructure@0.13.2 X -tempfile@3.22.0 X X -thiserror@1.0.69 X X -thiserror@2.0.16 X X -thiserror-impl@1.0.69 X X -thiserror-impl@2.0.16 X X -thrift@0.17.0 X -tiny-keccak@2.0.2 X -tinystr@0.8.1 X -tinyvec@1.10.0 X X X -tinyvec_macros@0.1.1 X X X -tokio@1.47.1 X -tokio-macros@2.5.0 X -tokio-util@0.7.16 X -toml@0.8.23 X X -toml_datetime@0.6.11 X X -toml_edit@0.22.27 X X -toml_write@0.1.2 X X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X -twox-hash@2.1.2 X -typed-builder@0.19.1 X X -typed-builder-macro@0.19.1 X X -typenum@1.18.0 X X -typify@0.4.3 X -typify-impl@0.4.3 X -typify-macro@0.4.3 X -unicode-bidi@0.3.18 X X -unicode-ident@1.0.18 X X X -unicode-normalization@0.1.24 X X -unicode-properties@0.1.3 X X -unicode-segmentation@1.12.0 X X -unicode-width@0.1.14 X X -unicode-width@0.2.1 X X -unsafe-libyaml@0.2.11 X -url@2.5.7 X X -utf8_iter@1.0.4 X X -utf8parse@0.2.2 X X -uuid@1.18.1 X X -version_check@0.9.5 X X -walkdir@2.5.0 X X -wasi@0.11.1+wasi-snapshot-preview1 X X X -wasi@0.14.4+wasi-0.2.4 X X X -wasm-bindgen@0.2.101 X X -wasm-bindgen-backend@0.2.101 X X -wasm-bindgen-futures@0.4.51 X X -wasm-bindgen-macro@0.2.101 X X -wasm-bindgen-macro-support@0.2.101 X X -wasm-bindgen-shared@0.2.101 X X -web-sys@0.3.78 X X -web-time@1.1.0 X X -winapi-util@0.1.11 X X -windows-core@0.61.2 X X -windows-implement@0.60.0 X X -windows-interface@0.59.1 X X -windows-link@0.1.3 X X -windows-link@0.2.0 X X -windows-result@0.3.4 X X -windows-strings@0.4.2 X X -windows-sys@0.52.0 X X -windows-sys@0.59.0 X X -windows-sys@0.60.2 X X -windows-sys@0.61.0 X X -windows-targets@0.52.6 X X -windows-targets@0.53.3 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_gnullvm@0.53.0 X X -windows_aarch64_msvc@0.52.6 X X -windows_aarch64_msvc@0.53.0 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnu@0.53.0 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_gnullvm@0.53.0 X X -windows_i686_msvc@0.52.6 X X -windows_i686_msvc@0.53.0 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnu@0.53.0 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_gnullvm@0.53.0 X X -windows_x86_64_msvc@0.52.6 X X -windows_x86_64_msvc@0.53.0 X X -winnow@0.7.13 X -wit-bindgen@0.45.1 X X X -writeable@0.6.1 X -xz2@0.1.7 X X -yoke@0.8.0 X -yoke-derive@0.8.0 X -zerocopy@0.8.27 X X X -zerofrom@0.1.6 X -zerofrom-derive@0.1.6 X -zerotrie@0.2.2 X -zerovec@0.11.4 X -zerovec-derive@0.11.1 X -zlib-rs@0.5.2 X -zstd@0.13.3 X -zstd-safe@7.2.4 X X -zstd-sys@2.0.16+zstd.1.5.7 X X +crate 0BSD Apache-2.0 Apache-2.0 WITH LLVM-exception BSD-2-Clause BSD-3-Clause BSL-1.0 CC0-1.0 CDLA-Permissive-2.0 ISC LGPL-2.1-or-later MIT MIT-0 Unicode-3.0 Unlicense Zlib bzip2-1.0.6 +adler2@2.0.1 X X X +ahash@0.8.12 X X +aho-corasick@1.1.4 X X +alloc-no-stdlib@2.0.4 X +alloc-stdlib@0.2.2 X +allocator-api2@0.2.21 X X +android_system_properties@0.1.5 X X +anstream@0.6.21 X X +anstyle@1.0.13 X X +anstyle-parse@0.2.7 X X +anstyle-query@1.1.5 X X +anstyle-wincon@3.0.11 X X +anyhow@1.0.100 X X +apache-avro@0.20.0 X +apache-avro@0.21.0 X +ar_archive_writer@0.2.0 X +array-init@2.1.0 X X +arrayref@0.3.9 X +arrayvec@0.7.6 X X +arrow@57.1.0 X +arrow-arith@57.1.0 X +arrow-array@57.1.0 X +arrow-buffer@57.1.0 X +arrow-cast@57.1.0 X +arrow-csv@57.1.0 X +arrow-data@57.1.0 X +arrow-ipc@57.1.0 X +arrow-json@57.1.0 X +arrow-ord@57.1.0 X +arrow-row@57.1.0 X +arrow-schema@57.1.0 X +arrow-select@57.1.0 X +arrow-string@57.1.0 X +as-any@0.3.2 X X +async-compression@0.4.19 X X +async-lock@3.4.1 X X +async-recursion@1.1.1 X X +async-trait@0.1.89 X X +atoi@2.0.0 X +atomic-waker@1.1.2 X X +autocfg@1.5.0 X X +backon@1.6.0 X +base64@0.22.1 X X +bigdecimal@0.4.9 X X +bimap@0.6.3 X X +bitflags@2.10.0 X X +blake2@0.10.6 X X +blake3@1.8.2 X X X +block-buffer@0.10.4 X X +bon@3.8.1 X X +bon-macros@3.8.1 X X +brotli@8.0.2 X X +brotli-decompressor@5.0.0 X X +bumpalo@3.19.0 X X +bytemuck@1.24.0 X X X +byteorder@1.5.0 X X +bytes@1.11.0 X +bzip2@0.5.2 X X +bzip2@0.6.1 X X +bzip2-sys@0.1.13+1.0.8 X X +cc@1.2.49 X X +cfg-if@1.0.4 X X +chrono@0.4.42 X X +chrono-tz@0.10.4 X X +clap@4.5.53 X X +clap_builder@4.5.53 X X +clap_derive@4.5.49 X X +clap_lex@0.7.6 X X +colorchoice@1.0.4 X X +comfy-table@7.2.1 X +concurrent-queue@2.5.0 X X +console@0.16.1 X +const-oid@0.9.6 X X +const-random@0.1.18 X X +const-random-macro@0.1.16 X X +constant_time_eq@0.3.1 X X X +core-foundation-sys@0.8.7 X X +cpufeatures@0.2.17 X X +crc32c@0.6.8 X X +crc32fast@1.5.0 X X +crossbeam-channel@0.5.15 X X +crossbeam-epoch@0.9.18 X X +crossbeam-utils@0.8.21 X X +crunchy@0.2.4 X +crypto-common@0.1.7 X X +csv@1.4.0 X X +csv-core@0.1.13 X X +darling@0.20.11 X +darling@0.21.3 X +darling_core@0.20.11 X +darling_core@0.21.3 X +darling_macro@0.20.11 X +darling_macro@0.21.3 X +dashmap@6.1.0 X +datafusion@51.0.0 X +datafusion-catalog@51.0.0 X +datafusion-catalog-listing@51.0.0 X +datafusion-common@51.0.0 X +datafusion-common-runtime@51.0.0 X +datafusion-datasource@51.0.0 X +datafusion-datasource-arrow@51.0.0 X +datafusion-datasource-avro@51.0.0 X +datafusion-datasource-csv@51.0.0 X +datafusion-datasource-json@51.0.0 X +datafusion-datasource-parquet@51.0.0 X +datafusion-doc@51.0.0 X +datafusion-execution@51.0.0 X +datafusion-expr@51.0.0 X +datafusion-expr-common@51.0.0 X +datafusion-functions@51.0.0 X +datafusion-functions-aggregate@51.0.0 X +datafusion-functions-aggregate-common@51.0.0 X +datafusion-functions-nested@51.0.0 X +datafusion-functions-table@51.0.0 X +datafusion-functions-window@51.0.0 X +datafusion-functions-window-common@51.0.0 X +datafusion-macros@51.0.0 X +datafusion-optimizer@51.0.0 X +datafusion-physical-expr@51.0.0 X +datafusion-physical-expr-adapter@51.0.0 X +datafusion-physical-expr-common@51.0.0 X +datafusion-physical-optimizer@51.0.0 X +datafusion-physical-plan@51.0.0 X +datafusion-pruning@51.0.0 X +datafusion-session@51.0.0 X +datafusion-spark@51.0.0 X +datafusion-sql@51.0.0 X +datafusion-sqllogictest@51.0.0 X +datafusion-substrait@51.0.0 X +derive_builder@0.20.2 X X +derive_builder_core@0.20.2 X X +derive_builder_macro@0.20.2 X X +digest@0.10.7 X X +displaydoc@0.2.5 X X +dissimilar@1.0.10 X +dyn-clone@1.0.20 X X +educe@0.6.0 X +either@1.15.0 X X +encode_unicode@1.0.0 X X +enum-ordinalize@4.3.2 X +enum-ordinalize-derive@4.3.2 X +env_filter@0.1.4 X X +env_logger@0.11.8 X X +equivalent@1.0.2 X X +errno@0.3.14 X X +escape8259@0.5.3 X +event-listener@5.4.1 X X +event-listener-strategy@0.5.4 X X +expect-test@1.5.1 X X +fallible-iterator@0.2.0 X X +fastrand@2.3.0 X X +find-msvc-tools@0.1.5 X X +fixedbitset@0.5.7 X X +flatbuffers@25.9.23 X +flate2@1.1.5 X X +fnv@1.0.7 X X +foldhash@0.1.5 X +foldhash@0.2.0 X +form_urlencoded@1.2.2 X X +fs-err@3.2.0 X X +futures@0.3.31 X X +futures-channel@0.3.31 X X +futures-core@0.3.31 X X +futures-executor@0.3.31 X X +futures-io@0.3.31 X X +futures-macro@0.3.31 X X +futures-sink@0.3.31 X X +futures-task@0.3.31 X X +futures-timer@3.0.3 X X +futures-util@0.3.31 X X +generic-array@0.14.7 X +getrandom@0.2.16 X X +getrandom@0.3.4 X X +glob@0.3.3 X X +gloo-timers@0.3.0 X X +half@2.7.1 X X +hashbrown@0.14.5 X X +hashbrown@0.15.5 X X +hashbrown@0.16.1 X X +heck@0.5.0 X X +hex@0.4.3 X X +hmac@0.12.1 X X +home@0.5.11 X X +http@1.4.0 X X +http-body@1.0.1 X +http-body-util@0.1.3 X +httparse@1.10.1 X X +humantime@2.3.0 X X +hyper@1.8.1 X +hyper-rustls@0.27.7 X X X +hyper-util@0.1.19 X +iana-time-zone@0.1.64 X X +iana-time-zone-haiku@0.1.2 X X +iceberg@0.8.0 X +iceberg-datafusion@0.8.0 X +iceberg-sqllogictest@0.8.0 X +iceberg_test_utils@0.8.0 X +icu_collections@2.1.1 X +icu_locale_core@2.1.1 X +icu_normalizer@2.1.1 X +icu_normalizer_data@2.1.1 X +icu_properties@2.1.1 X +icu_properties_data@2.1.1 X +icu_provider@2.1.1 X +ident_case@1.0.1 X X +idna@1.1.0 X X +idna_adapter@1.2.1 X X +indexmap@2.12.1 X X +indicatif@0.18.3 X +integer-encoding@3.0.4 X +ipnet@2.11.0 X X +iri-string@0.7.9 X X +is_terminal_polyfill@1.70.2 X X +itertools@0.13.0 X X +itertools@0.14.0 X X +itoa@1.0.15 X X +jiff@0.2.16 X X +jiff-tzdb@0.1.4 X X +jiff-tzdb-platform@0.1.3 X X +jobserver@0.1.34 X X +js-sys@0.3.83 X X +lazy_static@1.5.0 X X +lexical-core@1.0.6 X X +lexical-parse-float@1.0.6 X X +lexical-parse-integer@1.0.6 X X +lexical-util@1.0.7 X X +lexical-write-float@1.0.6 X X +lexical-write-integer@1.0.6 X X +libbz2-rs-sys@0.2.2 X +libc@0.2.178 X X +libm@0.2.15 X +libtest-mimic@0.8.1 X X +libz-rs-sys@0.5.3 X +linux-raw-sys@0.11.0 X X X +litemap@0.8.1 X +lock_api@0.4.14 X X +log@0.4.29 X X +lz4_flex@0.12.0 X +lzma-sys@0.1.20 X X +md-5@0.10.6 X X +memchr@2.7.6 X X +miniz_oxide@0.8.9 X X X +mio@1.1.1 X +moka@0.12.11 X X +multimap@0.10.1 X X +murmur3@0.5.2 X X +nu-ansi-term@0.50.3 X +num-bigint@0.4.6 X X +num-complex@0.4.6 X X +num-integer@0.1.46 X X +num-traits@0.2.19 X X +object@0.32.2 X X +object_store@0.12.4 X X +once_cell@1.21.3 X X +once_cell_polyfill@1.70.2 X X +opendal@0.55.0 X +ordered-float@2.10.1 X +ordered-float@4.6.0 X +owo-colors@4.2.3 X +parking@2.2.1 X X +parking_lot@0.12.5 X X +parking_lot_core@0.9.12 X X +parquet@57.1.0 X +paste@1.0.15 X X +pbjson@0.8.0 X +pbjson-build@0.8.0 X +pbjson-types@0.8.0 X +percent-encoding@2.3.2 X X +petgraph@0.7.1 X X +petgraph@0.8.3 X X +phf@0.12.1 X +phf_shared@0.12.1 X +pin-project-lite@0.2.16 X X +pin-utils@0.1.0 X X +pkg-config@0.3.32 X X +portable-atomic@1.11.1 X X +portable-atomic-util@0.2.4 X X +postgres-protocol@0.6.9 X X +postgres-types@0.2.11 X X +potential_utf@0.1.4 X +ppv-lite86@0.2.21 X X +prettyplease@0.2.37 X X +proc-macro-crate@3.4.0 X X +proc-macro2@1.0.103 X X +prost@0.14.1 X +prost-build@0.14.1 X +prost-derive@0.14.1 X +prost-types@0.14.1 X +psm@0.1.28 X X +quad-rand@0.2.3 X +quick-xml@0.38.4 X +quote@1.0.42 X X +r-efi@5.3.0 X X X +rand@0.8.5 X X +rand@0.9.2 X X +rand_chacha@0.3.1 X X +rand_chacha@0.9.0 X X +rand_core@0.6.4 X X +rand_core@0.9.3 X X +recursive@0.1.1 X +recursive-proc-macro-impl@0.1.1 X +redox_syscall@0.5.18 X +regex@1.12.2 X X +regex-automata@0.4.13 X X +regex-lite@0.1.8 X X +regex-syntax@0.8.8 X X +regress@0.10.5 X X +relative-path@1.9.3 X X +reqsign@0.16.5 X +reqwest@0.12.25 X X +ring@0.17.14 X X +roaring@0.11.2 X X +rstest@0.26.1 X X +rstest_macros@0.26.1 X X +rust_decimal@1.39.0 X +rustc_version@0.4.1 X X +rustix@1.1.2 X X X +rustls@0.23.35 X X X +rustls-pki-types@1.13.1 X X +rustls-webpki@0.103.8 X +rustversion@1.0.22 X X +ryu@1.0.20 X X +same-file@1.0.6 X X +schemars@0.8.22 X +schemars_derive@0.8.22 X +scopeguard@1.2.0 X X +semver@1.0.27 X X +seq-macro@0.3.6 X X +serde@1.0.228 X X +serde_bytes@0.11.19 X X +serde_core@1.0.228 X X +serde_derive@1.0.228 X X +serde_derive_internals@0.29.1 X X +serde_json@1.0.145 X X +serde_repr@0.1.20 X X +serde_spanned@0.6.9 X X +serde_tokenstream@0.2.2 X +serde_urlencoded@0.7.1 X X +serde_with@3.16.1 X X +serde_with_macros@3.16.1 X X +serde_yaml@0.9.34+deprecated X X +sha1@0.10.6 X X +sha2@0.10.9 X X +sharded-slab@0.1.7 X +shlex@1.3.0 X X +simd-adler32@0.3.8 X +simdutf8@0.1.5 X X +similar@2.7.0 X +siphasher@1.0.1 X X +slab@0.4.11 X +smallvec@1.15.1 X X +snap@1.1.1 X +socket2@0.6.1 X X +sqllogictest@0.28.4 X X +sqlparser@0.59.0 X +sqlparser_derive@0.3.0 X +stable_deref_trait@1.2.1 X X +stacker@0.1.22 X X +stringprep@0.1.5 X X +strsim@0.11.1 X +strum@0.27.2 X +strum_macros@0.27.2 X +subst@0.3.8 X X +substrait@0.62.0 X +subtle@2.6.1 X +syn@2.0.111 X X +sync_wrapper@1.0.2 X +synstructure@0.13.2 X +tagptr@0.2.0 X X +tempfile@3.23.0 X X +thiserror@2.0.17 X X +thiserror-impl@2.0.17 X X +thread_local@1.1.9 X X +thrift@0.17.0 X +tiny-keccak@2.0.2 X +tinystr@0.8.2 X +tinyvec@1.10.0 X X X +tinyvec_macros@0.1.1 X X X +tokio@1.48.0 X +tokio-macros@2.6.0 X +tokio-rustls@0.26.4 X X +tokio-util@0.7.17 X +toml@0.8.23 X X +toml_datetime@0.6.11 X X +toml_datetime@0.7.3 X X +toml_edit@0.22.27 X X +toml_edit@0.23.9 X X +toml_parser@1.0.4 X X +toml_write@0.1.2 X X +tower@0.5.2 X +tower-http@0.6.8 X +tower-layer@0.3.3 X +tower-service@0.3.3 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X +tracing-log@0.2.0 X +tracing-subscriber@0.3.22 X +try-lock@0.2.5 X +twox-hash@2.1.2 X +typed-builder@0.20.1 X X +typed-builder-macro@0.20.1 X X +typenum@1.19.0 X X +typify@0.5.0 X +typify-impl@0.5.0 X +typify-macro@0.5.0 X +unicode-bidi@0.3.18 X X +unicode-ident@1.0.22 X X X +unicode-normalization@0.1.25 X X +unicode-properties@0.1.4 X X +unicode-segmentation@1.12.0 X X +unicode-width@0.1.14 X X +unicode-width@0.2.2 X X +unit-prefix@0.5.2 X +unsafe-libyaml@0.2.11 X +untrusted@0.9.0 X +url@2.5.7 X X +utf8_iter@1.0.4 X X +utf8parse@0.2.2 X X +uuid@1.19.0 X X +version_check@0.9.5 X X +walkdir@2.5.0 X X +want@0.3.1 X +wasi@0.11.1+wasi-snapshot-preview1 X X X +wasip2@1.0.1+wasi-0.2.4 X X X +wasm-bindgen@0.2.106 X X +wasm-bindgen-futures@0.4.56 X X +wasm-bindgen-macro@0.2.106 X X +wasm-bindgen-macro-support@0.2.106 X X +wasm-bindgen-shared@0.2.106 X X +wasm-streams@0.4.2 X X +web-sys@0.3.83 X X +web-time@1.1.0 X X +webpki-roots@1.0.4 X +winapi-util@0.1.11 X X +windows-core@0.62.2 X X +windows-implement@0.60.2 X X +windows-interface@0.59.3 X X +windows-link@0.2.1 X X +windows-result@0.4.1 X X +windows-strings@0.5.1 X X +windows-sys@0.52.0 X X +windows-sys@0.59.0 X X +windows-sys@0.60.2 X X +windows-sys@0.61.2 X X +windows-targets@0.52.6 X X +windows-targets@0.53.5 X X +windows_aarch64_gnullvm@0.52.6 X X +windows_aarch64_gnullvm@0.53.1 X X +windows_aarch64_msvc@0.52.6 X X +windows_aarch64_msvc@0.53.1 X X +windows_i686_gnu@0.52.6 X X +windows_i686_gnu@0.53.1 X X +windows_i686_gnullvm@0.52.6 X X +windows_i686_gnullvm@0.53.1 X X +windows_i686_msvc@0.52.6 X X +windows_i686_msvc@0.53.1 X X +windows_x86_64_gnu@0.52.6 X X +windows_x86_64_gnu@0.53.1 X X +windows_x86_64_gnullvm@0.52.6 X X +windows_x86_64_gnullvm@0.53.1 X X +windows_x86_64_msvc@0.52.6 X X +windows_x86_64_msvc@0.53.1 X X +winnow@0.7.14 X +wit-bindgen@0.46.0 X X X +writeable@0.6.2 X +xz2@0.1.7 X X +yoke@0.8.1 X +yoke-derive@0.8.1 X +zerocopy@0.8.31 X X X +zerocopy-derive@0.8.31 X X X +zerofrom@0.1.6 X +zerofrom-derive@0.1.6 X +zeroize@1.8.2 X X +zerotrie@0.2.3 X +zerovec@0.11.5 X +zerovec-derive@0.11.2 X +zlib-rs@0.5.3 X +zstd@0.13.3 X +zstd-safe@7.2.4 X X +zstd-sys@2.0.16+zstd.1.5.7 X X diff --git a/crates/sqllogictest/src/engine/datafusion.rs b/crates/sqllogictest/src/engine/datafusion.rs index b3e37d9206..e9f93287d8 100644 --- a/crates/sqllogictest/src/engine/datafusion.rs +++ b/crates/sqllogictest/src/engine/datafusion.rs @@ -22,13 +22,13 @@ use std::sync::Arc; use datafusion::catalog::CatalogProvider; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_sqllogictest::DataFusion; -use iceberg::CatalogBuilder; use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; +use iceberg::spec::{NestedField, PrimitiveType, Schema, Transform, Type, UnboundPartitionSpec}; +use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation}; use iceberg_datafusion::IcebergCatalogProvider; use indicatif::ProgressBar; -use toml::Table as TomlTable; -use crate::engine::{EngineRunner, run_slt_with_runner}; +use crate::engine::{DatafusionCatalogConfig, EngineRunner, run_slt_with_runner}; use crate::error::Result; pub struct DataFusionEngine { @@ -58,12 +58,15 @@ impl EngineRunner for DataFusionEngine { } impl DataFusionEngine { - pub async fn new(config: TomlTable) -> Result { + pub async fn new(catalog_config: Option) -> Result { let session_config = SessionConfig::new() .with_target_partitions(4) .with_information_schema(true); let ctx = SessionContext::new_with_config(session_config); - ctx.register_catalog("default", Self::create_catalog(&config).await?); + ctx.register_catalog( + "default", + Self::create_catalog(catalog_config.as_ref()).await?, + ); Ok(Self { test_data_path: PathBuf::from("testdata"), @@ -71,9 +74,11 @@ impl DataFusionEngine { }) } - async fn create_catalog(_: &TomlTable) -> anyhow::Result> { - // TODO: support dynamic catalog configuration - // See: https://github.com/apache/iceberg-rust/issues/1780 + async fn create_catalog( + _catalog_config: Option<&DatafusionCatalogConfig>, + ) -> anyhow::Result> { + // TODO: Use catalog_config to load different catalog types via iceberg-catalog-loader + // See: https://github.com/apache/iceberg-rust/issues/1780 let catalog = MemoryCatalogBuilder::default() .load( "memory", @@ -84,8 +89,76 @@ impl DataFusionEngine { ) .await?; + // Create a test namespace for INSERT INTO tests + let namespace = NamespaceIdent::new("default".to_string()); + catalog.create_namespace(&namespace, HashMap::new()).await?; + + // Create test tables + Self::create_unpartitioned_table(&catalog, &namespace).await?; + Self::create_partitioned_table(&catalog, &namespace).await?; + Ok(Arc::new( IcebergCatalogProvider::try_new(Arc::new(catalog)).await?, )) } + + /// Create an unpartitioned test table with id and name columns + /// TODO: this can be removed when we support CREATE TABLE + async fn create_unpartitioned_table( + catalog: &impl Catalog, + namespace: &NamespaceIdent, + ) -> anyhow::Result<()> { + let schema = Schema::builder() + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build()?; + + catalog + .create_table( + namespace, + TableCreation::builder() + .name("test_unpartitioned_table".to_string()) + .schema(schema) + .build(), + ) + .await?; + + Ok(()) + } + + /// Create a partitioned test table with id, category, and value columns + /// Partitioned by category using identity transform + /// TODO: this can be removed when we support CREATE TABLE + async fn create_partitioned_table( + catalog: &impl Catalog, + namespace: &NamespaceIdent, + ) -> anyhow::Result<()> { + let schema = Schema::builder() + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::required(2, "category", Type::Primitive(PrimitiveType::String)).into(), + NestedField::optional(3, "value", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build()?; + + let partition_spec = UnboundPartitionSpec::builder() + .with_spec_id(0) + .add_partition_field(2, "category", Transform::Identity)? + .build(); + + catalog + .create_table( + namespace, + TableCreation::builder() + .name("test_partitioned_table".to_string()) + .schema(schema) + .partition_spec(partition_spec) + .build(), + ) + .await?; + + Ok(()) + } } diff --git a/crates/sqllogictest/src/engine/mod.rs b/crates/sqllogictest/src/engine/mod.rs index 724359fbe5..a276671401 100644 --- a/crates/sqllogictest/src/engine/mod.rs +++ b/crates/sqllogictest/src/engine/mod.rs @@ -17,29 +17,45 @@ mod datafusion; +use std::collections::HashMap; use std::path::Path; use anyhow::anyhow; +use serde::Deserialize; use sqllogictest::{AsyncDB, MakeConnection, Runner, parse_file}; -use toml::Table as TomlTable; use crate::engine::datafusion::DataFusionEngine; use crate::error::{Error, Result}; -const TYPE_DATAFUSION: &str = "datafusion"; +/// Configuration for the catalog used by the DataFusion engine +#[derive(Debug, Clone, Deserialize)] +pub struct DatafusionCatalogConfig { + /// Catalog type: "memory", "rest", "glue", "hms", "s3tables", "sql" + #[serde(rename = "type")] + pub catalog_type: String, + /// Catalog properties passed to the catalog loader + #[serde(default)] + pub props: HashMap, +} + +/// Engine configuration as a tagged enum +#[derive(Debug, Clone, Deserialize)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum EngineConfig { + Datafusion { + #[serde(default)] + catalog: Option, + }, +} #[async_trait::async_trait] pub trait EngineRunner: Send { async fn run_slt_file(&mut self, path: &Path) -> Result<()>; } -pub async fn load_engine_runner( - engine_type: &str, - cfg: TomlTable, -) -> Result> { - match engine_type { - TYPE_DATAFUSION => Ok(Box::new(DataFusionEngine::new(cfg).await?)), - _ => Err(anyhow::anyhow!("Unsupported engine type: {engine_type}").into()), +pub async fn load_engine_runner(config: EngineConfig) -> Result> { + match config { + EngineConfig::Datafusion { catalog } => Ok(Box::new(DataFusionEngine::new(catalog).await?)), } } @@ -65,29 +81,63 @@ where #[cfg(test)] mod tests { - use crate::engine::{TYPE_DATAFUSION, load_engine_runner}; + use crate::engine::{DatafusionCatalogConfig, EngineConfig, load_engine_runner}; - #[tokio::test] - async fn test_engine_invalid_type() { + #[test] + fn test_deserialize_engine_config() { + let input = r#"type = "datafusion""#; + + let config: EngineConfig = toml::from_str(input).unwrap(); + assert!(matches!(config, EngineConfig::Datafusion { catalog: None })); + } + + #[test] + fn test_deserialize_engine_config_with_catalog() { + let input = r#" + type = "datafusion" + + [catalog] + type = "rest" + + [catalog.props] + uri = "http://localhost:8181" + "#; + + let config: EngineConfig = toml::from_str(input).unwrap(); + match config { + EngineConfig::Datafusion { catalog: Some(cat) } => { + assert_eq!(cat.catalog_type, "rest"); + assert_eq!( + cat.props.get("uri"), + Some(&"http://localhost:8181".to_string()) + ); + } + _ => panic!("Expected Datafusion with catalog"), + } + } + + #[test] + fn test_deserialize_catalog_config() { let input = r#" - [engines] - random = { type = "random_engine", url = "http://localhost:8181" } + type = "memory" + + [props] + warehouse = "file:///tmp/warehouse" "#; - let tbl = toml::from_str(input).unwrap(); - let result = load_engine_runner("random_engine", tbl).await; - assert!(result.is_err()); + let config: DatafusionCatalogConfig = toml::from_str(input).unwrap(); + assert_eq!(config.catalog_type, "memory"); + assert_eq!( + config.props.get("warehouse"), + Some(&"file:///tmp/warehouse".to_string()) + ); } #[tokio::test] async fn test_load_datafusion() { - let input = r#" - [engines] - df = { type = "datafusion" } - "#; - let tbl = toml::from_str(input).unwrap(); - let result = load_engine_runner(TYPE_DATAFUSION, tbl).await; + let config = EngineConfig::Datafusion { catalog: None }; + let result = load_engine_runner(config).await; assert!(result.is_ok()); } } diff --git a/crates/sqllogictest/src/schedule.rs b/crates/sqllogictest/src/schedule.rs index 7c13ad4d12..25728a2968 100644 --- a/crates/sqllogictest/src/schedule.rs +++ b/crates/sqllogictest/src/schedule.rs @@ -21,10 +21,18 @@ use std::path::{Path, PathBuf}; use anyhow::{Context, anyhow}; use serde::{Deserialize, Serialize}; -use toml::{Table as TomlTable, Value}; use tracing::info; -use crate::engine::{EngineRunner, load_engine_runner}; +use crate::engine::{EngineConfig, EngineRunner, load_engine_runner}; + +/// Raw configuration parsed from the schedule TOML file +#[derive(Debug, Clone, Deserialize)] +pub struct ScheduleConfig { + /// Engine name to engine configuration + pub engines: HashMap, + /// List of test steps to run + pub steps: Vec, +} pub struct Schedule { /// Engine names to engine instances @@ -59,15 +67,27 @@ impl Schedule { pub async fn from_file>(path: P) -> anyhow::Result { let path_str = path.as_ref().to_string_lossy().to_string(); let content = read_to_string(path)?; - let toml_value = content.parse::()?; - let toml_table = toml_value - .as_table() - .ok_or_else(|| anyhow!("Schedule file must be a TOML table"))?; - let engines = Schedule::parse_engines(toml_table).await?; - let steps = Schedule::parse_steps(toml_table)?; + let config: ScheduleConfig = toml::from_str(&content) + .with_context(|| format!("Failed to parse schedule file: {path_str}"))?; - Ok(Self::new(engines, steps, path_str)) + let engines = Self::instantiate_engines(config.engines).await?; + + Ok(Self::new(engines, config.steps, path_str)) + } + + /// Instantiate engine runners from their configurations + async fn instantiate_engines( + configs: HashMap, + ) -> anyhow::Result>> { + let mut engines = HashMap::new(); + + for (name, config) in configs { + let engine = load_engine_runner(config).await?; + engines.insert(name, engine); + } + + Ok(engines) } pub async fn run(mut self) -> anyhow::Result<()> { @@ -105,103 +125,131 @@ impl Schedule { } Ok(()) } +} - async fn parse_engines( - table: &TomlTable, - ) -> anyhow::Result>> { - let engines_tbl = table - .get("engines") - .with_context(|| "Schedule file must have an 'engines' table")? - .as_table() - .ok_or_else(|| anyhow!("'engines' must be a table"))?; - - let mut engines = HashMap::new(); - - for (name, engine_val) in engines_tbl { - let cfg_tbl = engine_val - .as_table() - .ok_or_else(|| anyhow!("Config of engine '{name}' is not a table"))? - .clone(); - - let engine_type = cfg_tbl - .get("type") - .ok_or_else(|| anyhow::anyhow!("Engine {name} doesn't have a 'type' field"))? - .as_str() - .ok_or_else(|| anyhow::anyhow!("Engine {name} type must be a string"))?; - - let engine = load_engine_runner(engine_type, cfg_tbl.clone()).await?; - - if engines.insert(name.clone(), engine).is_some() { - return Err(anyhow!("Duplicate engine '{name}'")); - } - } +#[cfg(test)] +mod tests { + use crate::engine::EngineConfig; + use crate::schedule::ScheduleConfig; - Ok(engines) - } + #[test] + fn test_deserialize_schedule_config() { + let input = r#" + [engines] + df = { type = "datafusion" } - fn parse_steps(table: &TomlTable) -> anyhow::Result> { - let steps_val = table - .get("steps") - .with_context(|| "Schedule file must have a 'steps' array")?; + [[steps]] + engine = "df" + slt = "test.slt" + "#; - let steps: Vec = steps_val - .clone() - .try_into() - .with_context(|| "Failed to deserialize steps")?; + let config: ScheduleConfig = toml::from_str(input).unwrap(); - Ok(steps) + assert_eq!(config.engines.len(), 1); + assert!(config.engines.contains_key("df")); + assert!(matches!(config.engines["df"], EngineConfig::Datafusion { + catalog: None + })); + assert_eq!(config.steps.len(), 1); + assert_eq!(config.steps[0].engine, "df"); + assert_eq!(config.steps[0].slt, "test.slt"); } -} - -#[cfg(test)] -mod tests { - use toml::Table as TomlTable; - - use crate::schedule::Schedule; #[test] - fn test_parse_steps() { + fn test_deserialize_multiple_steps() { let input = r#" + [engines] + datafusion = { type = "datafusion" } + [[steps]] engine = "datafusion" slt = "test.slt" [[steps]] - engine = "spark" + engine = "datafusion" slt = "test2.slt" "#; - let tbl: TomlTable = toml::from_str(input).unwrap(); - let steps = Schedule::parse_steps(&tbl).unwrap(); + let config: ScheduleConfig = toml::from_str(input).unwrap(); - assert_eq!(steps.len(), 2); - assert_eq!(steps[0].engine, "datafusion"); - assert_eq!(steps[0].slt, "test.slt"); - assert_eq!(steps[1].engine, "spark"); - assert_eq!(steps[1].slt, "test2.slt"); + assert_eq!(config.steps.len(), 2); + assert_eq!(config.steps[0].engine, "datafusion"); + assert_eq!(config.steps[0].slt, "test.slt"); + assert_eq!(config.steps[1].engine, "datafusion"); + assert_eq!(config.steps[1].slt, "test2.slt"); } #[test] - fn test_parse_steps_empty() { + fn test_deserialize_with_catalog_config() { let input = r#" + [engines.df] + type = "datafusion" + + [engines.df.catalog] + type = "rest" + + [engines.df.catalog.props] + uri = "http://localhost:8181" + [[steps]] + engine = "df" + slt = "test.slt" "#; - let tbl: TomlTable = toml::from_str(input).unwrap(); - let steps = Schedule::parse_steps(&tbl); + let config: ScheduleConfig = toml::from_str(input).unwrap(); - assert!(steps.is_err()); + match &config.engines["df"] { + EngineConfig::Datafusion { catalog: Some(cat) } => { + assert_eq!(cat.catalog_type, "rest"); + assert_eq!( + cat.props.get("uri"), + Some(&"http://localhost:8181".to_string()) + ); + } + _ => panic!("Expected Datafusion with catalog config"), + } } - #[tokio::test] - async fn test_parse_engines_invalid_table() { - let toml_content = r#" - engines = "not_a_table" + #[test] + fn test_deserialize_missing_engine_type() { + let input = r#" + [engines] + df = { } + + [[steps]] + engine = "df" + slt = "test.slt" "#; - let table: TomlTable = toml::from_str(toml_content).unwrap(); - let result = Schedule::parse_engines(&table).await; + let result: Result = toml::from_str(input); + assert!(result.is_err()); + } + + #[test] + fn test_deserialize_invalid_engine_type() { + let input = r#" + [engines] + df = { type = "unknown_engine" } + + [[steps]] + engine = "df" + slt = "test.slt" + "#; + + let result: Result = toml::from_str(input); + assert!(result.is_err()); + } + + #[test] + fn test_deserialize_missing_step_fields() { + let input = r#" + [engines] + df = { type = "datafusion" } + + [[steps]] + "#; + let result: Result = toml::from_str(input); assert!(result.is_err()); } } diff --git a/crates/sqllogictest/testdata/schedules/df_test.toml b/crates/sqllogictest/testdata/schedules/df_test.toml index 0733744951..df5e638d5a 100644 --- a/crates/sqllogictest/testdata/schedules/df_test.toml +++ b/crates/sqllogictest/testdata/schedules/df_test.toml @@ -20,4 +20,8 @@ df = { type = "datafusion" } [[steps]] engine = "df" -slt = "df_test/show_tables.slt" \ No newline at end of file +slt = "df_test/show_tables.slt" + +[[steps]] +engine = "df" +slt = "df_test/insert_into.slt" diff --git a/crates/sqllogictest/testdata/slts/df_test/insert_into.slt b/crates/sqllogictest/testdata/slts/df_test/insert_into.slt new file mode 100644 index 0000000000..2ba33afcd1 --- /dev/null +++ b/crates/sqllogictest/testdata/slts/df_test/insert_into.slt @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Verify the table is initially empty +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- + +# Insert a single row and verify the count +query I +INSERT INTO default.default.test_unpartitioned_table VALUES (1, 'Alice') +---- +1 + +# Verify the inserted row +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- +1 Alice + +# Insert multiple rows and verify the count +query I +INSERT INTO default.default.test_unpartitioned_table VALUES (2, 'Bob'), (3, 'Charlie') +---- +2 + +# Verify all rows +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- +1 Alice +2 Bob +3 Charlie + +# Insert with NULL value and verify the count +query I +INSERT INTO default.default.test_unpartitioned_table VALUES (4, NULL) +---- +1 + +# Verify NULL handling +query IT rowsort +SELECT * FROM default.default.test_unpartitioned_table +---- +1 Alice +2 Bob +3 Charlie +4 NULL + +# Test partitioned table - verify initially empty +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- + +# Insert single row into partitioned table +query I +INSERT INTO default.default.test_partitioned_table VALUES (1, 'electronics', 'laptop') +---- +1 + +# Verify the inserted row in partitioned table +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- +1 electronics laptop + +# Insert multiple rows with different partition values +query I +INSERT INTO default.default.test_partitioned_table VALUES (2, 'electronics', 'phone'), (3, 'books', 'novel'), (4, 'books', 'textbook'), (5, 'clothing', 'shirt') +---- +4 + +# Verify all rows in partitioned table +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- +1 electronics laptop +2 electronics phone +3 books novel +4 books textbook +5 clothing shirt + +# Insert with NULL value in optional column +query I +INSERT INTO default.default.test_partitioned_table VALUES (6, 'electronics', NULL) +---- +1 + +# Verify NULL handling in partitioned table +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table +---- +1 electronics laptop +2 electronics phone +3 books novel +4 books textbook +5 clothing shirt +6 electronics NULL + +# Verify partition filtering works +query ITT rowsort +SELECT * FROM default.default.test_partitioned_table WHERE category = 'books' +---- +3 books novel +4 books textbook diff --git a/crates/sqllogictest/testdata/slts/df_test/show_tables.slt b/crates/sqllogictest/testdata/slts/df_test/show_tables.slt index 34709d7359..c5da5f6276 100644 --- a/crates/sqllogictest/testdata/slts/df_test/show_tables.slt +++ b/crates/sqllogictest/testdata/slts/df_test/show_tables.slt @@ -25,6 +25,12 @@ datafusion information_schema routines VIEW datafusion information_schema schemata VIEW datafusion information_schema tables VIEW datafusion information_schema views VIEW +default default test_partitioned_table BASE TABLE +default default test_partitioned_table$manifests BASE TABLE +default default test_partitioned_table$snapshots BASE TABLE +default default test_unpartitioned_table BASE TABLE +default default test_unpartitioned_table$manifests BASE TABLE +default default test_unpartitioned_table$snapshots BASE TABLE default information_schema columns VIEW default information_schema df_settings VIEW default information_schema parameters VIEW diff --git a/crates/test_utils/DEPENDENCIES.rust.tsv b/crates/test_utils/DEPENDENCIES.rust.tsv index ef7c315df7..879ead5921 100644 --- a/crates/test_utils/DEPENDENCIES.rust.tsv +++ b/crates/test_utils/DEPENDENCIES.rust.tsv @@ -1,30 +1,22 @@ crate Apache-2.0 MIT Unicode-3.0 -cfg-if@1.0.3 X X -iceberg_test_utils@0.7.0 X +cfg-if@1.0.4 X X +iceberg_test_utils@0.8.0 X lazy_static@1.5.0 X X -log@0.4.28 X X -nu-ansi-term@0.50.1 X +log@0.4.29 X X +nu-ansi-term@0.50.3 X once_cell@1.21.3 X X pin-project-lite@0.2.16 X X -proc-macro2@1.0.101 X X -quote@1.0.40 X X +proc-macro2@1.0.103 X X +quote@1.0.42 X X sharded-slab@0.1.7 X smallvec@1.15.1 X X -syn@2.0.106 X X +syn@2.0.111 X X thread_local@1.1.9 X X -tracing@0.1.41 X -tracing-attributes@0.1.30 X -tracing-core@0.1.34 X +tracing@0.1.43 X +tracing-attributes@0.1.31 X +tracing-core@0.1.35 X tracing-log@0.2.0 X -tracing-subscriber@0.3.20 X -unicode-ident@1.0.18 X X X -windows-sys@0.52.0 X X -windows-targets@0.52.6 X X -windows_aarch64_gnullvm@0.52.6 X X -windows_aarch64_msvc@0.52.6 X X -windows_i686_gnu@0.52.6 X X -windows_i686_gnullvm@0.52.6 X X -windows_i686_msvc@0.52.6 X X -windows_x86_64_gnu@0.52.6 X X -windows_x86_64_gnullvm@0.52.6 X X -windows_x86_64_msvc@0.52.6 X X +tracing-subscriber@0.3.22 X +unicode-ident@1.0.22 X X X +windows-link@0.2.1 X X +windows-sys@0.61.2 X X diff --git a/deny.toml b/deny.toml index 6d75c5d219..0f88ba6d0e 100644 --- a/deny.toml +++ b/deny.toml @@ -26,6 +26,7 @@ allow = [ "CC0-1.0", "Zlib", "CDLA-Permissive-2.0", + "bzip2-1.0.6", # Category-A: https://issues.apache.org/jira/browse/LEGAL-660 "Unicode-3.0", # Boost Software License Version 1.0 is allowed (Category-A): @@ -39,4 +40,4 @@ exceptions = [ { allow = ["MPL-2.0"], crate = "webpki-roots" }, { allow = ["MPL-2.0"], crate = "generational-arena" }, { allow = ["MPL-2.0"], crate = "option-ext" }, -] \ No newline at end of file +] diff --git a/docs/rfcs/0001_modularize_iceberg_implementations.md b/docs/rfcs/0001_modularize_iceberg_implementations.md new file mode 100644 index 0000000000..14bd478270 --- /dev/null +++ b/docs/rfcs/0001_modularize_iceberg_implementations.md @@ -0,0 +1,120 @@ + + +# RFC: Modularize `iceberg` Implementations + +## Background + +Issue #1819 highlighted that the current `iceberg` crate mixes the Iceberg protocol abstractions (catalog/table/plan/transaction) with concrete runtime, storage, and execution code (Tokio runtime wrappers, opendal-based `FileIO`, Arrow helpers, DataFusion glue, etc.). This coupling makes the crate heavy and blocks users from composing their own storage or execution stacks. + +Two principles have been agreed: +1. The `iceberg` crate remains the single source of truth for all protocol traits and data structures. We will not create a separate “kernel” crate or facade layer. +2. Concrete integrations (Tokio runtime, opendal `FileIO`, Arrow/DataFusion glue, catalog adapters, etc.) move out into dedicated companion crates. Users needing a ready path can depend on those crates (e.g., `iceberg-datafusion` or `integrations/local`), while custom stacks depend only on `iceberg`. + +This RFC focuses on modularizing implementations; detailed trait signatures (e.g., `FileIO`, `Runtime`) will be handled in separate RFCs. + +## Goals and Scope + +- Keep `iceberg` as the protocol crate (traits + metadata + planning), without bundling runtimes, storage adapters, or execution glue. +- Relocate concrete code into companion crates under `crates/fileio/*`, `crates/runtime/*`, and `crates/integrations/*`. +- Provide a staged plan for extracting Arrow-dependent APIs to avoid destabilizing file-format code. +- Minimize breaking surfaces: traits stay in `iceberg`; downstream crates mainly adjust dependencies. + +Out of scope: changes to the Iceberg table specification or catalog adapter external behavior; detailed trait method design (covered by follow-up RFCs). + +## Architecture Overview + +### Workspace Layout (target) + +``` +crates/ + iceberg/ # core traits, metadata, planning, transactions + fileio/ + opendal/ # e.g. `iceberg-fileio-opendal` + fs/ # other FileIO implementations + runtime/ + tokio/ # e.g. `iceberg-runtime-tokio` + smol/ + catalog/* # catalog adapters (REST, HMS, Glue, etc.) + integrations/ + local/ # simple local/arrow-based helper crate + datafusion/ # combines core + implementations for DF + cache-moka/ + playground/ +``` + +- `crates/iceberg` drops direct deps on opendal, Tokio, Arrow, and DataFusion. +- Implementation crates depend on `iceberg` to implement the traits. +- Higher-level crates (`integrations/local`, `iceberg-datafusion`) assemble the pieces for ready-to-use scenarios. + +### Core Trait Surfaces + +`FileIO`, `Runtime`, `Catalog`, `Table`, `Transaction`, `TableScan` (plan descriptors) all remain hosted in `iceberg`. Precise method signatures are deferred to dedicated RFCs to avoid locking details prematurely. + +### Usage Modes + +- **Custom stacks**: depend on `iceberg` and provide your own implementations. +- **Pre-built stacks**: depend on `integrations/local` or `iceberg-datafusion`, which bundle `iceberg` with selected runtime/FileIO/Arrow helpers. +- `iceberg` does not re-export companion crates; users compose explicitly. + +## Migration Plan (staged, with Arrow extraction phased) + +1. **Phase 1 – Confirm trait hosting, defer details** + - Keep all protocol traits in `iceberg`; move detailed API design (FileIO, Runtime, etc.) to separate RFCs. + - Add temporary shims/deprecations only when traits are finalized. + +2. **Phase 2 – First Arrow step: move `to_arrow()` out** + - Relocate the public `to_arrow()` API to `integrations/local` (or another higher-level crate). Core no longer exposes Arrow entry points. + - Keep internal Arrow-dependent helpers (e.g., `ArrowFileReader`) temporarily in `iceberg` to avoid breaking file-format flows. + +3. **Phase 3 – Gradual Arrow dependency removal** + - Incrementally migrate/replace Arrow-dependent internals (`ArrowFileReader`, format-specific readers) into `integrations/local` or other helper crates. + - Adjust file-format APIs as needed; expect this to be multi-release work. + +4. **Phase 4 – Dependency cleanup** + - Ensure catalog and integration crates depend only on `iceberg` plus the specific runtime/FileIO/helper crates they need. + - Verify build/test pipelines against the new dependency graph. + +5. **Phase 5 – Docs & release** + - Publish migration guides: where `to_arrow()` moved, how to assemble local/DataFusion stacks. + - Schedule deprecation windows for remaining Arrow helpers; target a breaking release once Arrow is fully removed from `iceberg`. + +## Compatibility + +- Short term: users of `Table::scan().to_arrow()` must switch to `integrations/local` (or another crate that rehosts that API). Other Arrow types stay temporarily but will migrate in later phases. +- Long term: `iceberg` will be Arrow-free; companion crates provide Arrow-based helpers. +- Tests/examples move alongside the implementations they exercise. + +## Risks and Mitigations + +| Risk | Description | Mitigation | +| ---- | ----------- | ---------- | +| Arrow dependency unwinding is complex | File-format readers may rely on Arrow types | Phase the work; move `to_arrow()` first, then refactor readers; document interim state | +| Discoverability | Users may not know where Arrow helpers went | Clear docs pointing to `integrations/local` and `iceberg-datafusion`; migration guide | +| Trait churn | Future trait RFCs may break early adopters | Use deprecation shims and communicate timelines | +| Duplicate impls | Multiple helper crates could overlap | Provide recommended combinations and feature guidance | + +## Open Questions + +1. Versioning: align companion crate versions with `iceberg`, or allow independent versions plus compatibility matrix? +2. Deprecation schedule: how long do we keep interim Arrow helpers before full removal from `iceberg`? + +## Conclusion + +We will keep `iceberg` as the protocol crate while modularizing concrete implementations. Arrow removal will be phased: first relocating `to_arrow()` to `integrations/local`, then gradually moving Arrow-dependent readers and helpers. This keeps the core lean, lets users compose their preferred runtime/FileIO stacks, and still offers ready-to-use combinations via companion crates. diff --git a/rust-toolchain.toml b/rust-toolchain.toml index ff7d1f7fbb..4b20d68e44 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -20,5 +20,5 @@ # # The channel is exactly same day for our MSRV. [toolchain] -channel = "nightly-2025-03-28" +channel = "nightly-2025-06-23" components = ["rustfmt", "clippy"] diff --git a/scripts/release.sh b/scripts/release.sh index a790cdd8d1..e4cee342d2 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -25,6 +25,12 @@ else echo "var is set to '$ICEBERG_VERSION'" fi +# Validate version format (e.g., 1.0.0) +if [[ ! "$ICEBERG_VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "Error: ICEBERG_VERSION ($ICEBERG_VERSION) must be in the format: .." + exit 1 +fi + # tar source code release_version=${ICEBERG_VERSION} # rc versions diff --git a/website/src/release.md b/website/src/release.md index 78c0e57525..f3a5798a2b 100644 --- a/website/src/release.md +++ b/website/src/release.md @@ -325,13 +325,13 @@ After downloading them, here are the instructions on how to verify them. ```bash gpg --verify apache-iceberg-rust-*.tar.gz.asc ``` - Expects: `"apache-iceberg-rust-0.7.0.tar.gz: OK"` + Expects: `gpg: Good signature from ...` * Verify the checksums: ```bash shasum -a 512 -c apache-iceberg-rust-*.tar.gz.sha512 ``` - Expects: `gpg: Good signature from ...` + Expects: `"apache-iceberg-rust-...tar.gz: OK"` * Verify build and test: ```bash