diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml deleted file mode 100644 index 762cdbb7e..000000000 --- a/.github/workflows/build-and-test.yml +++ /dev/null @@ -1,295 +0,0 @@ -name: build-and-test - -on: - workflow_call: - inputs: - build-all-platforms: - description: 'Build wheels for all platforms (Linux ARM64, Windows, macOS)' - required: false - type: boolean - default: true -permissions: - contents: read - actions: write -jobs: - check-rust: - runs-on: ubuntu-22.04 - steps: - - name: Check out repository code - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@6d653acede28d24f02e3cd41383119e8b1b35921 - with: - toolchain: stable - components: rustfmt, clippy - - uses: prefix-dev/setup-pixi@97359467eae22a2d994114fe900db762fea86261 # v0.8.9 - with: - cache: true - - name: Cache - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 - with: - key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-check-rust - path: | - ~/.cargo - target - .pixi - - name: Check fmt - run: pixi run check-rs-fmt - - name: Check warnings - run: pixi run check-rs-warnings - - name: Check clippy - run: pixi run check-rs-clippy - - test-rust: - runs-on: ubuntu-22.04 - steps: - - name: Check out repository code - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@6d653acede28d24f02e3cd41383119e8b1b35921 - with: - toolchain: stable - - uses: prefix-dev/setup-pixi@97359467eae22a2d994114fe900db762fea86261 # v0.8.9 - - name: Cache - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 - with: - key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-test-rust2 - path: | - ~/.cargo - target - .pixi - - name: start minio server in the background - run: pixi run start-minio & - - name: Test rust - run: | - pixi run test-rs --release - - name: Upload test artifacts - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - if: failure() - with: - name: vegafusion-rt-test-images-linux - path: | - vegafusion-runtime/tests/output - - check-vegafusion-python: - runs-on: ubuntu-22.04 - steps: - - name: Check out repository code - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@6d653acede28d24f02e3cd41383119e8b1b35921 - with: - toolchain: stable - - uses: prefix-dev/setup-pixi@97359467eae22a2d994114fe900db762fea86261 # v0.8.9 - - name: Cache - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 - with: - key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-wasm - path: | - ~/.cargo - target - .pixi - - name: Dev python - run: | - pixi run dev-py - - name: Check format and lint - run: | - pixi run lint-check-py - - name: Type check - run: | - pixi run type-check-py -v - - build-vegafusion-python-linux-64: - runs-on: ubuntu-latest - steps: - - name: Check out repository code - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 - - name: Clear target/wheels - run: rm -rf target/wheels - - name: Build wheels (Linux) - uses: PyO3/maturin-action@ea5bac0f1ccd0ab11c805e2b804bfcb65dac2eab # v1.45.0 - with: - command: build - manylinux: 2014 - rust-toolchain: stable - args: -m vegafusion-python/Cargo.toml --profile release-opt --features=protobuf-src --strip --sdist - - name: Upload artifacts - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - with: - name: python-wheels-linux-64 - path: | - target/wheels/*.tar.gz - target/wheels/*.whl - retention-days: 1 - if-no-files-found: error - - build-vegafusion-python-linux-arm64: - if: inputs.build-all-platforms - runs-on: ubuntu-latest - steps: - - name: Check out repository code - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 - - name: Clear target/wheels - run: rm -rf target/wheels - - name: Build arm64 wheels - uses: PyO3/maturin-action@ea5bac0f1ccd0ab11c805e2b804bfcb65dac2eab # v1.45.0 - with: - command: build - manylinux: 2_28 - rust-toolchain: stable - args: -m vegafusion-python/Cargo.toml --profile release-opt --features=protobuf-src --strip --target aarch64-unknown-linux-gnu - - name: Upload artifacts - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - with: - name: python-wheels-linux-arm64 - path: | - target/wheels/*.tar.gz - target/wheels/*.whl - retention-days: 1 - if-no-files-found: error - - build-vegafusion-python-win-64: - if: inputs.build-all-platforms - runs-on: windows-2022 - steps: - - name: Check out repository code - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@6d653acede28d24f02e3cd41383119e8b1b35921 - with: - toolchain: stable - - uses: prefix-dev/setup-pixi@97359467eae22a2d994114fe900db762fea86261 # v0.8.9 - - name: Cache - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 - with: - key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-python-win-64 - path: | - ~/.cargo - target - .pixi - - name: Build vegafusion-python - run: | - pixi run build-py - - name: Upload artifacts - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - with: - name: python-wheels-win-64 - path: | - target/wheels/*.tar.gz - target/wheels/*.whl - retention-days: 1 - if-no-files-found: error - - build-vegafusion-python-osx-64: - if: inputs.build-all-platforms - runs-on: macos-15-intel - steps: - - name: Check out repository code - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@6d653acede28d24f02e3cd41383119e8b1b35921 - with: - toolchain: stable - - uses: prefix-dev/setup-pixi@97359467eae22a2d994114fe900db762fea86261 # v0.8.9 - - name: Cache - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 - with: - key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-python-osx-64 - path: | - ~/.cargo - target - .pixi - - name: Build vegafusion-python - run: | - pixi run build-py - - name: Upload artifacts - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - with: - name: python-wheels-osx-64 - path: | - target/wheels/*.tar.gz - target/wheels/*.whl - retention-days: 1 - if-no-files-found: error - - build-vegafusion-python-osx-arm64: - if: inputs.build-all-platforms - runs-on: macos-15 - steps: - - name: Check out repository code - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@6d653acede28d24f02e3cd41383119e8b1b35921 - with: - toolchain: stable - targets: aarch64-apple-darwin - - uses: prefix-dev/setup-pixi@97359467eae22a2d994114fe900db762fea86261 # v0.8.9 - - name: Cache - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 - with: - key: osx-arm64-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-python-osx-arm64 - path: | - ~/.cargo - target - .pixi - - name: Build vegafusion-python - run: | - pixi run build-py --target aarch64-apple-darwin - - name: Upload artifacts - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - with: - name: python-wheels-osx-arm64 - path: | - target/wheels/*.tar.gz - target/wheels/*.whl - retention-days: 1 - if-no-files-found: error - - test-vegafusion-python-linux-64: - runs-on: ubuntu-22.04 - needs: [build-vegafusion-python-linux-64] - steps: - - name: Check out repository code - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 - - name: Setup Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: "3.11" - - name: Install Chrome - uses: browser-actions/setup-chrome@c785b87e244131f27c9f19c1a33e2ead956ab7ce # v1.7.3 - with: - chrome-version: stable - - name: Install fonts on Linux - if: runner.os == 'Linux' - run: | - echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | sudo debconf-set-selections - sudo apt-get install ttf-mscorefonts-installer - - name: Download vegafusion-python wheel - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 - with: - name: python-wheels-linux-64 - path: target/wheels/ - - name: install wheels - working-directory: target/wheels/ - run: | - ls -la - python -m pip install deepnote_vegafusion-*manylinux_2_17_x86_64*.whl - - # Optional dependencies - python -m pip install pyarrow pandas polars-lts-cpu "duckdb>=1.0" "vl-convert-python>=1.0.1rc1" scikit-image "pandas>=2.2" jupytext voila anywidget ipywidgets chromedriver-binary-auto - - # Test dependencies - python -m pip install pytest altair vega-datasets scikit-image jupytext voila ipykernel anywidget ipywidgets selenium flaky tenacity chromedriver-binary-auto "pyspark==3.5.*" - - name: Test vegafusion - working-directory: vegafusion-python/ - env: - VEGAFUSION_TEST_HEADLESS: 1 - run: pytest -vv -rA tests/test_spark_e2e.py - - name: Upload test artifacts - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - if: failure() - with: - name: vegafusion-python-test-failures - path: | - vegafusion-python/tests/output/* - diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml new file mode 100644 index 000000000..53dc1a85f --- /dev/null +++ b/.github/workflows/build_test.yml @@ -0,0 +1,648 @@ +name: build_test +on: + pull_request: + types: [opened, synchronize] +jobs: + check-rust: + runs-on: ubuntu-22.04 + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + components: rustfmt, clippy + - uses: prefix-dev/setup-pixi@v0.8.9 + with: + cache: true + - name: Cache + uses: actions/cache@v4 + with: + key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-check-rust + path: | + ~/.cargo + target + .pixi + - name: Check fmt + run: pixi run check-rs-fmt + - name: Check warnings + run: pixi run check-rs-warnings + - name: Check clippy + run: pixi run check-rs-clippy + + test-rust: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-22.04 + - windows-2022 + - macos-14 + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + - uses: prefix-dev/setup-pixi@v0.8.9 + - name: Cache + uses: actions/cache@v4 + with: + key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-test-rust2 + path: | + ~/.cargo + target + .pixi + - name: start minio server in the background + run: pixi run start-minio & + - name: Test rust + run: | + pixi run test-rs --release + - name: Upload test artifacts + uses: actions/upload-artifact@v4 + if: failure() + with: + name: vegafusion-rt-test-images-${{ matrix.os }} + path: | + vegafusion-runtime/tests/output + + build-vegafusion-wasm: + runs-on: ubuntu-22.04 + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + targets: wasm32-unknown-unknown + - uses: prefix-dev/setup-pixi@v0.8.9 + - name: Cache + uses: actions/cache@v4 + with: + key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-wasm + path: | + ~/.cargo + target + .pixi + - name: Build package + run: | + pixi run pack-wasm + mkdir -p package + cp vegafusion-wasm/pkg/vegafusion-wasm-*.tgz package/ + echo "timestamp: $(date)" > package/info.txt + - name: Upload artifacts + uses: actions/upload-artifact@v4.4.3 + with: + name: vegafusion-wasm-package + path: package + + check-vegafusion-python: + runs-on: ubuntu-22.04 + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + - uses: prefix-dev/setup-pixi@v0.8.9 + - name: Cache + uses: actions/cache@v4 + with: + key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-wasm + path: | + ~/.cargo + target + .pixi + - name: Dev python + run: | + pixi run dev-py + - name: Check format and lint + run: | + pixi run lint-check-py + - name: Type check + run: | + pixi run type-check-py + + # Use maturin action to build linux wheels within proper manylinux compatible containers + # (This is why we don't use the pixi "build-py" action) + build-vegafusion-python-linux-64: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Clear target/wheels + run: rm -rf target/wheels + - name: Build wheels (Linux) + uses: messense/maturin-action@v1.45.0 + with: + command: build + manylinux: 2014 + rust-toolchain: stable + args: -m vegafusion-python/Cargo.toml --profile release-opt --features=protobuf-src --strip --sdist + - name: Upload artifacts + uses: actions/upload-artifact@v4.4.3 + with: + name: python-wheels-linux-64 + path: | + target/wheels/*.tar.gz + target/wheels/*.whl + retention-days: 1 + if-no-files-found: error + + build-vegafusion-python-linux-arm64: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Clear target/wheels + run: rm -rf target/wheels + - name: Build arm64 wheels + uses: messense/maturin-action@v1.45.0 + with: + command: build + manylinux: 2_28 + rust-toolchain: stable + args: -m vegafusion-python/Cargo.toml --profile release-opt --features=protobuf-src --strip --target aarch64-unknown-linux-gnu + - name: Upload artifacts + uses: actions/upload-artifact@v4.4.3 + with: + name: python-wheels-linux-arm64 + path: | + target/wheels/*.tar.gz + target/wheels/*.whl + retention-days: 1 + if-no-files-found: error + + build-vegafusion-python-win-64: + runs-on: windows-2022 + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + - uses: prefix-dev/setup-pixi@v0.8.9 + - name: Cache + uses: actions/cache@v4 + with: + key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-python-win-64 + path: | + ~/.cargo + target + .pixi + - name: Build vegafusion-python + run: | + pixi run build-py + - name: Upload artifacts + uses: actions/upload-artifact@v4.4.3 + with: + name: python-wheels-win-64 + path: | + target/wheels/*.tar.gz + target/wheels/*.whl + retention-days: 1 + if-no-files-found: error + + build-vegafusion-python-osx-64: + runs-on: macos-13 + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + - uses: prefix-dev/setup-pixi@v0.8.9 + - name: Cache + uses: actions/cache@v4 + with: + key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-python-osx-64 + path: | + ~/.cargo + target + .pixi + - name: Build vegafusion-python + run: | + pixi run build-py + - name: Upload artifacts + uses: actions/upload-artifact@v4.4.3 + with: + name: python-wheels-osx-64 + path: | + target/wheels/*.tar.gz + target/wheels/*.whl + retention-days: 1 + if-no-files-found: error + + build-vegafusion-python-osx-arm64: + runs-on: macos-14 + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + targets: aarch64-apple-darwin + - uses: prefix-dev/setup-pixi@v0.8.9 + - name: Cache + uses: actions/cache@v4 + with: + key: osx-arm64-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-python-osx-arm64 + path: | + ~/.cargo + target + .pixi + - name: Build vegafusion-python + run: | + pixi run build-py --target aarch64-apple-darwin + - name: Upload artifacts + uses: actions/upload-artifact@v4.4.3 + with: + name: python-wheels-osx-arm64 + path: | + target/wheels/*.tar.gz + target/wheels/*.whl + retention-days: 1 + if-no-files-found: error + + test-vegafusion-python-linux-64: + runs-on: ubuntu-22.04 + needs: [build-vegafusion-python-linux-64] + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Setup Python + uses: actions/setup-python@v5.3.0 + with: + python-version: "3.11" + - name: Install Chrome + uses: browser-actions/setup-chrome@v1 + with: + chrome-version: stable + - name: Install fonts on Linux + if: runner.os == 'Linux' + run: | + echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | sudo debconf-set-selections + sudo apt-get install ttf-mscorefonts-installer + - name: Download vegafusion-python wheel + uses: actions/download-artifact@v4.1.8 + with: + name: python-wheels-linux-64 + path: target/wheels/ + - name: install wheels + working-directory: target/wheels/ + run: | + ls -la + python -m pip install vegafusion-*manylinux_2_17_x86_64*.whl + + # Optional dependencies + python -m pip install pyarrow pandas polars-lts-cpu "duckdb>=1.0" "vl-convert-python>=1.0.1rc1" scikit-image "pandas>=2.2" jupytext voila anywidget ipywidgets chromedriver-binary-auto + + # Test dependencies + python -m pip install pytest altair vega-datasets scikit-image jupytext voila ipykernel anywidget ipywidgets selenium flaky tenacity chromedriver-binary-auto + - name: Test lazy imports + working-directory: vegafusion-python/ + run: python checks/check_lazy_imports.py + - name: Test vegafusion + working-directory: vegafusion-python/ + env: + VEGAFUSION_TEST_HEADLESS: 1 + run: pytest + - name: Upload test artifacts + uses: actions/upload-artifact@v4.4.3 + if: failure() + with: + name: vegafusion-python-test-failures + path: | + vegafusion-python/tests/output/* + + test-vegafusion-python-osx-arm64: + runs-on: macos-14 + needs: [build-vegafusion-python-osx-arm64] + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Setup Python + uses: actions/setup-python@v5.3.0 + with: + python-version: "3.10" + - name: Download vegafusion-python wheel + uses: actions/download-artifact@v4.1.8 + with: + name: python-wheels-osx-arm64 + path: target/wheels/ + - name: install wheels + working-directory: target/wheels/ + run: | + ls -la + python -m pip install vegafusion-*macosx_11_*_arm64.whl + + # Optional dependencies + python -m pip install pyarrow pandas polars "duckdb>=1.0" vl-convert-python "pandas>=2.2" + + # Test dependencies + python -m pip install pytest altair vega-datasets scikit-image + + - name: Test vegafusion + working-directory: vegafusion-python/ + run: pytest --ignore=tests/test_jupyter_widget.py + + test-vegafusion-python-win-64: + runs-on: windows-2022 + needs: [build-vegafusion-python-win-64] + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Setup Python + uses: actions/setup-python@v5.3.0 + with: + python-version: "3.9" + - name: Download vegafusion-python wheel + uses: actions/download-artifact@v4.1.8 + with: + name: python-wheels-win-64 + path: target/wheels/ + - name: install wheels + working-directory: target/wheels/ + shell: powershell + run: | + Get-ChildItem -Force + $vegafusion = Get-ChildItem -Name "vegafusion-*win_amd64.whl" | Select-Object -First 1 + + python -m pip install $vegafusion + + # Optional dependencies + python -m pip install pyarrow pandas "numpy<2" polars[timezone] "duckdb>=1.0" vl-convert-python + + # Test dependencies + python -m pip install pytest altair vega-datasets scikit-image + - name: Test vegafusion + working-directory: vegafusion-python/ + run: pytest --ignore=tests/test_jupyter_widget.py + + build-vegafusion-server-linux-64: + runs-on: ubuntu-22.04 + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + - uses: prefix-dev/setup-pixi@v0.8.9 + - name: Cache + uses: actions/cache@v4 + with: + key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-server-linux-64 + path: | + ~/.cargo + target + .pixi + - name: Build vegafusion-server + run: | + pixi run build-rs-server + - name: zip executable + uses: vimtor/action-zip@v1.2 + with: + files: target/release-opt/vegafusion-server + dest: vegafusion-server-linux-64.zip + - name: Upload artifacts + uses: actions/upload-artifact@v4.4.3 + with: + name: vegafusion-server-linux-64 + path: | + vegafusion-server-* + retention-days: 1 + if-no-files-found: error + + # # Below is an Attempt to cross compile to linux aarch64 within conda environment. + # # It fails in compiling protobuf-src. So instead we fall back to the direct use of + # # cargo (without involving conda). + # + # build-vegafusion-server-linux-arm64: + # runs-on: ubuntu-22.04 + # steps: + # - name: Check out repository code + # uses: actions/checkout@v4.2.2 + # - name: Install pixi + # run: curl -fsSL https://pixi.sh/install.sh | bash && echo "${HOME}/.pixi/bin" >> $GITHUB_PATH + # - name: Cache + # uses: actions/cache@v4 + # with: + # key: linux-arm64-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-server + # path: | + # ~/.cargo + # target + # .pixi + # - name: Build vegafusion-server + # run: | + # pixi run python automation/download_rust_target.py aarch64-unknown-linux-gnu + # pixi add gcc_linux-aarch64 -p linux-64 + # export RUSTFLAGS="-C linker=aarch64-linux-gnu-gcc" + # pixi run build-rs-server --features=protobuf-src --target aarch64-unknown-linux-gnu + # - name: zip executable + # uses: vimtor/action-zip@v1.2 + # with: + # files: target/aarch64-unknown-linux-gnu/release/vegafusion-server + # dest: vegafusion-server-linux-arm64.zip + # - name: Upload artifacts + # uses: actions/upload-artifact@v4.4.3 + # with: + # name: vegafusion-server-linux-arm64 + # path: | + # vegafusion-server-* + + build-vegafusion-server-linux-arm64: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install latest stable Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + - name: Cache rust dependencies + uses: Swatinem/rust-cache@v2.7.5 + with: + cache-on-failure: True + - name: Download arm64 toolchain + run: | + rustup target add aarch64-unknown-linux-gnu + - name: Download gcc for cross compilation + run: | + sudo apt-get update + sudo apt-get install gcc-aarch64-linux-gnu + - name: Build vegafusion-server + env: + RUSTFLAGS: "-C linker=aarch64-linux-gnu-gcc" + run: | + cargo build -p vegafusion-server --features=protobuf-src --profile release-opt --target=aarch64-unknown-linux-gnu + - name: zip executable + uses: vimtor/action-zip@v1.2 + with: + files: target/aarch64-unknown-linux-gnu/release-opt/vegafusion-server + dest: vegafusion-server-linux-arm64.zip + - name: Upload artifacts + uses: actions/upload-artifact@v4.4.3 + with: + name: vegafusion-server-linux-arm64 + path: | + vegafusion-server-* + retention-days: 1 + if-no-files-found: error + + build-vegafusion-server-win-64: + runs-on: windows-2022 + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + - uses: prefix-dev/setup-pixi@v0.8.9 + - name: Cache + uses: actions/cache@v4 + with: + key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-server-win-64b + path: | + ~/.cargo + target + .pixi + - name: Build vegafusion-server + run: | + pixi install -vvv + pixi run build-rs-server + - name: zip executable + uses: vimtor/action-zip@v1.2 + with: + files: target/release-opt/vegafusion-server.exe + dest: vegafusion-server-win-64.zip + - name: Upload artifacts + uses: actions/upload-artifact@v4.4.3 + with: + name: vegafusion-server-win-64 + path: | + vegafusion-server-* + retention-days: 1 + if-no-files-found: error + + build-vegafusion-server-osx-64: + runs-on: macos-13 + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + - uses: prefix-dev/setup-pixi@v0.8.9 + - name: Cache + uses: actions/cache@v4 + with: + key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-server-osx-64 + path: | + ~/.cargo + target + .pixi + - name: Build vegafusion-server + run: | + pixi run build-rs-server + - name: zip executable + uses: vimtor/action-zip@v1.2 + with: + files: target/release-opt/vegafusion-server + dest: vegafusion-server-osx-64.zip + - name: Upload artifacts + uses: actions/upload-artifact@v4.4.3 + with: + name: vegafusion-server-osx-64 + path: | + vegafusion-server-* + retention-days: 1 + if-no-files-found: error + + build-vegafusion-server-osx-arm64: + runs-on: macos-14 + steps: + - name: Check out repository code + uses: actions/checkout@v4.2.2 + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + targets: aarch64-apple-darwin + - uses: prefix-dev/setup-pixi@v0.8.9 + - name: Cache + uses: actions/cache@v4 + with: + key: osx-arm64-${{ hashFiles('pixi.lock', 'Cargo.lock', 'vegafusion-python/pyproject.toml') }}-build-vegafusion-server-osx-arm64 + path: | + ~/.cargo + target + .pixi + - name: Build vegafusion-server + run: | + pixi run build-rs-server --target aarch64-apple-darwin + - name: zip executable + uses: vimtor/action-zip@v1.2 + with: + files: target/aarch64-apple-darwin/release-opt/vegafusion-server + dest: vegafusion-server-osx-arm64.zip + - name: Upload artifacts + uses: actions/upload-artifact@v4.4.3 + with: + name: vegafusion-server-osx-arm64 + path: | + vegafusion-server-* + retention-days: 1 + if-no-files-found: error + + # Bundle all server builds into a single artifact for easier downloading + bundle-vegafusion-server-builds: + needs: + [ + build-vegafusion-server-linux-64, + build-vegafusion-server-linux-arm64, + build-vegafusion-server-win-64, + build-vegafusion-server-osx-64, + build-vegafusion-server-osx-arm64, + ] + runs-on: ubuntu-latest + steps: + - name: Merge Server Artifacts + uses: actions/upload-artifact/merge@v4.4.3 + with: + name: vegafusion-server-all + pattern: vegafusion-server-* + delete-merged: true + separate-directories: false + + # Bundle all python wheels into a single artifact for easier downloading + bundle-vegafusion-python-wheels: + needs: [ + # jobs that build but don't have test dependencies + build-vegafusion-python-linux-arm64, + build-vegafusion-python-osx-64, + + # jobs that test the wheels + test-vegafusion-python-linux-64, + test-vegafusion-python-osx-arm64, + test-vegafusion-python-win-64, + ] + runs-on: ubuntu-latest + steps: + - name: Merge Wheel Artifacts + uses: actions/upload-artifact/merge@v4.4.3 + with: + name: vegafusion-python-wheels-all + pattern: python-wheels-* + delete-merged: true + separate-directories: false diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml deleted file mode 100644 index 2962f9f32..000000000 --- a/.github/workflows/pr.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: 'PR checks' -on: - workflow_dispatch: - pull_request: - types: [opened, synchronize] - -permissions: - contents: read - actions: write - -jobs: - build-and-test: - uses: ./.github/workflows/build-and-test.yml - with: - build-all-platforms: false - permissions: - contents: read - actions: write diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 20b98b3c5..000000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: 'Release' -on: - release: - types: [published] - -permissions: - contents: read - -jobs: - build-and-test: - uses: ./.github/workflows/build-and-test.yml - permissions: - contents: read - actions: write - - attach-wheels-to-release: - name: Attach wheels to GitHub Release - runs-on: ubuntu-latest - needs: [build-and-test] - permissions: - contents: write - actions: read - steps: - - name: Download all wheel artifacts - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 - with: - pattern: python-wheels-* - path: wheels/ - merge-multiple: true - - name: List downloaded wheels - run: ls -lR wheels/ - - name: Upload wheels to release - env: - GH_TOKEN: ${{ github.token }} - GH_REPO: ${{ github.repository }} - RELEASE_TAG: ${{ github.ref_name }} - run: | - gh release upload "$RELEASE_TAG" wheels/* --clobber - - publish-to-pypi: - name: Publish Python wheels to PyPI - runs-on: ubuntu-latest - needs: [build-and-test] - environment: - name: pypi - url: https://pypi.org/p/deepnote-vegafusion - permissions: - id-token: write - contents: read - actions: read - steps: - - name: Download all wheel artifacts - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 - with: - pattern: python-wheels-* - path: dist/ - merge-multiple: true - - name: List files to be uploaded - run: ls -lR dist/ - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 - with: - skip-existing: true - verbose: true diff --git a/Cargo.lock b/Cargo.lock index 6c1939582..2641d6fec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4047,6 +4047,7 @@ dependencies = [ name = "rust-examples" version = "0.1.0" dependencies = [ + "async-trait", "datafusion", "reqwest 0.12.22", "serde_json", @@ -5027,7 +5028,7 @@ dependencies = [ [[package]] name = "vegafusion" -version = "2.0.8" +version = "2.0.3" dependencies = [ "arrow", "async-trait", @@ -5052,7 +5053,7 @@ dependencies = [ [[package]] name = "vegafusion-common" -version = "2.0.8" +version = "2.0.3" dependencies = [ "ahash", "arrow", @@ -5076,7 +5077,7 @@ dependencies = [ [[package]] name = "vegafusion-core" -version = "2.0.8" +version = "2.0.3" dependencies = [ "ahash", "async-trait", @@ -5108,7 +5109,7 @@ dependencies = [ [[package]] name = "vegafusion-runtime" -version = "2.0.8" +version = "2.0.3" dependencies = [ "async-lock", "async-mutex", @@ -5130,7 +5131,6 @@ dependencies = [ "datafusion-optimizer", "datafusion-physical-expr", "datafusion-proto", - "datafusion-sql", "env_logger", "float-cmp", "futures", @@ -5165,7 +5165,7 @@ dependencies = [ [[package]] name = "vegafusion-server" -version = "2.0.8" +version = "2.0.3" dependencies = [ "assert_cmd", "clap", @@ -5189,7 +5189,7 @@ dependencies = [ [[package]] name = "vegafusion-wasm" -version = "2.0.8" +version = "2.0.3" dependencies = [ "async-trait", "chrono", diff --git a/examples/python-examples/pre_transform_spec_vendor.py b/examples/python-examples/pre_transform_spec_vendor.py deleted file mode 100644 index a04ed0e5f..000000000 --- a/examples/python-examples/pre_transform_spec_vendor.py +++ /dev/null @@ -1,301 +0,0 @@ -import pandas as pd -import pyarrow as pa -from typing import Any - -try: - from pyspark.sql import SparkSession -except ImportError: - print("PySpark not available. Please install pyspark to run this example.") - print("pip install pyspark") - raise - -import vegafusion as vf - - -def create_spark_executor_for_session(spark_session: SparkSession): - """Create a Spark executor function that executes SQL queries.""" - - def spark_executor(sql_query: str) -> pa.Table: - """Execute SQL query using Spark and return Arrow table.""" - print("๐Ÿ”ฅ Executing Spark SQL:") - print(f" {sql_query}") - print("-" * 60) - - spark_df = spark_session.sql(sql_query) - - pandas_df = spark_df.toPandas() - print("Got response from Spark, rows:", len(pandas_df)) - arrow_table = pa.Table.from_pandas(pandas_df) - return arrow_table - - return spark_executor - - -def setup_spark_session() -> SparkSession: - """Initialize a local Spark session with appropriate configuration.""" - print("๐Ÿš€ Setting up Spark session...") - - spark = ( - SparkSession.builder.appName("vegafusion-spec-vendor-example") - .config("spark.sql.execution.arrow.pyspark.enabled", "true") - .config("spark.sql.execution.arrow.pyspark.fallback.enabled", "true") - .config("spark.executor.memory", "2g") - .config("spark.driver.memory", "2g") - .master("local[2]") - .getOrCreate() - ) - - # Set timezone to UTC for consistency - spark.sql("SET TIME ZONE 'UTC'") - print("โœ… Spark session created successfully") - - return spark - - -def load_and_register_movies_data(spark: SparkSession) -> pa.Schema: - """Load movies data and register it as a Spark table.""" - print("๐Ÿ“Š Loading movies data...") - - movies_url = "https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/movies.json" - movies_df = pd.read_json(movies_url) - - print(f"โœ… Loaded {len(movies_df)} movies") - print(f" Columns: {list(movies_df.columns)}") - - for col in movies_df.columns: - if movies_df[col].dtype == "object": - non_null_values = movies_df[col].dropna() - if len(non_null_values) > 0: - try: - pd.to_numeric(non_null_values, errors="raise") - movies_df[col] = pd.to_numeric(movies_df[col], errors="coerce") - except (ValueError, TypeError): - movies_df[col] = movies_df[col].astype("string") - - spark_df = spark.createDataFrame(movies_df) - - spark_df.createOrReplaceTempView("movies") - print("โœ… Movies data registered as 'movies' table in Spark") - - pandas_df_clean = spark_df.toPandas() - arrow_schema = pa.Schema.from_pandas(pandas_df_clean) - return arrow_schema - - -def main(): - print("=" * 80) - print("VegaFusion pre_transform_spec_vendor Example with Apache Spark") - print("=" * 80) - - spark = setup_spark_session() - - try: - movies_schema = load_and_register_movies_data(spark) - - spark_executor = create_spark_executor_for_session(spark) - - spec = get_spec() - - print("\n๐Ÿ”ง Running pre_transform_spec_vendor...") - print(f" Using Arrow schema with {len(movies_schema)} fields:") - for field in movies_schema: - print(f" - {field.name}: {field.type}") - - # Build a vendor-specific runtime with the provided Spark executor - runtime = vf.VegaFusionRuntime.new_vendor( - vendor="sparksql", - executor=spark_executor, - ) - - transformed_spec, warnings = runtime.pre_transform_spec( - spec=spec, - local_tz="UTC", - preserve_interactivity=False, - inline_datasets={"movies": movies_schema}, - ) - - print("\nโœ… Transformation completed!") - print(f" Warnings: {len(warnings)}") - - if warnings: - print("โš ๏ธ Warnings:") - for warning in warnings: - print(f" - {warning['type']}: {warning['message']}") - - print("\n๐Ÿ“‹ Transformed specification:") - print(f" Data sources: {len(transformed_spec.get('data', []))}") - - for data_source in transformed_spec.get("data", []): - if "values" in data_source: - values = data_source["values"] - print(f" - '{data_source['name']}': {len(values)} inline rows") - elif "url" in data_source: - print( - f" - '{data_source['name']}': external URL ({data_source['url']})" - ) - - print("\n๐ŸŽฏ Example completed successfully!") - - finally: - print("\n๐Ÿงน Stopping Spark session...") - spark.stop() - print("โœ… Cleanup completed") - - -def get_spec() -> dict[str, Any]: - """ - Return a Vega specification that creates a histogram of IMDB ratings. - Based on https://vega.github.io/editor/#/examples/vega/histogram-null-values - """ - return { - "$schema": "https://vega.github.io/schema/vega/v5.json", - "description": "A histogram of film IMDB ratings from the movies dataset.", - "width": 500, - "height": 300, - "padding": 5, - "autosize": {"type": "fit", "resize": True}, - "signals": [ - { - "name": "maxbins", - "value": 15, - "bind": {"input": "select", "options": [5, 10, 15, 20]}, - }, - {"name": "binCount", "update": "(bins.stop - bins.start) / bins.step"}, - {"name": "nullGap", "value": 10}, - {"name": "barStep", "update": "(width - nullGap) / (1 + binCount)"}, - ], - "data": [ - { - "name": "movies_table", - "url": "vegafusion+dataset://movies", - "transform": [ - {"type": "extent", "field": "IMDB Rating", "signal": "extent"}, - { - "type": "bin", - "signal": "bins", - "field": "IMDB Rating", - "extent": {"signal": "extent"}, - "maxbins": {"signal": "maxbins"}, - }, - ], - }, - { - "name": "rating_counts", - "source": "movies_table", - "transform": [ - {"type": "filter", "expr": "datum['IMDB Rating'] != null"}, - {"type": "aggregate", "groupby": ["bin0", "bin1"]}, - ], - }, - { - "name": "null_ratings", - "source": "movies_table", - "transform": [ - {"type": "filter", "expr": "datum['IMDB Rating'] == null"}, - {"type": "aggregate", "groupby": []}, - ], - }, - { - "name": "genre_summary", - "source": "movies_table", - "transform": [ - {"type": "filter", "expr": "datum['Major Genre'] != null"}, - { - "type": "aggregate", - "groupby": ["Major Genre"], - "fields": ["IMDB Rating", "Production Budget"], - "ops": ["mean", "mean"], - "as": ["avg_rating", "avg_budget"], - }, - { - "type": "filter", - "expr": "datum.count > 5", - }, - ], - }, - ], - "scales": [ - { - "name": "yscale", - "type": "linear", - "range": "height", - "round": True, - "nice": True, - "domain": { - "fields": [ - {"data": "rating_counts", "field": "count"}, - {"data": "null_ratings", "field": "count"}, - ] - }, - }, - { - "name": "xscale", - "type": "linear", - "range": [{"signal": "barStep + nullGap"}, {"signal": "width"}], - "round": True, - "domain": {"signal": "[bins.start, bins.stop]"}, - "bins": {"signal": "bins"}, - }, - { - "name": "xscale-null", - "type": "band", - "range": [0, {"signal": "barStep"}], - "round": True, - "domain": [None], - }, - ], - "axes": [ - { - "orient": "bottom", - "scale": "xscale", - "title": "IMDB Rating", - "tickMinStep": 0.5, - }, - {"orient": "bottom", "scale": "xscale-null", "title": "Null Values"}, - { - "orient": "left", - "scale": "yscale", - "title": "Number of Movies", - "tickCount": 5, - "offset": 5, - }, - ], - "marks": [ - { - "type": "rect", - "from": {"data": "rating_counts"}, - "encode": { - "update": { - "x": {"scale": "xscale", "field": "bin0", "offset": 1}, - "x2": {"scale": "xscale", "field": "bin1"}, - "y": {"scale": "yscale", "field": "count"}, - "y2": {"scale": "yscale", "value": 0}, - "fill": {"value": "steelblue"}, - "stroke": {"value": "white"}, - "strokeWidth": {"value": 1}, - }, - "hover": {"fill": {"value": "firebrick"}}, - }, - }, - { - "type": "rect", - "from": {"data": "null_ratings"}, - "encode": { - "update": { - "x": {"scale": "xscale-null", "value": None, "offset": 1}, - "x2": {"scale": "xscale-null", "band": 1}, - "y": {"scale": "yscale", "field": "count"}, - "y2": {"scale": "yscale", "value": 0}, - "fill": {"value": "#aaa"}, - "stroke": {"value": "white"}, - "strokeWidth": {"value": 1}, - }, - "hover": {"fill": {"value": "firebrick"}}, - }, - }, - ], - } - - -if __name__ == "__main__": - main() diff --git a/examples/rust-examples/Cargo.toml b/examples/rust-examples/Cargo.toml index 3805f20c7..e719058e2 100644 --- a/examples/rust-examples/Cargo.toml +++ b/examples/rust-examples/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2021" [dev-dependencies] +async-trait = { workspace = true } serde_json = { workspace = true } vegafusion-common = { path = "../../vegafusion-common" } vegafusion-core = { path = "../../vegafusion-core" } diff --git a/examples/rust-examples/examples/custom_executor.rs b/examples/rust-examples/examples/custom_executor.rs new file mode 100644 index 000000000..717747867 --- /dev/null +++ b/examples/rust-examples/examples/custom_executor.rs @@ -0,0 +1,140 @@ +use datafusion::prelude::SessionContext; +use std::sync::Arc; +use vegafusion_common::data::table::VegaFusionTable; +use vegafusion_common::datafusion_expr::LogicalPlan; +use vegafusion_common::error::Result; +use vegafusion_core::runtime::{PlanExecutor, VegaFusionRuntimeTrait}; +use vegafusion_core::spec::chart::ChartSpec; +use vegafusion_runtime::datafusion::context::make_datafusion_context; +use vegafusion_runtime::plan_executor::DataFusionPlanExecutor; +use vegafusion_runtime::task_graph::runtime::VegaFusionRuntime; + +/// A custom executor that logs plan execution and forwards to DataFusion +#[derive(Clone)] +struct LoggingExecutor { + fallback: Arc, +} + +impl LoggingExecutor { + fn new(ctx: Arc) -> Self { + Self { + fallback: Arc::new(DataFusionPlanExecutor::new(ctx)), + } + } +} + +#[async_trait::async_trait] +impl PlanExecutor for LoggingExecutor { + async fn execute_plan(&self, plan: LogicalPlan) -> Result { + println!("Custom executor received logical plan"); + println!("Plan details:\n{}\n", plan.display_indent()); + + // Forward to DataFusion for actual execution + let result = self.fallback.execute_plan(plan).await?; + + println!( + "Custom executor executed plan, returned {} rows\n", + result.num_rows() + ); + + Ok(result) + } +} + +/// This example demonstrates how to use a custom plan executor with VegaFusion. +/// The custom executor logs each plan execution before forwarding to DataFusion. +#[tokio::main] +async fn main() { + let spec = get_spec(); + + // Create a custom executor + let ctx = Arc::new(make_datafusion_context()); + let custom_executor = Arc::new(LoggingExecutor::new(ctx)) as Arc; + + // Create runtime with custom executor + let runtime = VegaFusionRuntime::new(None, Some(custom_executor)); + + println!("Starting pre-transform with custom executor\n"); + + let (_transformed_spec, warnings) = runtime + .pre_transform_spec( + &spec, + &Default::default(), // Inline datasets + &Default::default(), // Options + ) + .await + .unwrap(); + println!("Spec transformed"); + assert_eq!(warnings.len(), 0); +} + +fn get_spec() -> ChartSpec { + let spec_str = r##" + { + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "A histogram demonstrating custom executor usage", + "width": 400, + "height": 200, + "padding": 5, + "data": [ + { + "name": "table", + "url": "data/movies.json", + "transform": [ + { + "type": "extent", + "field": "IMDB Rating", + "signal": "extent" + }, + { + "type": "bin", + "signal": "bins", + "field": "IMDB Rating", + "extent": {"signal": "extent"}, + "maxbins": 10 + }, + { + "type": "aggregate", + "groupby": ["bin0", "bin1"], + "ops": ["count"], + "fields": [null], + "as": ["count"] + } + ] + } + ], + "scales": [ + { + "name": "xscale", + "type": "linear", + "range": "width", + "domain": {"signal": "extent"} + }, + { + "name": "yscale", + "type": "linear", + "range": "height", + "round": true, + "domain": {"data": "table", "field": "count"}, + "zero": true, + "nice": true + } + ], + "marks": [ + { + "type": "rect", + "from": {"data": "table"}, + "encode": { + "update": { + "x": {"scale": "xscale", "field": "bin0"}, + "x2": {"scale": "xscale", "field": "bin1"}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0} + } + } + } + ] + } + "##; + serde_json::from_str(spec_str).unwrap() +} diff --git a/pixi.lock b/pixi.lock index 11f615746..82cf9e5e2 100644 --- a/pixi.lock +++ b/pixi.lock @@ -204,7 +204,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/nest-asyncio-1.6.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/nodejs-20.5.1-hb753e55_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-shim-0.2.4-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py310hb13e2d6_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.6-py310hefbff90_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openjdk-20.0.0-hfea2f88_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.2-hb9d3cd8_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/orc-2.0.2-he039a57_2.conda @@ -232,14 +232,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda - conda: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd3deb0d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.3-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/py4j-0.10.9.7-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/pyarrow-16.1.0-py310hb7f781d_6.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pyarrow-core-16.1.0-py310hac404ae_6_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pydata-sphinx-theme-0.16.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/noarch/pyspark-3.5.5-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.1.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.10.15-h4a871b0_2_cpython.conda @@ -509,7 +507,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/nest-asyncio-1.6.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/nodejs-20.5.1-h119ffd7_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-shim-0.2.4-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/osx-64/numpy-1.26.4-py310h4bfa8fc_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/numpy-2.2.6-py310h07c5b4d_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/openjdk-20.0.0-h7d26f99_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/openssl-3.3.2-hd23fc13_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/orc-2.0.2-hb8ce1e1_2.conda @@ -534,7 +532,6 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/psygnal-0.11.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd3deb0d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.3-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/py4j-0.10.9.7-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-64/pyarrow-16.1.0-py310h58fd45c_6.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/pyarrow-core-16.1.0-py310h86202ae_6_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyhd8ed1ab_0.conda @@ -543,7 +540,6 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-64/pyobjc-core-10.3.1-py310h1c7075f_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/pyobjc-framework-cocoa-10.3.1-py310h1c7075f_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/noarch/pyspark-3.5.5-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.1.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/python-3.10.15-hd8744da_2_cpython.conda @@ -823,7 +819,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/nest-asyncio-1.6.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/nodejs-20.5.1-ha2ed473_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-shim-0.2.4-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/osx-arm64/numpy-1.26.4-py310hd45542a_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/numpy-2.2.6-py310h4d83441_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openjdk-20.0.0-hbe7ddab_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.3.2-h8359307_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/orc-2.0.1-h47ade37_1.conda @@ -852,7 +848,6 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/psygnal-0.11.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd3deb0d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.3-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/py4j-0.10.9.7-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyarrow-16.1.0-py310h24597f5_4.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyarrow-core-16.1.0-py310h2e300fa_4_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyhd8ed1ab_0.conda @@ -861,7 +856,6 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyobjc-core-10.3.1-py310hb3dec1a_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyobjc-framework-cocoa-10.3.1-py310hb3dec1a_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/noarch/pyspark-3.5.5-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.1.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.10.15-hdce6c4c_2_cpython.conda @@ -1103,7 +1097,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/nest-asyncio-1.6.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/nodejs-20.5.1-h57928b3_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-shim-0.2.4-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/win-64/numpy-1.26.4-py310hf667824_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/numpy-2.2.6-py310h4987827_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/openjdk-20.0.0-h57928b3_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/openssl-3.3.2-h2466b09_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/orc-2.0.2-h34659fe_2.conda @@ -1126,14 +1120,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/win-64/psutil-5.9.5-py310h8d17308_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/psygnal-0.11.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.3-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/py4j-0.10.9.7-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/win-64/pyarrow-16.1.0-py310h05ea346_6.conda - conda: https://conda.anaconda.org/conda-forge/win-64/pyarrow-core-16.1.0-py310h399dd74_6_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pydata-sphinx-theme-0.16.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyh0701188_6.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/noarch/pyspark-3.5.5-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.1.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/python-3.10.15-hfaddaf0_2_cpython.conda @@ -7479,15 +7471,16 @@ packages: - pkg:pypi/notebook-shim?source=hash-mapping size: 16880 timestamp: 1707957948029 -- conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py310hb13e2d6_0.conda - sha256: 028fe2ea8e915a0a032b75165f11747770326f3d767e642880540c60a3256425 - md5: 6593de64c935768b6bad3e19b3e978be +- conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.6-py310hefbff90_0.conda + sha256: 0ba94a61f91d67413e60fa8daa85627a8f299b5054b0eff8f93d26da83ec755e + md5: b0cea2c364bf65cd19e023040eeab05d depends: + - __glibc >=2.17,<3.0.a0 - libblas >=3.9.0,<4.0a0 - libcblas >=3.9.0,<4.0a0 - - libgcc-ng >=12 + - libgcc >=13 - liblapack >=3.9.0,<4.0a0 - - libstdcxx-ng >=12 + - libstdcxx >=13 - python >=3.10,<3.11.0a0 - python_abi 3.10.* *_cp310 constrains: @@ -7496,15 +7489,16 @@ packages: license_family: BSD purls: - pkg:pypi/numpy?source=hash-mapping - size: 7009070 - timestamp: 1707225917496 -- conda: https://conda.anaconda.org/conda-forge/osx-64/numpy-1.26.4-py310h4bfa8fc_0.conda - sha256: 914476e2d3273fdf9c0419a7bdcb7b31a5ec25949e4afbc847297ff3a50c62c8 - md5: cd6a2298387f558c9ea70ee73a189791 + size: 7893263 + timestamp: 1747545075833 +- conda: https://conda.anaconda.org/conda-forge/osx-64/numpy-2.2.6-py310h07c5b4d_0.conda + sha256: f1851c5726ff1a4de246e385ba442d749a68ef39316c834933ee9b980dbe62df + md5: d79253493dcc76b95221588b98e1eb3c depends: + - __osx >=10.13 - libblas >=3.9.0,<4.0a0 - libcblas >=3.9.0,<4.0a0 - - libcxx >=16 + - libcxx >=18 - liblapack >=3.9.0,<4.0a0 - python >=3.10,<3.11.0a0 - python_abi 3.10.* *_cp310 @@ -7514,15 +7508,16 @@ packages: license_family: BSD purls: - pkg:pypi/numpy?source=hash-mapping - size: 6491938 - timestamp: 1707226191321 -- conda: https://conda.anaconda.org/conda-forge/osx-arm64/numpy-1.26.4-py310hd45542a_0.conda - sha256: e3078108a4973e73c813b89228f4bd8095ec58f96ca29f55d2e45a6223a9a1db - md5: 267ee89a3a0b8c8fa838a2353f9ea0c0 + size: 6988856 + timestamp: 1747545137089 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/numpy-2.2.6-py310h4d83441_0.conda + sha256: 87704bcd5f4a4f88eaf2a97f07e9825803b58a8003a209b91e89669317523faf + md5: f4bd8ac423d04b3c444b96f2463d3519 depends: + - __osx >=11.0 - libblas >=3.9.0,<4.0a0 - libcblas >=3.9.0,<4.0a0 - - libcxx >=16 + - libcxx >=18 - liblapack >=3.9.0,<4.0a0 - python >=3.10,<3.11.0a0 - python >=3.10,<3.11.0a0 *_cpython @@ -7533,11 +7528,11 @@ packages: license_family: BSD purls: - pkg:pypi/numpy?source=hash-mapping - size: 5475744 - timestamp: 1707226187124 -- conda: https://conda.anaconda.org/conda-forge/win-64/numpy-1.26.4-py310hf667824_0.conda - sha256: 20ca447a8f840c01961f2bdf0847fc7b7785a62968e867d7aa4ca8a66d70f9ad - md5: 93e881c391880df90e74e43a4b67c16d + size: 5841650 + timestamp: 1747545043441 +- conda: https://conda.anaconda.org/conda-forge/win-64/numpy-2.2.6-py310h4987827_0.conda + sha256: 6f628e51763b86a535a723664e3aa1e38cb7147a2697f80b75c1980c1ed52f3e + md5: d2596785ac2cf5bab04e2ee9e5d04041 depends: - libblas >=3.9.0,<4.0a0 - libcblas >=3.9.0,<4.0a0 @@ -7553,8 +7548,8 @@ packages: license_family: BSD purls: - pkg:pypi/numpy?source=hash-mapping - size: 5977469 - timestamp: 1707226445438 + size: 6596153 + timestamp: 1747545352390 - conda: https://conda.anaconda.org/conda-forge/linux-64/openjdk-20.0.0-hfea2f88_2.conda sha256: a67450cb4bbffd084c407da95fb0c7a0d845cdecd412b9d9b0793634e1cdddc2 md5: b1d60ca9e1cf8c82214380fc8a8582e0 @@ -8371,17 +8366,6 @@ packages: - pkg:pypi/pure-eval?source=hash-mapping size: 16551 timestamp: 1721585805256 -- conda: https://conda.anaconda.org/conda-forge/noarch/py4j-0.10.9.7-pyhd8ed1ab_0.tar.bz2 - sha256: 5a1d134f58dbc2c77b7985069a5485fe4aa09d49f1b545087913af62559ff738 - md5: 0f01a52cf858aef86632a8ab08011c0c - depends: - - python >=3.6 - license: BSD-3-Clause - license_family: BSD - purls: - - pkg:pypi/py4j?source=hash-mapping - size: 186208 - timestamp: 1660381672873 - conda: https://conda.anaconda.org/conda-forge/linux-64/pyarrow-16.1.0-py310hb7f781d_6.conda sha256: d417cf50d3090f22ad63f7641fb4d4efc6fd1cb77d2412fac4649b801b9bf585 md5: fdb4acd5dd835ffadde6a8fffe4dff6c @@ -8669,21 +8653,6 @@ packages: - pkg:pypi/pysocks?source=hash-mapping size: 18981 timestamp: 1661604969727 -- conda: https://conda.anaconda.org/conda-forge/noarch/pyspark-3.5.5-pyhd8ed1ab_0.conda - sha256: 0da96ab21bc215a05f78788fc8bcee425dada013307967cc32d240ef51d47fbf - md5: 1216c9ab11a8116330554bf767a0c600 - depends: - - numpy >=1.15,<2 - - pandas >=1.0.5 - - py4j 0.10.9.7 - - pyarrow >=4.0.0 - - python >=3.9 - license: Apache-2.0 - license_family: APACHE - purls: - - pkg:pypi/pyspark?source=hash-mapping - size: 311167998 - timestamp: 1740719235517 - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.3-pyhd8ed1ab_0.conda sha256: e99376d0068455712109d233f5790458ff861aeceb458bfda74e353338e4d815 md5: c03d61f31f38fdb9facf70c29958bf7a diff --git a/pixi.toml b/pixi.toml index 243dbb07b..d94f2e2aa 100644 --- a/pixi.toml +++ b/pixi.toml @@ -120,7 +120,6 @@ nbval = "0.9.6.*" selenium = "4.11.2.*" toml = "0.10.2.*" pytest = ">=4.6" -pyspark = "3.5.*" click = "8.1.6.*" python-duckdb = ">=1.1.3,<2" pip = "23.2.1.*" diff --git a/vegafusion-common/Cargo.toml b/vegafusion-common/Cargo.toml index 41967a6f3..0a28a8533 100644 --- a/vegafusion-common/Cargo.toml +++ b/vegafusion-common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vegafusion-common" -version = "2.0.8" +version = "2.0.3" edition = "2021" description = "Common components required by multiple VegaFusion crates" license = "BSD-3-Clause" diff --git a/vegafusion-common/src/data/table.rs b/vegafusion-common/src/data/table.rs index a5810e831..f456538d5 100644 --- a/vegafusion-common/src/data/table.rs +++ b/vegafusion-common/src/data/table.rs @@ -342,8 +342,8 @@ impl VegaFusionTable { let hash = vf_table.get_hash(); // Now rechunk for better multithreaded efficiency with DataFusion - let seq = PyList::new(py, vec![("max_chunksize", 8096)])?; - let kwargs = PyDict::from_sequence(seq.as_any())?; + let kwargs = PyDict::new(py); + kwargs.set_item("max_chunksize", 8096)?; let rechunked_table = data .call_method("rechunk", (), Some(&kwargs))? diff --git a/vegafusion-common/src/error.rs b/vegafusion-common/src/error.rs index 4c7d1647d..4640174b4 100644 --- a/vegafusion-common/src/error.rs +++ b/vegafusion-common/src/error.rs @@ -65,9 +65,6 @@ pub enum VegaFusionError { #[error("Arrow error: {0}\n{1}")] ArrowError(ArrowError, ErrorContext), - #[error("Vendor error: {0}\n{1}")] - VendorError(String, ErrorContext), - #[error("DataFusion error: {0}\n{1}")] DataFusionError(DataFusionError, ErrorContext), @@ -156,10 +153,6 @@ impl VegaFusionError { context.contexts.push(context_fn().into()); VegaFusionError::DataFusionError(err, context) } - VendorError(msg, mut context) => { - context.contexts.push(context_fn().into()); - VegaFusionError::VendorError(msg, context) - } #[cfg(feature = "proto")] DataFusionProtoError(err, mut context) => { context.contexts.push(context_fn().into()); @@ -207,10 +200,6 @@ impl VegaFusionError { } } - pub fn vendor>(message: S) -> Self { - Self::VendorError(message.into(), Default::default()) - } - pub fn parse>(message: S) -> Self { Self::ParseError(message.into(), Default::default()) } @@ -270,7 +259,6 @@ impl VegaFusionError { DataFusionError(err, context) => { VegaFusionError::ExternalError(err.to_string(), context.clone()) } - VendorError(msg, context) => VegaFusionError::VendorError(msg.clone(), context.clone()), #[cfg(feature = "proto")] DataFusionProtoError(err, context) => { VegaFusionError::ExternalError(err.to_string(), context.clone()) diff --git a/vegafusion-core/Cargo.toml b/vegafusion-core/Cargo.toml index 89538ddcb..6ae3274b4 100644 --- a/vegafusion-core/Cargo.toml +++ b/vegafusion-core/Cargo.toml @@ -2,7 +2,7 @@ name = "vegafusion-core" license = "BSD-3-Clause" edition = "2021" -version = "2.0.8" +version = "2.0.3" description = "Core components required by multiple VegaFusion crates, with WASM compatibility" [features] @@ -48,7 +48,7 @@ workspace = true [dependencies.vegafusion-common] path = "../vegafusion-common" features = ["json", "sqlparser"] -version = "2.0.8" +version = "2.0.3" [dependencies.datafusion-common] workspace = true diff --git a/vegafusion-python/Cargo.toml b/vegafusion-python/Cargo.toml index 7c941d515..9a342f0b5 100644 --- a/vegafusion-python/Cargo.toml +++ b/vegafusion-python/Cargo.toml @@ -2,7 +2,7 @@ name = "vegafusion" license = "BSD-3-Clause" edition = "2021" -version = "2.0.8" +version = "2.0.3" description = "VegaFusion Python interface" [lib] @@ -52,16 +52,16 @@ workspace = true [dependencies.vegafusion-common] path = "../vegafusion-common" features = ["pyo3", "base64"] -version = "2.0.8" +version = "2.0.3" [dependencies.vegafusion-core] path = "../vegafusion-core" features = ["py", "tonic_support"] -version = "2.0.8" +version = "2.0.3" [dependencies.vegafusion-runtime] path = "../vegafusion-runtime" -version = "2.0.8" +version = "2.0.3" [dependencies.datafusion] workspace = true diff --git a/vegafusion-python/checks/README.md b/vegafusion-python/checks/README.md new file mode 100644 index 000000000..5a04f9e7c --- /dev/null +++ b/vegafusion-python/checks/README.md @@ -0,0 +1,4 @@ +## Checks +Python scripts to run on CI that should not be run as part of the pytest test suite. +In particular, to check lazy imports. + diff --git a/vegafusion-python/checks/check_lazy_imports.py b/vegafusion-python/checks/check_lazy_imports.py new file mode 100644 index 000000000..3ebf86311 --- /dev/null +++ b/vegafusion-python/checks/check_lazy_imports.py @@ -0,0 +1,70 @@ +import sys +from pathlib import Path + +from packaging.version import Version + +root = Path(__file__).parent.parent.parent + + +if __name__ == "__main__": + # Make sure the prominant dependencies are not loaded on import + # Check narwhals version to see if we should skip pandas/pyarrow checks + import narwhals + import vegafusion as vf # noqa: F401 + + narwhals_version = Version(narwhals.__version__) + skip_eager_import_check = narwhals_version >= Version("1.43.0") + + for mod in ["polars", "pandas", "pyarrow", "duckdb", "altair"]: + if mod in ["pandas", "pyarrow"] and skip_eager_import_check: + # Skip pandas/pyarrow check for narwhals >= 1.43.0 as it may import + # them eagerly. This appears to be a regression or change in behavior in + # narwhals 1.43.0. We should investigate more and potentially report + # upstream. + print( + f"WARNING: Skipping {mod} lazy import check for narwhals " + "{narwhals.__version__}" + ) + continue + assert mod not in sys.modules, f"{mod} module should be imported lazily" + + # Create an altair chart with polars and check that pandas and pyarrow are + # not loaded + import altair as alt + import polars as pl + + cars = pl.read_json( + root / "vegafusion-runtime/tests/util/vegajs_runtime/data/cars.json" + ) + + # Build a histogram of horsepower + chart = ( + alt.Chart(cars) + .mark_bar() + .encode( + alt.X("Horsepower:Q", bin=True), + y="count()", + ) + ) + + # Check that the transformed data is a polars DataFrame + transformed = chart.transformed_data() + assert isinstance(transformed, pl.DataFrame) + assert len(transformed["bin_maxbins_10_Horsepower"]) == 10 + + # Do a full pre-transform of the spec + transformed_spec = chart.to_dict(format="vega") + assert isinstance(transformed_spec, dict) + assert "data" in transformed_spec + + # Make sure that pandas and pyarrow were not loaded when using polars + for mod in ["pandas", "pyarrow", "duckdb"]: + if mod in ["pandas", "pyarrow"] and skip_eager_import_check: + # Skip pandas/pyarrow check for narwhals >= 1.43.0 as it may import + # them eagerly + print( + f"WARNING: Skipping {mod} lazy import check for narwhals " + "{narwhals.__version__}" + ) + continue + assert mod not in sys.modules, f"{mod} module should be imported lazily" diff --git a/vegafusion-python/pyproject.toml b/vegafusion-python/pyproject.toml index 41e9a0c8e..1abf4ec8f 100644 --- a/vegafusion-python/pyproject.toml +++ b/vegafusion-python/pyproject.toml @@ -1,6 +1,6 @@ [project] -name = "deepnote_vegafusion" -version = "2.0.8" +name = "vegafusion" +version = "2.0.3" description = "Core tools for using VegaFusion from Python" readme = "README.md" requires-python = ">=3.9" @@ -14,12 +14,7 @@ classifiers = [ "License :: OSI Approved :: BSD License", "Topic :: Scientific/Engineering :: Visualization", ] -dependencies = [ - "arro3-core", - "packaging", - "narwhals>=1.42", - "datafusion>=49.0.0,<50.0.0", -] +dependencies = ["arro3-core", "packaging", "narwhals>=1.42"] [[project.authors]] name = "VegaFusion Contributors" @@ -32,7 +27,7 @@ file = "LICENSE" [project.urls] Homepage = "https://vegafusion.io" -Repository = "https://github.com/deepnote/vegafusion" +Repository = "https://github.com/hex-inc/vegafusion" Documentation = "https://vegafusion.io" [project.optional-dependencies] diff --git a/vegafusion-python/src/lib.rs b/vegafusion-python/src/lib.rs index be08d07b3..525742789 100644 --- a/vegafusion-python/src/lib.rs +++ b/vegafusion-python/src/lib.rs @@ -1,7 +1,5 @@ mod chart_state; mod utils; -mod vendor; - use lazy_static::lazy_static; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -37,7 +35,6 @@ use vegafusion_runtime::task_graph::cache::VegaFusionCache; use crate::chart_state::PyChartState; use crate::utils::{parse_json_spec, process_inline_datasets}; -use crate::vendor::select_executor_for_vendor; static INIT: Once = Once::new(); @@ -102,19 +99,6 @@ impl PyVegaFusionRuntime { Self::build_with_executor(max_capacity, memory_limit, worker_threads, None) } - #[staticmethod] - #[pyo3(signature = (max_capacity=None, memory_limit=None, worker_threads=None, vendor=None, executor=None))] - pub fn new_embedded_vendor( - max_capacity: Option, - memory_limit: Option, - worker_threads: Option, - vendor: Option, - executor: Option, - ) -> PyResult { - let rust_executor = select_executor_for_vendor(vendor, executor)?; - Self::build_with_executor(max_capacity, memory_limit, worker_threads, rust_executor) - } - #[staticmethod] pub fn new_grpc(url: &str) -> PyResult { let tokio_runtime = Arc::new( diff --git a/vegafusion-python/src/vendor.rs b/vegafusion-python/src/vendor.rs deleted file mode 100644 index f3728d669..000000000 --- a/vegafusion-python/src/vendor.rs +++ /dev/null @@ -1,120 +0,0 @@ -use std::sync::Arc; - -use async_trait::async_trait; -use pyo3::exceptions::PyValueError; -use pyo3::prelude::*; -use pyo3::types::PyString; -use tokio::sync::Mutex; -use vegafusion_common::{data::table::VegaFusionTable, datafusion_expr::LogicalPlan}; -use vegafusion_core::runtime::PlanExecutor; -use vegafusion_runtime::sql::logical_plan_to_spark_sql; - -pub struct SparkSqlPlanExecutor { - python_executor: PyObject, - mutex: Arc>, -} - -impl SparkSqlPlanExecutor { - pub fn new(python_executor: PyObject) -> Self { - Self { - python_executor, - mutex: Arc::new(Mutex::new(())), - } - } -} - -pub fn select_executor_for_vendor( - vendor: Option, - executor: Option, -) -> PyResult>> { - match vendor.as_deref() { - Some("sparksql") => { - let py_exec = executor.ok_or_else(|| { - PyValueError::new_err( - "'executor' is required for vendor='sparksql' and must be callable or have execute_plan", - ) - })?; - - Python::with_gil(|py| -> PyResult<()> { - let obj_ref = py_exec.bind(py); - if obj_ref.is_callable() || obj_ref.hasattr("execute_plan")? { - Ok(()) - } else { - Err(PyValueError::new_err( - "Executor must be callable or have an execute_plan method", - )) - } - })?; - - Ok(Some(Arc::new(SparkSqlPlanExecutor::new(py_exec)))) - } - Some("datafusion") | Some("") | None => { - // For DataFusion we don't support passing custom executor from Python (due to issues with - // serialization and deserialization of LogicalPlan). We always use default runtime executor - // by passing None - if executor.is_some() { - return Err(PyValueError::new_err( - "Custom executors are not supported for the default DataFusion runtime. Remove executor parameter or use different vendor.", - )); - } - Ok(None) - } - Some(other) => Err(PyValueError::new_err(format!( - "Unsupported vendor: '{}'. Supported vendors: 'datafusion', 'sparksql'", - other - ))), - } -} - -#[async_trait] -impl PlanExecutor for SparkSqlPlanExecutor { - async fn execute_plan( - &self, - plan: LogicalPlan, - ) -> vegafusion_common::error::Result { - // Acquire mutex lock to ensure only one request runs at a time - let _lock = self.mutex.lock().await; - - // Convert logical plan to SparkSQL - let spark_sql = logical_plan_to_spark_sql(&plan)?; - - let python_executor = &self.python_executor; - let result = tokio::task::spawn_blocking({ - let python_executor = Python::with_gil(|py| python_executor.clone_ref(py)); - let spark_sql = spark_sql.clone(); - - move || { - Python::with_gil(|py| -> PyResult { - let sql_py = PyString::new(py, &spark_sql); - - let table_result = if python_executor.bind(py).is_callable() { - python_executor.call1(py, (sql_py,)) - } else if python_executor.bind(py).hasattr("execute_plan")? { - let execute_plan_method = - python_executor.bind(py).getattr("execute_plan")?; - execute_plan_method - .call1((sql_py,)) - .map(|result| result.into()) - } else { - return Err(PyValueError::new_err( - "Executor must be callable or have an execute_plan method", - )); - }?; - - VegaFusionTable::from_pyarrow(py, &table_result.bind(py)) - }) - } - }) - .await; - - match result { - Ok(Ok(table)) => Ok(table), - Ok(Err(py_err)) => Err(vegafusion_common::error::VegaFusionError::internal( - format!("Python executor error: {}", py_err), - )), - Err(join_err) => Err(vegafusion_common::error::VegaFusionError::internal( - format!("Failed to execute Python executor: {}", join_err), - )), - } - } -} diff --git a/vegafusion-python/tests/specs/aggregations.json b/vegafusion-python/tests/specs/aggregations.json deleted file mode 100644 index a4cd6ca09..000000000 --- a/vegafusion-python/tests/specs/aggregations.json +++ /dev/null @@ -1,829 +0,0 @@ -{ - "layer": [ - { - "layer": [ - { - "layer": [ - { - "mark": { - "clip": true, - "type": "line", - "color": "#83AFF6", - "tooltip": false - }, - "encoding": { - "x": { - "sort": "descending", - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear", - "zero": false - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear", - "zero": false - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "average", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "order_total", - "scale": { - "range": [ - "#83AFF6" - ], - "domain": [ - "order_total" - ] - } - }, - "tooltip": [ - { - "type": "quantitative", - "field": "discount", - "aggregate": "average" - } - ], - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_0" - }, - { - "param": "legend_color_0" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_0", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_0", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "interval_selection", - "select": { - "type": "interval", - "encodings": [ - "x", - "y" - ] - } - } - ] - } - ] - }, - { - "layer": [ - { - "mark": { - "clip": true, - "type": "line", - "color": "#2266D3", - "tooltip": false - }, - "encoding": { - "x": { - "sort": "descending", - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear", - "zero": false - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear", - "zero": false - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "median", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "order_total", - "scale": { - "range": [ - "#2266D3" - ], - "domain": [ - "order_total" - ] - } - }, - "tooltip": [ - { - "type": "quantitative", - "field": "discount", - "aggregate": "median" - } - ], - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_1" - }, - { - "param": "legend_color_1" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_1", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_1", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - } - ] - } - ] - }, - { - "layer": [ - { - "mark": { - "clip": true, - "type": "line", - "color": "#CC8BFF", - "tooltip": false - }, - "encoding": { - "x": { - "sort": "descending", - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear", - "zero": false - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear", - "zero": false - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "min", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "order_total", - "scale": { - "range": [ - "#CC8BFF" - ], - "domain": [ - "order_total" - ] - } - }, - "tooltip": [ - { - "type": "quantitative", - "field": "discount", - "aggregate": "min" - } - ], - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_2" - }, - { - "param": "legend_color_2" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_2", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_2", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - } - ] - } - ] - }, - { - "layer": [ - { - "mark": { - "clip": true, - "type": "line", - "color": "#8018D3", - "tooltip": false - }, - "encoding": { - "x": { - "sort": "descending", - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear", - "zero": false - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear", - "zero": false - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "max", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "order_total", - "scale": { - "range": [ - "#8018D3" - ], - "domain": [ - "order_total" - ] - } - }, - "tooltip": [ - { - "type": "quantitative", - "field": "discount", - "aggregate": "max" - } - ], - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_3" - }, - { - "param": "legend_color_3" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_3", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_3", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - } - ] - } - ] - }, - { - "layer": [ - { - "mark": { - "clip": true, - "type": "line", - "color": "#0E9384", - "tooltip": false - }, - "encoding": { - "x": { - "sort": "descending", - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear", - "zero": false - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear", - "zero": false - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "count", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "order_total", - "scale": { - "range": [ - "#0E9384" - ], - "domain": [ - "order_total" - ] - } - }, - "tooltip": [ - { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear", - "zero": false - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "count", - "formatType": "numberFormatFromNumberType" - }, - { - "type": "quantitative", - "field": "discount", - "aggregate": "sum" - } - ], - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_4" - }, - { - "param": "legend_color_4" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_4", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_4", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - } - ] - } - ] - }, - { - "layer": [ - { - "mark": { - "clip": true, - "type": "line", - "color": "#5FD9C3", - "tooltip": false - }, - "encoding": { - "x": { - "sort": "descending", - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear", - "zero": false - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "customer_name", - "scale": { - "type": "linear", - "zero": false - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "distinct", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "customer_name", - "scale": { - "range": [ - "#5FD9C3" - ], - "domain": [ - "customer_name" - ] - } - }, - "tooltip": [ - { - "type": "quantitative", - "field": "discount", - "aggregate": "distinct" - } - ], - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_5" - }, - { - "param": "legend_color_5" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_5", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_5", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - } - ] - } - ] - } - ], - "resolve": { - "scale": { - "color": "independent" - } - } - }, - { - "layer": [ - { - "layer": [ - { - "mark": { - "clip": true, - "type": "line", - "color": "#EEC026", - "tooltip": false - }, - "encoding": { - "x": { - "sort": "descending", - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear", - "zero": false - } - }, - "y": { - "axis": { - "title": null - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "zero": false - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "order_total", - "scale": { - "range": [ - "#EEC026" - ], - "domain": [ - "order_total" - ] - } - }, - "tooltip": [ - { - "type": "quantitative", - "field": "discount", - "aggregate": "sum" - } - ], - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_6" - }, - { - "param": "legend_color_6" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_6", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_6", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - } - ] - } - ] - } - ], - "resolve": { - "scale": { - "color": "independent" - } - } - } - ], - "title": "", - "config": { - "legend": { - "disable": false - }, - "axisQuantitative": { - "tickCount": 5 - }, - "area": { - "line": true - }, - "mark": { - "color": "#2266D3" - }, - "customFormatTypes": true - }, - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "resolve": { - "scale": { - "y": "independent" - } - }, - "encoding": {}, - "usermeta": { - "seriesNames": [ - "order_total", - "order_total", - "order_total", - "order_total", - "order_total", - "customer_name", - "order_total" - ], - "seriesOrder": [ - 0, - 1, - 2, - 3, - 4, - 6, - 5 - ], - "specSchemaVersion": 2, - "tooltipDefaultMode": false - }, - "height": "container", - "width": "container", - "autosize": { - "type": "fit" - }, - "data": { - "url": "vegafusion+dataset://sales_data_1kk" - } -} diff --git a/vegafusion-python/tests/specs/bin.json b/vegafusion-python/tests/specs/bin.json deleted file mode 100644 index 55b85f231..000000000 --- a/vegafusion-python/tests/specs/bin.json +++ /dev/null @@ -1,165 +0,0 @@ -{ - "layer": [ - { - "layer": [ - { - "layer": [ - { - "mark": { - "clip": true, - "type": "bar", - "color": "#2266D3", - "tooltip": true - }, - "encoding": { - "x": { - "bin": { - "step": 10 - }, - "sort": "descending", - "type": "quantitative", - "field": "customer_age", - "scale": { - "type": "linear" - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "customer_name", - "scale": { - "type": "linear" - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "count", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "Orders", - "scale": { - "range": [ - "#2266D3" - ], - "domain": [ - "Orders" - ] - } - }, - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_0" - }, - { - "param": "legend_color_0" - }, - { - "param": "interval_selection" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_0", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_0", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "interval_selection", - "select": { - "type": "interval", - "encodings": [ - "x", - "y" - ] - } - } - ] - } - ] - } - ], - "resolve": { - "scale": { - "color": "independent" - } - } - } - ], - "title": "", - "config": { - "legend": { - "disable": false - }, - "axisQuantitative": { - "tickCount": 5 - }, - "area": { - "line": true - }, - "mark": { - "color": "#2266D3" - }, - "customFormatTypes": true - }, - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "encoding": {}, - "usermeta": { - "seriesNames": [ - "Orders" - ], - "seriesOrder": [ - 0 - ], - "aditionalTypeInfo": { - "histogramLayerIndexes": [ - 0 - ] - }, - "specSchemaVersion": 2, - "tooltipDefaultMode": true - }, - "height": "container", - "width": "container", - "autosize": { - "type": "fit" - }, - "data": { - "url": "vegafusion+dataset://sales_data_1kk" - } -} diff --git a/vegafusion-python/tests/specs/dual_axis.json b/vegafusion-python/tests/specs/dual_axis.json deleted file mode 100644 index 64b6d67d7..000000000 --- a/vegafusion-python/tests/specs/dual_axis.json +++ /dev/null @@ -1,294 +0,0 @@ -{ - "layer": [ - { - "layer": [ - { - "layer": [ - { - "mark": { - "clip": true, - "type": "bar", - "color": "#2266D3", - "tooltip": false - }, - "encoding": { - "x": { - "sort": { - "op": "sum", - "field": "order_total", - "order": "descending" - }, - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear" - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear" - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "order_total", - "scale": { - "range": [ - "#2266D3" - ], - "domain": [ - "order_total" - ] - } - }, - "tooltip": [ - { - "type": "quantitative", - "field": "discount", - "aggregate": "sum" - } - ], - "xOffset": { - "datum": "series_0" - }, - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_0" - }, - { - "param": "legend_color_0" - }, - { - "param": "interval_selection" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_0", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_0", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "interval_selection", - "select": { - "type": "interval", - "encodings": [ - "x", - "y" - ] - } - } - ] - } - ] - } - ], - "resolve": { - "scale": { - "color": "independent" - } - } - }, - { - "layer": [ - { - "layer": [ - { - "mark": { - "clip": true, - "type": "bar", - "color": "#83AFF6", - "tooltip": false - }, - "encoding": { - "x": { - "sort": { - "op": "sum", - "field": "order_total", - "order": "descending" - }, - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear" - } - }, - "y": { - "axis": { - "title": null - }, - "type": "quantitative", - "field": "discount", - "scale": {}, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "discount", - "scale": { - "range": [ - "#83AFF6" - ], - "domain": [ - "discount" - ] - } - }, - "tooltip": [ - { - "type": "quantitative", - "field": "discount", - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum" - } - ], - "xOffset": { - "datum": "series_1" - }, - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_1" - }, - { - "param": "legend_color_1" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_1", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_1", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - } - ] - } - ] - } - ], - "resolve": { - "scale": { - "color": "independent" - } - } - } - ], - "title": "", - "config": { - "legend": { - "disable": false - }, - "axisQuantitative": { - "tickCount": 5 - }, - "area": { - "line": true - }, - "mark": { - "color": "#2266D3" - }, - "customFormatTypes": true - }, - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "resolve": { - "scale": { - "y": "independent" - } - }, - "encoding": {}, - "usermeta": { - "seriesNames": [ - "order_total", - "discount" - ], - "seriesOrder": [ - 0, - 1 - ], - "specSchemaVersion": 2, - "tooltipDefaultMode": false - }, - "height": "container", - "width": "container", - "autosize": { - "type": "fit" - }, - "data": { - "url": "vegafusion+dataset://sales_data_1kk" - } -} \ No newline at end of file diff --git a/vegafusion-python/tests/specs/group_by_categorical.json b/vegafusion-python/tests/specs/group_by_categorical.json deleted file mode 100644 index 8fefc90b1..000000000 --- a/vegafusion-python/tests/specs/group_by_categorical.json +++ /dev/null @@ -1,153 +0,0 @@ -{ - "layer": [ - { - "layer": [ - { - "layer": [ - { - "mark": { - "clip": true, - "type": "bar", - "tooltip": true - }, - "encoding": { - "x": { - "sort": "descending", - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear" - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear" - }, - "stack": "zero", - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "sort": null, - "type": "nominal", - "field": "customer_segment", - "scale": { - "scheme": "deepnote10" - } - }, - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_0" - }, - { - "param": "legend_color_0" - }, - { - "param": "interval_selection" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_0", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_0", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "interval_selection", - "select": { - "type": "interval", - "encodings": [ - "x", - "y" - ] - } - } - ] - } - ] - } - ], - "resolve": { - "scale": { - "color": "independent" - } - } - } - ], - "title": "", - "config": { - "legend": { - "disable": false - }, - "axisQuantitative": { - "tickCount": 5 - }, - "area": { - "line": true - }, - "mark": { - "color": "#2266D3" - }, - "customFormatTypes": true - }, - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "encoding": {}, - "usermeta": { - "seriesNames": [ - "Orders" - ], - "seriesOrder": [ - 0 - ], - "specSchemaVersion": 2, - "tooltipDefaultMode": true - }, - "height": "container", - "width": "container", - "autosize": { - "type": "fit" - }, - "data": { - "url": "vegafusion+dataset://sales_data_1kk" - } -} \ No newline at end of file diff --git a/vegafusion-python/tests/specs/group_by_numerical.json b/vegafusion-python/tests/specs/group_by_numerical.json deleted file mode 100644 index 69e3acd7b..000000000 --- a/vegafusion-python/tests/specs/group_by_numerical.json +++ /dev/null @@ -1,154 +0,0 @@ -{ - "layer": [ - { - "layer": [ - { - "layer": [ - { - "mark": { - "clip": true, - "type": "bar", - "tooltip": true - }, - "encoding": { - "x": { - "sort": "descending", - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear" - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear" - }, - "stack": "zero", - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "sort": null, - "type": "quantitative", - "field": "customer_age", - "scale": { - "scheme": "deepnote_blues" - }, - "aggregate": "average" - }, - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_0" - }, - { - "param": "legend_color_0" - }, - { - "param": "interval_selection" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_0", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_0", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "interval_selection", - "select": { - "type": "interval", - "encodings": [ - "x", - "y" - ] - } - } - ] - } - ] - } - ], - "resolve": { - "scale": { - "color": "independent" - } - } - } - ], - "title": "", - "config": { - "legend": { - "disable": false - }, - "axisQuantitative": { - "tickCount": 5 - }, - "area": { - "line": true - }, - "mark": { - "color": "#2266D3" - }, - "customFormatTypes": true - }, - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "encoding": {}, - "usermeta": { - "seriesNames": [ - "Orders" - ], - "seriesOrder": [ - 0 - ], - "specSchemaVersion": 2, - "tooltipDefaultMode": true - }, - "height": "container", - "width": "container", - "autosize": { - "type": "fit" - }, - "data": { - "url": "vegafusion+dataset://sales_data_1kk" - } -} diff --git a/vegafusion-python/tests/specs/sales_data_1kk.parquet b/vegafusion-python/tests/specs/sales_data_1kk.parquet deleted file mode 100644 index cc359760b..000000000 Binary files a/vegafusion-python/tests/specs/sales_data_1kk.parquet and /dev/null differ diff --git a/vegafusion-python/tests/specs/simple.json b/vegafusion-python/tests/specs/simple.json deleted file mode 100644 index 0960782cc..000000000 --- a/vegafusion-python/tests/specs/simple.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "data": { - "url": "vegafusion+dataset://sales_data_1kk" - }, - "layer": [ - { - "layer": [ - { - "layer": [ - { - "mark": { - "clip": true, - "type": "bar", - "color": "#2266D3", - "tooltip": true - }, - "encoding": { - "x": { - "sort": { - "op": "sum", - "field": "order_total", - "order": "descending" - }, - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear" - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear" - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "order_total", - "scale": { - "range": [ - "#2266D3" - ], - "domain": [ - "order_total" - ] - } - }, - "xOffset": { - "datum": "series_0" - } - }, - "transform": [] - } - ] - } - ], - "resolve": { - "scale": { - "color": "independent" - } - } - } - ], - "config": { - "legend": { - "disable": false - } - }, - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "encoding": {} - } \ No newline at end of file diff --git a/vegafusion-python/tests/specs/temporal.json b/vegafusion-python/tests/specs/temporal.json deleted file mode 100644 index 4514cb7bc..000000000 --- a/vegafusion-python/tests/specs/temporal.json +++ /dev/null @@ -1,163 +0,0 @@ -{ - "layer": [ - { - "layer": [ - { - "layer": [ - { - "mark": { - "clip": true, - "type": "bar", - "color": "#2266D3", - "tooltip": true - }, - "encoding": { - "x": { - "axis": { - "grid": false - }, - "sort": "descending", - "type": "temporal", - "field": "datetime", - "scale": { - "type": "linear" - }, - "timeUnit": "yearmonth", - "bandPosition": 0 - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear" - }, - "stack": "zero", - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "Orders", - "scale": { - "range": [ - "#2266D3" - ], - "domain": [ - "Orders" - ] - } - }, - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_0" - }, - { - "param": "legend_color_0" - }, - { - "param": "interval_selection" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_0", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_0", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "interval_selection", - "select": { - "type": "interval", - "encodings": [ - "x", - "y" - ] - } - } - ] - } - ] - } - ], - "resolve": { - "scale": { - "color": "independent" - } - } - } - ], - "title": "", - "config": { - "legend": { - "disable": false - }, - "axisQuantitative": { - "tickCount": 5 - }, - "area": { - "line": true - }, - "mark": { - "color": "#2266D3" - }, - "customFormatTypes": true - }, - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "encoding": {}, - "usermeta": { - "seriesNames": [ - "Orders" - ], - "seriesOrder": [ - 0 - ], - "specSchemaVersion": 2, - "tooltipDefaultMode": true - }, - "height": "container", - "width": "container", - "autosize": { - "type": "fit" - }, - "data": { - "url": "vegafusion+dataset://sales_data_1kk" - } -} \ No newline at end of file diff --git a/vegafusion-python/tests/specs/tooltip_value_labels.json b/vegafusion-python/tests/specs/tooltip_value_labels.json deleted file mode 100644 index b574fc9bc..000000000 --- a/vegafusion-python/tests/specs/tooltip_value_labels.json +++ /dev/null @@ -1,277 +0,0 @@ -{ - "layer": [ - { - "layer": [ - { - "layer": [ - { - "mark": { - "clip": true, - "type": "bar", - "color": "#2266D3", - "tooltip": false - }, - "encoding": { - "x": { - "sort": "descending", - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear" - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear" - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum", - "formatType": "numberFormatFromNumberType" - }, - "color": { - "type": "nominal", - "datum": "Orders", - "scale": { - "range": [ - "#2266D3" - ], - "domain": [ - "Orders" - ] - } - }, - "tooltip": [ - { - "type": "quantitative", - "field": "discount", - "aggregate": "sum" - }, - { - "type": "quantitative", - "field": "order_total", - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum" - } - ], - "xOffset": { - "datum": "series_0" - }, - "opacity": { - "condition": { - "test": { - "and": [ - { - "param": "legend_size_0" - }, - { - "param": "legend_color_0" - }, - { - "param": "interval_selection" - } - ] - }, - "value": 1 - }, - "value": 0.2 - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_0", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_0", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "interval_selection", - "select": { - "type": "interval", - "encodings": [ - "x", - "y" - ] - } - } - ] - }, - { - "mark": { - "dx": 0, - "dy": -1, - "fill": "black", - "type": "text", - "align": "center", - "baseline": "bottom" - }, - "encoding": { - "x": { - "sort": "descending", - "type": "nominal", - "field": "product_category", - "scale": { - "type": "linear" - } - }, - "y": { - "axis": { - "format": { - "type": "default", - "decimals": null - }, - "formatType": "numberFormatFromNumberType" - }, - "type": "quantitative", - "field": "order_total", - "scale": { - "type": "linear" - }, - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum", - "formatType": "numberFormatFromNumberType" - }, - "text": { - "type": "quantitative", - "field": "order_total", - "aggregate": "sum" - }, - "color": { - "type": "nominal", - "datum": "Orders", - "scale": { - "range": [ - "#2266D3" - ], - "domain": [ - "Orders" - ] - } - }, - "tooltip": [ - { - "type": "quantitative", - "field": "discount", - "aggregate": "sum" - }, - { - "type": "quantitative", - "field": "order_total", - "format": { - "type": "default", - "decimals": null - }, - "aggregate": "sum" - } - ], - "xOffset": { - "datum": "series_0" - } - }, - "transform": [], - "params": [ - { - "name": "legend_size_0_0", - "select": { - "type": "point", - "encodings": [ - "size" - ], - "toggle": "true" - }, - "bind": "legend" - }, - { - "name": "legend_color_0_0", - "select": { - "type": "point", - "encodings": [ - "color" - ], - "toggle": "true" - }, - "bind": "legend" - } - ] - } - ] - } - ], - "resolve": { - "scale": { - "color": "independent" - } - } - } - ], - "title": "", - "config": { - "legend": { - "disable": false - }, - "axisQuantitative": { - "tickCount": 5 - }, - "area": { - "line": true - }, - "mark": { - "color": "#2266D3" - }, - "customFormatTypes": true - }, - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "encoding": {}, - "usermeta": { - "seriesNames": [ - "Orders" - ], - "seriesOrder": [ - 0 - ], - "specSchemaVersion": 2, - "tooltipDefaultMode": false - }, - "height": "container", - "width": "container", - "autosize": { - "type": "fit" - }, - "data": { - "url": "vegafusion+dataset://sales_data_1kk" - } -} diff --git a/vegafusion-python/tests/test_jupyter_widget.py b/vegafusion-python/tests/test_jupyter_widget.py index 3c7a9d0bf..009bdc779 100644 --- a/vegafusion-python/tests/test_jupyter_widget.py +++ b/vegafusion-python/tests/test_jupyter_widget.py @@ -33,7 +33,7 @@ altair_default_template = r""" ```python import altair as alt -alt.renderers.enable('default', embed_options={'actions': False}); +alt.renderers.enable('default', embed_options={'actions': False, 'renderer': 'canvas'}); ``` ```python @@ -50,7 +50,7 @@ ```python import vegafusion import altair as alt -alt.renderers.enable('jupyter', embed_options={'actions': False}); +alt.renderers.enable('jupyter', embed_options={'actions': False, 'renderer': 'canvas'}); alt.data_transformers.enable('vegafusion'); ``` @@ -332,6 +332,11 @@ def test_vegafusion_widget(): # Display with default altair renderer notebook_text_alt = f""" +```python +import altair as alt +alt.renderers.enable('default', embed_options={{'actions': False, 'renderer': 'canvas'}}); +``` + ```python {altair_chart_str} diff --git a/vegafusion-python/tests/test_spark_e2e.py b/vegafusion-python/tests/test_spark_e2e.py deleted file mode 100644 index e4a28a0f8..000000000 --- a/vegafusion-python/tests/test_spark_e2e.py +++ /dev/null @@ -1,138 +0,0 @@ -from __future__ import annotations -from pathlib import Path -import json -import pytest - -import pandas as pd -from pyspark.sql import SparkSession -from pyspark.sql.functions import col, to_utc_timestamp, current_timezone -from pyspark.sql.pandas.types import to_arrow_schema -import vl_convert as vlc -import vegafusion as vf -import pyarrow as pa -from typing import Optional - - -SPEC_ROOT = (Path(__file__).parent / "specs").resolve() -SALES_DATA_PATH = SPEC_ROOT / "sales_data_1kk.parquet" -SALES_DATA_DF = pd.read_parquet(SALES_DATA_PATH) - - -def _discover_spec_files(limit: Optional[int] = None) -> list[Path]: - specs_all = SPEC_ROOT.rglob("*.json") - specs_filtered = [p for p in specs_all if not p.name.startswith("_")] - specs_sorted = sorted(specs_filtered) - return specs_sorted[:limit] if limit is not None else specs_sorted - - -@pytest.fixture(scope="session") -def spark(): - """Initialise a local SparkSession for the duration of the test session.""" - - session: SparkSession = ( - SparkSession.builder.appName("vegafusion-e2e") - .config("spark.sql.execution.arrow.pyspark.fallback.enabled", "true") - .config("spark.sql.legacy.parquet.nanosAsLong", "true") - .config("spark.sql.execution.arrow.pyspark.enabled", "true") - .config("spark.executor.memory", "8g") - .config("spark.driver.memory", "8g") - .master("local[2]") - .getOrCreate() - ) - - # This is required for properly handling temporal. Due to how Spark handles dates (which doesn't match - # proper SQL standart), we have to set timezone to UTC. We mention this is our docs for users. - session.sql("SET TIME ZONE 'UTC'") - - sales_data_df = session.read.parquet(str(SALES_DATA_PATH)) - - # Convert datetime column from bigint (nanoseconds) to actual timestamp - sales_data_df = sales_data_df.withColumn( - "datetime", - to_utc_timestamp((col("datetime") / 1e9).cast("timestamp"), current_timezone()), - ) - - sales_data_df.createOrReplaceTempView("sales_data_1kk") - - yield session - - session.stop() - - -_SPEC_FILES = [p for p in _discover_spec_files() if isinstance(p, Path)] - - -@pytest.mark.parametrize("spec_path", _SPEC_FILES, ids=[p.stem for p in _SPEC_FILES]) -def test_spec_against_spark(spec_path: Path, spark: SparkSession): - """End-to-end comparison between in-memory evaluation and Spark SQL. - - For every Vega-Lite spec we: - - 1. Load the spec JSON and discover the associated datasets. - 2. Evaluate the spec with the *in-memory* Vegafusion runtime to obtain the - expected result. - 3. Register the datasets as Spark SQL tables. - 4. Ask Vegafusion to generate the equivalent Spark SQL statements. - 5. Execute the SQL with Spark and collect the actual result. - 6. Compare *expected* vs *actual*. - """ - - print(f"Testing {spec_path.name}") - - vegalite_spec = json.loads(spec_path.read_text("utf8")) - vega_spec = vlc.vegalite_to_vega(vegalite_spec) - - print("Generating Pandas-based transformed spec") - inmemory_spec, inmemory_datasets, _ = vf.runtime.pre_transform_extract( - spec=vega_spec, - preserve_interactivity=False, - local_tz="UTC", - default_input_tz="UTC", - inline_datasets={"sales_data_1kk": SALES_DATA_DF}, - extract_threshold=0, - extracted_format="pyarrow", - ) - - print("Generating Spark-backed results via custom executor") - - def spark_executor(sql_query: str) -> pa.Table: - spark_df = spark.sql(sql_query) - return pa.Table.from_pandas(spark_df.toPandas()) - - vf_spark_runtime = vf.VegaFusionRuntime.new_vendor("sparksql", spark_executor) - - sales_schema = to_arrow_schema(spark.table("sales_data_1kk").schema) - - spark_spec, spark_datasets, _ = vf_spark_runtime.pre_transform_extract( - spec=vega_spec, - preserve_interactivity=False, - local_tz="UTC", - default_input_tz="UTC", - inline_datasets={"sales_data_1kk": sales_schema}, - extract_threshold=0, - extracted_format="pyarrow", - ) - - print("Comparing transformed specs (Pandas vs Spark)") - - assert spark_spec == inmemory_spec - - assert len(inmemory_datasets) == len(spark_datasets), ( - f"Dataset count mismatch: in-memory={len(inmemory_datasets)} " - f"vs spark={len(spark_datasets)}" - ) - - for inmemory_ds, spark_ds in zip(inmemory_datasets, spark_datasets): - inmemory_name, _, inmemory_data = inmemory_ds - spark_name, _, spark_data = spark_ds - - assert inmemory_name == spark_name - - pd.testing.assert_frame_equal( - inmemory_data.to_pandas(), - spark_data.to_pandas(), - check_dtype=False, - check_like=True, - atol=1e-6, - rtol=1e-6, - ) diff --git a/vegafusion-python/vegafusion/runtime.py b/vegafusion-python/vegafusion/runtime.py index 760561aa5..84034eae7 100644 --- a/vegafusion-python/vegafusion/runtime.py +++ b/vegafusion-python/vegafusion/runtime.py @@ -5,9 +5,7 @@ from typing import ( TYPE_CHECKING, Any, - Callable, Literal, - Protocol, TypedDict, Union, cast, @@ -37,21 +35,6 @@ ) -class PlanExecutorProtocol(Protocol): - """Protocol for objects with execute_plan method.""" - - def execute_plan(self, logical_plan_json: str) -> pa.Table: - """Execute a logical plan and return an Arrow table.""" - ... - - -# Type alias for plan executors -PlanExecutor = Union[ - # Callable that takes logical plan JSON and returns Arrow table - Callable[[str], "pa.Table"], - PlanExecutorProtocol, # Object with execute_plan method -] - # This type isn't defined in the grpcio package, so let's at least name it UnaryUnaryMultiCallable = Any @@ -246,32 +229,6 @@ def runtime(self) -> PyVegaFusionRuntime: ) return self._runtime - @classmethod - def new_vendor( - cls, - vendor: Literal["datafusion", "sparksql"], - executor: PlanExecutor | None = None, - cache_capacity: int = 64, - memory_limit: int | None = None, - worker_threads: int | None = None, - ) -> VegaFusionRuntime: - from vegafusion._vegafusion import PyVegaFusionRuntime - - inst = cls(cache_capacity, memory_limit, worker_threads) - if inst.memory_limit is None: - inst.memory_limit = get_virtual_memory() // 2 - if inst.worker_threads is None: - inst.worker_threads = get_cpu_count() - - inst._runtime = PyVegaFusionRuntime.new_embedded_vendor( - inst.cache_capacity, - inst.memory_limit, - inst.worker_threads, - vendor, - executor, - ) - return inst - def grpc_connect(self, url: str) -> None: """ Connect to a VegaFusion server over gRPC at the provided gRPC url @@ -324,8 +281,6 @@ def _import_inline_datasets( if (pa is not None and isinstance(value, pa.Schema)) or hasattr( value, "__arrow_c_schema__" ): - # Handle PyArrow Schema - convert to arro3 Schema - # This allows for planning without requiring actual data imported_inline_datasets[name] = Schema.from_arrow(value) elif pd is not None and pa is not None and isinstance(value, pd.DataFrame): # rename to help mypy diff --git a/vegafusion-runtime/Cargo.toml b/vegafusion-runtime/Cargo.toml index 2cf548655..6e2731186 100644 --- a/vegafusion-runtime/Cargo.toml +++ b/vegafusion-runtime/Cargo.toml @@ -6,7 +6,7 @@ harness = false name = "vegafusion-runtime" license = "BSD-3-Clause" edition = "2021" -version = "2.0.8" +version = "2.0.3" description = "VegaFusion Runtime" [features] @@ -115,12 +115,12 @@ workspace = true [dependencies.vegafusion-common] path = "../vegafusion-common" features = ["json", "sqlparser", "prettyprint", "object_store", "url"] -version = "2.0.8" +version = "2.0.3" [dependencies.vegafusion-core] path = "../vegafusion-core" features = ["sqlparser"] -version = "2.0.8" +version = "2.0.3" [dependencies.serde] workspace = true @@ -154,10 +154,6 @@ workspace = true [dependencies.datafusion-functions-window] workspace = true -[dependencies.datafusion-sql] -workspace = true -features = ["unparser"] - [dependencies.datafusion-proto] workspace = true optional = true diff --git a/vegafusion-runtime/src/lib.rs b/vegafusion-runtime/src/lib.rs index 4e5bafa10..8333dab58 100644 --- a/vegafusion-runtime/src/lib.rs +++ b/vegafusion-runtime/src/lib.rs @@ -7,7 +7,6 @@ pub mod datafusion; pub mod expression; pub mod plan_executor; pub mod signal; -pub mod sql; pub mod task_graph; pub mod tokio_runtime; pub mod transform; diff --git a/vegafusion-runtime/src/sql/mod.rs b/vegafusion-runtime/src/sql/mod.rs deleted file mode 100644 index bc46ddaff..000000000 --- a/vegafusion-runtime/src/sql/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod spark; - -pub use spark::logical_plan_to_spark_sql; diff --git a/vegafusion-runtime/src/sql/spark.rs b/vegafusion-runtime/src/sql/spark.rs deleted file mode 100644 index b3be3d603..000000000 --- a/vegafusion-runtime/src/sql/spark.rs +++ /dev/null @@ -1,684 +0,0 @@ -use datafusion::sql::unparser::dialect::CustomDialectBuilder; -use datafusion::sql::unparser::Unparser; -use datafusion_common::tree_node::{Transformed, TreeNode}; -use datafusion_common::{Column, ScalarValue}; -use datafusion_expr::{expr::ScalarFunction, Expr, LogicalPlan}; -use lazy_static::lazy_static; -use regex; -use sqlparser::ast::{self, visit_expressions_mut}; -use std::collections::HashMap; -use std::ops::ControlFlow; -use vegafusion_common::error::{Result, VegaFusionError}; - -/// This method converts a logical plan, which we get from DataFusion, into a SQL query -/// which is compatible with Spark. -// The SQL generated from the DataFusion plan is not compatible with Spark by default. -// To make it work, we apply changes to both the logical plan itself and to the -// abstract syntax tree generated from this logical plan before converting -// it into an SQL string. This allows us to rewrite parts of the plan or syntax tree to -// be compatible with Spark. -pub fn logical_plan_to_spark_sql(plan: &LogicalPlan) -> Result { - let plan = plan.clone(); - let processed_plan = rewrite_subquery_column_identifiers(plan)?; - let processed_plan = rewrite_datetime_formatting(processed_plan)?; - - let dialect = CustomDialectBuilder::new().build(); - let unparser = Unparser::new(&dialect).with_pretty(true); - let mut statement = unparser.plan_to_sql(&processed_plan).map_err(|e| { - VegaFusionError::vendor(format!( - "Failed to generate SQL AST from logical plan: {}", - e - )) - })?; - - rewrite_row_number(&mut statement); - rewrite_inf_and_nan(&mut statement); - rewrite_date_format(&mut statement); - rewrite_timestamps(&mut statement); - rewrite_intervals(&mut statement); - rewrite_nested_is_null(&mut statement); - rewrite_column_identifiers(&mut statement); - - let spark_sql = statement.to_string(); - - Ok(spark_sql) -} - -/// When adding row_number() DataFusion generates SQL like this: -/// ```sql -/// row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING -/// ``` -/// Which is not compatible with Spark. For Spark we rewrite AST to be -/// ```sql -/// row_number() OVER (ORDER BY monotonically_increasing_id()) -/// ``` -fn rewrite_row_number(statement: &mut ast::Statement) { - let _ = visit_expressions_mut(statement, |expr: &mut ast::Expr| { - if let ast::Expr::Function(func) = expr { - if func.name.to_string().to_lowercase() == "row_number" { - if let Some(ast::WindowType::WindowSpec(ref mut window_spec)) = &mut func.over { - window_spec.window_frame = None; - window_spec.order_by = vec![ast::OrderByExpr { - expr: ast::Expr::Function(ast::Function { - name: ast::ObjectName::from(vec![ast::Ident::new( - "monotonically_increasing_id", - )]), - args: ast::FunctionArguments::List(ast::FunctionArgumentList { - duplicate_treatment: None, - args: vec![], - clauses: vec![], - }), - filter: None, - null_treatment: None, - over: None, - within_group: vec![], - uses_odbc_syntax: false, - parameters: ast::FunctionArguments::None, - }), - options: ast::OrderByOptions { - asc: None, - nulls_first: None, - }, - with_fill: None, - }]; - } - } - } - ControlFlow::<()>::Continue(()) - }); -} - -/// When DataFusion generates SQL, NaN and infinity values are presented as -/// literals, while Spark requires them to be `float('NaN')`, `float('inf')`, etc. -fn rewrite_inf_and_nan(statement: &mut ast::Statement) { - const SPECIAL_VALUES: &[&str] = &[ - "nan", - "inf", - "infinity", - "+inf", - "+infinity", - "-inf", - "-infinity", - ]; - - let _ = visit_expressions_mut(statement, |expr: &mut ast::Expr| { - if let ast::Expr::Value(value) = expr { - if let ast::Value::Number(num_str, _) = &value.value { - if SPECIAL_VALUES.contains(&num_str.to_lowercase().as_str()) { - *expr = ast::Expr::Function(ast::Function { - name: ast::ObjectName::from(vec![ast::Ident::new("float")]), - args: ast::FunctionArguments::List(ast::FunctionArgumentList { - duplicate_treatment: None, - args: vec![ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr( - ast::Expr::Value(ast::ValueWithSpan { - value: ast::Value::SingleQuotedString(num_str.clone()), - span: value.span.clone(), - }), - ))], - clauses: vec![], - }), - filter: None, - null_treatment: None, - over: None, - within_group: vec![], - uses_odbc_syntax: false, - parameters: ast::FunctionArguments::None, - }); - } - } - } - ControlFlow::<()>::Continue(()) - }); -} - -/// Rename `to_char` function calls to `date_format` for Spark compatibility -/// Spark <4 doesn't support formatting dates with `to_char` function -fn rewrite_date_format(statement: &mut ast::Statement) { - let _ = visit_expressions_mut(statement, |expr: &mut ast::Expr| { - if let ast::Expr::Function(func) = expr { - if func.name.to_string().to_lowercase() == "to_char" { - func.name = ast::ObjectName::from(vec![ast::Ident::new("date_format")]); - } - } - ControlFlow::<()>::Continue(()) - }); -} - -/// Timestamp is weird in Spark. -/// First of all, TIMESTAMP type is SQL in "naive", it doesn't have associated timezone. But in Spark it actually has. -/// And Spark doesn't support TIMESTAMP WITH TIME ZONE type, so we rewrite it to just TIMESTAMP. -/// Because of this we also rewrite calls to make_timestamptz into make_timestamp, dropping milliseconds argument, -/// as it's not supported by Spark. -fn rewrite_timestamps(statement: &mut ast::Statement) { - let _ = visit_expressions_mut(statement, |expr: &mut ast::Expr| { - if let ast::Expr::Function(func) = expr { - let func_name = func.name.to_string().to_lowercase(); - if func_name == "make_timestamptz" { - func.name = ast::ObjectName::from(vec![ast::Ident::new("make_timestamp")]); - - // Remove milliseconds (not supported by Spark) - if let ast::FunctionArguments::List(ref mut arg_list) = &mut func.args { - if arg_list.args.len() >= 7 { - arg_list.args.remove(6); - } - } - } else if func_name.starts_with("to_timestamp") { - // Spark only has to_timestamp function, no to_timestamp_nanos, etc - if func_name != "to_timestamp" { - func.name = ast::ObjectName::from(vec![ast::Ident::new("to_timestamp")]); - } - - // Spark's `to_timestamp` supports passing only format, while DataFusion allows to - // match list of Chrono patterns. So we remove ALL patterns from this func call - if let ast::FunctionArguments::List(ref mut arg_list) = &mut func.args { - if arg_list.args.len() > 1 { - arg_list.args.truncate(1); - } - } - } - } else if let ast::Expr::Cast { data_type, .. } = expr { - // Rewrite TIMESTAMP WITH TIME ZONE to just TIMESTAMP - if let ast::DataType::Timestamp(_, ast::TimezoneInfo::WithTimeZone) = data_type { - *data_type = ast::DataType::Timestamp(None, ast::TimezoneInfo::None); - } - } - ControlFlow::<()>::Continue(()) - }); -} - -/// Rewrite interval expressions to use full names instead of abbreviations for Spark compatibility -/// e.g. "1 MONS" -> "1 MONTHS", "2 MINS" -> "2 MINUTES" -fn rewrite_intervals(statement: &mut ast::Statement) { - let _ = visit_expressions_mut(statement, |expr: &mut ast::Expr| { - if let ast::Expr::Interval(interval) = expr { - if let ast::Expr::Value(value_with_span) = interval.value.as_ref() { - if let ast::Value::SingleQuotedString(interval_str) = &value_with_span.value { - *interval.value = ast::Expr::Value(ast::ValueWithSpan { - value: ast::Value::SingleQuotedString(expand_interval_abbreviations( - interval_str, - )), - span: value_with_span.span.clone(), - }); - } - } - } - ControlFlow::<()>::Continue(()) - }); -} - -lazy_static! { - /// Precompiled regexes for expanding interval abbreviations - static ref INTERVAL_ABBREV_REGEXES: Vec<(regex::Regex, &'static str)> = vec![ - (regex::Regex::new(r"\b(\d+)\s+MONS\b").unwrap(), "${1} MONTHS"), - (regex::Regex::new(r"\b(\d+)\s+MON\b").unwrap(), "${1} MONTH"), - (regex::Regex::new(r"\b(\d+)\s+MINS\b").unwrap(), "${1} MINUTES"), - (regex::Regex::new(r"\b(\d+)\s+MIN\b").unwrap(), "${1} MINUTE"), - (regex::Regex::new(r"\b(\d+)\s+SECS\b").unwrap(), "${1} SECONDS"), - (regex::Regex::new(r"\b(\d+)\s+SEC\b").unwrap(), "${1} SECOND"), - (regex::Regex::new(r"\b(\d+)\s+HRS\b").unwrap(), "${1} HOURS"), - (regex::Regex::new(r"\b(\d+)\s+HR\b").unwrap(), "${1} HOUR"), - (regex::Regex::new(r"\b(\d+)\s+YRS\b").unwrap(), "${1} YEARS"), - (regex::Regex::new(r"\b(\d+)\s+YR\b").unwrap(), "${1} YEAR"), - ]; -} - -/// Ensure nested IS NULL/IS NOT NULL predicates are parenthesized for Spark compatibility -/// e.g. `col IS NULL IS NULL` -> `(col IS NULL) IS NULL` -fn rewrite_nested_is_null(statement: &mut ast::Statement) { - let _ = visit_expressions_mut(statement, |expr: &mut ast::Expr| { - if let ast::Expr::IsNull(inner) | ast::Expr::IsNotNull(inner) = expr { - if matches!( - inner.as_ref(), - ast::Expr::IsNull(_) | ast::Expr::IsNotNull(_) - ) { - let inner_clone = inner.as_ref().clone(); - *inner = Box::new(ast::Expr::Nested(Box::new(inner_clone))); - } - } - ControlFlow::<()>::Continue(()) - }); -} - -/// Expand interval abbreviations to full names for Spark compatibility -fn expand_interval_abbreviations(interval_str: &str) -> String { - let mut result = interval_str.to_string(); - for (re, replacement) in INTERVAL_ABBREV_REGEXES.iter() { - result = re.replace_all(&result, *replacement).to_string(); - } - result -} - -/// DataFusion logical plan which uses compound names when selecting from subquery: -/// ```sql -/// SELECT orders.customer_name, orders.customer_age FROM (SELECT orders.customer_name, orders.customer_age FROM orders) -/// ``` -/// This is not a valid SQL, as `orders` isn't available once we get out of first query. -/// So we rewrite logical plan to replace compound names with just the column names in projections -/// that select data from another projection -fn rewrite_subquery_column_identifiers(plan: LogicalPlan) -> Result { - let processed_plan = plan - .transform_up_with_subqueries(|p| { - if let LogicalPlan::Projection(projection) = &p { - // only touch projections that read from another projection - if matches!(*projection.input, LogicalPlan::Projection { .. }) { - let rewritten_exprs = projection - .expr - .iter() - .map(|e| { - e.clone() - .transform_up(|mut ex| { - if let Expr::Column(c) = &mut ex { - *c = Column::from_name(c.name.clone()); - Ok(Transformed::yes(ex)) - } else { - Ok(Transformed::no(ex)) - } - }) - .map(|t| t.data) - }) - .collect::>()?; - let new_plan_node = - p.with_new_exprs(rewritten_exprs, vec![(*projection.input).clone()])?; - return Ok(Transformed::yes(new_plan_node)); - } - } - - Ok(Transformed::no(p)) - }) - .map_err(|e| { - VegaFusionError::vendor(format!( - "Failed to rewrite subquery column identifiers: {}", - e - )) - })? - .data; - - Ok(processed_plan) -} - -/// Rewrite datetime formatting expressions to be compatible with Spark -fn rewrite_datetime_formatting(plan: LogicalPlan) -> Result { - let processed_plan = plan - .transform_up_with_subqueries(|p| { - let p = p - .map_expressions(|expr| { - expr.transform(&|e| { - if let Expr::ScalarFunction(sf) = &e { - if sf.name().eq_ignore_ascii_case("to_char") { - let mut new_args = sf.args.clone(); - if new_args.len() > 1 { - if let Expr::Literal(ScalarValue::Utf8(Some(format_str)), _) = - &new_args[1] - { - let spark_format = - chrono_to_spark(format_str).map_err(|e| { - datafusion_common::DataFusionError::External( - Box::new(e), - ) - })?; - new_args[1] = Expr::Literal( - ScalarValue::Utf8(Some(spark_format)), - None, - ); - let new_sf = ScalarFunction { - func: sf.func.clone(), - args: new_args, - }; - return Ok(Transformed::yes(Expr::ScalarFunction(new_sf))); - } - } - } - } - Ok(Transformed::no(e)) - }) - })? - .data; - Ok(Transformed::yes(p)) - }) - .map_err(|e: datafusion_common::DataFusionError| { - VegaFusionError::vendor(format!("Failed to rewrite datetime formatting: {}", e)) - })? - .data; - - Ok(processed_plan) -} - -lazy_static! { - /// chrono-strftime โ†’ SparkSQL pattern map - static ref CHRONO_SPARK_MAP: HashMap<&'static str, &'static str> = { - HashMap::from([ - // year - ("Y", "yyyy"), ("y", "yy"), - // month - ("m", "MM"), ("b", "MMM"), ("h", "MMM"), ("B", "MMMM"), - // day - ("d", "dd"), ("e", "d"), ("j", "DDD"), - // hour / minute / second - ("H", "HH"), ("I", "hh"), ("k", "H"), ("l", "h"), - ("M", "mm"), ("S", "ss"), - // week - ("U", "ww"), ("W", "ww"), ("V", "ww"), - // weekday names - ("a", "EEE"), ("A", "EEEE"), - // AM / PM - ("p", "a"), ("P", "a"), - // timezone - ("z", "Z"), ("Z", "z"), - ]) - }; -} - -/// Convert a chrono `strftime` pattern (e.g. "%Y-%m-%d %H:%M:%S") -/// to a Spark-SQL `date_format` pattern (e.g. "yyyy-MM-dd HH:mm:ss"). -fn chrono_to_spark(fmt: &str) -> Result { - let mut out = String::with_capacity(fmt.len() * 2); - let mut chars = fmt.chars().peekable(); - - while let Some(c) = chars.next() { - if c != '%' { - // Check if this character needs to be quoted - // Common separators like -, :, space don't need quotes - // Letters and other special characters do need quotes - if !matches!(c, '-' | ':' | ' ' | '/' | ',' | '.') { - // Collect consecutive literal characters that need quoting - let mut literal = String::new(); - literal.push(c); - - // Continue collecting non-% characters that need quoting - while let Some(&next_c) = chars.peek() { - if next_c == '%' || !matches!(next_c, '-' | ':' | ' ' | '/' | ',' | '.') { - break; - } - literal.push(chars.next().unwrap()); - } - - // Wrap the literal string in single quotes - out.push_str(&format!("\\'{}\\'", &literal)); - } else { - // Characters that don't need quoting (like -, :, space) - out.push(c); - } - continue; - } - - // literal %% - if chars.peek() == Some(&'%') { - out.push('%'); - chars.next(); - continue; - } - - // collect every char up to and incl. the terminating alpha - let mut modifier = String::new(); // '.', ':', '#' โ€ฆ - let mut digits = String::new(); // width like 3 in %3f - let mut letter = '\0'; - - while let Some(&ch) = chars.peek() { - chars.next(); - if ch.is_ascii_alphabetic() { - letter = ch; - break; - } else if ch.is_ascii_digit() { - digits.push(ch); - } else { - modifier.push(ch); - } - } - - match letter { - // -------- fractional seconds -------- - 'f' => { - // width: %f -> 9 (nanoseconds) - // %3f -> 3 (fixed) - // %.f -> 9 (leading dot) - // %.3f -> 3 (leading dot, fixed) - let width: usize = digits.parse::().unwrap_or(9).clamp(1, 9); - if modifier.contains('.') { - out.push('.'); - } - out.push_str(&"S".repeat(width)); // S, SS, โ€ฆ SSSSSSSSS - } - - // -------- time-zone offsets -------- - 'z' if modifier == ":" => out.push_str("XXX"), // %:z -> +09:30 - 'z' if modifier == "::" => out.push_str("XXXXX"), // %::z -> +09:30:00 - 'z' if modifier == ":::" => out.push_str("X"), // %:::z -> +09 - 'z' => out.push_str("Z"), // %z -> +0930 - - // -------- everything else that has a direct map -------- - _ => { - let key = &format!("{}{}", modifier, letter); // e.g. ""+"Y", ".f", ":z" - match CHRONO_SPARK_MAP.get(key.as_str()) { - Some(rep) => out.push_str(rep), - None => { - return Err(VegaFusionError::vendor(format!( - "unsupported specifier %{}", - key - ))) - } - } - } - } - } - Ok(out) -} - -/// Rewrite column identifiers to properly quote column names with spaces or special characters -/// Spark SQL requires column names with spaces or special characters to be quoted with backticks -fn rewrite_column_identifiers(statement: &mut ast::Statement) { - // Helper function to quote an identifier if needed - let quote_if_needed = |ident: &mut ast::Ident| { - if needs_quoting(&ident.value) { - ident.quote_style = Some('`'); - } - }; - - // First handle expressions (column references) - let _ = visit_expressions_mut(statement, |expr: &mut ast::Expr| { - match expr { - ast::Expr::Identifier(ident) => { - quote_if_needed(ident); - } - ast::Expr::CompoundIdentifier(identifiers) => { - for ident in identifiers { - quote_if_needed(ident); - } - } - _ => {} - } - ControlFlow::<()>::Continue(()) - }); - - // Then handle aliases in all SELECT projections (including nested subqueries) - quote_aliases_recursively(statement, "e_if_needed); -} - -/// Recursively traverse the AST to quote aliases in all SELECT statements -fn quote_aliases_recursively( - statement: &mut ast::Statement, - quote_if_needed: &impl Fn(&mut ast::Ident), -) { - if let ast::Statement::Query(query) = statement { - quote_aliases_in_query(query, quote_if_needed); - } -} - -/// Quote aliases in a query and all its nested subqueries -fn quote_aliases_in_query(query: &mut ast::Query, quote_if_needed: &impl Fn(&mut ast::Ident)) { - quote_aliases_in_set_expr(&mut query.body, quote_if_needed); -} - -/// Quote aliases in a set expression (SELECT, UNION, etc.) -fn quote_aliases_in_set_expr( - set_expr: &mut ast::SetExpr, - quote_if_needed: &impl Fn(&mut ast::Ident), -) { - match set_expr { - ast::SetExpr::Select(select) => { - quote_aliases_in_select(select, quote_if_needed); - } - ast::SetExpr::Query(query) => { - quote_aliases_in_query(query, quote_if_needed); - } - ast::SetExpr::SetOperation { left, right, .. } => { - quote_aliases_in_set_expr(left, quote_if_needed); - quote_aliases_in_set_expr(right, quote_if_needed); - } - _ => { - // Other set expression types don't have aliases to quote - } - } -} - -/// Quote aliases in a SELECT statement and handle nested subqueries -fn quote_aliases_in_select(select: &mut ast::Select, quote_if_needed: &impl Fn(&mut ast::Ident)) { - // Quote aliases in the projection list - for projection in &mut select.projection { - if let ast::SelectItem::ExprWithAlias { alias, .. } = projection { - quote_if_needed(alias); - } - } - - // Recursively handle subqueries in the FROM clause - for table_with_joins in &mut select.from { - quote_aliases_in_table_with_joins(table_with_joins, quote_if_needed); - } -} - -/// Quote aliases in table references and their joins -fn quote_aliases_in_table_with_joins( - table_with_joins: &mut ast::TableWithJoins, - quote_if_needed: &impl Fn(&mut ast::Ident), -) { - quote_aliases_in_table_factor(&mut table_with_joins.relation, quote_if_needed); - - for join in &mut table_with_joins.joins { - quote_aliases_in_table_factor(&mut join.relation, quote_if_needed); - } -} - -/// Quote aliases in table factors (tables, subqueries, etc.) -fn quote_aliases_in_table_factor( - table_factor: &mut ast::TableFactor, - quote_if_needed: &impl Fn(&mut ast::Ident), -) { - match table_factor { - ast::TableFactor::Derived { subquery, .. } => { - quote_aliases_in_query(subquery, quote_if_needed); - } - ast::TableFactor::NestedJoin { - table_with_joins, .. - } => { - quote_aliases_in_table_with_joins(table_with_joins, quote_if_needed); - } - _ => { - // Other table factor types don't have nested queries to process - } - } -} - -/// Check if a column name needs to be quoted in Spark SQL -fn needs_quoting(name: &str) -> bool { - // Don't quote function calls (contain parentheses) - if name.contains('(') || name.contains(')') { - return false; - } - - // Check if the name contains spaces, hyphens, or starts with a digit - name.contains(' ') - || name.contains('-') - || name.chars().next().map_or(false, |c| c.is_ascii_digit()) - || has_special_chars_needing_quotes(name) - || is_sql_reserved_word(name) -} - -/// Check if a name contains special characters that require quoting -/// This is more restrictive than the previous version to avoid quoting function calls -fn has_special_chars_needing_quotes(name: &str) -> bool { - // Only quote for specific problematic characters, not all non-alphanumeric - name.chars().any(|c| { - matches!( - c, - '!' | '@' - | '#' - | '$' - | '%' - | '^' - | '&' - | '*' - | '+' - | '=' - | '[' - | ']' - | '{' - | '}' - | '|' - | '\\' - | ':' - | ';' - | '"' - | '\'' - | '<' - | '>' - | ',' - | '.' - | '?' - | '/' - | '~' - | '`' - ) - }) -} - -lazy_static! { - /// SQL reserved words that need quoting in Spark SQL - /// This includes both ANSI SQL and Spark SQL specific reserved words - static ref RESERVED_WORDS: std::collections::HashSet<&'static str> = { - std::collections::HashSet::from([ - // ANSI SQL reserved words - "SELECT", "FROM", "WHERE", "GROUP", "BY", "HAVING", "ORDER", "LIMIT", - "INSERT", "UPDATE", "DELETE", "CREATE", "DROP", "ALTER", "TABLE", "INDEX", - "VIEW", "DATABASE", "SCHEMA", "COLUMN", "CONSTRAINT", "PRIMARY", "KEY", - "FOREIGN", "REFERENCES", "UNIQUE", "NOT", "NULL", "DEFAULT", "CHECK", - "UNION", "INTERSECT", "EXCEPT", "JOIN", "INNER", "LEFT", "RIGHT", "FULL", - "OUTER", "CROSS", "ON", "USING", "AS", "DISTINCT", "ALL", "ANY", "SOME", - "EXISTS", "IN", "BETWEEN", "LIKE", "IS", "AND", "OR", "CASE", "WHEN", - "THEN", "ELSE", "END", "IF", "CAST", "CONVERT", "SUBSTRING", "TRIM", - "UPPER", "LOWER", "LENGTH", "CHAR", "VARCHAR", "TEXT", "INTEGER", "INT", - "BIGINT", "SMALLINT", "DECIMAL", "NUMERIC", "FLOAT", "DOUBLE", "REAL", - "BOOLEAN", "DATE", "TIME", "TIMESTAMP", "INTERVAL", "YEAR", "MONTH", - "DAY", "HOUR", "MINUTE", "SECOND", "TRUE", "FALSE", "UNKNOWN", - - // Spark SQL specific reserved words - "ARRAY", "MAP", "STRUCT", "BINARY", "TINYINT", "STRING", "PARTITION", - "PARTITIONS", "CLUSTER", "DISTRIBUTE", "SORT", "TABLESAMPLE", "LATERAL", - "WINDOW", "OVER", "ROW", "ROWS", "RANGE", "PRECEDING", "FOLLOWING", - "UNBOUNDED", "CURRENT", "FIRST", "LAST", "IGNORE", "RESPECT", "NULLS", - "ROLLUP", "CUBE", "GROUPING", "SETS", "PIVOT", "UNPIVOT", "TRANSFORM", - "REDUCE", "AGGREGATE", "FILTER", "WITHIN", "COLLECT_LIST", "COLLECT_SET", - "EXPLODE", "INLINE", "POSEXPLODE", "STACK", "JSON_TUPLE", "GET_JSON_OBJECT", - "REGEXP_EXTRACT", "REGEXP_REPLACE", "SPLIT", "SIZE", "SORT_ARRAY", - "ARRAY_CONTAINS", "MAP_KEYS", "MAP_VALUES", "NAMED_STRUCT", "STRUCT", - "DESCRIBE", "DESC", "EXPLAIN", "ANALYZE", "CACHE", "UNCACHE", "REFRESH", - "SHOW", "MSCK", "REPAIR", "RECOVER", "EXPORT", "IMPORT", "LOAD", "UNLOAD", - "SET", "RESET", "ADD", "LIST", "WITH", "RECURSIVE", "TEMPORARY", "TEMP", - "GLOBAL", "LOCAL", "LOCATION", "COMMENT", "TBLPROPERTIES", "SERDEPROPERTIES", - "STORED", "INPUTFORMAT", "OUTPUTFORMAT", "SERDE", "DELIMITED", "FIELDS", - "TERMINATED", "ESCAPED", "COLLECTION", "ITEMS", "KEYS", "LINES", "DEFINED", - "RECORDREADER", "RECORDWRITER", "ROW_FORMAT", "STORED_AS", "CLUSTERED", - "SORTED", "INTO", "BUCKETS", "SKEWED", "DIRECTORIES", "PURGE", "ARCHIVE", - "UNARCHIVE", "TOUCH", "COMPACT", "CONCATENATE", "CHANGE", "REPLACE", - "COLUMNS", "RLIKE", "REGEXP", "FUNCTION", "MACRO", "AGGREGATE", "RETURNS", - "LANGUAGE", "DETERMINISTIC", "SQL", "CONTAINS", "READS", "MODIFIES", - "NO", "CALLED", "INPUT", "SPECIFIC", "EXTERNAL", "SECURITY", "DEFINER", - "INVOKER", "PATH", "ISOLATION", "LEVEL", "READ", "WRITE", "ONLY", - "COMMITTED", "UNCOMMITTED", "REPEATABLE", "SERIALIZABLE", "WORK", - "TRANSACTION", "START", "COMMIT", "ROLLBACK", "SAVEPOINT", "RELEASE" - ]) - }; -} - -/// Check if a name is a SQL reserved word that needs quoting in Spark SQL -fn is_sql_reserved_word(name: &str) -> bool { - RESERVED_WORDS.contains(name.to_uppercase().as_str()) -} diff --git a/vegafusion-runtime/src/task_graph/runtime.rs b/vegafusion-runtime/src/task_graph/runtime.rs index 02ad6e7f9..9e047a1c4 100644 --- a/vegafusion-runtime/src/task_graph/runtime.rs +++ b/vegafusion-runtime/src/task_graph/runtime.rs @@ -57,11 +57,10 @@ impl VegaFusionRuntime { task_graph: Arc, node_value_index: &NodeValueIndex, inline_datasets: HashMap, - plan_executor: Option>, ) -> Result { // We shouldn't panic inside get_or_compute_node_value, but since this may be used // in a server context, wrap in catch_unwind just in case. - let executor = plan_executor.unwrap_or_else(|| self.plan_executor()); + let executor = self.plan_executor(); let node_value = AssertUnwindSafe(get_or_compute_node_value( task_graph, node_value_index.node_index as usize, @@ -136,17 +135,11 @@ impl VegaFusionRuntimeTrait for VegaFusionRuntime { // Clone task_graph and task_graph_runtime for use in closure let task_graph_runtime = task_graph_runtime.clone(); let task_graph = task_graph.clone(); - let plan_executor_clone = self.plan_executor(); Ok(async move { let value = task_graph_runtime .clone() - .get_node_value( - task_graph, - node_value_index, - inline_datasets.clone(), - Some(plan_executor_clone), - ) + .get_node_value(task_graph, node_value_index, inline_datasets.clone()) .await?; Ok::<_, VegaFusionError>(NamedTaskValue { diff --git a/vegafusion-runtime/tests/test_image_comparison.rs b/vegafusion-runtime/tests/test_image_comparison.rs index d8d11ea58..332654201 100644 --- a/vegafusion-runtime/tests/test_image_comparison.rs +++ b/vegafusion-runtime/tests/test_image_comparison.rs @@ -1466,12 +1466,7 @@ async fn check_spec_sequence( for var in &spec_plan.comm_plan.server_to_client { let node_index = task_graph_mapping.get(var).unwrap(); let value = runtime - .get_node_value( - Arc::new(task_graph.clone()), - node_index, - Default::default(), - None, - ) + .get_node_value(Arc::new(task_graph.clone()), node_index, Default::default()) .await .expect("Failed to get node value"); @@ -1543,12 +1538,7 @@ async fn check_spec_sequence( let mut server_to_client_value_batch = HashMap::new(); for (var, node_index) in watch_vars.iter().zip(&watch_indices) { let value = runtime - .get_node_value( - Arc::new(task_graph.clone()), - node_index, - Default::default(), - None, - ) + .get_node_value(Arc::new(task_graph.clone()), node_index, Default::default()) .await .unwrap(); diff --git a/vegafusion-runtime/tests/test_planning.rs b/vegafusion-runtime/tests/test_planning.rs index 1f332ed73..8219ee8c0 100644 --- a/vegafusion-runtime/tests/test_planning.rs +++ b/vegafusion-runtime/tests/test_planning.rs @@ -69,7 +69,6 @@ async fn test_extract_server_data() { .get(&(Variable::new_data("data_3"), Vec::new())) .unwrap(), Default::default(), - None, ) .await .unwrap(); @@ -86,7 +85,6 @@ async fn test_extract_server_data() { )) .unwrap(), Default::default(), - None, ) .await .unwrap(); diff --git a/vegafusion-runtime/tests/test_spark_sql.rs b/vegafusion-runtime/tests/test_spark_sql.rs deleted file mode 100644 index 068be069c..000000000 --- a/vegafusion-runtime/tests/test_spark_sql.rs +++ /dev/null @@ -1,334 +0,0 @@ -use datafusion::datasource::{provider_as_source, MemTable}; -use datafusion::prelude::{DataFrame, SessionContext}; -use datafusion_expr::Expr; -use datafusion_expr::{col, lit, LogicalPlanBuilder}; -use datafusion_functions::expr_fn::{to_char, to_timestamp_seconds}; -use std::sync::Arc; -use vegafusion_common::arrow::array::RecordBatch; -use vegafusion_common::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; -use vegafusion_common::column::flat_col; -use vegafusion_runtime::data::util::DataFrameUtils; -use vegafusion_runtime::datafusion::udfs::datetime::make_timestamptz::make_timestamptz; -use vegafusion_runtime::expression::compiler::utils::ExprHelpers; -use vegafusion_runtime::sql::logical_plan_to_spark_sql; - -async fn create_test_dataframe( - schema_fields: Vec, -) -> Result> { - let ctx = SessionContext::new(); - - let schema = Arc::new(Schema::new(schema_fields)); - - let empty_batch = RecordBatch::new_empty(schema.clone()); - let mem_table = MemTable::try_new(schema.clone(), vec![vec![empty_batch]])?; - - let base_plan = - LogicalPlanBuilder::scan("test_table", provider_as_source(Arc::new(mem_table)), None)? - .build()?; - - Ok(DataFrame::new(ctx.state(), base_plan)) -} - -#[tokio::test] -async fn test_logical_plan_to_spark_sql_rewrites_row_number( -) -> Result<(), Box> { - let schema_fields = vec![ - Field::new("id", DataType::Int32, false), - Field::new("name", DataType::Utf8, false), - Field::new("value", DataType::Float64, false), - ]; - - let df = create_test_dataframe(schema_fields).await?; - - let indexed_df = df.filter(col("value").gt(lit(0.0)))?.with_index()?; - - let plan = indexed_df.logical_plan().clone(); - let spark_sql = logical_plan_to_spark_sql(&plan)?; - - let expected_sql = "SELECT row_number() OVER (ORDER BY monotonically_increasing_id()) AS _vf_order, test_table.id, test_table.name, test_table.value FROM test_table WHERE test_table.value > 0.0"; - - assert_eq!( - spark_sql.trim(), - expected_sql, - "Generated SQL should use ORDER BY monotonically_increasing_id() as window for row_number()" - ); - - Ok(()) -} - -#[tokio::test] -async fn test_logical_plan_to_spark_sql_rewrites_inf_and_nan( -) -> Result<(), Box> { - let schema_fields = vec![ - Field::new("id", DataType::Int32, false), - Field::new("value", DataType::Float64, false), - ]; - - let df = create_test_dataframe(schema_fields).await?; - - // Create a query that will generate NaN and infinity literals in the logical plan - let filtered_df = df - .filter(col("value").gt(lit(f64::NAN)))? - .filter(col("value").lt(lit(f64::INFINITY)))? - .filter(col("value").gt(lit(f64::NEG_INFINITY)))?; - - let plan = filtered_df.logical_plan().clone(); - let spark_sql = logical_plan_to_spark_sql(&plan)?; - - let expected_sql = "SELECT * FROM test_table WHERE test_table.value > float('-inf') AND test_table.value < float('inf') AND test_table.value > float('NaN')"; - - assert_eq!( - spark_sql, expected_sql, - "Should wrap NaN and Infinity literals in float()" - ); - Ok(()) -} - -#[tokio::test] -async fn test_logical_plan_to_spark_sql_rewrites_subquery_column_identifiers( -) -> Result<(), Box> { - let schema_fields = vec![ - Field::new("customer_name", DataType::Utf8, false), - Field::new("customer_age", DataType::Float32, false), - ]; - - let df = create_test_dataframe(schema_fields).await?; - - // Create nested projections that would generate compound column names - let nested_df = df - .select(vec![flat_col("customer_name"), flat_col("customer_age")])? - .select(vec![flat_col("customer_name"), flat_col("customer_age")])?; - - let plan = nested_df.logical_plan().clone(); - let spark_sql = logical_plan_to_spark_sql(&plan)?; - - let expected_sql = "SELECT customer_name, customer_age FROM (SELECT test_table.customer_name, test_table.customer_age FROM test_table)"; - - assert_eq!( - spark_sql.trim(), - expected_sql, - "Generated SQL should rewrite subquery column identifiers correctly" - ); - - Ok(()) -} - -#[tokio::test] -async fn test_logical_plan_to_spark_sql_chrono_formatting() -> Result<(), Box> -{ - let schema_fields = vec![Field::new( - "timestamp_col", - DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".into())), - false, - )]; - - let df = create_test_dataframe(schema_fields).await?; - - // Test basic datetime format conversion: %Y-%m-%d %H:%M:%S -> yyyy-MM-dd HH:mm:ss - let df_basic = df.clone().select(vec![to_char( - col("timestamp_col"), - lit("%Y-%m-%d %H:%M:%S"), - )])?; - - let plan_basic = df_basic.logical_plan().clone(); - let spark_sql_basic = logical_plan_to_spark_sql(&plan_basic)?; - - assert!( - spark_sql_basic.contains("yyyy-MM-dd HH:mm:ss"), - "Basic datetime format should be converted to Spark format. Got: {}", - spark_sql_basic - ); - - // Test fractional seconds: %.3f -> .SSS - let df_frac = df - .clone() - .select(vec![to_char(col("timestamp_col"), lit("%.3f"))])?; - - let plan_frac = df_frac.logical_plan().clone(); - let spark_sql_frac = logical_plan_to_spark_sql(&plan_frac)?; - - assert!( - spark_sql_frac.contains(".SSS"), - "Fractional seconds format should be converted to Spark format. Got: {}", - spark_sql_frac - ); - - // Test full ISO format: %Y-%m-%dT%H:%M:%S%.f%:z -> yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSXXX - let df_iso = df.clone().select(vec![to_char( - col("timestamp_col"), - lit("%Y-%m-%dT%H:%M:%S%.f%:z"), - )])?; - - let plan_iso = df_iso.logical_plan().clone(); - let spark_sql_iso = logical_plan_to_spark_sql(&plan_iso)?; - - assert!( - // Double single quote because it will be inside SQL string and has to be escaped - spark_sql_iso.contains("yyyy-MM-dd\\'T\\'HH:mm:ss.SSSSSSSSSXXX"), - "ISO datetime format should be converted to Spark format. Got: {}", - spark_sql_iso - ); - - Ok(()) -} - -#[tokio::test] -async fn test_logical_plan_to_spark_sql_rewrites_timestamps( -) -> Result<(), Box> { - let schema_fields = vec![Field::new( - "order_date", - DataType::Timestamp(TimeUnit::Millisecond, None), - false, - )]; - - let df = create_test_dataframe(schema_fields).await?; - let df_schema = df.schema().clone(); - - // Create operations that will generate make_timestamptz function calls, TIMESTAMP WITH TIME ZONE casts, and to_timestamp_seconds calls - let timestamp_df = df.select(vec![ - // This should create a cast to TIMESTAMP WITH TIME ZONE, which should be rewritten to TIMESTAMP - col("order_date") - .try_cast_to( - &DataType::Timestamp( - TimeUnit::Millisecond, - Some("America/Los_Angeles".to_string().into()), - ), - &df_schema, - )? - .alias("order_date_tz"), - // This should create a make_timestamptz call, which should be rewritten to make_timestamp with 7th arg dropped - make_timestamptz( - lit(2023), - lit(12), - lit(25), - lit(10), - lit(30), - lit(45), - lit(123), // This 7th argument (milliseconds) should be dropped - "UTC", - ) - .alias("made_timestamp") - .into(), - // This should create a to_timestamp_seconds call, which should be rewritten to to_timestamp - to_timestamp_seconds(vec![lit("2023-12-25 10:30:45")]).alias("parsed_timestamp"), - ])?; - - let plan = timestamp_df.logical_plan().clone(); - let spark_sql = logical_plan_to_spark_sql(&plan)?; - - // Check that make_timestamptz was rewritten to make_timestamp and to_timestamp_seconds was rewritten to to_timestamp - assert_eq!( - spark_sql, - "SELECT TRY_CAST(test_table.order_date AS TIMESTAMP) AS order_date_tz, make_timestamp(2023, 12, 25, 10, 30, 45, 'UTC') AS made_timestamp, to_timestamp('2023-12-25 10:30:45') AS parsed_timestamp FROM test_table", - "Generated SQL should rewrite timestamp functions for Spark compatibility" - ); - - Ok(()) -} - -#[tokio::test] -async fn test_logical_plan_to_spark_sql_rewrites_intervals( -) -> Result<(), Box> { - let schema_fields = vec![Field::new( - "timestamp_col", - DataType::Timestamp(TimeUnit::Millisecond, None), - false, - )]; - - let df = create_test_dataframe(schema_fields).await?; - - // Create a query that will generate interval expressions with abbreviated names - let interval_df = df.select(vec![ - col("timestamp_col") + lit(datafusion_common::ScalarValue::IntervalYearMonth(Some(1))), - ])?; - - let plan = interval_df.logical_plan().clone(); - let spark_sql = logical_plan_to_spark_sql(&plan)?; - - let expected_sql = - "SELECT test_table.timestamp_col + INTERVAL '0 YEARS 1 MONTHS' FROM test_table"; - - assert_eq!( - spark_sql, expected_sql, - "Generated SQL should expand interval abbreviations to full names" - ); - - Ok(()) -} - -#[tokio::test] -async fn test_logical_plan_to_spark_sql_quotes_column_identifiers( -) -> Result<(), Box> { - let schema_fields = vec![ - Field::new("customer name", DataType::Utf8, false), // space in name - Field::new("customer-email", DataType::Utf8, false), // hyphen in name - Field::new("select", DataType::Utf8, false), // SQL reserved word - Field::new("from", DataType::Utf8, false), // SQL reserved word - Field::new("Total Amount", DataType::Float64, false), // space and capital letters - Field::new("123field", DataType::Int32, false), // starts with digit - Field::new("normal_field", DataType::Int32, false), // normal field (shouldn't be quoted) - ]; - - let df = create_test_dataframe(schema_fields).await?; - - // Create a nested query structure to test recursive alias quoting - // This creates: SELECT ... FROM (SELECT ... AS "nested alias" FROM (SELECT ... FROM test_table)) - let inner_df = df.select(vec![ - flat_col("customer name").alias("inner customer name"), // nested alias with space - flat_col("select").alias("inner select"), // nested alias with reserved word - flat_col("normal_field").alias("inner_normal"), // normal nested alias - ])?; - - let middle_df = inner_df.select(vec![ - flat_col("inner customer name").alias("middle customer name"), // another level of nesting - flat_col("inner select").alias("middle from"), // reserved word alias - flat_col("inner_normal"), // pass through - ])?; - - let result_df = middle_df.select(vec![ - flat_col("middle customer name").alias("final name"), // final alias with space - flat_col("middle from").alias("final location"), // final alias with space - flat_col("inner_normal").alias("final_normal"), - ])?; - - let plan = result_df.logical_plan().clone(); - let spark_sql = logical_plan_to_spark_sql(&plan)?; - - // The expected SQL should have quoted aliases at all levels of nesting - let expected_sql = "SELECT `middle customer name` AS `final name`, `middle from` AS `final location`, inner_normal AS final_normal FROM (SELECT `inner customer name` AS `middle customer name`, `inner select` AS `middle from`, inner_normal FROM (SELECT test_table.`customer name` AS `inner customer name`, test_table.`select` AS `inner select`, test_table.normal_field AS inner_normal FROM test_table))"; - - assert_eq!( - spark_sql.trim(), - expected_sql, - "Generated SQL should properly quote column names and aliases at all nesting levels" - ); - - Ok(()) -} - -#[tokio::test] -async fn test_logical_plan_to_spark_sql_parenthesizes_nested_is_null( -) -> Result<(), Box> { - let schema_fields = vec![Field::new("IMDB Rating", DataType::Utf8, false)]; - - let df = create_test_dataframe(schema_fields).await?; - - // Build nested IS NULL: (test_table.`IMDB Rating` IS NULL) IS NULL - let nested_is_null = Expr::IsNull(Box::new(Expr::IsNull(Box::new(col("IMDB Rating"))))); - let expr = nested_is_null.alias("check_null"); - - let df = df.select(vec![expr])?; - let plan = df.logical_plan().clone(); - let spark_sql = logical_plan_to_spark_sql(&plan)?; - - let expected_sql = - "SELECT (test_table.`IMDB Rating` IS NULL) IS NULL AS check_null FROM test_table"; - - assert_eq!( - spark_sql.trim(), - expected_sql, - "Generated SQL should parenthesize nested IS NULL for Spark compatibility" - ); - - Ok(()) -} diff --git a/vegafusion-runtime/tests/test_task_graph_runtime.rs b/vegafusion-runtime/tests/test_task_graph_runtime.rs index 39c5a5f37..7e0023c0a 100644 --- a/vegafusion-runtime/tests/test_task_graph_runtime.rs +++ b/vegafusion-runtime/tests/test_task_graph_runtime.rs @@ -83,12 +83,7 @@ async fn try_it() { let graph_runtime = VegaFusionRuntime::default(); // let result = graph_runtime.get_node_value(graph, 2, None).await.unwrap(); let result = graph_runtime - .get_node_value( - graph, - &NodeValueIndex::new(2, Some(0)), - Default::default(), - None, - ) + .get_node_value(graph, &NodeValueIndex::new(2, Some(0)), Default::default()) .await .unwrap(); @@ -145,12 +140,7 @@ async fn try_it_from_spec() { let graph_runtime = VegaFusionRuntime::default(); let result = graph_runtime - .get_node_value( - graph, - &NodeValueIndex::new(2, Some(0)), - Default::default(), - None, - ) + .get_node_value(graph, &NodeValueIndex::new(2, Some(0)), Default::default()) .await .unwrap(); println!("result: {result:?}"); diff --git a/vegafusion-server/Cargo.toml b/vegafusion-server/Cargo.toml index d5c10dc2e..0346f01ad 100644 --- a/vegafusion-server/Cargo.toml +++ b/vegafusion-server/Cargo.toml @@ -5,7 +5,7 @@ path = "src/main.rs" [package] name = "vegafusion-server" license = "BSD-3-Clause" -version = "2.0.8" +version = "2.0.3" edition = "2021" description = "VegaFusion Server" repository = "https://github.com/vega/vegafusion" @@ -34,16 +34,16 @@ workspace = true [dependencies.vegafusion-common] path = "../vegafusion-common" -version = "2.0.8" +version = "2.0.3" [dependencies.vegafusion-core] path = "../vegafusion-core" features = ["tonic_support"] -version = "2.0.8" +version = "2.0.3" [dependencies.vegafusion-runtime] path = "../vegafusion-runtime" -version = "2.0.8" +version = "2.0.3" [dependencies.tokio] workspace = true diff --git a/vegafusion-wasm/Cargo.toml b/vegafusion-wasm/Cargo.toml index 9d7db6279..b569484cb 100644 --- a/vegafusion-wasm/Cargo.toml +++ b/vegafusion-wasm/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "vegafusion-wasm" license = "BSD-3-Clause" -version = "2.0.8" +version = "2.0.3" edition = "2021" description = "VegaFusion WASM package for embedding Vega charts in the browser with a connection to a VegaFusion Runtime" @@ -41,15 +41,15 @@ workspace = true [dependencies.vegafusion-common] path = "../vegafusion-common" features = ["json"] -version = "2.0.8" +version = "2.0.3" [dependencies.vegafusion-core] path = "../vegafusion-core" -version = "2.0.8" +version = "2.0.3" [dependencies.vegafusion-runtime] path = "../vegafusion-runtime" -version = "2.0.8" +version = "2.0.3" default-features = false features = ["http-wasm"] diff --git a/vegafusion-wasm/package-lock.json b/vegafusion-wasm/package-lock.json index 69fe23e55..1e5ea54c6 100644 --- a/vegafusion-wasm/package-lock.json +++ b/vegafusion-wasm/package-lock.json @@ -1,6 +1,6 @@ { "name": "vegafusion-wasm", - "version": "2.0.8", + "version": "2.0.3", "lockfileVersion": 2, "requires": true, "packages": { diff --git a/vegafusion-wasm/package.json b/vegafusion-wasm/package.json index 581d05791..2784d02c5 100644 --- a/vegafusion-wasm/package.json +++ b/vegafusion-wasm/package.json @@ -1,6 +1,6 @@ { "name": "vegafusion-wasm", - "version": "2.0.8", + "version": "2.0.3", "author": "Jon Mease (https://jonmmease.dev)", "license": "BSD-3-Clause", "description": "Wasm library for interfacing with VegaFusion",