Skip to content

Commit

Permalink
feat: make it possible to build lance without protoc (except on Windo…
Browse files Browse the repository at this point in the history
…ws) (#3363)

Inspired by substrait-io/substrait-validator#356
this PR adds a `protoc` feature to `lance`. If the feature is specified
then we will use
[`protobuf-src`](https://github.com/MaterializeInc/rust-protobuf-native)
to build a vendored copy of `protoc`.

This increases build times slightly (need to compile `protoc` as part of
the build) but removes the need for an external copy of `protoc`.

At the moment it is not possible to build both the `protoc` and
`substrait` features because `datafusion-substrait` does not yet have
its own `protoc` feature (but it will hopefully have one added soon).
  • Loading branch information
westonpace authored Jan 10, 2025
1 parent 226d86f commit f478c46
Show file tree
Hide file tree
Showing 18 changed files with 121 additions and 30 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/cargo-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
workflow_dispatch:
inputs:
tag:
description: 'Tag to publish (e.g., v1.0.0)'
description: "Tag to publish (e.g., v1.0.0)"
required: true
type: string

Expand All @@ -24,7 +24,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang-18
CXX: clang-18
CXX: clang++-18
defaults:
run:
working-directory: .
Expand Down Expand Up @@ -53,5 +53,5 @@ jobs:
- uses: albertlockett/publish-crates@v2.2
with:
registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
args: '--all-features'
args: "--all-features"
path: .
2 changes: 1 addition & 1 deletion .github/workflows/ci-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang-18
CXX: clang-18
CXX: clang++-18
defaults:
run:
shell: bash
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang-18
CXX: clang-18
CXX: clang++-18
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -67,8 +67,9 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev
- name: Lint Rust
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo fmt --all -- --check
cargo clippy --locked --all-features --tests -- -D warnings
cargo clippy --locked --features ${ALL_FEATURES} --tests -- -D warnings
- name: Build
run: |
python -m venv venv
Expand Down
43 changes: 29 additions & 14 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev
- name: Run clippy
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo clippy --version
cargo clippy --locked --all-features --tests --benches -- -D warnings
cargo clippy --locked --features ${ALL_FEATURES} --tests --benches -- -D warnings
linux-build:
runs-on: "ubuntu-24.04"
timeout-minutes: 45
Expand All @@ -59,7 +60,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang
CXX: clang
CXX: clang++
steps:
- uses: actions/checkout@v4
# pin the toolchain version to avoid surprises
Expand All @@ -81,13 +82,18 @@ jobs:
- name: Run tests
if: ${{ matrix.toolchain == 'stable' }}
run: |
cargo llvm-cov --locked --workspace --codecov --output-path coverage.codecov --all-features
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo llvm-cov --locked --workspace --codecov --output-path coverage.codecov --features ${ALL_FEATURES}
- name: Build tests (nightly)
run: cargo test --locked --all-features --workspace --no-run
if: ${{ matrix.toolchain != 'stable' }}
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --locked --features ${ALL_FEATURES} --workspace --no-run
- name: Run tests (nightly)
if: ${{ matrix.toolchain != 'stable' }}
run: |
cargo test --all-features --workspace
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --features ${ALL_FEATURES} --workspace
- name: Upload coverage to Codecov
if: ${{ matrix.toolchain == 'stable' }}
uses: codecov/codecov-action@v4
Expand All @@ -113,20 +119,22 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev pkg-config
- name: Build tests
run: |
cargo test --locked --all-features --no-run
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --locked --features ${ALL_FEATURES} --no-run
- name: Start DynamoDB local for tests
run: |
docker run -d -e AWS_ACCESS_KEY_ID=DUMMYKEY -e AWS_SECRET_ACCESS_KEY=DUMMYKEY -p 8000:8000 amazon/dynamodb-local
- name: Run tests
run: |
cargo test --locked --all-features
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --locked --features ${ALL_FEATURES}
build-no-lock:
runs-on: ubuntu-24.04
timeout-minutes: 30
env:
# Need up-to-date compilers for kernels
CC: clang
CXX: clang
CXX: clang++
steps:
- uses: actions/checkout@v4
# Remote cargo.lock to force a fresh build
Expand All @@ -139,7 +147,9 @@ jobs:
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Build all
run: cargo build --benches --all-features --tests
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo build --benches --features ${ALL_FEATURES} --tests
mac-build:
runs-on: "macos-14"
timeout-minutes: 45
Expand All @@ -165,11 +175,14 @@ jobs:
run: |
rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }}
- name: Build tests
run: cargo test --locked --all-features --no-run
run: |
cargo test --locked --features fp16kernels,cli,tensorflow,dynamodb,substrait --no-run
- name: Run tests
run: cargo test --all-features
run: |
cargo test --features fp16kernels,cli,tensorflow,dynamodb,substrait
- name: Check benchmarks
run: cargo check --benches --all-features
run: |
cargo check --benches --features fp16kernels,cli,tensorflow,dynamodb,substrait
windows-build:
runs-on: windows-latest
defaults:
Expand Down Expand Up @@ -203,7 +216,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang
CXX: clang
CXX: clang++
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -218,4 +231,6 @@ jobs:
with:
toolchain: ${{ matrix.msrv }}
- name: cargo +${{ matrix.msrv }} check
run: cargo check --workspace --tests --benches --all-features
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo check --workspace --tests --benches --features ${ALL_FEATURES}
25 changes: 23 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions rust/lance-encoding-datafusion/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,17 @@ lance-datagen.workspace = true

[build-dependencies]
prost-build.workspace = true
protobuf-src = { version = "2.1", optional = true }

[target.'cfg(target_os = "linux")'.dev-dependencies]
pprof = { workspace = true }

[features]
protoc = ["dep:protobuf-src"]

[package.metadata.docs.rs]
# docs.rs uses an older version of Ubuntu that does not have the necessary protoc version
features = ["protoc"]

[lints]
workspace = true
4 changes: 4 additions & 0 deletions rust/lance-encoding-datafusion/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ use std::io::Result;
fn main() -> Result<()> {
println!("cargo:rerun-if-changed=protos");

#[cfg(feature = "protoc")]
// Use vendored protobuf compiler if requested.
std::env::set_var("PROTOC", protobuf_src::protoc());

let mut prost_build = prost_build::Config::new();
prost_build.extern_path(".lance.encodings", "::lance_encoding::format::pb");
prost_build.protoc_arg("--experimental_allow_proto3_optional");
Expand Down
8 changes: 8 additions & 0 deletions rust/lance-encoding/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,18 @@ rand_xoshiro = "0.6.0"

[build-dependencies]
prost-build.workspace = true
protobuf-src = { version = "2.1", optional = true }

[target.'cfg(target_os = "linux")'.dev-dependencies]
pprof = { workspace = true }

[features]
protoc = ["dep:protobuf-src"]

[package.metadata.docs.rs]
# docs.rs uses an older version of Ubuntu that does not have the necessary protoc version
features = ["protoc"]

[[bench]]
name = "decoder"
harness = false
Expand Down
4 changes: 4 additions & 0 deletions rust/lance-encoding/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ use std::io::Result;
fn main() -> Result<()> {
println!("cargo:rerun-if-changed=protos");

#[cfg(feature = "protoc")]
// Use vendored protobuf compiler if requested.
std::env::set_var("PROTOC", protobuf_src::protoc());

let mut prost_build = prost_build::Config::new();
prost_build.protoc_arg("--experimental_allow_proto3_optional");
prost_build.enable_type_names();
Expand Down
8 changes: 8 additions & 0 deletions rust/lance-file/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,18 @@ test-log.workspace = true

[build-dependencies]
prost-build.workspace = true
protobuf-src = { version = "2.1", optional = true }

[target.'cfg(target_os = "linux")'.dev-dependencies]
pprof = { workspace = true }

[features]
protoc = ["dep:protobuf-src"]

[package.metadata.docs.rs]
# docs.rs uses an older version of Ubuntu that does not have the necessary protoc version
features = ["protoc"]

[[bench]]
name = "reader"
harness = false
Expand Down
4 changes: 4 additions & 0 deletions rust/lance-file/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ use std::io::Result;
fn main() -> Result<()> {
println!("cargo:rerun-if-changed=protos");

#[cfg(feature = "protoc")]
// Use vendored protobuf compiler if requested.
std::env::set_var("PROTOC", protobuf_src::protoc());

let mut prost_build = prost_build::Config::new();
prost_build.protoc_arg("--experimental_allow_proto3_optional");
prost_build.extern_path(".lance.encodings", "::lance_encoding::format::pb");
Expand Down
6 changes: 6 additions & 0 deletions rust/lance-index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,22 @@ datafusion-sql.workspace = true
random_word = { version = "0.4.3", features = ["en"] }

[features]
protoc = ["dep:protobuf-src"]
tokenizer-lindera = ["lindera", "lindera-tantivy", "tokenizer-common"]
tokenizer-jieba = ["jieba-rs", "tokenizer-common"]
tokenizer-common = []

[build-dependencies]
prost-build.workspace = true
protobuf-src = { version = "2.1", optional = true }

[target.'cfg(target_os = "linux")'.dev-dependencies]
pprof.workspace = true

[package.metadata.docs.rs]
# docs.rs uses an older version of Ubuntu that does not have the necessary protoc version
features = ["protoc"]

[[bench]]
name = "find_partitions"
harness = false
Expand Down
4 changes: 4 additions & 0 deletions rust/lance-index/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ use std::io::Result;
fn main() -> Result<()> {
println!("cargo:rerun-if-changed=protos");

#[cfg(feature = "protoc")]
// Use vendored protobuf compiler if requested.
std::env::set_var("PROTOC", protobuf_src::protoc());

let mut prost_build = prost_build::Config::new();
prost_build.protoc_arg("--experimental_allow_proto3_optional");
prost_build.compile_protos(&["./protos/index.proto"], &["./protos"])?;
Expand Down
Loading

0 comments on commit f478c46

Please sign in to comment.