Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Duckdb updates #337

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ duckdb-loadable-macros = { version = "0.1.2", path = "crates/duckdb-loadable-mac
autocfg = "1.0"
bindgen = { version = "0.69", default-features = false }
byteorder = "1.3"
calamine = "0.22.0"
calamine = "0.22"
cast = "0.3"
cc = "1.0"
chrono = "0.4.22"
Expand All @@ -40,8 +40,9 @@ memchr = "2.3"
num = { version = "0.4", default-features = false }
num-integer = "0.1.46"
pkg-config = "0.3.24"
polars = "0.35.4"
polars-core = "0.35.4"
polars = "0.38"
polars-core = "0.38"
derive_more = "0.99"
pretty_assertions = "1.4.0"
proc-macro2 = "1.0.56"
quote = "1.0.21"
Expand All @@ -56,9 +57,14 @@ strum = "0.25"
syn = "2.0.15"
tar = "0.4.38"
tempdir = "0.3.7"
thiserror = "1.0"
tempfile = "3.1.0"
unicase = "2.6.0"
url = "2.1"
uuid = "1.0"
vcpkg = "0.2"
arrow = { version = "52", default-features = false }
rusqlite = "0.31"
arrow_convert = "0.6"
itertools = "0.13"
criterion = { version = "0.5", features = [ "html_reports"] }
18 changes: 16 additions & 2 deletions crates/duckdb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ description = "Ergonomic wrapper for DuckDB"

[lib]
name = "duckdb"
bench = false

[features]
default = []
Expand Down Expand Up @@ -54,9 +55,13 @@ strum = { workspace = true, features = ["derive"] }
r2d2 = { workspace = true, optional = true }
calamine = { workspace = true, optional = true }
num = { workspace = true, features = ["std"], optional = true }
derive_more = { workspace = true }
duckdb-loadable-macros = { workspace = true, optional = true }
polars = { workspace = true, features = ["dtype-full"], optional = true }
num-integer = { workspace = true }
thiserror = { workspace = true }
arrow_convert = { workspace = true }
itertools = { workspace = true }

[dev-dependencies]
doc-comment = { workspace = true }
Expand All @@ -69,13 +74,13 @@ rand = { workspace = true }
tempdir = { workspace = true }
polars-core = { workspace = true }
pretty_assertions = { workspace = true }
# criterion = "0.3"
rusqlite = { workspace = true }
criterion = { workspace = true }

# [[bench]]
# name = "data_types"
# harness = false


[package.metadata.docs.rs]
features = []
all-features = false
Expand All @@ -90,3 +95,12 @@ all-features = false
name = "hello-ext"
crate-type = ["cdylib"]
required-features = ["vtab-loadable"]

[[example]]
name = "appender"
crate-type = ["cdylib"]
required-features = ["appender-arrow"]

[[bench]]
name = "issue-282"
harness = false
1 change: 1 addition & 0 deletions crates/duckdb/benches/issue-282/data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
db.*
19 changes: 19 additions & 0 deletions crates/duckdb/benches/issue-282/generate-database.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

# SQLite
sqlite3 data/db.sqlite <<EOF
CREATE TABLE IF NOT EXISTS income (
id INTEGER,
created_at INTEGER,
amount REAL,
category_id INTEGER,
wallet_id INTEGER,
meta TEXT
);
.mode csv
.import output.csv income
EOF

# DuckDB
echo "CREATE TABLE income (id INTEGER, created_at INTEGER, amount REAL, category_id INTEGER, wallet_id INTEGER, meta TEXT);" | duckdb data/db.duckdb
echo "COPY income FROM 'output.csv' (HEADER);" | duckdb data/db.duckdb
126 changes: 126 additions & 0 deletions crates/duckdb/benches/issue-282/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
//! ensure that the databases are generated using the `generate-database.sh` utility
use arrow_convert::{ArrowDeserialize, ArrowField, ArrowSerialize};
use criterion::{criterion_group, criterion_main, Criterion};

pub fn sqlite_db() -> rusqlite::Connection {
let db_path = concat!(env!("CARGO_MANIFEST_DIR"), "/benches/issue-282/data/db.sqlite");
rusqlite::Connection::open(db_path).unwrap()
}

pub fn duck_db() -> duckdb::Connection {
let db_path = concat!(env!("CARGO_MANIFEST_DIR"), "/benches/issue-282/data/db.duckdb");
duckdb::Connection::open(db_path).unwrap()
}

#[derive(Debug, ArrowField, ArrowSerialize, ArrowDeserialize)]
struct Income {
created_at: Option<i32>,
amount: Option<f32>,
category_id: i32,
wallet_id: Option<i32>,
meta: Option<String>,
}

impl Income {
fn select_duckdb_arrow(
conn: &duckdb::Connection,
start: u32,
end: u32,
) -> Result<Vec<Self>, Box<dyn std::error::Error>> {
let sql = format!(
"SELECT created_at, amount, category_id, wallet_id, meta \
FROM 'income' \
WHERE created_at >= {} AND created_at <= {}",
start, end
);
let mut stmt = conn.prepare_cached(&sql)?;
let result = stmt.query_arrow_deserialized::<Income>([])?;
Ok(result)
}

fn select_duckdb(conn: &duckdb::Connection, start: u32, end: u32) -> Result<Vec<Self>, Box<dyn std::error::Error>> {
let mut arr = Vec::new();
let sql = format!(
"SELECT created_at, amount, category_id, wallet_id, meta \
FROM 'income' \
WHERE created_at >= {} AND created_at <= {}",
start, end
);
let mut stmt = conn.prepare_cached(&sql)?;
let result_iter = stmt.query_map([], |row| {
Ok(Self {
created_at: row.get(0)?,
amount: row.get(1)?,
category_id: row.get(2)?,
wallet_id: row.get(3)?,
meta: row.get(4)?,
})
})?;
for result in result_iter {
arr.push(result?);
}
Ok(arr)
}

fn select_sqlite(
conn: &rusqlite::Connection,
start: u32,
end: u32,
) -> Result<Vec<Self>, Box<dyn std::error::Error>> {
let mut arr = Vec::new();
let sql = format!(
"SELECT created_at, amount, category_id, wallet_id, meta \
FROM 'income' \
WHERE created_at >= {} AND created_at <= {}",
start, end
);
let mut stmt = conn.prepare(&sql)?;
let result_iter = stmt.query_map([], |row| {
Ok(Self {
created_at: row.get(0)?,
amount: row.get(1)?,
category_id: row.get(2)?,
wallet_id: row.get(3)?,
meta: row.get(4)?,
})
})?;
for result in result_iter {
arr.push(result?);
}
Ok(arr)
}
}

fn bench_sqlite(c: &mut Criterion) {
let sqlite_conn = sqlite_db();
c.bench_function("sqlite_test", |b| {
b.iter(|| {
let out = Income::select_sqlite(&sqlite_conn, 1709292049, 1711375239).unwrap();
out.len()
})
});
}

fn bench_duckdb(c: &mut Criterion) {
let duckdb_conn = duck_db();
c.bench_function("duckdb_test", |b| {
b.iter(|| {
let out = Income::select_duckdb(&duckdb_conn, 1709292049, 1711375239).unwrap();
out.len()
})
});
}

fn bench_duckdb_arrow(c: &mut Criterion) {
let duckdb_conn = duck_db();
c.bench_function("duckdb_test_arrow", |b| {
b.iter(|| {
let out = Income::select_duckdb_arrow(&duckdb_conn, 1709292049, 1711375239).unwrap();
out.len()
})
});
}

// criterion_group!(benches, bench_duckdb_arrow);
criterion_group!(benches, bench_sqlite, bench_duckdb, bench_duckdb_arrow);
criterion_main!(benches);
Loading
Loading