diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index be8fc591e..fef6f2720 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -25,40 +25,13 @@ jobs: - uses: actions/cache@v4 name: nightly toolchain cache with: - path: | - ~/.rustup/toolchains/ + path: ~/.rustup/toolchains/ key: ${{ runner.os }}-toolchain-${{ hashFiles('**/rustfmt.toml') }} - run: rustup install nightly - run: rustup component add rustfmt --toolchain nightly - run: just fmt-check - lint: - name: Lint (clippy) - # emperically runtimes are the same for big/small hosts: - runs-on: ubuntu-latest - needs: ["build"] - steps: - - name: checkout - uses: actions/checkout@v4 - - uses: extractions/setup-just@v1 - with: - just-version: "1.23.0" - - uses: actions/cache/restore@v4 - name: toolchain cache - with: - path: | - ~/.rustup/toolchains/ - key: ${{ runner.os }}-toolchain-${{ hashFiles('**/rust-toolchain.toml') }} - - uses: actions/cache/restore@v4 - name: cargo cache - with: - path: | - ~/.cargo/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - - run: just clippy - - build: name: Build runs-on: ubuntu-latest-8-cores @@ -72,23 +45,24 @@ jobs: - uses: actions/cache@v4 name: toolchain cache with: - path: | - ~/.rustup/toolchains/ + path: ~/.rustup/toolchains/ key: ${{ runner.os }}-toolchain-${{ hashFiles('**/rust-toolchain.toml') }} - uses: actions/cache@v4 name: cargo cache with: - path: | - ~/.cargo/ + path: ~/.cargo/ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - uses: actions/cache@v4 - name: workspace cache + name: build cache with: path: | target/ !target/**/glaredb !target/**/pgprototest - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + key: ${{ runner.os }}-cargo-build-${{ github.ref_name }} + restore-keys: | + ${{ runner.os }}-cargo-glaredb-build-${{ hashFiles('**/Cargo.lock') }} + ${{ runner.os }}-cargo-glaredb-build- - run: just build - run: cargo build --bin pgprototest - uses: actions/cache/save@v4 @@ -103,10 +77,66 @@ jobs: key: ${{ github.run_id }} + cache: + # update a cache that can be used between builds, but do it + # outside of the hot path. + name: cache helper + needs: ["build"] + runs-on: ubuntu-latest + steps: + - uses: actions/cache/restore@v4 + name: build cache + with: + path: | + target/ + !target/**/glaredb + !target/**/pgprototest + key: ${{ runner.os }}-cargo-build-${{ github.ref_name }} + - uses: actions/cache/save@v4 + name: build cache + with: + path: | + target/ + !target/**/glaredb + !target/**/pgprototest + key: ${{ runner.os }}-cargo-glaredb-build-${{ hashFiles('**/Cargo.lock') }} + + lint: + name: Lint (clippy) + # emperically runtimes are the same for big/small hosts: + runs-on: ubuntu-latest + needs: ["build"] + steps: + - name: checkout + uses: actions/checkout@v4 + - uses: extractions/setup-just@v1 + with: + just-version: "1.23.0" + - uses: actions/cache/restore@v4 + name: toolchain cache + with: + path: ~/.rustup/toolchains/ + key: ${{ runner.os }}-toolchain-${{ hashFiles('**/rust-toolchain.toml') }} + - uses: actions/cache/restore@v4 + name: cargo cache + with: + path: ~/.cargo/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + - uses: actions/cache/restore@v4 + name: build cache + with: + path: | + target/ + !target/**/glaredb + !target/**/pgprototest + key: ${{ runner.os }}-cargo-glaredb-${{ github.ref_name }} + - run: just clippy + + python-binding-tests: name: Python Binding Tests runs-on: ubuntu-latest-4-cores - needs: ["fmt"] + needs: ["build"] steps: - name: checkout uses: actions/checkout@v4 @@ -116,23 +146,21 @@ jobs: - uses: actions/cache/restore@v4 name: toolchain cache with: - path: | - ~/.rustup/toolchains/ + path: ~/.rustup/toolchains/ key: ${{ runner.os }}-toolchain-${{ hashFiles('**/rust-toolchain.toml') }} - uses: actions/cache/restore@v4 name: cargo cache with: - path: | - ~/.cargo/ + path: ~/.cargo/ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - uses: actions/cache/restore@v4 - name: workspace cache + name: build cache with: path: | target/ !target/**/glaredb !target/**/pgprototest - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + key: ${{ runner.os }}-cargo-build-${{ github.ref_name }} - run: just python build - run: just python test @@ -140,7 +168,7 @@ jobs: nodejs-bindings-tests: name: Node.js Binding Tests runs-on: ubuntu-latest-8-cores - needs: ["fmt"] + needs: ["build"] steps: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 @@ -155,20 +183,20 @@ jobs: path: | ~/.rustup/toolchains/ key: ${{ runner.os }}-toolchain-${{ hashFiles('**/rust-toolchain.toml') }} - - uses: actions/cache@v4 + - uses: actions/cache/restore@v4 name: cargo cache with: path: | ~/.cargo/ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - uses: actions/cache/restore@v4 - name: workspace cache + name: build cache with: path: | target/ !target/**/glaredb !target/**/pgprototest - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + key: ${{ runner.os }}-cargo-build-${{ github.ref_name }} - run: just js build-debug - run: just js test @@ -186,15 +214,21 @@ jobs: - uses: actions/cache/restore@v4 name: toolchain cache with: - path: | - ~/.rustup/toolchains/ + path: ~/.rustup/toolchains/ key: ${{ runner.os }}-toolchain-${{ hashFiles('**/rust-toolchain.toml') }} - uses: actions/cache/restore@v4 name: cargo cache with: - path: | - ~/.cargo/ + path: ~/.cargo/ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + - uses: actions/cache/restore@v4 + name: build cache + with: + path: | + target/ + !target/**/glaredb + !target/**/pgprototest + key: ${{ runner.os }}-cargo-build-${{ github.ref_name }} - uses: actions/cache/restore@v4 name: glaredb cache with: @@ -289,9 +323,8 @@ jobs: - uses: actions/cache@v4 name: py cache with: - path: | - tests/.venv/ - key: ${{ runner.os }}-poetry-${{ hashFiles('**/Cargo.lock') }} + path: tests/.venv/ + key: ${{ runner.os }}-poetry-${{ hashFiles('tests/poetry.lock') }} - run: just venv - run: just pytest diff --git a/Cargo.lock b/Cargo.lock index 15714bdf0..915132bc9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -467,7 +467,7 @@ dependencies = [ [[package]] name = "arrow_util" -version = "0.8.1" +version = "0.8.2" dependencies = [ "comfy-table", "datafusion", @@ -1038,7 +1038,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "bench_runner" -version = "0.8.1" +version = "0.8.2" dependencies = [ "anyhow", "async-trait", @@ -1352,7 +1352,7 @@ dependencies = [ [[package]] name = "bytesutil" -version = "0.8.1" +version = "0.8.2" dependencies = [ "bytes", ] @@ -1428,7 +1428,7 @@ dependencies = [ [[package]] name = "catalog" -version = "0.8.1" +version = "0.8.2" dependencies = [ "datafusion", "logutil", @@ -2350,7 +2350,7 @@ dependencies = [ [[package]] name = "datafusion_ext" -version = "0.8.1" +version = "0.8.2" dependencies = [ "async-recursion", "async-trait", @@ -2380,7 +2380,7 @@ dependencies = [ [[package]] name = "datasources" -version = "0.8.1" +version = "0.8.2" dependencies = [ "apache-avro", "async-channel", @@ -2441,7 +2441,7 @@ dependencies = [ [[package]] name = "decimal" -version = "0.8.1" +version = "0.8.2" dependencies = [ "num-traits", "regex", @@ -2694,7 +2694,7 @@ dependencies = [ [[package]] name = "distexec" -version = "0.8.1" +version = "0.8.2" dependencies = [ "async-channel", "datafusion", @@ -3331,7 +3331,7 @@ dependencies = [ [[package]] name = "glaredb" -version = "0.8.1" +version = "0.8.2" dependencies = [ "anyhow", "arrow_util", @@ -3755,7 +3755,7 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "ioutil" -version = "0.8.1" +version = "0.8.2" dependencies = [ "bytes", "home", @@ -4353,7 +4353,7 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "logutil" -version = "0.8.1" +version = "0.8.2" dependencies = [ "chrono", "tracing", @@ -4537,7 +4537,7 @@ dependencies = [ [[package]] name = "metastore" -version = "0.8.1" +version = "0.8.2" dependencies = [ "async-trait", "bytes", @@ -5136,7 +5136,7 @@ dependencies = [ [[package]] name = "object_store_util" -version = "0.8.1" +version = "0.8.2" dependencies = [ "async-trait", "bytes", @@ -5420,7 +5420,7 @@ dependencies = [ [[package]] name = "pgprototest" -version = "0.8.1" +version = "0.8.2" dependencies = [ "anyhow", "bytes", @@ -5434,7 +5434,7 @@ dependencies = [ [[package]] name = "pgrepr" -version = "0.8.1" +version = "0.8.2" dependencies = [ "bytes", "chrono", @@ -5452,7 +5452,7 @@ dependencies = [ [[package]] name = "pgsrv" -version = "0.8.1" +version = "0.8.2" dependencies = [ "async-trait", "bytes", @@ -5834,7 +5834,7 @@ dependencies = [ [[package]] name = "protogen" -version = "0.8.1" +version = "0.8.2" dependencies = [ "datafusion", "datafusion-proto", @@ -5852,7 +5852,7 @@ dependencies = [ [[package]] name = "proxyutil" -version = "0.8.1" +version = "0.8.2" dependencies = [ "async-trait", "futures", @@ -5958,7 +5958,7 @@ dependencies = [ [[package]] name = "py-glaredb" -version = "0.8.1" +version = "0.8.2" dependencies = [ "anyhow", "arrow_util", @@ -6303,7 +6303,7 @@ dependencies = [ [[package]] name = "repr" -version = "0.8.1" +version = "0.8.2" dependencies = [ "chrono", "decimal", @@ -6457,7 +6457,7 @@ dependencies = [ [[package]] name = "rpcsrv" -version = "0.8.1" +version = "0.8.2" dependencies = [ "arrow-flight", "async-trait", @@ -7330,7 +7330,7 @@ dependencies = [ [[package]] name = "slt" -version = "0.8.1" +version = "0.8.2" dependencies = [ "anyhow", "arrow-flight", @@ -7396,7 +7396,7 @@ checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" [[package]] name = "snowflake_connector" -version = "0.8.1" +version = "0.8.2" dependencies = [ "base64 0.21.7", "bytes", @@ -7459,7 +7459,7 @@ dependencies = [ [[package]] name = "sqlbuiltins" -version = "0.8.1" +version = "0.8.2" dependencies = [ "async-trait", "datafusion", @@ -7487,7 +7487,7 @@ dependencies = [ [[package]] name = "sqlexec" -version = "0.8.1" +version = "0.8.2" dependencies = [ "arrow_util", "async-trait", @@ -7835,7 +7835,7 @@ checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a" [[package]] name = "telemetry" -version = "0.8.1" +version = "0.8.2" dependencies = [ "segment", "serde_json", @@ -7868,7 +7868,7 @@ dependencies = [ [[package]] name = "terminal_util" -version = "0.8.1" +version = "0.8.2" dependencies = [ "crossterm", ] diff --git a/Cargo.toml b/Cargo.toml index c9b196bed..1f6d17304 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ default-members = ["crates/*", "xtask"] resolver = "2" [workspace.package] -version = "0.8.1" +version = "0.8.2" edition = "2021" [profile.release] diff --git a/bindings/nodejs/package.json b/bindings/nodejs/package.json index eed2ac76d..7dfa44445 100644 --- a/bindings/nodejs/package.json +++ b/bindings/nodejs/package.json @@ -1,6 +1,6 @@ { "name": "@glaredb/glaredb", - "version": "0.8.1", + "version": "0.8.2", "main": "glaredb.js", "types": "index.d.ts", "repository": { diff --git a/crates/datasources/src/bson/builder.rs b/crates/datasources/src/bson/builder.rs index 7df9afd78..fc480d537 100644 --- a/crates/datasources/src/bson/builder.rs +++ b/crates/datasources/src/bson/builder.rs @@ -241,11 +241,16 @@ macro_rules! append_scalar { fn append_value(val: RawBsonRef, typ: &DataType, col: &mut dyn ArrayBuilder) -> Result<()> { // So robust match (val, typ) { + // null + (RawBsonRef::Null, _) => append_null(typ, col)?, + (RawBsonRef::Undefined, _) => append_null(typ, col)?, + // Boolean (RawBsonRef::Boolean(v), DataType::Boolean) => append_scalar!(BooleanBuilder, col, v), (RawBsonRef::Boolean(v), DataType::Utf8) => { append_scalar!(StringBuilder, col, v.to_string()) } + // Double (RawBsonRef::Double(v), DataType::Int32) => append_scalar!(Int32Builder, col, v as i32), (RawBsonRef::Double(v), DataType::Int64) => append_scalar!(Int64Builder, col, v as i64), diff --git a/justfile b/justfile index c59860a09..9203d453b 100644 --- a/justfile +++ b/justfile @@ -128,8 +128,9 @@ venv: # Runs pytest in the tests directory. -pytest *args: - {{VENV_BIN}}/poetry -C tests run pytest --rootdir={{invocation_directory()}}/tests {{ if args == "" {'tests'} else {args} }} +pytest *args: + {{VENV_BIN}}/poetry -C tests lock --no-update + {{VENV_BIN}}/poetry -C tests run pytest --rootdir={{invocation_directory()}}/tests {{ if args == "" {'tests'} else {args} }} # private helpers below # --------------------- diff --git a/scripts/create-test-mongo-db.sh b/scripts/create-test-mongo-db.sh index 80d104a00..58566122b 100755 --- a/scripts/create-test-mongo-db.sh +++ b/scripts/create-test-mongo-db.sh @@ -40,6 +40,11 @@ docker exec $CONTAINER_ID mongoimport \ "mongodb://localhost:27017/${DB_NAME}" \ /tmp/bikeshare_stations.csv 1>&2 +# insert fixture data for a null handling regression test. +docker exec $CONTAINER_ID mongosh \ + "mongodb://localhost:27017/${DB_NAME}" \ + --eval "db.null_test.insertMany([{a:1},{a:null}])" 1>&2 + # The mongod docker container is kinda bad. The MONGO_INITDB_... environment vars # might look like the obvious solution, but they don't work as you would expect. # diff --git a/testdata/sqllogictests_mongodb/read.slt b/testdata/sqllogictests_mongodb/read.slt index ed1fbe5e8..54850b16f 100644 --- a/testdata/sqllogictests_mongodb/read.slt +++ b/testdata/sqllogictests_mongodb/read.slt @@ -4,3 +4,14 @@ query I SELECT count(*) FROM read_mongodb('${MONGO_CONN_STRING}', 'test', 'bikeshare_stations'); ---- 102 + +query I +SELECT count(*) FROM read_mongodb('${MONGO_CONN_STRING}', 'test', 'null_test'); +---- +2 + +query I +SELECT a FROM read_mongodb('${MONGO_CONN_STRING}', 'test', 'null_test'); +---- +1 +NULL diff --git a/tests/poetry.lock b/tests/poetry.lock index 78e2082ce..dbb2ea6cb 100644 --- a/tests/poetry.lock +++ b/tests/poetry.lock @@ -31,19 +31,6 @@ idna = ["idna (>=2.1)"] trio = ["trio (>=0.14)"] wmi = ["wmi (>=1.5.1)"] -[[package]] -name = "glaredb" -version = "0.8.1" -description = "GlareDB is a fast SQL database for querying and analyzing distributed data." -optional = false -python-versions = ">=3.7" -files = [ - {file = "glaredb-0.8.1-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:a8e261e42982cda0bfd4c79ec1dbeba586b5d68b60462999e89a113f84d608d1"}, - {file = "glaredb-0.8.1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:b0276f91b27346eab3d3e8ba5b68e30ce4d5fc4a466b853418a70f6398825a42"}, - {file = "glaredb-0.8.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17f62b1550560917c6fd7f61495500216dbd09280900df2e150f3cbd5c2da79b"}, - {file = "glaredb-0.8.1-cp37-abi3-win_amd64.whl", hash = "sha256:0f3150b566d9e558d3fac98c773f337a399cf7a7f14cb4b560918aa2238e07f4"}, -] - [[package]] name = "iniconfig" version = "2.0.0" @@ -111,73 +98,6 @@ files = [ {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] -[[package]] -name = "pandas" -version = "2.1.4" -description = "Powerful data structures for data analysis, time series, and statistics" -optional = false -python-versions = ">=3.9" -files = [ - {file = "pandas-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9"}, - {file = "pandas-2.1.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034"}, - {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d"}, - {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9"}, - {file = "pandas-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139"}, - {file = "pandas-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46"}, - {file = "pandas-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092"}, - {file = "pandas-2.1.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821"}, - {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d"}, - {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171"}, - {file = "pandas-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623"}, - {file = "pandas-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e"}, - {file = "pandas-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6"}, - {file = "pandas-2.1.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b"}, - {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540"}, - {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead"}, - {file = "pandas-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1"}, - {file = "pandas-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf"}, - {file = "pandas-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34"}, - {file = "pandas-2.1.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a"}, - {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732"}, - {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8"}, - {file = "pandas-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860"}, - {file = "pandas-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984"}, - {file = "pandas-2.1.4.tar.gz", hash = "sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7"}, -] - -[package.dependencies] -numpy = [ - {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, -] -python-dateutil = ">=2.8.2" -pytz = ">=2020.1" -tzdata = ">=2022.1" - -[package.extras] -all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] -aws = ["s3fs (>=2022.05.0)"] -clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] -compression = ["zstandard (>=0.17.0)"] -computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] -consortium-standard = ["dataframe-api-compat (>=0.1.7)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] -feather = ["pyarrow (>=7.0.0)"] -fss = ["fsspec (>=2022.05.0)"] -gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] -hdf5 = ["tables (>=3.7.0)"] -html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] -mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] -parquet = ["pyarrow (>=7.0.0)"] -performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] -plot = ["matplotlib (>=3.6.1)"] -postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] -spss = ["pyreadstat (>=1.1.5)"] -sql-other = ["SQLAlchemy (>=1.4.36)"] -test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.8.0)"] - [[package]] name = "pluggy" version = "1.4.0" @@ -193,43 +113,6 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] -[[package]] -name = "polars" -version = "0.20.5" -description = "Blazingly fast DataFrame library" -optional = false -python-versions = ">=3.8" -files = [ - {file = "polars-0.20.5-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:19693d0815e7be757b2320a5ed988a209f9a505562ed937084b0c7d59109f6b7"}, - {file = "polars-0.20.5-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6b8674a18b4915207ae46855e72b188391e341e519a72f24b9591ce5164b837d"}, - {file = "polars-0.20.5-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa5d8139020688b0a8f4cdf765df17fe9fa4c8defac6361412bd4bc80a12433c"}, - {file = "polars-0.20.5-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:d9d069bb4e0cad8efbd7e6211d68e65698d50e77e72490565e52ff035236c08e"}, - {file = "polars-0.20.5-cp38-abi3-win_amd64.whl", hash = "sha256:4d614503f963cd5a8cea3240e7fd9f56b6e574d00ef80091e8689bb6defaf880"}, - {file = "polars-0.20.5.tar.gz", hash = "sha256:fa4abc22cee024b5872961ddcd8a13a0a76150df345e21ce4308c2b1a36b47aa"}, -] - -[package.extras] -adbc = ["adbc_driver_sqlite"] -all = ["polars[adbc,cloudpickle,connectorx,deltalake,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,pyiceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"] -cloudpickle = ["cloudpickle"] -connectorx = ["connectorx (>=0.3.2)"] -deltalake = ["deltalake (>=0.14.0)"] -fsspec = ["fsspec"] -gevent = ["gevent"] -matplotlib = ["matplotlib"] -numpy = ["numpy (>=1.16.0)"] -openpyxl = ["openpyxl (>=3.0.0)"] -pandas = ["pandas", "pyarrow (>=7.0.0)"] -plot = ["hvplot (>=0.9.1)"] -pyarrow = ["pyarrow (>=7.0.0)"] -pydantic = ["pydantic"] -pyiceberg = ["pyiceberg (>=0.5.0)"] -pyxlsb = ["pyxlsb (>=1.0)"] -sqlalchemy = ["pandas", "sqlalchemy"] -timezone = ["backports.zoneinfo", "tzdata"] -xlsx2csv = ["xlsx2csv (>=0.8.0)"] -xlsxwriter = ["xlsxwriter"] - [[package]] name = "psycopg2" version = "2.9.9" @@ -446,31 +329,6 @@ pluggy = ">=0.12,<2.0" [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] -[[package]] -name = "python-dateutil" -version = "2.8.2" -description = "Extensions to the standard Python datetime module" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "pytz" -version = "2023.3.post1" -description = "World timezone definitions, modern and historical" -optional = false -python-versions = "*" -files = [ - {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, - {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, -] - [[package]] name = "ruff" version = "0.1.14" @@ -497,29 +355,7 @@ files = [ {file = "ruff-0.1.14.tar.gz", hash = "sha256:ad3f8088b2dfd884820289a06ab718cde7d38b94972212cc4ba90d5fbc9955f3"}, ] -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] - -[[package]] -name = "tzdata" -version = "2023.4" -description = "Provider of IANA time zone data" -optional = false -python-versions = ">=2" -files = [ - {file = "tzdata-2023.4-py2.py3-none-any.whl", hash = "sha256:aa3ace4329eeacda5b7beb7ea08ece826c28d761cda36e747cfbf97996d39bf3"}, - {file = "tzdata-2023.4.tar.gz", hash = "sha256:dd54c94f294765522c77399649b4fefd95522479a664a0cec87f41bebc6148c9"}, -] - [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "2ecc6daced23d4f2082829ee33950ee25a36058f791f305c2de8bce9f9ae1ee1" +content-hash = "9f0cb4f7c1b85bf9bb3fe4be27f89f8ca0bf4b5cd7bd8668fc3148fa190ffcbd" diff --git a/tests/tests/test_bson.py b/tests/tests/test_bson.py index f77b0f9dc..2eeed4d8d 100644 --- a/tests/tests/test_bson.py +++ b/tests/tests/test_bson.py @@ -84,3 +84,22 @@ def test_read_bson( assert len(row) == 5 assert row["beatle_name"] in beatles assert beatles.index(row["beatle_name"]) == row["beatle_idx"] - 1 + +def test_null_handling( + glaredb_connection: psycopg2.extensions.connection, + tmp_path_factory: pytest.TempPathFactory, +): + tmp_dir = tmp_path_factory.mktemp(basename="null_handling", numbered=True) + data_path = tmp_dir.joinpath("mixed.bson") + + with open(data_path, "wb") as f: + for i in range(100): + f.write(bson.encode({"a": 1})) + + for i in range(10): + f.write(bson.encode({"a": None})) + + with glaredb_connection.cursor() as curr: + curr.execute(f"select count(*) from '{data_path}'") + r = curr.fetchone() + assert r[0] == 110