Skip to content

Commit

Permalink
PARTITION KEY support (#122)
Browse files Browse the repository at this point in the history
* initial pass at PARTITION KEY support.

* unit tests

* gha this PR branch

* fixup tests

* doc internal

* fix tests, KNN/rowids in

* define SQLITE_INDEX_CONSTRAINT_OFFSET

* whoops

* update tests, syrupy, use uv

* un ignore pyproject.toml

* dot

* tests/

* type error?

* win: .exe, update error name

* try fix macos python, paren around expr?

* win bash?

* dbg :(

* explicit error

* op

* dbg win

* win ./tests/.venv/Scripts/python.exe

* block UPDATEs on partition key values for now
  • Loading branch information
asg017 authored Nov 20, 2024
1 parent ee36547 commit 6658624
Show file tree
Hide file tree
Showing 16 changed files with 1,522 additions and 245 deletions.
135 changes: 80 additions & 55 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,100 @@ on:
push:
branches:
- main
- partition-by
permissions:
contents: read
jobs:
build-linux-x86_64-extension:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- uses: astral-sh/setup-uv@v3
with:
python-version: "3.12"
enable-cache: true
- run: ./scripts/vendor.sh
- run: make loadable static
- run: pip install pytest numpy; make test-loadable
- run: uv sync --directory tests
- run: make test-loadable python=./tests/.venv/bin/python
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-linux-x86_64-extension
path: dist/*
build-macos-x86_64-extension:
runs-on: macos-12
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v3
with:
enable-cache: true
- run: uv python install 3.12
- run: ./scripts/vendor.sh
- run: make loadable static
- run: uv sync --directory tests
- run: make test-loadable python=./tests/.venv/bin/python
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-macos-x86_64-extension
path: dist/*
build-macos-aarch64-extension:
runs-on: macos-14
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v3
with:
enable-cache: true
- run: ./scripts/vendor.sh
- run: make loadable static
- run: uv sync --directory tests
- run: make test-loadable python=./tests/.venv/bin/python
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-macos-aarch64-extension
path: dist/*
build-windows-x86_64-extension:
runs-on: windows-2019
steps:
- uses: actions/checkout@v4
- uses: ilammy/msvc-dev-cmd@v1
- uses: astral-sh/setup-uv@v3
with:
enable-cache: true
- run: ./scripts/vendor.sh
shell: bash
- run: make sqlite-vec.h
- run: mkdir dist
- run: cl.exe /fPIC -shared /W4 /Ivendor/ /O2 /LD sqlite-vec.c -o dist/vec0.dll
- run: uv sync --directory tests
- run: make test-loadable python=./tests/.venv/Scripts/python.exe
shell: bash
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-windows-x86_64-extension
path: dist/*
build-linux-aarch64-extension:
runs-on: ubuntu-latest
steps:
- uses: green-coding-solutions/eco-ci-energy-estimation@v4
with:
task: start-measurement
- uses: actions/checkout@v4
with:
version: "latest"
- run: sudo apt-get install gcc-arm-linux-gnueabihf
- run: ./scripts/vendor.sh
- run: make sqlite-vec.h
- run: make CC=arm-linux-gnueabihf-gcc loadable static
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-linux-aarch64-extension
path: dist/*
- uses: green-coding-solutions/eco-ci-energy-estimation@v4
with:
task: get-measurement
label: "all"
- uses: green-coding-solutions/eco-ci-energy-estimation@v4
with:
task: display-results
build-android-extensions:
runs-on: ubuntu-latest
strategy:
Expand Down Expand Up @@ -98,58 +175,6 @@ jobs:
with:
name: sqlite-vec-${{ matrix.platforms.name }}-extension
path: dist/*
build-macos-x86_64-extension:
runs-on: macos-12
steps:
- uses: actions/checkout@v4
- run: ./scripts/vendor.sh
- run: make loadable static
- run: /usr/local/opt/python@3/libexec/bin/python -m pip install --break-system-packages pytest numpy; make test-loadable python=/usr/local/opt/python@3/libexec/bin/python
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-macos-x86_64-extension
path: dist/*
build-macos-aarch64-extension:
runs-on: macos-14
steps:
- uses: actions/checkout@v4
- run: ./scripts/vendor.sh
- run: make loadable static
- run: /opt/homebrew/opt/python3/libexec/bin/python -m pip install pytest numpy --break-system-packages; make test-loadable python=/opt/homebrew/opt/python3/libexec/bin/python
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-macos-aarch64-extension
path: dist/*
build-windows-x86_64-extension:
runs-on: windows-2019
steps:
- uses: actions/checkout@v4
- uses: ilammy/msvc-dev-cmd@v1
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- run: ./scripts/vendor.sh
shell: bash
- run: make sqlite-vec.h
- run: mkdir dist
- run: cl.exe /fPIC -shared /W4 /Ivendor/ /O2 /LD sqlite-vec.c -o dist/vec0.dll
- run: pip install pytest numpy; make test-loadable
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-windows-x86_64-extension
path: dist/*
build-linux-aarch64-extension:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: sudo apt-get install gcc-arm-linux-gnueabihf
- run: ./scripts/vendor.sh
- run: make sqlite-vec.h
- run: make CC=arm-linux-gnueabihf-gcc loadable static
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-linux-aarch64-extension
path: dist/*
build-wasm32-emscripten:
runs-on: ubuntu-latest
steps:
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,3 @@ sqlite-vec.h
tmp/

poetry.lock
pyproject.toml
54 changes: 54 additions & 0 deletions ARCHITECTURE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
## `vec0`

### idxStr

The `vec0` idxStr is a string composed of single "header" character and 0 or
more "blocks" of 4 characters each.

The "header" charcter denotes the type of query plan, as determined by the
`enum vec0_query_plan` values. The current possible values are:

| Name | Value | Description |
| -------------------------- | ----- | ---------------------------------------------------------------------- |
| `VEC0_QUERY_PLAN_FULLSCAN` | `'1'` | Perform a full-scan on all rows |
| `VEC0_QUERY_PLAN_POINT` | `'2'` | Perform a single-lookup point query for the provided rowid |
| `VEC0_QUERY_PLAN_KNN` | `'3'` | Perform a KNN-style query on the provided query vector and parameters. |

Each 4-character "block" is associated with a corresponding value in `argv[]`. For example, the 1st block at byte offset `1-4` (inclusive) is the 1st block and is associated with `argv[1]`. The 2nd block at byte offset `5-8` (inclusive) is associated with `argv[2]` and so on. Each block describes what kind of value or filter the given `argv[i]` value is.


#### `VEC0_IDXSTR_KIND_KNN_MATCH` (`'{'`)

`argv[i]` is the query vector of the KNN query.

The remaining 3 characters of the block are `_` fillers.

#### `VEC0_IDXSTR_KIND_KNN_K` (`'}'`)

`argv[i]` is the limit/k value of the KNN query.

The remaining 3 characters of the block are `_` fillers.

#### `VEC0_IDXSTR_KIND_KNN_ROWID_IN` (`'['`)

`argv[i]` is the optional `rowid in (...)` value, and must be handled with [`sqlite3_vtab_in_first()` /
`sqlite3_vtab_in_next()`](https://www.sqlite.org/c3ref/vtab_in_first.html).

The remaining 3 characters of the block are `_` fillers.

#### `VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT` (`']'`)

`argv[i]` is a "constraint" on a specific partition key.

The second character of the block denotes which partition key to filter on, using `A` to denote the first partition key column, `B` for the second, etc. It is encoded with `'A' + partition_idx` and can be decoded with `c - 'A'`.

The third character of the block denotes which operator is used in the constraint. It will be one of the values of `enum vec0_partition_operator`, as only a subset of operations are supported on partition keys.

The fourth character of the block is a `_` filler.


#### `VEC0_IDXSTR_KIND_POINT_ID` (`'!'`)

`argv[i]` is the value of the rowid or id to match against for the point query.

The remaining 3 characters of the block are `_` fillers.
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -185,13 +185,16 @@ publish-release:

# -k test_vec0_update
test-loadable: loadable
$(PYTHON) -m pytest -vv -s -x tests/test-loadable.py
$(PYTHON) -m pytest -vv -s -x tests/test-*.py

test-loadable-snapshot-update: loadable
$(PYTHON) -m pytest -vv tests/test-loadable.py --snapshot-update

test-loadable-watch:
watchexec -w sqlite-vec.c -w tests/test-loadable.py -w Makefile --clear -- make test-loadable
watchexec --exts c,py,Makefile --clear -- make test-loadable

test-unit:
$(CC) tests/test-unit.c sqlite-vec.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit

site-dev:
npm --prefix site run dev
Expand Down
5 changes: 5 additions & 0 deletions TODO
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# partition

- [ ] UPDATE on partition key values
- remove previous row from chunk, insert into new one?
- [ ] properly sqlite3_vtab_nochange / sqlite3_value_nochange handling
Loading

0 comments on commit 6658624

Please sign in to comment.