From 396580b2bdf4682167b31974ead29c9df06fc43a Mon Sep 17 00:00:00 2001 From: Duncan Blythe Date: Sun, 23 Jun 2024 15:25:27 +0200 Subject: [PATCH] Add parameters to sqlvector and build nightly --- Makefile | 1 + superduperdb/components/vector_index.py | 5 ++++- superduperdb/ext/openai/model.py | 5 ++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 63dbc40b2..81d8b1267 100644 --- a/Makefile +++ b/Makefile @@ -138,6 +138,7 @@ build_sandbox: ## Build superduperdb/sandbox: image (RUNNER=) build_nightly: ## Build superduperdb/nightly: image (EXTRA_REQUIREMENTS_FILE=) (RUNNER=) docker build . -f ./deploy/images/superduperdb/Dockerfile \ --build-arg BUILD_ENV="nightly" \ + --platform linux/amd64 \ --progress=plain \ $(if $(EXTRA_REQUIREMENTS_FILE),--build-arg EXTRA_REQUIREMENTS_FILE=$(EXTRA_REQUIREMENTS_FILE),) \ $(if $(RUNNER),--build-arg RUNNER=$(RUNNER),) \ diff --git a/superduperdb/components/vector_index.py b/superduperdb/components/vector_index.py index 7f910697f..1e221cd95 100644 --- a/superduperdb/components/vector_index.py +++ b/superduperdb/components/vector_index.py @@ -285,16 +285,19 @@ def vector(shape, identifier: t.Optional[str] = None): ) -def sqlvector(shape): +@component() +def sqlvector(shape, bytes_encoding: str = 'Bytes'): """Create an encoder for a vector (list of ints/ floats) of a given shape. This is used for compatibility with SQL databases, as the default vector :param shape: The shape of the vector + :param bytes_encoding: The encoding of the bytes """ return DataType( identifier=f'sqlvector[{str_shape(shape)}]', shape=shape, encoder=EncodeArray(dtype='float64'), decoder=DecodeArray(dtype='float64'), + bytes_encoding=bytes_encoding, ) diff --git a/superduperdb/ext/openai/model.py b/superduperdb/ext/openai/model.py index 9536d5542..a4d69966f 100644 --- a/superduperdb/ext/openai/model.py +++ b/superduperdb/ext/openai/model.py @@ -15,6 +15,7 @@ ) from openai._types import NOT_GIVEN +from superduperdb import CFG from superduperdb.backends.ibis.data_backend import IbisDataBackend from superduperdb.backends.ibis.field_types import dtype from superduperdb.backends.query_dataset import QueryDataset @@ -128,7 +129,9 @@ def pre_create(self, db): super().pre_create(db) if isinstance(db.databackend.type, IbisDataBackend): if self.datatype is None: - self.datatype = sqlvector(self.shape) + self.datatype = sqlvector( + shape=self.shape, bytes_encoding=CFG.bytes_encoding + ) elif self.datatype is None: self.datatype = vector(shape=self.shape)