diff --git a/CHANGELOG.md b/CHANGELOG.md
index 279a25120..9cfd8262e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 #### Changed defaults / behaviours 
 
+- Deprecate vanilla `DataType`
+- Remove `_Encodable` from project
+
 #### New Features & Functionality 
 
 - Streamlit component and server
diff --git a/plugins/anthropic/superduper_anthropic/model.py b/plugins/anthropic/superduper_anthropic/model.py
index 81c878f7f..28076aa34 100644
--- a/plugins/anthropic/superduper_anthropic/model.py
+++ b/plugins/anthropic/superduper_anthropic/model.py
@@ -23,9 +23,9 @@ class Anthropic(APIBaseModel):
 
     client_kwargs: t.Dict[str, t.Any] = dc.field(default_factory=dict)
 
-    def __post_init__(self, db, artifacts, example):
+    def __post_init__(self, db, example):
         self.model = self.model or self.identifier
-        super().__post_init__(db, artifacts, example=example)
+        super().__post_init__(db, example=example)
 
     def init(self, db=None):
         """Initialize the model.
diff --git a/plugins/cohere/superduper_cohere/model.py b/plugins/cohere/superduper_cohere/model.py
index b2d4f495d..973c9aa1e 100644
--- a/plugins/cohere/superduper_cohere/model.py
+++ b/plugins/cohere/superduper_cohere/model.py
@@ -23,8 +23,8 @@ class Cohere(APIBaseModel):
 
     client_kwargs: t.Dict[str, t.Any] = dc.field(default_factory=dict)
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts, example=example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example=example)
         self.identifier = self.identifier or self.model
 
 
@@ -47,8 +47,8 @@ class CohereEmbed(Cohere):
     batch_size: int = 100
     signature: str = 'singleton'
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts, example=example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example=example)
         if self.shape is None:
             self.shape = self.shapes[self.identifier]
 
diff --git a/plugins/ibis/superduper_ibis/data_backend.py b/plugins/ibis/superduper_ibis/data_backend.py
index c798ffa92..e0ad93e66 100644
--- a/plugins/ibis/superduper_ibis/data_backend.py
+++ b/plugins/ibis/superduper_ibis/data_backend.py
@@ -14,7 +14,7 @@
 from superduper.backends.local.artifacts import FileSystemArtifactStore
 from superduper.base import exceptions
 from superduper.base.enums import DBType
-from superduper.components.datatype import DataType
+from superduper.components.datatype import BaseDataType
 from superduper.components.schema import Schema
 from superduper.components.table import Table
 
@@ -69,7 +69,7 @@ def __init__(self, uri: str, flavour: t.Optional[str] = None):
         self.overwrite = False
         self._setup(conn)
 
-        if uri.startswith('snowflake://') or uri.startswith('sqlite://'):
+        if uri.startswith('snowflake://'):
             self.bytes_encoding = 'base64'
 
         self.datatype_presets = {'vector': 'superduper.ext.numpy.encoder.Array'}
@@ -190,7 +190,7 @@ def drop_table_or_collection(self, name: str):
     def create_output_dest(
         self,
         predict_id: str,
-        datatype: t.Union[FieldType, DataType],
+        datatype: t.Union[FieldType, BaseDataType],
         flatten: bool = False,
     ):
         """Create a table for the output of the model.
diff --git a/plugins/ibis/superduper_ibis/query.py b/plugins/ibis/superduper_ibis/query.py
index 6ec315581..dd18d0867 100644
--- a/plugins/ibis/superduper_ibis/query.py
+++ b/plugins/ibis/superduper_ibis/query.py
@@ -11,7 +11,7 @@
 )
 from superduper.base.cursor import SuperDuperCursor
 from superduper.base.exceptions import DatabackendException
-from superduper.components.datatype import Encodable
+from superduper.components.datatype import _Encodable
 from superduper.components.schema import Schema
 from superduper.misc.special_dicts import SuperDuperFlatEncode
 
@@ -81,7 +81,7 @@ def _model_update_impl(
         d = {
             "_source": str(source_id),
             f"{CFG.output_prefix}{predict_id}": output.x
-            if isinstance(output, Encodable)
+            if isinstance(output, _Encodable)
             else output,
             "id": str(uuid.uuid4()),
         }
diff --git a/plugins/ibis/superduper_ibis/utils.py b/plugins/ibis/superduper_ibis/utils.py
index 1aeeb87b3..b8215bf55 100644
--- a/plugins/ibis/superduper_ibis/utils.py
+++ b/plugins/ibis/superduper_ibis/utils.py
@@ -1,20 +1,12 @@
 from ibis.expr.datatypes import dtype
 from superduper.components.datatype import (
-    Artifact,
     BaseDataType,
     File,
-    LazyArtifact,
-    LazyFile,
-    Native,
 )
 from superduper.components.schema import ID, FieldType, Schema
 
 SPECIAL_ENCODABLES_FIELDS = {
     File: "str",
-    LazyFile: "str",
-    Artifact: "str",
-    LazyArtifact: "str",
-    Native: "json",
 }
 
 
diff --git a/plugins/jina/superduper_jina/model.py b/plugins/jina/superduper_jina/model.py
index f2137c7f5..dd221e8fc 100644
--- a/plugins/jina/superduper_jina/model.py
+++ b/plugins/jina/superduper_jina/model.py
@@ -16,8 +16,8 @@ class Jina(APIBaseModel):
 
     api_key: t.Optional[str] = None
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts, example=example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example=example)
         self.identifier = self.identifier or self.model
         self.client = JinaAPIClient(model_name=self.identifier, api_key=self.api_key)
 
@@ -41,8 +41,8 @@ class JinaEmbedding(Jina):
     shape: t.Optional[t.Sequence[int]] = None
     signature: str = 'singleton'
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts, example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example)
         if self.shape is None:
             self.shape = (len(self.client.encode_batch(['shape'])[0]),)
 
diff --git a/plugins/mongodb/superduper_mongodb/data_backend.py b/plugins/mongodb/superduper_mongodb/data_backend.py
index 474c650d4..ec13feefb 100644
--- a/plugins/mongodb/superduper_mongodb/data_backend.py
+++ b/plugins/mongodb/superduper_mongodb/data_backend.py
@@ -9,7 +9,7 @@
 from superduper.backends.base.data_backend import BaseDataBackend
 from superduper.backends.base.metadata import MetaDataStoreProxy
 from superduper.base.enums import DBType
-from superduper.components.datatype import DataType
+from superduper.components.datatype import BaseDataType
 from superduper.components.schema import Schema
 from superduper.misc.colors import Colors
 
@@ -140,7 +140,7 @@ def disconnect(self):
     def create_output_dest(
         self,
         predict_id: str,
-        datatype: t.Union[str, DataType],
+        datatype: t.Union[str, BaseDataType],
         flatten: bool = False,
     ):
         """Create an output collection for a component.
diff --git a/plugins/mongodb/superduper_mongodb/query.py b/plugins/mongodb/superduper_mongodb/query.py
index efa0277e6..d6e6e862f 100644
--- a/plugins/mongodb/superduper_mongodb/query.py
+++ b/plugins/mongodb/superduper_mongodb/query.py
@@ -532,6 +532,7 @@ def process_find_part(part):
             method, args, kwargs = part
             # args: (filter, projection, *args)
             filter = copy.deepcopy(args[0]) if len(args) > 0 else {}
+            filter = dict(filter)
             filter.update(self._get_filter_conditions())
             args = tuple((filter, *args[1:]))
 
diff --git a/plugins/openai/superduper_openai/model.py b/plugins/openai/superduper_openai/model.py
index 6e6fead09..44739a0e7 100644
--- a/plugins/openai/superduper_openai/model.py
+++ b/plugins/openai/superduper_openai/model.py
@@ -50,8 +50,8 @@ class _OpenAI(APIBaseModel):
     openai_api_base: t.Optional[str] = None
     client_kwargs: t.Optional[dict] = dc.field(default_factory=dict)
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts, example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example)
 
         assert isinstance(self.client_kwargs, dict)
 
@@ -151,8 +151,8 @@ class OpenAIChatCompletion(_OpenAI):
     batch_size: int = 1
     prompt: str = ''
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts, example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example)
         self.takes_context = True
 
     def _format_prompt(self, context, X):
diff --git a/plugins/sentence_transformers/superduper_sentence_transformers/model.py b/plugins/sentence_transformers/superduper_sentence_transformers/model.py
index fc8245959..3eba51dcf 100644
--- a/plugins/sentence_transformers/superduper_sentence_transformers/model.py
+++ b/plugins/sentence_transformers/superduper_sentence_transformers/model.py
@@ -4,7 +4,7 @@
 from superduper.backends.query_dataset import QueryDataset
 from superduper.base.enums import DBType
 from superduper.components.component import ensure_initialized
-from superduper.components.datatype import DataType, dill_lazy
+from superduper.components.datatype import dill_serializer
 from superduper.components.model import Model, Signature, _DeviceManaged
 
 DEFAULT_PREDICT_KWARGS = {
@@ -39,9 +39,7 @@ class SentenceTransformer(Model, _DeviceManaged):
 
     """
 
-    _artifacts: t.ClassVar[t.Sequence[t.Tuple[str, 'DataType']]] = (
-        ('object', dill_lazy),
-    )
+    _fields = {'object': dill_serializer}
 
     object: t.Optional[_SentenceTransformer] = None
     model: t.Optional[str] = None
@@ -50,8 +48,8 @@ class SentenceTransformer(Model, _DeviceManaged):
     postprocess: t.Union[None, t.Callable] = None
     signature: Signature = 'singleton'
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts, example=example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example=example)
 
         if self.model is None:
             self.model = self.identifier
diff --git a/plugins/sklearn/plugin_test/test_sklearn.py b/plugins/sklearn/plugin_test/test_sklearn.py
index 10949fedc..cfbafc2c7 100644
--- a/plugins/sklearn/plugin_test/test_sklearn.py
+++ b/plugins/sklearn/plugin_test/test_sklearn.py
@@ -104,7 +104,7 @@ def test_sklearn(db):
         identifier='test',
         object=SVC(),
     )
-    assert 'object' in m.artifact_schema.fields
+    assert 'object' in m.class_schema.fields
     db.apply(m, force=True)
     assert db.show('model') == ['test']
 
diff --git a/plugins/torch/superduper_torch/model.py b/plugins/torch/superduper_torch/model.py
index 1e276a03b..abb9c15d6 100644
--- a/plugins/torch/superduper_torch/model.py
+++ b/plugins/torch/superduper_torch/model.py
@@ -153,8 +153,8 @@ class TorchModel(Model, _DeviceManaged):
     optimizer_state: t.Optional[t.Any] = None
     loader_kwargs: t.Dict = dc.field(default_factory=lambda: {})
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts=artifacts, example=example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example=example)
 
         if self.optimizer_state is not None:
             self.optimizer.load_state_dict(self.optimizer_state)
diff --git a/plugins/transformers/superduper_transformers/model.py b/plugins/transformers/superduper_transformers/model.py
index 5adfe5727..515c4d984 100644
--- a/plugins/transformers/superduper_transformers/model.py
+++ b/plugins/transformers/superduper_transformers/model.py
@@ -78,11 +78,11 @@ class TransformersTrainer(TrainingArguments, Trainer):
         t.Callable[[torch.Tensor, torch.Tensor], torch.Tensor]
     ] = None
 
-    def __post_init__(self, db, artifacts):
+    def __post_init__(self, db):
         assert self.output_dir == '' or self.output_dir == self.identifier
         self.output_dir = self.identifier
         TrainingArguments.__post_init__(self)
-        return Trainer.__post_init__(self, db, artifacts)
+        return Trainer.__post_init__(self, db)
 
     @property
     def native_arguments(self):
@@ -214,10 +214,10 @@ def _build_pipeline(self):
             model=self.model_cls.from_pretrained(self.model_name),
         )
 
-    def __post_init__(self, db, artifacts, example):
+    def __post_init__(self, db, example):
         if self.pipeline is None:
             self._build_pipeline()
-        super().__post_init__(db, artifacts, example)
+        super().__post_init__(db, example)
 
     def predict(self, text: str):
         """Predict the class of a single text.
@@ -284,12 +284,12 @@ class LLM(BaseLLM):
         ("tokenizer_kwargs", dill_serializer),
     )
 
-    def __post_init__(self, db, artifacts, example):
+    def __post_init__(self, db, example):
         if not self.identifier:
             self.identifier = self.adapter_id or self.model_name_or_path
 
         #  TODO: Compatible with the bug of artifact sha1 equality and will be deleted
-        super().__post_init__(db, artifacts, example)
+        super().__post_init__(db, example)
 
     @classmethod
     def from_pretrained(
diff --git a/plugins/transformers/superduper_transformers/training.py b/plugins/transformers/superduper_transformers/training.py
index a59c086a4..e231c985a 100644
--- a/plugins/transformers/superduper_transformers/training.py
+++ b/plugins/transformers/superduper_transformers/training.py
@@ -184,11 +184,11 @@ class LLMTrainer(TrainingArguments, SuperDuperTrainer):
     num_gpus: t.Optional[int] = None
     ray_configs: t.Optional[dict] = None
 
-    def __post_init__(self, db, artifacts):
+    def __post_init__(self, db):
         self.output_dir = self.output_dir or os.path.join("output", self.identifier)
         if self.num_gpus and 'num_gpus' not in self.compute_kwargs:
             self.compute_kwargs['num_gpus'] = self.num_gpus
-        return SuperDuperTrainer.__post_init__(self, db, artifacts)
+        return SuperDuperTrainer.__post_init__(self, db)
 
     def build(self):
         """Build the training arguments."""
diff --git a/plugins/vllm/superduper_vllm/model.py b/plugins/vllm/superduper_vllm/model.py
index 3032ec809..975d0a67a 100644
--- a/plugins/vllm/superduper_vllm/model.py
+++ b/plugins/vllm/superduper_vllm/model.py
@@ -19,7 +19,7 @@ class _VLLMCore(Model):
 
     vllm_params: dict = dc.field(default_factory=dict)
 
-    def __post_init__(self, db, artifacts, example):
+    def __post_init__(self, db, example):
         super().__post_init__(db, artifacts, example)
         assert "model" in self.vllm_params, "model is required in vllm_params"
         self._async_llm = None
diff --git a/superduper/__init__.py b/superduper/__init__.py
index 4e0d8f189..4b41d44b1 100644
--- a/superduper/__init__.py
+++ b/superduper/__init__.py
@@ -24,7 +24,7 @@
 from .components.application import Application
 from .components.component import Component
 from .components.dataset import Dataset
-from .components.datatype import DataType, dill_serializer, pickle_serializer
+from .components.datatype import BaseDataType, dill_serializer, pickle_serializer
 from .components.listener import Listener
 from .components.metric import Metric
 from .components.model import (
@@ -39,7 +39,7 @@
 from .components.streamlit import Streamlit
 from .components.table import Table
 from .components.template import QueryTemplate, Template
-from .components.vector_index import VectorIndex, vector
+from .components.vector_index import VectorIndex
 
 REQUIRES = [
     'superduper=={}'.format(__version__),
@@ -52,7 +52,7 @@
     'config',
     'logging',
     'superduper',
-    'DataType',
+    'BaseDataType',
     'Document',
     'code',
     'ObjectModel',
@@ -62,7 +62,6 @@
     'model',
     'Listener',
     'VectorIndex',
-    'vector',
     'Dataset',
     'Metric',
     'Plugin',
diff --git a/superduper/backends/base/data_backend.py b/superduper/backends/base/data_backend.py
index 30ad4ca77..0e5377977 100644
--- a/superduper/backends/base/data_backend.py
+++ b/superduper/backends/base/data_backend.py
@@ -4,7 +4,7 @@
 
 from superduper import logging
 from superduper.backends.base.query import Query
-from superduper.components.datatype import DataType
+from superduper.components.datatype import BaseDataType
 
 if t.TYPE_CHECKING:
     from superduper.components.schema import Schema
@@ -75,7 +75,7 @@ def build_artifact_store(self):
     def create_output_dest(
         self,
         predict_id: str,
-        datatype: t.Union[str, DataType],
+        datatype: t.Union[str, BaseDataType],
         flatten: bool = False,
     ):
         """Create an output destination for the database.
diff --git a/superduper/backends/base/query.py b/superduper/backends/base/query.py
index ae5f90ada..1fce727af 100644
--- a/superduper/backends/base/query.py
+++ b/superduper/backends/base/query.py
@@ -5,7 +5,7 @@
 import typing as t
 import uuid
 from abc import abstractmethod
-from functools import wraps
+from functools import cached_property, wraps
 
 from superduper import CFG, logging
 from superduper.base.constant import (
@@ -309,31 +309,10 @@ def flavour(self):
         """Return the flavour of the query."""
         return self._get_flavour()
 
-    @property
+    @cached_property
     def documents(self):
         """Return the documents."""
 
-        def _wrap_document(document):
-            if not isinstance(document, Document):
-                if isinstance(document, dict):
-                    document = Document(document)
-                else:
-                    try:
-                        table = self.db.load('table', self.table)
-                    except FileNotFoundError:
-                        raise FileNotFoundError(
-                            "Table not found. Please provide a document or a dictionary"
-                        )
-                    field = [
-                        k
-                        for k in table.schema.fields
-                        if k not in [self.primary_id, '_fold']
-                        and not k.startswith(CFG.output_prefix)
-                    ]
-                    assert len(field) == 1
-                    document = Document({field[0]: document})
-            return document
-
         def _update_part(documents):
             nonlocal self
             doc_args = (documents, *self.parts[0][1][1:])
@@ -345,8 +324,9 @@ def _update_part(documents):
         if one_document:
             documents = [documents]
         wrapped_documents = []
+
         for document in documents:
-            document = _wrap_document(document)
+            document = Document(document)
             wrapped_documents.append(document)
 
         if one_document:
diff --git a/superduper/base/apply.py b/superduper/base/apply.py
index 6168f3c79..5c353e709 100644
--- a/superduper/base/apply.py
+++ b/superduper/base/apply.py
@@ -139,6 +139,7 @@ def _apply(
     object.db = db
 
     serialized = object.dict(metadata=False)
+
     del serialized['uuid']
 
     create_events = {}
@@ -183,7 +184,9 @@ def wrapper(child):
         del current_serialized['uuid']
 
         # finds the fields where there is a difference
-        this_diff = Document(current_serialized).diff(serialized)
+        this_diff = Document(current_serialized, schema=current_serialized.schema).diff(
+            serialized
+        )
         logging.info(f'Found identical {object.huuid}')
 
         if not this_diff:
@@ -215,7 +218,7 @@ def wrapper(child):
             # this is necessary to prevent inconsistencies
             # this takes the difference between
             # the current and
-            serialized = serialized.update(this_diff).encode()
+            serialized = serialized.update(this_diff).encode(keep_schema=False)
 
             # assign/ increment the version since
             # this breaks a previous version
@@ -229,7 +232,6 @@ def wrapper(child):
             Document(object.metadata).map(wrapper, lambda x: isinstance(x, Component))
 
         else:
-            # if object.identifier ==
             apply_status = 'update'
             current.handle_update_or_same(object)
 
@@ -238,7 +240,12 @@ def wrapper(child):
 
             # update the existing component with the change
             # data from the applied component
-            serialized = current.dict().update(serialized).update(this_diff).encode()
+            serialized = (
+                current.dict()
+                .update(serialized)
+                .update(this_diff)
+                .encode(keep_schema=False)
+            )
 
             logging.info(f'Found update {object.huuid}')
 
@@ -250,7 +257,7 @@ def wrapper(child):
         # need to be applied, do that now
         Document(object.metadata).map(wrapper, lambda x: isinstance(x, Component))
 
-        serialized = serialized.encode()
+        serialized = serialized.encode(keep_schema=False)
 
         object.version = 0
         apply_status = 'new'
diff --git a/superduper/base/datalayer.py b/superduper/base/datalayer.py
index b14ab0f6d..fa3895a8f 100644
--- a/superduper/base/datalayer.py
+++ b/superduper/base/datalayer.py
@@ -19,12 +19,12 @@
 from superduper.base.cursor import SuperDuperCursor
 from superduper.base.document import Document
 from superduper.components.component import Component
-from superduper.components.datatype import DataType
+from superduper.components.datatype import BaseDataType
 from superduper.components.schema import Schema
 from superduper.components.table import Table
 from superduper.misc.annotations import deprecated
 from superduper.misc.colors import Colors
-from superduper.misc.download import download_from_one
+from superduper.misc.importing import import_object
 from superduper.misc.retry import db_retry
 
 DBResult = t.Any
@@ -255,7 +255,7 @@ def _insert(
         self,
         insert: Query,
         refresh: bool = True,
-        datatypes: t.Sequence[DataType] = (),
+        datatypes: t.Sequence[BaseDataType] = (),
         auto_schema: bool = True,
     ) -> InsertResult:
         """
@@ -275,7 +275,7 @@ def _insert(
                 'train' if random.random() >= s.CFG.fold_probability else 'valid',
             )
         if auto_schema and self.cfg.auto_schema:
-            self._auto_create_table(insert.table, insert.documents)
+            schema = self._auto_create_table(insert.table, insert.documents).schema
 
             timeout = 5
 
@@ -301,6 +301,8 @@ def _insert(
                     f'{insert.table} not found after {timeout} seconds'
                     ' table auto creation likely has failed or is stalling...'
                 )
+            for r in insert.documents:
+                r.schema = schema
 
         inserted_ids = insert.do_execute(self)
 
@@ -333,6 +335,7 @@ def _auto_create_table(self, table_name, documents):
         table = Table(identifier=table_name, schema=schema)
         logging.info(f"Creating table {table_name} with schema {schema.fields_set}")
         self.apply(table, force=True)
+        return table
 
     def _select(self, select: Query, reference: bool = True) -> SelectResult:
         """
@@ -563,6 +566,7 @@ def load(
         :param version: [Optional] Numerical version.
         :param allow_hidden: Toggle to ``True`` to allow loading
                              of deprecated components.
+        :param huuid: [Optional] human-readable UUID of the component to load.
         :param uuid: [Optional] UUID of the component to load.
         """
         if version is not None:
@@ -589,7 +593,20 @@ def load(
                 info = self.metadata.get_component_by_uuid(
                     uuid=uuid, allow_hidden=allow_hidden
                 )
-                c = Document.decode(info, db=self)
+                try:
+                    class_schema = import_object(info['_path']).build_class_schema()
+                except KeyError:
+                    # if defined in __main__ then the class is directly serialized
+                    assert '_object' in info
+                    from superduper.components.datatype import Blob, dill_serializer
+
+                    bytes_ = Blob(
+                        identifier=info['_object'].split(':')[-1], db=self
+                    ).unpack()
+                    object = dill_serializer.decode_data(bytes_)
+                    class_schema = object.build_class_schema()
+
+                c = Document.decode(info, db=self, schema=class_schema)
                 c.db = self
                 if c.cache:
                     logging.info(f'Adding {c.huuid} to cache')
@@ -655,16 +672,6 @@ def _remove_component_version(
             self._delete_artifacts(r['uuid'], info)
             self.metadata.delete_component_version(type_id, identifier, version=version)
 
-    def _get_content_for_filter(self, filter) -> Document:
-        if isinstance(filter, dict):
-            filter = Document(filter)
-        if '_id' not in filter:
-            filter['_id'] = 0
-        download_from_one(filter)
-        if not filter['_id']:
-            del filter['_id']
-        return filter
-
     def replace(self, object: t.Any):
         """
         Replace a model in the artifact store with an updated object.
@@ -691,7 +698,7 @@ def _replace_fn(component):
             return f'&:component:{component.huuid}'
 
         serialized = serialized.map(_replace_fn, lambda x: isinstance(x, Component))
-        serialized = serialized.encode()
+        serialized = serialized.encode(keep_schema=False)
 
         self._delete_artifacts(object.uuid, info)
         serialized = self._save_artifact(object.uuid, serialized)
@@ -775,7 +782,8 @@ def select_nearest(
         if not isinstance(like, Document):
             assert isinstance(like, dict)
             like = Document(like)
-        like = self._get_content_for_filter(like)
+        # TODO deprecate
+        # like = self._get_content_for_filter(like)
         logging.info('Getting vector-index')
         vi = self.load('vector_index', vector_index)
         if outputs is None:
diff --git a/superduper/base/document.py b/superduper/base/document.py
index 1b9b71345..7e57c2939 100644
--- a/superduper/base/document.py
+++ b/superduper/base/document.py
@@ -13,13 +13,7 @@
 from superduper.base.leaf import Leaf, import_item
 from superduper.base.variables import _replace_variables
 from superduper.components.component import Component
-from superduper.components.datatype import (
-    Blob,
-    Encodable,
-    FileItem,
-    Native,
-    _BaseEncodable,
-)
+from superduper.components.datatype import BaseDataType, Blob, File
 from superduper.components.schema import Schema, get_schema
 from superduper.misc.reference import parse_reference
 from superduper.misc.special_dicts import MongoStyleDict, SuperDuperFlatEncode
@@ -28,7 +22,6 @@
     from superduper.base.datalayer import Datalayer
 
 
-ContentType = t.Union[t.Dict, Encodable]
 LeafMetaType = t.Type['Leaf']
 
 _VERSION_LIMIT = 1000
@@ -70,10 +63,11 @@ def __init__(self, getters=None):
 
     def add_getter(self, name: str, getter: t.Callable):
         """Add a getter for a reference type."""
-        if name == 'blob':
-            self._getters[name].append(_build_blob_getter(getter))
-        else:
-            self._getters[name].append(getter)
+        self._getters[name].append(getter)
+        # if name == 'blob':
+        #     self._getters[name].append(_build_blob_getter(getter))
+        # else:
+        #     self._getters[name].append(getter)
 
     def run(self, name, data):
         """Run the getters one by one until one returns a value."""
@@ -102,6 +96,11 @@ def _diff(r1, r2, d):
 
         if isinstance(r1[k], Leaf):
             r1k = r1[k].dict(metadata=False)
+
+            if r2[k] is None:
+                d[k] = None
+                continue
+
             r2k = r2[k].dict(metadata=False)
 
             if set(r1k.keys()) != set(r2k.keys()):
@@ -181,7 +180,7 @@ def _map(r):
                 return fn(r)
             return r
 
-        return Document(_map(self))
+        return Document(_map(self), schema=self.schema)
 
     def diff(self, other: 'Document'):
         """Get a `Document` with the difference to `other` inside.
@@ -190,11 +189,16 @@ def diff(self, other: 'Document'):
         """
         out: t.Dict = {}
         _diff(self, other, out)
-        return Document(out)
+        return Document(out, schema=self.schema)
 
-    def update(self, other: 'Document'):
+    def update(self, other: t.Union['Document', dict]):
         """Update document with values from other."""
-        return Document(_update(dict(self), dict(other)))
+        schema = self.schema or Schema('tmp', fields={})
+
+        if isinstance(other, Document) and other.schema:
+            assert other.schema is not None
+            schema = schema.update(other.schema)
+        return Document(_update(dict(self), dict(other)), schema=schema)
 
     def encode(
         self,
@@ -202,6 +206,7 @@ def encode(
         leaves_to_keep: t.Sequence = (),
         metadata: bool = True,
         defaults: bool = True,
+        keep_schema: bool = True,
     ) -> SuperDuperFlatEncode:
         """Encode the document to a format that can be used in a database.
 
@@ -224,6 +229,10 @@ def encode(
             out = schema.encode_data(
                 out, builds, blobs, files, leaves_to_keep=leaves_to_keep
             )
+
+        if not keep_schema:
+            del out['_schema']
+
         out = _deep_flat_encode(
             out,
             builds=builds,
@@ -275,6 +284,7 @@ def decode(
 
         builds = r.get(KEY_BUILDS, {})
 
+        # TODO is this the right place for this?
         # Important: Leaf.identifier or Component.type_id:Component.identifier are
         # are used as the key, but must be set if not present.
         for k in builds:
@@ -297,18 +307,23 @@ def decode(
         # Prioritize using the local artifact storage getter,
         # and then use the DB read getter.
         if r.get(KEY_BLOBS):
-            getters.add_getter('blob', lambda x: r[KEY_BLOBS].get(x))
+            getters.add_getter(
+                'blob', lambda x: Blob(identifier=x, bytes=r[KEY_BLOBS].get(x))
+            )
+
+        def my_getter(x):
+            return File(path=r[KEY_FILES].get(x.split(':')[-1]), db=db)
 
         if r.get(KEY_FILES):
-            getters.add_getter('file', lambda x: r[KEY_FILES].get(x.split(':')[-1]))
+            getters.add_getter('file', my_getter)
 
         # Add a remote file getter
-        getters.add_getter('file', _get_file_remote_callback)
-        getters.add_getter('blob', _get_local_blob)
+        # getters.add_getter('file', _get_file_remote_callback)
+        # getters.add_getter('blob', _get_local_blob)
 
         if db is not None:
             getters.add_getter('component', lambda x: _get_component(db, x))
-            getters.add_getter('blob', _get_artifact_callback(db))
+            getters.add_getter('blob', _get_blob_callback(db))
             getters.add_getter('file', _get_file_callback(db))
 
         if schema is not None:
@@ -348,12 +363,25 @@ def set_variables(self, **kwargs) -> 'Document':
     def __repr__(self) -> str:
         return f'Document({repr(dict(self))})'
 
+    @staticmethod
+    def decode_blobs(schema, r):
+        for k, v in schema.fields.items():
+            if k not in r:
+                continue
+            if not isinstance(v, BaseDataType):
+                continue
+            if v.encodable == 'artifact':
+                r[k] = v.decode_data(r[k])
+        return r
+
     def unpack(self, leaves_to_keep: t.Sequence = ()) -> t.Any:
         """Returns the content, but with any encodables replaced by their contents.
 
         :param leaves_to_keep: The types of leaves to keep.
         """
         out = _unpack(self, leaves_to_keep=leaves_to_keep)
+        if self.schema is not None:
+            out = self.decode_blobs(self.schema, out)
         if '_base' in out:
             out = out['_base']
         return out
@@ -364,6 +392,7 @@ def __deepcopy__(self, momo):
         return new_doc
 
 
+# TODO what is this? Looks like it should be in superduper_mongodb
 class QueryUpdateDocument(Document):
     """A document that is used to update a document in a database.
 
@@ -402,6 +431,7 @@ def _create_metadata_update(update, original=None):
         update = {'$set': update}
         return update
 
+    # TODO needed?
     def to_template(self, **substitutions):
         """
         Convert the document to a template with variables.
@@ -443,9 +473,7 @@ def encode(
 
 
 def _unpack(item: t.Any, db=None, leaves_to_keep: t.Sequence = ()) -> t.Any:
-    if isinstance(item, _BaseEncodable) and not any(
-        [isinstance(item, leaf) for leaf in leaves_to_keep]
-    ):
+    if isinstance(item, Leaf) and not isinstance(item, tuple(leaves_to_keep)):
         return item.unpack()
     elif isinstance(item, dict):
         return {k: _unpack(v, leaves_to_keep=leaves_to_keep) for k, v in item.items()}
@@ -515,13 +543,10 @@ def _deep_flat_encode(
         blobs[r.identifier] = r.bytes
         return '&:blob:' + r.identifier
 
-    if isinstance(r, FileItem):
+    if isinstance(r, File):
         files[r.identifier] = r.path
         return '&:file:' + r.identifier
 
-    if isinstance(r, Native):
-        return r.x
-
     # TODO what is this??
     from superduper.backends.base.query import _BaseQuery
 
@@ -653,10 +678,11 @@ def _deep_flat_decode(r, builds, getters: Getters, db: t.Optional['Datalayer'] =
     if isinstance(r, dict) and '_object' in r:
         dict_ = {k: v for k, v in r.items() if k != '_object'}
         dict_ = _deep_flat_decode(dict_, builds, getters=getters, db=db)
-        object = _deep_flat_decode(
-            builds[r['_object'][1:]], builds, getters=getters, db=db
-        )
-        instance = import_item(object=object.unpack(), dict=dict_, db=db)
+        from superduper.components.datatype import dill_serializer
+
+        bytes_ = Blob(identifier=r['_object'].split(':')[-1], db=db).unpack()
+        object = dill_serializer.decode_data(bytes_)
+        instance = import_item(object=object, dict=dict_, db=db)
         return instance
     if isinstance(r, dict):
         literals = r.get('_literals', [])
@@ -746,16 +772,28 @@ def pull_file():
 
 
 def _get_file_callback(db):
-    def callback(path):
-        def pull_file():
-            identifier = path.split(':')[-1]
-            return db.artifact_store.get_file(identifier), path
+    def callback(ref):
+        return File(identifier=ref, db=db)
+
+    return callback
 
-        return pull_file
+
+def _get_blob_callback(db):
+    def callback(ref):
+        return Blob(identifier=ref, db=db)
 
     return callback
 
 
+# def _get_file_callback(db):
+#     def callback(path):
+#         def pull_file():
+#             identifier = path.split(':')[-1]
+#             return db.artifact_store.get_file(identifier), path
+#         return pull_file
+#     return callback
+
+
 def _get_local_blob(x, loader=None):
     if x.split('://')[0].startswith('file'):
         return loader(x)
diff --git a/superduper/base/leaf.py b/superduper/base/leaf.py
index e6275d15e..7e8f9aa6f 100644
--- a/superduper/base/leaf.py
+++ b/superduper/base/leaf.py
@@ -288,14 +288,12 @@ def dict(self, metadata: bool = True, defaults: bool = True):
         if self.literals:
             r['_literals'] = list(self.literals)
 
-        from superduper.components.datatype import Artifact, dill_serializer
+        from superduper.components.datatype import dill_serializer
 
         if self.__class__.__module__ == '__main__':
-            cls = Artifact(
-                x=self.__class__,
-                datatype=dill_serializer,
+            return Document(
+                {'_object': dill_serializer.encode_data(self.__class__), **r}
             )
-            return Document({'_object': cls, **r})
 
         path = f'{self.__class__.__module__}.{self.__class__.__name__}'
         return Document({'_path': path, **r})
diff --git a/superduper/components/application.py b/superduper/components/application.py
index 08443c541..68aae3f15 100644
--- a/superduper/components/application.py
+++ b/superduper/components/application.py
@@ -31,8 +31,8 @@ class Application(Component):
     namespace: t.Optional[t.Sequence[t.Tuple[str, str]]] = None
     link: t.Optional[str] = None
 
-    def __post_init__(self, db, artifacts):
-        super().__post_init__(db, artifacts)
+    def __post_init__(self, db):
+        super().__post_init__(db)
         self._sort_components_and_set_upstream()
 
     def _sort_components_and_set_upstream(self):
diff --git a/superduper/components/component.py b/superduper/components/component.py
index a9729ee0c..b94acef47 100644
--- a/superduper/components/component.py
+++ b/superduper/components/component.py
@@ -24,7 +24,7 @@
 if t.TYPE_CHECKING:
     from superduper import Document
     from superduper.base.datalayer import Datalayer
-    from superduper.components.datatype import DataType
+    from superduper.components.datatype import BaseDataType
     from superduper.components.plugin import Plugin
 
 
@@ -87,6 +87,20 @@ def getdeepattr(obj, attr):
 ComponentTuple.__doc__ = 'noqa'
 
 
+def _is_optional_callable(annotation) -> bool:
+    """Tell if an annotation is t.Optional[t.Callable].
+
+    >>> is_optional_callable(t.Optional[t.Callable])
+    True
+    """
+    # Check if the annotation is of the form Optional[...]
+    if t.get_origin(annotation) is t.Union:
+        # Get the type inside Optional and check if it is Callable
+        inner_type = t.get_args(annotation)[0]  # Optional[X] means X is at index 0
+        return inner_type is t.Callable
+    return False
+
+
 class ComponentMeta(LeafMeta):
     """Metaclass for the `Component` class.
 
@@ -113,6 +127,33 @@ def __new__(cls, name, bases, dct):
             if hasattr(attr_value, 'events'):
                 new_cls.triggers.add(attr_name)
 
+        import copy
+
+        new_cls._fields = copy.deepcopy(new_cls._fields)
+        for base in bases:
+            try:
+                new_cls._fields.update(
+                    {k: v for k, v in base._fields.items() if k not in new_cls._fields}
+                )
+            except AttributeError:
+                continue
+
+        for field in dc.fields(new_cls):
+            if field.name in new_cls._fields:
+                continue
+            try:
+                # For some reason it is random whether annotations are strings or not
+                annotation = new_cls.__annotations__[field.name]
+                if annotation in {
+                    't.Callable',
+                    't.Optional[t.Callable]',
+                    't.Callable | None',
+                }:
+                    new_cls._fields[field.name] = 'dill_serializer'
+                elif annotation is t.Callable or _is_optional_callable(annotation):
+                    new_cls._fields[field.name] = 'dill_serializer'
+            except KeyError:
+                continue
         return new_cls
 
 
@@ -122,7 +163,6 @@ class Component(Leaf, metaclass=ComponentMeta):
     Class to represent superduper.io serializable entities
     that can be saved into a database.
 
-    :param artifacts: A dictionary of artifacts paths and `DataType` objects
     :param upstream: A list of upstream components
     :param plugins: A list of plugins to be used in the component.
     :param cache: (Optional) If set `true` the component will not be cached
@@ -137,13 +177,11 @@ class Component(Leaf, metaclass=ComponentMeta):
     breaks: t.ClassVar[t.Sequence] = ()
     triggers: t.ClassVar[t.List] = []
     type_id: t.ClassVar[str] = 'component'
-    # TODO do something more elegant than this
-    _artifacts: t.ClassVar[t.Sequence[t.Tuple[str, 'DataType']]] = ()
+    _fields: t.ClassVar[t.Dict[str, t.Union['BaseDataType', str]]] = {}
     set_post_init: t.ClassVar[t.Sequence] = ('version',)
 
     upstream: t.Optional[t.List["Component"]] = None
     plugins: t.Optional[t.List["Plugin"]] = None
-    artifacts: dc.InitVar[t.Optional[t.Dict]] = None
     cache: t.Optional[bool] = True
     status: t.Optional[Status] = None
     build_variables: t.Dict | None = None
@@ -455,10 +493,9 @@ def leaves(self):
         leaf_keys = [k for k in r.keys(True) if isinstance(r[k], Leaf)]
         return {k: r[k] for k in leaf_keys}
 
-    def __post_init__(self, db, artifacts):
+    def __post_init__(self, db):
         super().__post_init__(db)
 
-        self.artifacts = artifacts
         self.version: t.Optional[int] = None
         if not self.identifier:
             raise ValueError('identifier cannot be empty or None')
@@ -506,46 +543,22 @@ def _init(item):
                 return [_init(i) for i in item]
 
             if isinstance(item, Leaf):
-                item.init(db=db)
+                item.init()
                 return item.unpack()
 
             return item
 
+        schema = self.build_class_schema()
+
         for f in dc.fields(self):
             item = getattr(self, f.name)
-            unpacked_item = _init(item)
-            setattr(self, f.name, unpacked_item)
+            item = _init(item)
+            if f.name in self._fields and isinstance(item, bytes):
+                item = schema.fields[f.name].decode_data(item)
+            setattr(self, f.name, item)
 
         return self
 
-    @property
-    def artifact_schema(self):
-        """Returns `Schema` representation for the serializers in the component."""
-        from superduper import Schema
-        from superduper.components.datatype import dill_serializer
-
-        schema = {}
-        lookup = dict(self._artifacts)
-        if self.artifacts is not None:
-            lookup.update(self.artifacts)
-        for f in dc.fields(self):
-            a = getattr(self, f.name)
-            if a is None:
-                continue
-            if f.name in lookup and not isinstance(a, Leaf):
-                schema[f.name] = lookup[f.name]
-                continue
-            if isinstance(getattr(self, f.name), Component):
-                continue
-            item = getattr(self, f.name)
-            if (
-                callable(item)
-                and not isinstance(item, Leaf)
-                and not getattr(item, 'importable', False)
-            ):
-                schema[f.name] = dill_serializer
-        return Schema(identifier=f'serializer/{self.identifier}', fields=schema)
-
     def _pre_create(self, db: Datalayer, startup_cache: t.Dict = {}):
         self.status = Status.initializing
 
@@ -679,6 +692,8 @@ def export(
         r = self.dict(defaults=defaults, metadata=metadata).encode(
             defaults=defaults, metadata=metadata
         )
+
+        del r['_schema']
         if not metadata:
             del r['uuid']
 
@@ -786,7 +801,6 @@ def dict(
     ) -> 'Document':
         """A dictionary representation of the component."""
         from superduper import Document
-        from superduper.components.datatype import Artifact, File
 
         r = super().dict(metadata=metadata, defaults=defaults)
 
@@ -801,14 +815,14 @@ def _convert_components_to_refs(r):
 
         if refs:
             r = _convert_components_to_refs(r)
-        s = self.artifact_schema
+
+        s = self.build_class_schema()
+
+        from superduper.components.datatype import Saveable
 
         for k in s.fields:
-            attr = getattr(self, k)
-            if isinstance(attr, (Artifact, File)):
-                r[k] = attr
-            else:
-                r[k] = s.fields[k](x=attr)  # artifact or file
+            if r[k] is not None and not isinstance(r[k], Saveable):
+                r[k] = s.fields[k].encode_data(r[k])
 
         if metadata:
             r['type_id'] = self.type_id
@@ -817,7 +831,19 @@ def _convert_components_to_refs(r):
 
         if r.get('status') is not None:
             r['status'] = str(self.status)
-        return Document(r)
+
+        return Document(r, schema=s)
+
+    @classmethod
+    def build_class_schema(cls):
+        from superduper import Schema
+        from superduper.components.datatype import INBUILT_DATATYPES
+
+        _fields = cls._fields.copy()
+        for k in _fields:
+            if isinstance(_fields[k], str):
+                _fields[k] = INBUILT_DATATYPES[_fields[k]]
+        return Schema(f'{cls.__name__}/class_schema', fields=_fields)
 
     # TODO needed? looks to have legacy "_content"
     @classmethod
diff --git a/superduper/components/cron_job.py b/superduper/components/cron_job.py
index 56de2743b..66475bcc1 100644
--- a/superduper/components/cron_job.py
+++ b/superduper/components/cron_job.py
@@ -33,7 +33,7 @@ class FunctionCronJob(CronJob):
     :param function: Callable to run
     """
 
-    _artifacts = (('function', dill_serializer),)
+    _fields = {'function': dill_serializer}
 
     function: t.Callable
 
diff --git a/superduper/components/dataset.py b/superduper/components/dataset.py
index 59a4259cc..99e803df6 100644
--- a/superduper/components/dataset.py
+++ b/superduper/components/dataset.py
@@ -10,10 +10,7 @@
 from superduper.base.datalayer import Datalayer
 from superduper.base.document import Document
 from superduper.components.component import Component, ensure_initialized
-from superduper.components.datatype import (
-    DataType,
-    dill_serializer,
-)
+from superduper.components.datatype import dill_serializer
 
 
 class Dataset(Component):
@@ -30,9 +27,7 @@ class Dataset(Component):
     """
 
     type_id: t.ClassVar[str] = 'dataset'
-    _artifacts: t.ClassVar[t.Sequence[t.Tuple[str, DataType]]] = (
-        ('raw_data', dill_serializer),
-    )
+    _fields = {'raw_data': dill_serializer}
 
     select: t.Optional[Query] = None
     sample_size: t.Optional[int] = None
@@ -41,13 +36,13 @@ class Dataset(Component):
     raw_data: t.Optional[t.Sequence[t.Any]] = None
     pin: bool = False
 
-    def __post_init__(self, db, artifacts):
+    def __post_init__(self, db):
         """Post-initialization method.
 
         :param artifacts: Optional additional artifacts for initialization.
         """
+        super().__post_init__(db=db)
         self._data = None
-        return super().__post_init__(db, artifacts)
 
     @property
     @ensure_initialized
@@ -105,9 +100,9 @@ class RemoteData(Component):
     type_id: t.ClassVar[str] = 'dataset'
     getter: t.Callable
 
-    def __post_init__(self, db, artifacts):
+    def __post_init__(self, db):
         self._data = None
-        return super().__post_init__(db, artifacts)
+        return super().__post_init__(db)
 
     @property
     def data(self):
diff --git a/superduper/components/datatype.py b/superduper/components/datatype.py
index 6eb199ee1..d5f32c739 100644
--- a/superduper/components/datatype.py
+++ b/superduper/components/datatype.py
@@ -1,12 +1,8 @@
-import base64
-import dataclasses as dc
 import hashlib
 import inspect
-import io
 import json
 import os
 import pickle
-import re
 import typing as t
 from abc import abstractmethod
 from functools import cached_property
@@ -16,144 +12,12 @@
 import numpy
 
 from superduper import CFG
-from superduper.backends.base.artifacts import (
-    _construct_file_id_from_uri,
-)
-from superduper.base.config import BytesEncoding
 from superduper.base.leaf import Leaf
-from superduper.components.component import Component, ensure_initialized
-from superduper.misc.annotations import component
-from superduper.misc.hash import hash_path
+from superduper.components.component import Component
 
 Decode = t.Callable[[bytes], t.Any]
 Encode = t.Callable[[t.Any], bytes]
 
-if t.TYPE_CHECKING:
-    from superduper.base.datalayer import Datalayer
-
-
-class IntermediateType:
-    """Intermediate data type # noqa."""
-
-    BYTES = 'bytes'
-    STRING = 'string'
-
-
-def json_encode(object: t.Any, info: t.Optional[t.Dict] = None) -> str:
-    """Encode the dict to a JSON string.
-
-    :param object: The object to encode
-    :param info: Optional information
-    """
-    return json.dumps(object)
-
-
-def json_decode(b: str, info: t.Optional[t.Dict] = None) -> t.Any:
-    """Decode the JSON string to an dict.
-
-    :param b: The JSON string to decode
-    :param info: Optional information
-    """
-    return json.loads(b)
-
-
-def pickle_encode(object: t.Any, info: t.Optional[t.Dict] = None) -> bytes:
-    """Encodes an object using pickle.
-
-    :param object: The object to encode.
-    :param info: Optional information.
-    """
-    return pickle.dumps(object)
-
-
-def pickle_decode(b: bytes, info: t.Optional[t.Dict] = None) -> t.Any:
-    """Decodes bytes using pickle.
-
-    :param b: The bytes to decode.
-    :param info: Optional information.
-    """
-    return pickle.loads(b)
-
-
-def dill_encode(object: t.Any, info: t.Optional[t.Dict] = None) -> bytes:
-    """Encodes an object using dill.
-
-    :param object: The object to encode.
-    :param info: Optional information.
-    """
-    return dill.dumps(object, recurse=True)
-
-
-def dill_decode(b: bytes, info: t.Optional[t.Dict] = None) -> t.Any:
-    """Decodes bytes using dill.
-
-    :param b: The bytes to decode.
-    :param info: Optional information.
-    """
-    return dill.loads(b)
-
-
-def file_check(path: t.Any, info: t.Optional[t.Dict] = None) -> str:
-    """Checks if a file path exists.
-
-    :param path: The file path to check.
-    :param info: Optional information.
-    :raises ValueError: If the path does not exist.
-    """
-    if not (isinstance(path, str) and os.path.exists(path)):
-        raise ValueError(f"Path '{path}' does not exist")
-    return path
-
-
-def torch_encode(object: t.Any, info: t.Optional[t.Dict] = None) -> bytes:
-    """Saves an object in torch format.
-
-    :param object: The object to encode.
-    :param info: Optional information.
-    """
-    import torch
-
-    from superduper.ext.torch.utils import device_of
-
-    if not isinstance(object, dict):
-        previous_device = str(device_of(object))
-        object.to('cpu')
-        f = io.BytesIO()
-        torch.save(object, f)
-        object.to(previous_device)
-    else:
-        f = io.BytesIO()
-        torch.save(object, f)
-
-    return f.getvalue()
-
-
-def torch_decode(b: bytes, info: t.Optional[t.Dict] = None) -> t.Any:
-    """Decodes bytes to a torch model.
-
-    :param b: The bytes to decode.
-    :param info: Optional information.
-    """
-    import torch
-
-    return torch.load(io.BytesIO(b))
-
-
-def bytes_to_base64(bytes):
-    """Converts bytes to base64.
-
-    :param bytes: The bytes to convert.
-    """
-    return base64.b64encode(bytes).decode('utf-8')
-
-
-def base64_to_bytes(encoded):
-    """Decodes a base64 encoded string.
-
-    :param encoded: The base64 encoded string.
-    """
-    return base64.b64decode(encoded)
-
 
 class DataTypeFactory:
     """Abstract class for creating a DataType # noqa."""
@@ -180,17 +44,13 @@ def create(data: t.Any) -> "BaseDataType":
 
 
 class BaseDataType(Component):
-    """Base class for datatype.
-
-    :param shape: size of vector
-    """
+    """Base class for datatype."""
 
     type_id: t.ClassVar[str] = 'datatype'
-    # TODO this can just be an integer
-    shape: t.Optional[int] = None
+    cache: bool = True
 
     @abstractmethod
-    def encode_data(self, item, info: t.Optional[t.Dict] = None):
+    def encode_data(self, item):
         """Decode the item as `bytes`.
 
         :param item: The item to decode.
@@ -198,713 +58,266 @@ def encode_data(self, item, info: t.Optional[t.Dict] = None):
         """
 
     @abstractmethod
-    def decode_data(self, item, info: t.Optional[t.Dict] = None):
+    def decode_data(self, item):
         """Decode the item from bytes.
 
         :param item: The item to decode.
         :param info: The optional information dictionary.
         """
 
-    def encode_data_with_identifier(self, item, info: t.Optional[t.Dict] = None):
-        b = self.encode_data(item=item, info=info)
-        if isinstance(b, bytes):
-            return b, hashlib.sha1(b).hexdigest()
-        else:
-            return b, hashlib.sha1(str(b).encode()).hexdigest()
 
+class BaseVector(BaseDataType):
+    """Base class for vector.
 
-class NativeVector(BaseDataType):
-    """Datatype for encoding vectors which are supported natively by databackend.
+    :param shape: size of vector
 
     :param dtype: Datatype of array to encode.
     """
 
-    encodable: t.ClassVar[str] = 'native'
+    shape: int
     dtype: str = 'float64'
 
-    def __post_init__(self, db, artifacts):
-        self.encodable_cls = Native
-        return super().__post_init__(db, artifacts)
-
-    def encode_data(self, item, info=None):
-        if isinstance(item, numpy.ndarray):
-            item = item.tolist()
-        return item
+    @abstractmethod
+    def encode_data(self, item):
+        pass
 
-    def decode_data(self, item, info=None):
-        return numpy.array(item).astype(self.dtype)
+    @abstractmethod
+    def decode_data(self, item):
+        pass
 
 
-class Json2Str(BaseDataType):
-    """Datatype for encoding vectors which are supported natively by databackend."""
+class NativeVector(BaseVector):
+    """Datatype for encoding vectors which are supported as list by databackend."""
 
     encodable: t.ClassVar[str] = 'native'
 
-    def __post_init__(self, db, artifacts):
-        # self.encodable_cls = Native
-        return super().__post_init__(db, artifacts)
+    def encode_data(self, item):
+        if isinstance(item, numpy.ndarray):
+            item = item.tolist()
+        return item
 
-    def encode_data(self, item, info=None):
-        return json.dumps(item)
+    def decode_data(self, item):
+        return numpy.array(item).astype(self.dtype)
 
-    def decode_data(self, item, info=None):
-        return json.loads(item)
 
+class Vector(BaseVector):
+    """Vector meta-datatype for encoding vectors ready for search.
 
-class DataType(BaseDataType):
-    """A data type component that defines how data is encoded and decoded.
-
-    :param encoder: A callable that converts an encodable object of this
-                    encoder to bytes.
-    :param decoder: A callable that converts bytes to an encodable object
-                    of this encoder.
-    :param info: An optional information dictionary.
-    :param directory: The directory to store file types.
-    :param encodable: The type of encodable object ('encodable',
-                      'lazy_artifact', or 'file').
-    :param bytes_encoding: The encoding type for bytes ('base64' or 'bytes').
-    :param intermediate_type: Type of the intermediate data
-           [IntermediateType.BYTES, IntermediateType.STRING]
-    :param media_type: The media type.
+    :param dtype: Datatype of encoded arrays.
     """
 
-    encoder: t.Optional[t.Callable] = None  # not necessary if encodable is file
-    decoder: t.Optional[t.Callable] = None
-    info: t.Optional[t.Dict] = None  # TODO deprecate
-    directory: t.Optional[str] = None  # TODO needed?
-    encodable: str = 'encodable'
-    bytes_encoding: t.Optional[str] = CFG.bytes_encoding
-    intermediate_type: t.Optional[str] = IntermediateType.BYTES
-    media_type: t.Optional[str] = None
-    registered_types: t.ClassVar[t.Dict[str, "DataType"]] = {}
-    cache: bool = True
-
-    def __post_init__(self, db, artifacts):
-        """Post-initialization hook.
-
-        :param artifacts: The artifacts.
-        """
-        super().__post_init__(db, artifacts)
-        if self.encodable in _ENCODABLES:
-            self.encodable_cls = _ENCODABLES[self.encodable]
-        else:
-            import importlib
-
-            self.encodable_cls = importlib.import_module(
-                '.'.join(self.encodable.split('.')[:-1])
-            ).__dict__[self.encodable.split('.')[-1]]
+    identifier: str = ''
 
-        self.bytes_encoding = self.bytes_encoding or CFG.bytes_encoding
-        self.register_datatype(self)
+    def __post_init__(self, db):
+        self.identifier = f'vector[{self.shape[0]}]'
+        return super().__post_init__(db)
 
     @property
-    def artifact(self):
-        """Check if the encodable is an artifact."""
-        return self.encodable_cls.artifact
-
-    def dict(self, metadata: bool = True, defaults: bool = True, refs: bool = False):
-        """Get the dictionary representation of the object."""
-        r = super().dict(metadata=metadata, defaults=defaults, refs=refs)
-        if hasattr(self.bytes_encoding, 'value'):
-            r['bytes_encoding'] = str(self.bytes_encoding.value)  # type: ignore[union-attr]
-        return r
-
-    def __call__(
-        self, x: t.Optional[t.Any] = None, uri: t.Optional[str] = None
-    ) -> '_BaseEncodable':
-        """Create an instance of the encodable class.
-
-        :param x: The optional content.
-        :param uri: The optional URI.
-        """
-        return self.encodable_cls(datatype=self, x=x, uri=uri, db=self.db)
-
-    @ensure_initialized
-    def encode_data_with_identifier(self, item, info: t.Optional[t.Dict] = None):
-        """Encode the item into bytes.
-
-        :param item: The item to encode.
-        :param info: The optional information dictionary.
-        """
-        info = info or {}
-        data = self.encoder(item, info) if self.encoder else item
-        sha1 = self.encodable_cls.get_hash(data)
-        data = self.bytes_encoding_after_encode(data)
-        return data, sha1
-
-    @ensure_initialized
-    def encode_data(self, item, info: t.Optional[t.Dict] = None):
-        """Encode the item into bytes.
-
-        :param item: The item to encode.
-        :param info: The optional information dictionary.
-        """
-        info = info or {}
-        data = self.encoder(item, info) if self.encoder else item
-        # data = self.bytes_encoding_after_encode(data)
-        return data
-
-    @ensure_initialized
-    def decode_data(self, item, info: t.Optional[t.Dict] = None):
-        """Decode the item from bytes.
-
-        :param item: The item to decode.
-        :param info: The optional information dictionary.
-        """
-        info = info or {}
-        # item = self.bytes_encoding_before_decode(item)
-        return self.decoder(item, info=info) if self.decoder else item
-
-    def bytes_encoding_after_encode(self, data):
-        """Encode the data to base64.
-
-        if the bytes_encoding is BASE64 and the intermediate_type is BYTES
-
-        :param data: Encoded data
-        """
-        if (
-            self.bytes_encoding == BytesEncoding.BASE64
-            and self.intermediate_type == IntermediateType.BYTES
-        ):
-            return bytes_to_base64(data)
-        return data
-
-    def bytes_encoding_before_decode(self, data):
-        """Encode the data to base64.
+    def encodable(self):
+        return self.datatype_impl.encodable
 
-        if the bytes_encoding is BASE64 and the intermediate_type is BYTES
+    @cached_property
+    def datatype_impl(self):
+        if isinstance(CFG.datatype_presets.vector, str):
+            type_: str = CFG.datatype_presets.vector
+        else:
+            type_: str = self.db.databackend.datatype_presets['vector']
+        module = '.'.join(type_.split('.')[:-1])
+        cls = type_.split('.')[-1]
+        datatype = getattr(import_module(module), cls)
+        if inspect.isclass(datatype):
+            datatype = datatype('tmp', dtype=self.dtype, shape=self.shape)
+        return datatype
 
-        :param data: Decoded data
-        """
-        if (
-            self.bytes_encoding == BytesEncoding.BASE64
-            and self.intermediate_type == IntermediateType.BYTES
-        ):
-            return base64_to_bytes(data)
-        return data
-
-    @classmethod
-    def register_datatype(cls, instance):
-        """Register a datatype.
-
-        :param instance: The datatype instance to register.
-        """
-        cls.registered_types[instance.identifier] = instance
+    def encode_data(self, item):
+        return self.datatype_impl.encode_data(item=item)
 
+    def decode_data(self, item):
+        return self.datatype_impl.decode_data(item=item)
 
-def encode_torch_state_dict(module, info):
-    """Encode torch state dictionary.
 
-    :param module: Module.
-    :param info: Information.
-    """
-    import torch
+class JSON(BaseDataType):
+    """Datatype for encoding vectors which are supported natively by databackend."""
 
-    buffer = io.BytesIO()
-    torch.save(module.state_dict(), buffer)
+    encodable: t.ClassVar[str] = 'native'
 
-    return buffer.getvalue()
+    def __post_init__(self, db):
+        return super().__post_init__(db)
 
+    def encode_data(self, item):
+        return json.dumps(item)
 
-# TODO migrate to torch plugin
-class DecodeTorchStateDict:
-    """Torch state dictionary decoder.
+    def decode_data(self, item):
+        return json.loads(item)
 
-    :param cls: Torch state cls
-    """
 
-    def __init__(self, cls):
-        self.cls = cls
+class _Encodable:
+    encodable: t.ClassVar[str] = 'encodable'
 
-    def __call__(self, b: bytes, info: t.Dict):
-        """Decode the torch state dictionary.
+    def encode_data(self, item):
+        return self._encode_data(item)
 
-        :param b: Bytes.
-        :param info: Information.
-        """
-        import torch
 
-        buffer = io.BytesIO(b)
-        module = self.cls(**info)
-        module.load_state_dict(torch.load(buffer))
-        return module
+class _Artifact:
+    encodable: t.ClassVar[str] = 'artifact'
 
+    def encode_data(self, item):
+        return Blob(bytes=self._encode_data(item))
 
-def _find_descendants(cls):
-    """Find descendants of the given class.
 
-    :param cls: The class to find descendants for.
-    """
-    descendants = cls.__subclasses__()
-    for subclass in descendants:
-        descendants.extend(_find_descendants(subclass))
-    return descendants
+class _PickleMixin:
+    def _encode_data(self, item):
+        return pickle.dumps(item)
 
+    def decode_data(self, item):
+        return pickle.loads(item)
 
-class _BaseEncodable(Leaf):
-    """Data variable wrapping encode-able item.
 
-    Encoding is controlled by the referred
-    to ``Encoder`` instance.
+class PickleSerializer(_Artifact, _PickleMixin, BaseDataType):
+    """Serializer with pickle."""
 
-    :param datatype: The datatype of the content.
-    :param uri: URI of the content, if any.
-    :param x: Wrapped content.
-    """
 
-    identifier: str = ''
-    datatype: DataType
-    uri: t.Optional[str] = None  # URI of the content to be deprecated
-    x: t.Optional[t.Any] = None
-    lazy: t.ClassVar[bool] = False
-    artifact: t.ClassVar[bool] = False
+class PickleEncoder(_Encodable, _PickleMixin, BaseDataType):
+    """Pickle inline encoder."""
 
-    def __post_init__(self, db):
-        """Post-initialization hook.
 
-        :param db: Datalayer instance.
-        """
-        db = db or self.datatype.db
-        super().__post_init__(db)
-        if self.uri is not None and self.identifier is None:
-            self.identifier = _construct_file_id_from_uri(self.uri)
+class _DillMixin:
+    def _encode_data(self, item):
+        return dill.dumps(item)
 
-        if self.uri and not re.match('^[a-z]{0,5}://', self.uri):
-            self.uri = f'file://{self.uri}'
+    def decode_data(self, item):
+        return dill.loads(item)
 
-    @property
-    def reference(self):
-        """Get the reference to the datatype."""
-        return self.datatype.reference
 
-    def unpack(self):
-        """Unpack the content of the `Encodable`."""
-        return self.x
+class _DillSerializer(_Artifact, _DillMixin, BaseDataType):
+    ...
 
-    @staticmethod
-    def get_hash(data):
-        """Get the hash of the given data.
 
-        :param data: Data to hash.
-        """
-        if isinstance(data, str):
-            bytes_ = data.encode()
-        elif isinstance(data, bytes):
-            bytes_ = data
-        elif isinstance(data, Native):
-            bytes_ = str([type(data), data.x]).encode()
-        else:
-            bytes_ = str(id(data)).encode()
-        return hashlib.sha1(bytes_).hexdigest()
+class _DillEncoder(_Encodable, _DillMixin, BaseDataType):
+    ...
 
-    @staticmethod
-    def build_reference(identifier, source_data):
-        raise NotImplementedError
 
+class FileType(BaseDataType):
+    """Type for encoding files on disk."""
 
-class Empty:
-    """Sentinel class # noqa."""
+    encodable: t.ClassVar[str] = 'file'
 
-    def __repr__(self):
-        """Get the string representation of the Empty object."""
-        return '<EMPTY>'
+    def encode_data(self, item):
+        assert os.path.exists(item)
+        return File(path=item)
 
+    def decode_data(self, item):
+        return item
 
-class Blob(Leaf):
-    """A wrapper to signify a blob for special treatment.
 
-    See `Document.encode` and related functions.
+def get_hash(data):
+    """Get the hash of the given data.
 
-    :param identifier: The identifier of the blob.
-    :param bytes: The bytes of the blob.
+    :param data: Data to hash.
     """
-
-    identifier: str
-    bytes: bytes
+    if isinstance(data, str):
+        bytes_ = data.encode()
+    elif isinstance(data, bytes):
+        bytes_ = data
+    else:
+        bytes_ = str(id(data)).encode()
+    return hashlib.sha1(bytes_).hexdigest()
 
 
-# TODO this is no longer stricly needed, since we now encode
-# directly with `Schema`
-class Encodable(_BaseEncodable):
-    """Class for encoding non-Python datatypes to the database.
+class Saveable(Leaf):
+    """A Saveable base class."""
 
-    :param x: The encodable object.
-    :param blob: The blob data.
-    """
+    identifier: str = ''
 
-    x: t.Any = Empty()
-    artifact: t.ClassVar[bool] = False
-    blob: dc.InitVar[t.Optional[bytearray]] = None
-
-    def __post_init__(self, db, blob):
-        super().__post_init__(db)
-        if isinstance(self.x, Empty):
-            self.datatype.init()
-            self.x = self.datatype.decode_data(blob)
-
-    def _encode(self):
-        bytes_ = self.datatype.encode_data(self.x)
-        sha1 = self.get_hash(bytes_)
-        return bytes_, sha1
-
-    def to_artifact(self):
-        """Convert the encodable to an artifact."""
-        r = self.dict()
-        r['datatype'].encodable = 'artifact'
-        kwargs = {
-            k: v for k, v in r.items() if k in inspect.signature(Artifact).parameters
-        }
-        return Artifact(**kwargs)
-
-    def dict(self, metadata: bool = True, defaults: bool = True):
-        """Get the dictionary representation of the object."""
-        r = super().dict(metadata=metadata, defaults=defaults)
-        del r['x']
-        r['blob'], identifier = self._encode()
-        if not r['identifier']:
-            self.identifier = identifier
-            r['identifier'] = identifier
-        return r
-
-    def init(self, db):
-        """Initialization method.
-
-        :param db: The Datalayer instance.
-        """
+    @property
+    @abstractmethod
+    def reference(self):
         pass
 
-    @classmethod
-    def get_datatype(cls, db, r):
-        """Get the datatype of the object.
+    @abstractmethod
+    def init(self):
+        pass
 
-        :param db: `Datalayer` instance to assist with
-        :param r: The object to get the datatype from
-        """
-        if db is None:
-            try:
-                from superduper.components.datatype import serializers
-
-                datatype = serializers[r['datatype']]
-            except KeyError:
-                raise ValueError(
-                    f'You specified a serializer which doesn\'t have a'
-                    f' default value: {r["datatype"]}'
-                )
-        else:
-            datatype = db.datatypes[r['datatype']]
-        return datatype
+    @abstractmethod
+    def unpack(self):
+        pass
 
 
-class Native(_BaseEncodable):
-    """Class for representing native data supported by the underlying database.
+class File(Saveable):
+    """Placeholder for a file.
 
-    :param x: The encodable object.
+    :param path: Path to file.
     """
 
-    x: t.Optional[t.Any] = None
+    path: str = ''
 
-    def __post_init__(self, db):
+    def __post_init__(self, db=None):
+        if not self.identifier:
+            self.identifier = get_hash(self.path)
         return super().__post_init__(db)
 
-    @classmethod
-    def _get_object(cls, db, r):
-        raise NotImplementedError
-
-
-class Artifact(_BaseEncodable):
-    """Class for representing data to be saved on disk or in the artifact-store.
-
-    :param x: The artifact object.
-    :param blob: The blob data. Can be a string or bytes.
-                if string, it should be in the format `&:blob:{file_id}`
-                if bytes, it should be the actual data.
-    """
-
-    artifact: t.ClassVar[bool] = True
-    x: t.Any = Empty()
-    blob: dc.InitVar[t.Optional[t.Union[str, bytes]]] = None
-    lazy: t.ClassVar[bool] = False
-
-    def __post_init__(self, db, blob=None):
-        super().__post_init__(db)
-        self._blob = blob
-        self._reference = None
-
-        if not (self.lazy and not isinstance(self._blob, bytes)):
-            self.init()
-
-    def init(self, db=None):
-        """Initialize to load `x` with the actual file from the artifact store."""
-        if isinstance(self._blob, t.Callable):
-            self._blob, _ = self._blob()
-
-        if isinstance(self._blob, bytes):
-            blob = self._blob
-            self.datatype.init()
-            self.x = self.datatype.decoder(blob, info=None)
-            self._blob = None
-
-        if not isinstance(self.x, Empty):
+    def init(self):
+        if self.path:
             return
-
-    def dict(self, metadata: bool = True, defaults: bool = True):
-        """Get the dictionary representation of the object."""
-        bytes, identifier = self._encode()
-        if not self.identifier:
-            self.identifier = identifier
-        r = super().dict(metadata=metadata, defaults=defaults)
-        del r['x']
-        r['blob'] = Blob(identifier=self.identifier, bytes=bytes)
-        return r
-
-    def _encode(self):
-        bytes_ = self.datatype.encoder(self.x)
-        sha1 = self.get_hash(bytes_)
-        return bytes_, sha1
+        self.path = self.db.artifact_store.get_file(self.identifier)
 
     def unpack(self):
-        """Unpack the content of the `Encodable`."""
         self.init()
-        return self.x
-
-    @staticmethod
-    def build_reference(identifier, source_data):
-        """Build a reference to the blob.
-
-        :param identifier: The identifier of the blob.
-        :param source_data: The source data.
-        :return: The reference to the blob. '&:blob:{file_id}'
-        """
-        return f"&:blob:{identifier}"
-
-
-class LazyArtifact(Artifact):
-    """Data to be saved and loaded only when needed."""
-
-    lazy: t.ClassVar[bool] = True
-
-    def dict(self, metadata: bool = True, defaults: bool = True):
-        """Get the dictionary representation of the object."""
-        self.init()
-        return super().dict(metadata=metadata, defaults=defaults)
-
-
-class FileItem(Leaf):
-    """File item class.
+        return self.path
 
-    :param identifier: The identifier of the file.
-    :param path: The path of the file.
-    """
-
-    identifier: str
-    path: str
+    # TODO - return this as self.dict()?
+    @property
+    def reference(self):
+        return f'&:file:{self.identifier}'
 
 
-class File(_BaseEncodable):
-    """Data to be saved on disk and passed as a file reference.
+class Blob(Saveable):
+    """Placeholder for a blob of bytes.
 
-    :param x: path to the file
+    :param bytes: Bytes blob.
     """
 
-    lazy: t.ClassVar[bool] = False
-    artifact: t.ClassVar[bool] = True
-
-    x: t.Any = Empty()
-
-    def __post_init__(self, db):
-        super().__post_init__(db)
-        if isinstance(self.x, t.Callable):
-            self._file = self.x
-            self.x = Empty()
-        else:
-            self._file = None
-
-        if not self.lazy:
-            self.init()
+    bytes: bytearray | None = None
+    identifier: str = ''
 
-    def init(self, db=None):
-        """Initialize to load `x` with the actual file from the artifact store."""
-        if isinstance(self._file, t.Callable):
-            file_path, self.identifier = self._file()
-            self.x = file_path
+    def __post_init__(self, db=None):
+        if not self.identifier:
+            assert self.bytes is not None
+            self.identifier = get_hash(self.bytes)
+        return super().__post_init__(db)
 
-        if not isinstance(self.x, Empty):
+    def init(self):
+        if self.bytes:
             return
-
-    def dict(self, metadata: bool = True, defaults: bool = True):
-        """Get the dictionary representation of the object."""
-        self.identifier = self.identifier or hash_path(self.x)
-        r = super().dict(metadata=metadata, defaults=defaults)
-        r['x'] = FileItem(identifier=self.identifier, path=self.x)
-        return r
+        self.bytes = self.db.artifact_store.get_bytes(self.identifier)
 
     def unpack(self):
-        """Unpack and get the original data."""
-        self.init()
-        return self.x
-
-    @staticmethod
-    def build_reference(identifier, source_data):
-        """Build a reference to the file.
-
-        :param identifier: The identifier of the file.
-        :param source_data: The source data.
-        :return: The reference to the file. '?:file:{file_id}'
-        """
-        return f"&:file:{identifier}"
-
-
-class LazyFile(File):
-    """Class is used to load a file only when needed."""
-
-    lazy: t.ClassVar[bool] = True
-
-    def dict(self, metadata: bool = True, defaults: bool = True):
-        """Get the dictionary representation of the object."""
         self.init()
-        return super().dict(metadata=metadata, defaults=defaults)
-
-
-_ENCODABLES = {
-    'encodable': Encodable,
-    'artifact': Artifact,
-    'lazy_artifact': LazyArtifact,
-    'file': File,
-    'native': Native,
-    'lazy_file': LazyFile,
-}
-
-
-methods: t.Dict[str, t.Dict] = {
-    'pickle': {'encoder': pickle_encode, 'decoder': pickle_decode},
-    'dill': {'encoder': dill_encode, 'decoder': dill_decode},
-    'torch': {'encoder': torch_encode, 'decoder': torch_decode},
-    'file': {'encoder': file_check, 'decoder': file_check},
-    'native': {'encoder': None, 'decoder': None},
-}
-
-
-@component()
-def get_serializer(
-    identifier: str,
-    method: str,
-    encodable: str = "encodable",
-    db: t.Optional['Datalayer'] = None,
-):
-    """Get a serializer.
-
-    :param identifier: The identifier of the serializer.
-    :param method: The method of the serializer.
-    :param encodable: The type of encodable object.
-    :param db: The Datalayer instance.
-    """
-    return DataType(
-        identifier=identifier,
-        encodable=encodable,
-        db=db,
-        **methods[method],
-    )
-
-
-json_serializer = Json2Str('json')
-
-
-pickle_encoder = get_serializer(
-    identifier='pickle_encoder',
-    method='pickle',
-    encodable='encodable',
-)
-
-
-pickle_serializer = get_serializer(
-    identifier='pickle',
-    method='pickle',
-    encodable='artifact',
-)
-
-pickle_lazy = get_serializer(
-    identifier='pickle_lazy',
-    method='pickle',
-    encodable='lazy_artifact',
-)
-
-dill_serializer = get_serializer(
-    identifier='dill',
-    method='dill',
-    encodable='artifact',
-)
-
-dill_lazy = get_serializer(
-    identifier='dill_lazy',
-    method='dill',
-    encodable='lazy_artifact',
-)
-
-torch_serializer = get_serializer(
-    identifier='torch',
-    method='torch',
-    encodable='lazy_artifact',
-)
-
-file_serializer = get_serializer(
-    identifier='file',
-    method='file',
-    encodable='file',
-)
-
-file_lazy = get_serializer(
-    identifier='file_lazy',
-    method='file',
-    encodable='lazy_file',
-)
-
-serializers = {
-    'pickle': pickle_serializer,
-    'dill': dill_serializer,
-    'torch': torch_serializer,
-    'file': file_serializer,
-    'pickle_lazy': pickle_lazy,
-    'dill_lazy': dill_lazy,
-    'file_lazy': file_lazy,
-}
-
-
-class Vector(BaseDataType):
-    """Vector meta-datatype for encoding vectors ready for search.
-
-    :param dtype: Datatype of encoded arrays.
-    """
-
-    identifier: str = ''
-    dtype: str = 'float64'
-
-    def __post_init__(self, db, artifacts):
-        self.identifier = f'vector[{self.shape[0]}]'
-        return super().__post_init__(db, artifacts)
+        return self.bytes
 
     @property
-    def encodable_cls(self):
-        return self.datatype_impl.encodable_cls
-
-    @property
-    def encodable(self):
-        return self.datatype_impl.encodable
-
-    @cached_property
-    def datatype_impl(self):
-        if isinstance(CFG.datatype_presets.vector, str):
-            type_: str = CFG.datatype_presets.vector
-        else:
-            type_: str = self.db.databackend.datatype_presets['vector']
-        module = '.'.join(type_.split('.')[:-1])
-        cls = type_.split('.')[-1]
-        datatype = getattr(import_module(module), cls)
-        if inspect.isclass(datatype):
-            datatype = datatype('tmp', dtype=self.dtype)
-        return datatype
-
-    def encode_data(self, item, info: t.Optional[t.Dict] = None):
-        return self.datatype_impl.encode_data(item=item, info=info)
+    def reference(self):
+        return f'&:blob:{self.identifier}'
+
+
+json_encoder = JSON('json')
+pickle_encoder = PickleEncoder('pickle_encoder')
+pickle_serializer = PickleSerializer('pickle_serializer')
+dill_encoder = _DillEncoder('dill_encoder')
+dill_serializer = _DillSerializer('dill_serializer')
+file = FileType('file')
+
+
+INBUILT_DATATYPES = {
+    dt.identifier: dt
+    for dt in [
+        json_encoder,
+        pickle_encoder,
+        pickle_serializer,
+        dill_encoder,
+        dill_serializer,
+        file,
+    ]
+}
 
-    def decode_data(self, item, info: t.Optional[t.Dict] = None):
-        return self.datatype_impl.decode_data(item=item, info=info)
+DEFAULT_DATATYPE = PickleEncoder('DEFAULT')
diff --git a/superduper/components/graph.py b/superduper/components/graph.py
index 81a1734cd..94555033b 100644
--- a/superduper/components/graph.py
+++ b/superduper/components/graph.py
@@ -167,8 +167,8 @@ class Input(Model):
     identifier: str = '_input'
     signature: Signature = '*args'
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts, example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example)
         if isinstance(self.spec, str):
             self.signature = 'singleton'
 
@@ -199,8 +199,8 @@ class DocumentInput(Model):
     identifier: str = '_input'
     signature: Signature = 'singleton'
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts, example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example)
 
     def predict(self, r):
         """Single prediction.
@@ -255,7 +255,7 @@ class Graph(Model):
     outputs: t.List[t.Union[str, Model]] = dc.field(default_factory=list)
     signature: Signature = '*args,**kwargs'
 
-    def __post_init__(self, db, artifacts, example):
+    def __post_init__(self, db, example):
         self.G = nx.DiGraph()
         self.nodes = {}
         self.version = 0
@@ -284,7 +284,7 @@ def __post_init__(self, db, artifacts, example):
                     on=on,
                     update_edge=False,
                 )
-        super().__post_init__(db, artifacts=artifacts, example=example)
+        super().__post_init__(db, example=example)
 
     def connect(
         self,
diff --git a/superduper/components/listener.py b/superduper/components/listener.py
index dbad51820..090024fe8 100644
--- a/superduper/components/listener.py
+++ b/superduper/components/listener.py
@@ -51,12 +51,12 @@ def _get_metadata(self):
         metadata = super()._get_metadata()
         return {**metadata, 'output_table': self.output_table}
 
-    def __post_init__(self, db, artifacts):
+    def __post_init__(self, db):
         if not self.cdc_table and self.select:
             self.cdc_table = self.select.table
         self._set_upstream()
 
-        return super().__post_init__(db, artifacts)
+        return super().__post_init__(db)
 
     def handle_update_or_same(self, other):
         super().handle_update_or_same(other)
diff --git a/superduper/components/metric.py b/superduper/components/metric.py
index b6a96ede8..641464e28 100644
--- a/superduper/components/metric.py
+++ b/superduper/components/metric.py
@@ -1,6 +1,6 @@
 import typing as t
 
-from superduper.components.component import Component
+from superduper.components.component import Component, ensure_initialized
 
 
 class Metric(Component):
@@ -15,6 +15,7 @@ class Metric(Component):
 
     object: t.Callable
 
+    @ensure_initialized
     def __call__(self, x: t.Sequence[int], y: t.Sequence[int]) -> bool:
         """Call the metric object on the x and y data.
 
diff --git a/superduper/components/model.py b/superduper/components/model.py
index 1dcc53101..d9985cf16 100644
--- a/superduper/components/model.py
+++ b/superduper/components/model.py
@@ -21,8 +21,9 @@
 from superduper.base.annotations import trigger
 from superduper.base.document import Document
 from superduper.base.exceptions import DatabackendException
+from superduper.base.leaf import Leaf
 from superduper.components.component import Component, ComponentMeta, ensure_initialized
-from superduper.components.datatype import DataType, dill_lazy
+from superduper.components.datatype import BaseDataType, dill_serializer
 from superduper.components.metric import Metric
 from superduper.components.schema import Schema
 
@@ -31,7 +32,7 @@
     from superduper.components.dataset import Dataset
 
 
-EncoderArg = t.Union[DataType, str, None]
+EncoderArg = t.Union[BaseDataType, str, None]
 ModelInputType = t.Union[str, t.List[str], t.Tuple[t.List[str], t.Dict[str, str]]]
 Signature = t.Literal['*args', '**kwargs', '*args,**kwargs', 'singleton']
 
@@ -391,8 +392,8 @@ class Model(Component, metaclass=ModelMeta):
     example: dc.InitVar[t.Any | None] = None
     deploy: bool = False
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts)
+    def __post_init__(self, db, example):
+        super().__post_init__(db)
         self.example = example
 
         self._is_initialized = False
@@ -1014,7 +1015,9 @@ def __getitem__(self, item):
         return _Node(item)
 
 
-class ObjectModel(Model):
+# This is if the user would like to
+# import the object
+class ImportedModel(Model):
     """Model component which wraps a Model to become serializable.
 
     Example:
@@ -1030,10 +1033,7 @@ class ObjectModel(Model):
     """
 
     breaks: t.ClassVar[t.Sequence] = ('object', 'trainer')
-    _artifacts: t.ClassVar[t.Sequence[t.Tuple[str, 'DataType']]] = (
-        ('object', dill_lazy),
-    )
-    object: t.Callable
+    object: Leaf
     method: t.Optional[str] = None
 
     @staticmethod
@@ -1095,6 +1095,13 @@ def predict(self, *args, **kwargs):
         return getattr(self.object, self.method)(*args, **kwargs)
 
 
+class ObjectModel(ImportedModel):
+    """A model to wrap a Python object and serialize it."""
+
+    _fields = {'object': dill_serializer}
+    object: t.Callable
+
+
 class APIBaseModel(Model):
     """APIBaseModel component which is used to make the type of API request.
 
@@ -1105,8 +1112,8 @@ class APIBaseModel(Model):
     model: t.Optional[str] = None
     max_batch_size: int = 8
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts, example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example)
         if self.model is None:
             assert self.identifier is not None
             self.model = self.identifier
@@ -1146,8 +1153,8 @@ def inputs(self):
         """Method to get ``Inputs`` instance for model inputs."""
         return Inputs(self.runtime_params)
 
-    def __post_init__(self, db, artifacts):
-        super().__post_init__(db, artifacts)
+    def __post_init__(self, db):
+        super().__post_init__(db)
         self.params['model'] = self.model
         env_variables = re.findall(r'{([A-Z0-9\_]+)}', self.url)
         runtime_variables = re.findall(r'{([a-z0-9\_]+)}', self.url)
@@ -1190,7 +1197,7 @@ class QueryModel(Model):
     """
 
     preprocess: t.Optional[t.Callable] = None
-    postprocess: t.Optional[t.Union[t.Callable]] = None
+    postprocess: t.Optional[t.Callable] = None
     select: Query
     signature: Signature = '**kwargs'
 
@@ -1247,10 +1254,10 @@ class SequentialModel(Model):
 
     models: t.List[Model]
 
-    def __post_init__(self, db, artifacts, example):
+    def __post_init__(self, db, example):
         self.signature = self.models[0].signature
         self.datatype = self.models[-1].datatype
-        return super().__post_init__(db, artifacts, example)
+        return super().__post_init__(db, example)
 
     @property
     def inputs(self) -> Inputs:
diff --git a/superduper/components/plugin.py b/superduper/components/plugin.py
index 816a4b47f..22256a910 100644
--- a/superduper/components/plugin.py
+++ b/superduper/components/plugin.py
@@ -6,7 +6,7 @@
 import typing as t
 
 from superduper import Component, logging
-from superduper.components.datatype import LazyFile, file_lazy
+from superduper.components.datatype import File, file
 
 
 class Plugin(Component):
@@ -18,19 +18,19 @@ class Plugin(Component):
     """
 
     type_id: t.ClassVar[str] = "plugin"
-    _artifacts: t.ClassVar = (("path", file_lazy),)
+    _fields = {"path": file}
     path: str
     identifier: str = ""
     cache_path: str = "~/.superduper/plugins"
 
-    def __post_init__(self, db, artifacts):
-        if isinstance(self.path, LazyFile):
+    def __post_init__(self, db):
+        if isinstance(self.path, File):
             self._prepare_plugin()
         else:
             path_name = os.path.basename(self.path.rstrip("/"))
             self.identifier = self.identifier or f"plugin-{path_name}".replace(".", "_")
         self._install()
-        super().__post_init__(db, artifacts)
+        super().__post_init__(db)
 
     def _install(self):
         logging.debug(f"Installing plugin {self.identifier}")
@@ -92,7 +92,7 @@ def _pip_install(self, requirement_path):
 
     def _prepare_plugin(self):
         plugin_name_tag = f"{self.identifier}"
-        assert isinstance(self.path, LazyFile)
+        assert isinstance(self.path, File)
         cache_path = os.path.expanduser(self.cache_path)
         uuid_path = os.path.join(cache_path, self.uuid)
         # Check if plugin is already in cache
diff --git a/superduper/components/schema.py b/superduper/components/schema.py
index d0b9a89e5..c3f291f5f 100644
--- a/superduper/components/schema.py
+++ b/superduper/components/schema.py
@@ -1,12 +1,11 @@
 import base64
-import hashlib
 import typing as t
 from functools import cached_property
 
 from superduper.base.constant import KEY_SCHEMA
 from superduper.base.leaf import Leaf
 from superduper.components.component import Component
-from superduper.components.datatype import BaseDataType, DataType
+from superduper.components.datatype import BaseDataType, Saveable
 from superduper.misc.reference import parse_reference
 from superduper.misc.special_dicts import SuperDuperFlatEncode
 
@@ -14,20 +13,6 @@
     from superduper.base.document import Getters
 
 
-def get_hash(data):
-    """Get the hash of the given data.
-
-    :param data: Data to hash.
-    """
-    if isinstance(data, str):
-        bytes_ = data.encode()
-    elif isinstance(data, bytes):
-        bytes_ = data
-    else:
-        bytes_ = str(id(data)).encode()
-    return hashlib.sha1(bytes_).hexdigest()
-
-
 class FieldType(Leaf):
     """Field type to represent the type of a field in a table.
 
@@ -36,12 +21,13 @@ class FieldType(Leaf):
     :param identifier: The name of the data type.
     """
 
-    identifier: t.Union[str, DataType]
+    identifier: t.Union[str, BaseDataType]
 
     def __post_init__(self, db):
         super().__post_init__(db)
 
-        if isinstance(self.identifier, DataType):
+        # TODO why would this happen?
+        if isinstance(self.identifier, BaseDataType):
             self.identifier = self.identifier.name
 
         elif isinstance(self.identifier, self.__class__):
@@ -74,12 +60,12 @@ class Schema(Component):
     """
 
     type_id: t.ClassVar[str] = 'schema'
-    fields: t.Mapping[str, DataType]
+    fields: t.Mapping[str, BaseDataType]
 
-    def __post_init__(self, db, artifacts):
+    def __post_init__(self, db):
         assert self.identifier is not None, 'Schema must have an identifier'
         assert self.fields is not None, 'Schema must have fields'
-        super().__post_init__(db, artifacts)
+        super().__post_init__(db)
 
         for k, v in self.fields.items():
             if isinstance(v, (BaseDataType, FieldType)):
@@ -92,6 +78,12 @@ def __post_init__(self, db, artifacts):
 
             self.fields[k] = v
 
+    def update(self, other: 'Schema'):
+        new_fields = self.fields.copy()
+        new_fields.update(other.fields)
+        return Schema(self.identifier, fields=new_fields)
+
+    # TODO why do we need this?
     @cached_property
     def encoded_types(self):
         """List of fields of type DataType."""
@@ -124,6 +116,7 @@ def decode_data(
         """Decode data using the schema's encoders.
 
         :param data: Data to decode.
+        :param getters: Getters to decode.
         """
         if self.trivial:
             return data
@@ -136,17 +129,8 @@ def decode_data(
 
             value = data[k]
             if reference := parse_reference(value):
-                value = getters.run(reference.name, reference.path)
-                if reference.name == 'blob':
-                    kwargs = {'blob': value}
-                elif reference.name == 'file':
-                    kwargs = {'x': value}
-                else:
-                    assert False, f'Unknown reference type {reference.name}'
-                encodable = field.encodable_cls(datatype=field, **kwargs)
-                if not field.encodable_cls.lazy:
-                    encodable = encodable.unpack()
-                decoded[k] = encodable
+                saveable: Saveable = getters.run(reference.name, reference.path)
+                decoded[k] = saveable
             else:
                 b = data[k]
                 if (
@@ -174,32 +158,34 @@ def encode_data(self, out, builds, blobs, files, leaves_to_keep=()):
             if k not in out:
                 continue
 
+            if isinstance(out[k], Saveable):
+                continue
+
             if isinstance(out[k], leaves_to_keep):
                 continue
 
-            # data, identifier = field.encode_data_with_identifier(out[k])
             data = field.encode_data(out[k])
 
-            identifier = get_hash(data)
-
             if (
                 field.encodable == 'encodable'
                 and self.db.databackend.bytes_encoding == 'base64'
             ):
                 assert isinstance(data, bytes)
                 data = _convert_bytes_to_base64(data)
+                out[k] = data
 
-            if field.encodable in {'artifact', 'lazy_artifact'}:
-                reference = field.encodable_cls.build_reference(identifier, data)
-                ref_obj = parse_reference(reference)
+            elif isinstance(data, Saveable):
+                ref_obj = parse_reference(data.reference)
 
                 if ref_obj.name == 'blob':
-                    blobs[identifier] = data
+                    blobs[data.identifier] = data.bytes
+
                 elif ref_obj.name == 'file':
-                    files[identifier] = data
+                    files[data.identifier] = data.path
                 else:
                     assert False, f'Unknown reference type {ref_obj.name}'
-                out[k] = reference
+
+                out[k] = data.reference
             else:
                 out[k] = data
 
diff --git a/superduper/components/table.py b/superduper/components/table.py
index d5947aa8d..8605d32db 100644
--- a/superduper/components/table.py
+++ b/superduper/components/table.py
@@ -22,7 +22,7 @@ class Table(Component):
     :param data: Data to insert post creation
     """
 
-    _artifacts: t.ClassVar[t.Tuple[str]] = (('data', pickle_serializer),)
+    _fields = {'data': pickle_serializer}
 
     type_id: t.ClassVar[str] = 'table'
 
@@ -30,8 +30,8 @@ class Table(Component):
     primary_id: str = DEFAULT_PRIMARY_ID
     data: t.List[t.Dict] | 'Dataset' | 'RemoteData' | None = None
 
-    def __post_init__(self, db, artifacts):
-        super().__post_init__(db, artifacts)
+    def __post_init__(self, db):
+        super().__post_init__(db)
         fields = {}
         fields.update(self.schema.fields)
 
diff --git a/superduper/components/template.py b/superduper/components/template.py
index c5ccec94f..22dad5cc2 100644
--- a/superduper/components/template.py
+++ b/superduper/components/template.py
@@ -38,7 +38,7 @@ class _BaseTemplate(Component):
     files: t.Optional[t.List[str]] = None
     substitutions: dc.InitVar[t.Optional[t.Dict]] = None
 
-    def __post_init__(self, db, artifacts, substitutions):
+    def __post_init__(self, db, substitutions):
         if isinstance(self.template, Leaf):
             self.template = self.template.encode(defaults=True, metadata=False)
         self.template = SuperDuperFlatEncode(self.template)
@@ -59,7 +59,7 @@ def __post_init__(self, db, artifacts, substitutions):
             )
         if self.template_variables is None:
             self.template_variables = self.template.variables
-        super().__post_init__(db, artifacts)
+        super().__post_init__(db)
 
     @ensure_initialized
     def __call__(self, **kwargs):
@@ -224,10 +224,10 @@ class QueryTemplate(_BaseTemplate):
 
     type_id: t.ClassVar[str] = 'query_template'
 
-    def __post_init__(self, db, artifacts, substitutions):
+    def __post_init__(self, db, substitutions):
         if isinstance(self.template, Leaf):
             self.template = self.template.dict(metadata=False, defaults=False).encode()
-        return super().__post_init__(db, artifacts, substitutions)
+        return super().__post_init__(db, substitutions)
 
     @property
     def form_template(self):
diff --git a/superduper/components/training.py b/superduper/components/training.py
index c1981c03c..56ab504c4 100644
--- a/superduper/components/training.py
+++ b/superduper/components/training.py
@@ -1,7 +1,7 @@
 import typing as t
 
 from superduper.components.component import Component
-from superduper.components.datatype import DataType, file_lazy
+from superduper.components.datatype import file_lazy
 
 
 class Checkpoint(Component):
@@ -13,9 +13,9 @@ class Checkpoint(Component):
 
     path: t.Optional[str]
     step: int
-    _artifacts: t.ClassVar[t.Sequence[t.Tuple[str, DataType]]] = (("path", file_lazy),)
+    _fields = {'path': file_lazy}
     type_id: t.ClassVar[str] = "checkpoint"
 
-    def __post_init__(self, db, artifacts):
-        super().__post_init__(db, artifacts)
+    def __post_init__(self, db):
+        super().__post_init__(db)
         self.version = int(self.step)
diff --git a/superduper/components/vector_index.py b/superduper/components/vector_index.py
index 358ba4379..43723a5d7 100644
--- a/superduper/components/vector_index.py
+++ b/superduper/components/vector_index.py
@@ -11,13 +11,10 @@
 from superduper.base.document import Document
 from superduper.components.cdc import CDC
 from superduper.components.component import Component
-from superduper.components.datatype import DataType
 from superduper.components.listener import Listener
 from superduper.components.model import Mapping, ModelInputType
 from superduper.components.schema import Schema
 from superduper.components.table import Table
-from superduper.ext.utils import str_shape
-from superduper.misc.annotations import component
 from superduper.misc.special_dicts import MongoStyleDict
 from superduper.vector_search.base import VectorIndexMeasureType, VectorItem
 
@@ -118,9 +115,9 @@ class VectorIndex(CDC):
     metric_values: t.Optional[t.Dict] = dc.field(default_factory=dict)
     cdc_table: str = ''
 
-    def __post_init__(self, db, artifacts):
+    def __post_init__(self, db):
         self.cdc_table = self.cdc_table or self.indexing_listener.outputs
-        return super().__post_init__(db, artifacts)
+        return super().__post_init__(db)
 
     def refresh(self):
         if self.cdc_table.startswith(CFG.output_prefix):
@@ -419,42 +416,42 @@ def __call__(self, bytes, info: t.Optional[t.Dict] = None):
         return np.frombuffer(bytes, dtype=self.dtype).tolist()
 
 
-@component(
-    {'name': 'shape', 'type': 'int'},
-    {'name': 'identifier', 'type': 'str'},
-)
-def vector(shape, identifier: t.Optional[str] = None):
-    """Create an encoder for a vector (list of ints/ floats) of a given shape.
-
-    :param shape: The shape of the vector
-    :param identifier: The identifier of the vector
-    """
-    if isinstance(shape, int):
-        shape = (shape,)
-
-    identifier = identifier or f'vector[{str_shape(shape)}]'
-    return DataType(
-        identifier=identifier,
-        shape=shape,
-        encoder=None,
-        decoder=None,
-        encodable='native',
-    )
-
-
-@component()
-def sqlvector(shape, bytes_encoding: t.Optional[str] = None):
-    """Create an encoder for a vector (list of ints/ floats) of a given shape.
-
-    This is used for compatibility with SQL databases, as the default vector
-
-    :param shape: The shape of the vector
-    :param bytes_encoding: The encoding of the bytes
-    """
-    return DataType(
-        identifier=f'sqlvector[{str_shape(shape)}]',
-        shape=shape,
-        encoder=EncodeArray(dtype='float64'),
-        decoder=DecodeArray(dtype='float64'),
-        bytes_encoding=bytes_encoding,
-    )
+# @component(
+#     {'name': 'shape', 'type': 'int'},
+#     {'name': 'identifier', 'type': 'str'},
+# )
+# def vector(shape, identifier: t.Optional[str] = None):
+#     """Create an encoder for a vector (list of ints/ floats) of a given shape.
+
+#     :param shape: The shape of the vector
+#     :param identifier: The identifier of the vector
+#     """
+#     if isinstance(shape, int):
+#         shape = (shape,)
+
+#     identifier = identifier or f'vector[{str_shape(shape)}]'
+#     return DataType(
+#         identifier=identifier,
+#         shape=shape,
+#         encoder=None,
+#         decoder=None,
+#         encodable='native',
+#     )
+
+
+# @component()
+# def sqlvector(shape, bytes_encoding: t.Optional[str] = None):
+#     """Create an encoder for a vector (list of ints/ floats) of a given shape.
+
+#     This is used for compatibility with SQL databases, as the default vector
+
+#     :param shape: The shape of the vector
+#     :param bytes_encoding: The encoding of the bytes
+#     """
+#     return DataType(
+#         identifier=f'sqlvector[{str_shape(shape)}]',
+#         shape=shape,
+#         encoder=EncodeArray(dtype='float64'),
+#         decoder=DecodeArray(dtype='float64'),
+#         bytes_encoding=bytes_encoding,
+#     )
diff --git a/superduper/ext/llm/model.py b/superduper/ext/llm/model.py
index bcd814c3c..4e466c056 100644
--- a/superduper/ext/llm/model.py
+++ b/superduper/ext/llm/model.py
@@ -28,8 +28,8 @@ class BaseLLM(Model):
     max_batch_size: t.Optional[int] = 4
     signature: str = 'singleton'
 
-    def __post_init__(self, db, artifacts, example):
-        super().__post_init__(db, artifacts, example)
+    def __post_init__(self, db, example):
+        super().__post_init__(db, example)
         self.takes_context = True
         self.identifier = self.identifier.replace("/", "-")
 
diff --git a/superduper/ext/llm/prompter.py b/superduper/ext/llm/prompter.py
index e785e8df9..08e665062 100644
--- a/superduper/ext/llm/prompter.py
+++ b/superduper/ext/llm/prompter.py
@@ -67,9 +67,9 @@ class RetrievalPrompt(QueryModel):
     prompt_introduction: str = PROMPT_INTRODUCTION
     join: str = "\n---\n"
 
-    def __post_init__(self, db, artifacts):
+    def __post_init__(self, db):
         assert 'prompt' in self.select.variables
-        return super().__post_init__(db, artifacts)
+        return super().__post_init__(db)
 
     @property
     def inputs(self):
diff --git a/superduper/ext/numpy/__init__.py b/superduper/ext/numpy/__init__.py
index ae00cf5b6..a73b58d16 100644
--- a/superduper/ext/numpy/__init__.py
+++ b/superduper/ext/numpy/__init__.py
@@ -1,7 +1,7 @@
 import typing as t
 
-from .encoder import Array, array
+from .encoder import Array
 
 requirements: t.List = []
 
-__all__ = ['array', 'Array']
+__all__ = ['Array']
diff --git a/superduper/ext/numpy/encoder.py b/superduper/ext/numpy/encoder.py
index 5548ecad3..be781567e 100644
--- a/superduper/ext/numpy/encoder.py
+++ b/superduper/ext/numpy/encoder.py
@@ -4,12 +4,10 @@
 
 from superduper.components.datatype import (
     BaseDataType,
-    DataType,
     DataTypeFactory,
-    Encodable,
+    _Encodable,
 )
 from superduper.ext.utils import str_shape
-from superduper.misc.annotations import component
 
 
 class EncodeArray:
@@ -55,21 +53,27 @@ def __call__(self, bytes, info: t.Optional[t.Dict] = None):
 class Array(BaseDataType):
     """Encode/ decode a numpy array as bytes.
 
-    :param dtype: numpy native datatype
+    :param dtype: numpy native datatype.
+    :param shape: Shape of array.
     """
 
     dtype: str = 'float64'
+    shape: int | t.Tuple[int]
+    identifier: str = ''
 
-    def __post_init__(self, db, artifacts):
-        self.encodable_cls = Encodable
+    def __post_init__(self, db):
+        self.encodable_cls = _Encodable
         self.encodable = 'encodable'
-        return super().__post_init__(db, artifacts)
+        if not self.identifier:
+            dtype = str(self.dtype)
+            self.identifier = f'numpy-{dtype}[{str_shape(self.shape)}]'
+        return super().__post_init__(db)
 
-    def encode_data(self, item, info=None):
+    def encode_data(self, item):
         encoder = EncodeArray(self.dtype)
         return encoder(item)
 
-    def decode_data(self, item, info=None):
+    def decode_data(self, item):
         shape = self.shape
         if isinstance(shape, int):
             shape = (self.shape,)
@@ -77,29 +81,27 @@ def decode_data(self, item, info=None):
         return decoder(item)
 
 
-@component()
-def array(
-    dtype: str,
-    shape: t.Sequence,
-    bytes_encoding: t.Optional[str] = None,
-    encodable: str = 'encodable',
-):
-    """
-    Create an encoder of numpy arrays.
-
-    :param dtype: The dtype of the array.
-    :param shape: The shape of the array.
-    :param bytes_encoding: The bytes encoding to use.
-    :param encodable: The encodable to use.
-    """
-    return DataType(
-        identifier=f'numpy-{dtype}[{str_shape(shape)}]',
-        encoder=EncodeArray(dtype),
-        decoder=DecodeArray(dtype, shape),
-        shape=shape,
-        bytes_encoding=bytes_encoding,
-        encodable=encodable,
-    )
+# @component()
+# def array(
+#     dtype: str,
+#     shape: t.Sequence,
+#     bytes_encoding: t.Optional[str] = None,
+#     encodable: str = 'encodable',
+# ):
+#     """
+#     Create an encoder of numpy arrays.
+
+#     :param dtype: The dtype of the array.
+#     :param shape: The shape of the array.
+#     :param bytes_encoding: The bytes encoding to use.
+#     :param encodable: The encodable to use.
+#     """
+#     return DataType(
+#         identifier=f'numpy-{dtype}[{str_shape(shape)}]',
+#         encoder=EncodeArray(dtype),
+#         decoder=DecodeArray(dtype, shape),
+#         encodable=encodable,
+#     )
 
 
 class NumpyDataTypeFactory(DataTypeFactory):
@@ -115,10 +117,10 @@ def check(data: t.Any) -> bool:
         return isinstance(data, numpy.ndarray)
 
     @staticmethod
-    def create(data: t.Any) -> DataType:
+    def create(data: t.Any) -> Array:
         """Create a numpy array datatype.
 
         It's used for registering the auto schema.
         :param data: The numpy array.
         """
-        return array(dtype=str(data.dtype), shape=list(data.shape))
+        return Array(dtype=str(data.dtype), shape=list(data.shape))
diff --git a/superduper/misc/annotations.py b/superduper/misc/annotations.py
index 0a6cba321..87dffd508 100644
--- a/superduper/misc/annotations.py
+++ b/superduper/misc/annotations.py
@@ -121,6 +121,7 @@ def _get_indent(docstring: str) -> int:
     return len(non_empty_lines[1]) - len(non_empty_lines[1].lstrip())
 
 
+# TODO deprecate - no longer needed
 def importable(f):
     """Make a function serializable as an importable.
 
diff --git a/superduper/misc/auto_schema.py b/superduper/misc/auto_schema.py
index 831a7b4e5..ccb01b96c 100644
--- a/superduper/misc/auto_schema.py
+++ b/superduper/misc/auto_schema.py
@@ -6,13 +6,11 @@
 from superduper import CFG, logging
 from superduper.base.exceptions import UnsupportedDatatype
 from superduper.components.datatype import (
+    DEFAULT_DATATYPE,
     BaseDataType,
-    DataType,
     DataTypeFactory,
     Vector,
-    _BaseEncodable,
-    get_serializer,
-    json_serializer,
+    json_encoder,
 )
 from superduper.components.schema import FieldType, Schema
 
@@ -29,12 +27,6 @@ def register_module(module_name):
         logging.debug(f"Could not register module: {module_name}")
 
 
-DEFAULT_DATATYPE = get_serializer(
-    identifier='DEFAULT',
-    method='pickle',
-    encodable='encodable',
-)
-
 BASE_TYPES = (
     int,
     str,
@@ -45,7 +37,7 @@ def register_module(module_name):
 )
 
 
-def infer_datatype(data: t.Any) -> t.Optional[t.Union[DataType, type]]:
+def infer_datatype(data: t.Any) -> t.Optional[t.Union[BaseDataType, type]]:
     """Infer the datatype of a given data object.
 
     If the data object is a base type, return None,
@@ -55,8 +47,9 @@ def infer_datatype(data: t.Any) -> t.Optional[t.Union[DataType, type]]:
     """
     datatype = None
 
-    if isinstance(data, _BaseEncodable):
-        return datatype
+    # # TODO - why this?
+    # if isinstance(data, _BaseEncodable):
+    #     return datatype
 
     try:
         from bson import ObjectId
@@ -80,8 +73,8 @@ def infer_datatype(data: t.Any) -> t.Optional[t.Union[DataType, type]]:
 
     if datatype is None:
         try:
-            encoded_data = DEFAULT_DATATYPE.encoder(data)
-            decoded_data = DEFAULT_DATATYPE.decoder(encoded_data)
+            encoded_data = DEFAULT_DATATYPE.encode_data(data)
+            decoded_data = DEFAULT_DATATYPE.decode_data(encoded_data)
             assert isinstance(decoded_data, type(data))
         except Exception as e:
             raise UnsupportedDatatype(
@@ -163,7 +156,7 @@ def check(data: t.Any) -> bool:
         :param data: The data object
         """
         try:
-            json_serializer.encode_data(data)
+            json_encoder.encode_data(data)
             return True
         except Exception:
             return False
@@ -176,7 +169,7 @@ def create(data: t.Any) -> BaseDataType | FieldType:
         """
         if CFG.json_native:
             return FieldType(identifier='json')
-        return json_serializer
+        return json_encoder
 
 
 register_module("superduper.ext.numpy.encoder")
diff --git a/superduper/misc/compat.py b/superduper/misc/compat.py
index 516aebccc..05613d87b 100644
--- a/superduper/misc/compat.py
+++ b/superduper/misc/compat.py
@@ -1,4 +1,5 @@
 """Functions from later standard libraries not available in Python 3.8."""
+# TODO not needed
 
 from functools import lru_cache
 
diff --git a/superduper/misc/download.py b/superduper/misc/download.py
index b81e770dc..d00875747 100644
--- a/superduper/misc/download.py
+++ b/superduper/misc/download.py
@@ -14,10 +14,8 @@
 from tqdm import tqdm
 
 from superduper import CFG, logging
-from superduper.backends.base.query import Query
-from superduper.base.constant import KEY_BUILDS
-from superduper.base.document import Document
-from superduper.components.datatype import _BaseEncodable
+
+# from superduper.components.datatype import _BaseEncodable
 from superduper.components.model import Model
 
 
@@ -255,277 +253,6 @@ def _sequential_go(self, f):
             f(i)
 
 
-class Updater:
-    """Updater class to update the artifact.
-
-    :param db: Datalayer instance
-    :param query: query to be executed
-    """
-
-    def __init__(self, db, query):
-        self.db = db
-        self.query = query
-
-    def exists(self, uri, key, id, datatype):
-        """Check if the artifact exists.
-
-        :param uri: uri to download from
-        :param key: key in the document
-        :param id: id of the document
-        :param datatype: datatype of the document
-        """
-        if self.db.datatypes[datatype].encodable == 'artifact':
-            out = self.db.artifact_store.exists(uri=uri, datatype=datatype)
-        else:
-            table_or_collection = self.query.table_or_collection.identifier
-            out = self.db.databackend.exists(table_or_collection, id, key)
-        return out
-
-    def __call__(
-        self,
-        *,
-        uri,
-        key,
-        id,
-        datatype,
-        bytes_,
-    ):
-        """Run the updater.
-
-        :param uri: uri to download from
-        :param key: key in the document
-        :param id: id of the document
-        :param datatype: datatype of the document
-        :param bytes_: bytes to insert
-        """
-        if self.db.datatypes[datatype].encodable == 'artifact':
-            self.db.artifact_store.save_artifact(
-                {
-                    'uri': uri,
-                    'datatype': datatype,
-                    'bytes': bytes_,
-                    'directory': self.db.datatypes[datatype].directory,
-                }
-            )
-        else:
-            # TODO move back to databackend
-            self.query.download_update(db=self.db, key=key, id=id, bytes=bytes_)
-
-
-class Downloader(BaseDownloader):
-    """
-    Download files from a list of URIs.
-
-    :param uris: list of uris/ file names to fetch
-    :param update_one: function to call to insert data into table
-    :param ids: list of ids of rows/ documents to update
-    :param keys: list of keys in rows/ documents to insert to
-    :param datatypes: list of datatypes of rows/ documents to insert to
-    :param n_workers: number of multiprocessing workers
-    :param headers: dictionary of request headers passed to``requests`` package
-    :param skip_existing: if ``True`` then don't bother getting already present data
-    :param timeout: set seconds until request times out
-    :param raises: raises error ``True``/``False``
-    """
-
-    results: t.Dict[int, str]
-
-    def __init__(
-        self,
-        uris,
-        update_one: t.Optional[t.Callable] = None,
-        ids: t.Optional[t.Union[t.List[str], t.List[int]]] = None,
-        keys: t.Optional[t.List[str]] = None,
-        datatypes: t.Optional[t.List[str]] = None,
-        n_workers: int = 20,
-        headers: t.Optional[t.Dict] = None,
-        skip_existing: bool = True,
-        timeout: t.Optional[int] = None,
-        raises: bool = True,
-    ):
-        super().__init__(
-            uris, n_workers=n_workers, timeout=timeout, headers=headers, raises=raises
-        )
-
-        if ids is not None:
-            if len(ids) != len(uris):
-                raise ValueError(f'len(ids={ids}) != len(uris={uris})')
-
-        self.ids = ids
-        self.keys = keys
-        self.datatypes = datatypes
-        self.failed = 0
-        self.skip_existing = skip_existing
-        self.update_one = update_one
-
-    def _download(self, i):
-        if self.update_one.exists(
-            id=self.ids[i],
-            key=self.keys[i],
-            uri=self.uris[i],
-            datatype=self.datatypes[i],
-        ):
-            return
-        content = self.fetcher(self.uris[i])
-        self.update_one(
-            id=self.ids[i],
-            key=self.keys[i],
-            datatype=self.datatypes[i],
-            bytes_=content,
-            uri=self.uris[i],
-        )
-
-
-def gather_uris(
-    documents: t.Sequence[Document], gather_ids: bool = True
-) -> t.Tuple[t.List[str], t.List[str], t.List[t.Any], t.List[str]]:
-    """Get the uris out of all documents as denoted by ``{"_content": ...}``.
-
-    :param documents: list of dictionaries
-    :param gather_ids: if ``True`` then gather ids of documents
-    """
-    uris = []
-    mongo_keys = []
-    datatypes = []
-    ids = []
-    for i, r in enumerate(documents):
-        sub_uris, sub_mongo_keys, sub_datatypes = _gather_uris_for_document(r)
-        if gather_ids:
-            ids.extend([r['_id'] for _ in sub_uris])
-        else:
-            ids.append(i)
-        uris.extend(sub_uris)
-        mongo_keys.extend(sub_mongo_keys)
-        datatypes.extend(sub_datatypes)
-    return uris, mongo_keys, datatypes, ids
-
-
-def _gather_uris_for_document(r: Document, id_field: str = '_id'):
-    """Get the uris out of a single document as denoted by ``{"_content": ...}``.
-
-    >>> _gather_uris_for_document({'a': {'_content': {'uri': 'test'}}})
-    (['test'], ['a'])
-    >>> d = {'b': {'a': {'_content': {'uri': 'test'}}}}
-    >>> _gather_uris_for_document(d)
-    (['test'], ['b.a'])
-    >>> d = {'b': {'a': {'_content': {'uri': 'test', 'bytes': b'abc'}}}}
-    >>> _gather_uris_for_document(d)
-    ([], [])
-    """
-    uris = []
-    keys = []
-    datatypes = []
-    # TODO: This function not be tested in UT,
-    # fast fix the schema parameter to avoid type error
-    leaf_lookup = r.encode(None, leaves_to_keep=(_BaseEncodable,))[KEY_BUILDS]
-    for k in leaf_lookup:
-        if leaf_lookup[k].uri is None:
-            continue
-        keys.append(k)
-        uris.append(leaf_lookup[k].uri)
-        datatypes.append(leaf_lookup[k].datatype.identifier)
-    return uris, keys, datatypes
-
-
-def download_content(
-    db,
-    query: t.Union[Query, t.Dict],
-    ids: t.Optional[t.Sequence[str]] = None,
-    documents: t.Optional[t.List[Document]] = None,
-    raises: bool = True,
-    n_workers: t.Optional[int] = None,
-) -> t.Optional[t.Sequence[Document]]:
-    """Download content contained in uploaded data.
-
-    Items to be downloaded are identifier
-    via the subdocuments in the form exemplified below. By default items are downloaded
-    to the database, unless a ``download_update`` function is provided.
-
-    :param db: database instance
-    :param query: query to be executed
-    :param ids: ids to be downloaded
-    :param documents: documents to be downloaded
-    :param raises: whether to raise errors
-    :param n_workers: number of download workers
-
-    >>> d = {"_content": {"uri": "<uri>", "encoder": "<encoder-identifier>"}}
-    >>> def update(key, id, bytes):
-    >>> ... with open(f'/tmp/{key}+{id}', 'wb') as f:
-    >>> ...     f.write(bytes)
-    >>> download_content(None, None, ids=["0"], documents=[d]))
-    ...
-    """
-    logging.debug(str(query))
-    logging.debug(str(ids))
-
-    # TODO handle this in the job runner
-    if isinstance(query, dict):
-        query = Document.decode(query).unpack()
-        query = t.cast(Query, query)
-        query.db = db
-
-    if documents is not None:
-        pass
-    elif isinstance(query, Query) and query.type == 'select':
-        if ids is None:
-            # TODO deprecate reference since lazy loading in any case
-            documents = list(db.execute(query))
-        else:
-            select = query.select_using_ids(ids)
-            documents = list(db.execute(select))
-    else:
-        assert query.type == 'insert'
-        documents = t.cast(t.List[Document], query.documents)
-
-    uris, keys, datatypes, place_ids = gather_uris(documents)
-
-    if uris:
-        logging.info(f'found {len(uris)} uris')
-
-    if not uris:
-        return  # type: ignore[return-value]
-
-    downloader = Downloader(
-        uris=uris,
-        ids=place_ids,
-        keys=keys,
-        datatypes=datatypes,
-        update_one=Updater(db, query),
-        n_workers=n_workers or CFG.downloads.n_workers,
-        timeout=CFG.downloads.timeout,
-        headers=CFG.downloads.headers,
-        raises=raises,
-    )
-    downloader.go()
-
-    return  # type: ignore[return-value]
-
-
-def download_from_one(r: Document):
-    """Download content from a single document.
-
-    This function will find all URIs in the document and download them.
-
-    :param r: document to download from
-    """
-    uris, keys, _, _ = gather_uris([r])
-    if not uris:
-        return
-
-    downloader = BaseDownloader(
-        uris=uris,
-        n_workers=0,
-        timeout=CFG.downloads.timeout,
-        headers=CFG.downloads.headers,
-        raises=True,
-    )
-    downloader.go()
-    for key, uri in zip(keys, uris):
-        r[key].x = r[key].datatype.decode_data(downloader.results[uri])
-
-    return
-
-
 class DownloadFiles(Model):
     """Download files from a list of URIs.
 
diff --git a/superduper/misc/importing.py b/superduper/misc/importing.py
new file mode 100644
index 000000000..052067ec1
--- /dev/null
+++ b/superduper/misc/importing.py
@@ -0,0 +1,11 @@
+import importlib
+
+
+def import_object(path):
+    """Import item from path.
+
+    :param path: Path to import from.
+    """
+    module = '.'.join(path.split('.')[:-1])
+    cls = path.split('.')[-1]
+    return getattr(importlib.import_module(module), cls)
diff --git a/superduper/rest/utils.py b/superduper/rest/utils.py
index ae25e9fb6..b72cdc6b9 100644
--- a/superduper/rest/utils.py
+++ b/superduper/rest/utils.py
@@ -1,19 +1,19 @@
 import inspect
 
 from superduper import Document
-from superduper.components.datatype import Artifact, Encodable
+from superduper.components.datatype import _Artifact, _Encodable
 
 
 def rewrite_artifacts(r, db):
     """Helper function to rewrite artifacts."""
-    if isinstance(r, Encodable):
+    if isinstance(r, _Encodable):
         kwargs = r.dict()
         kwargs['datatype'].encodable = 'artifact'
         blob = r._encode()[0]
         db.artifact_store.put_bytes(blob, file_id=r.identifier)
-        init_args = inspect.signature(Artifact.__init__).parameters.keys()
+        init_args = inspect.signature(_Artifact.__init__).parameters.keys()
         kwargs = {k: v for k, v in kwargs.items() if k in init_args}
-        return Artifact(**kwargs)
+        return _Artifact(**kwargs)
     if isinstance(r, Document):
         return Document(rewrite_artifacts(dict(r), db=db))
     if isinstance(r, dict):
diff --git a/superduper/vector_search/base.py b/superduper/vector_search/base.py
index d8a6d4ca3..bb9c9d65e 100644
--- a/superduper/vector_search/base.py
+++ b/superduper/vector_search/base.py
@@ -12,6 +12,8 @@
 if t.TYPE_CHECKING:
     from superduper.components.vector_index import VectorIndex
 
+# TODO this is now in the wrong place
+
 
 class BaseVectorSearcher(ABC):
     """Base class for vector searchers.
diff --git a/templates/simple_rag/VERSION b/templates/simple_rag/VERSION
index 1d0ba9ea1..8f0916f76 100644
--- a/templates/simple_rag/VERSION
+++ b/templates/simple_rag/VERSION
@@ -1 +1 @@
-0.4.0
+0.5.0
diff --git a/templates/simple_rag/build.ipynb b/templates/simple_rag/build.ipynb
index 88289701c..17947c9f9 100644
--- a/templates/simple_rag/build.ipynb
+++ b/templates/simple_rag/build.ipynb
@@ -42,7 +42,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "3ef70f6d-a189-460a-8864-241a689624e2",
    "metadata": {
     "editable": true,
@@ -65,7 +65,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "cb029a5e-fedf-4f07-8a31-d220cfbfbb3d",
    "metadata": {
     "editable": true,
@@ -74,7 +74,22 @@
     },
     "tags": []
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:42:39.55\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.misc.plugins\u001b[0m:\u001b[36m13  \u001b[0m | \u001b[1mLoading plugin: mongodb\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:39.58\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m69  \u001b[0m | \u001b[1mBuilding Data Layer\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:39.58\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.build\u001b[0m:\u001b[36m184 \u001b[0m | \u001b[1mConfiguration: \n",
+      " +---------------+--------------+\n",
+      "| Configuration |    Value     |\n",
+      "+---------------+--------------+\n",
+      "|  Data Backend | mongomock:// |\n",
+      "+---------------+--------------+\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
     "from superduper import superduper, CFG\n",
     "\n",
@@ -86,7 +101,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "4e7902bd",
    "metadata": {
     "editable": true,
@@ -112,10 +127,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "1ef8dd07-1b47-4dce-84dd-a081d1f5ee9d",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:42:41.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36m__main__\u001b[0m:\u001b[36m7   \u001b[0m | \u001b[1mDownloading data...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:42.44\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36m__main__\u001b[0m:\u001b[36m9   \u001b[0m | \u001b[1mDownloading data... (Done)\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
     "if APPLY:\n",
     "    data = getter()"
@@ -134,10 +158,48 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "c5965fdf",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (table, docs) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('table', 'docs')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m329 \u001b[0m | \u001b[1mTable docs does not exist, auto creating...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m335 \u001b[0m | \u001b[1mCreating table docs with schema {('_fold', 'str'), ('x', 'str')}\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf str already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (schema, AUTO-_fold=<class 'str'>&x=<class 'str'>) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('schema', \"AUTO-_fold=<class 'str'>&x=<class 'str'>\")) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf str already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new schema:AUTO-_fold=<class 'str'>&x=<class 'str'>:0cc2139173e6460a\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (table, docs) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('table', 'docs')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.05\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new table:docs:059cbc59f1794f86\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.backends.local.artifacts\u001b[0m:\u001b[36m87  \u001b[0m | \u001b[33m\u001b[1mFile /tmp/dbc1aaddc8b7343d6d33b34edcf608b8f8801918 already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m67  \u001b[0m | \u001b[1mFound these changes and/ or additions that need to be made:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m69  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m70  \u001b[0m | \u001b[1mMETADATA EVENTS:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m71  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m78  \u001b[0m | \u001b[1m[0]: schema:AUTO-_fold=<class 'str'>&x=<class 'str'>:0cc2139173e6460a: create ~ [1]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m82  \u001b[0m | \u001b[1m[1]: table:docs:059cbc59f1794f86: create\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m84  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m85  \u001b[0m | \u001b[1mJOBS EVENTS:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m86  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m90  \u001b[0m | \u001b[1mNo job events...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m101 \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 0cc2139173e6460a not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding schema:AUTO-_fold=<class 'str'>&x=<class 'str'>:0cc2139173e6460a to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding schema: AUTO-_fold=<class 'str'>&x=<class 'str'> to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 059cbc59f1794f86 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding table:docs:059cbc59f1794f86 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.06\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.backends.local.compute\u001b[0m:\u001b[36m49  \u001b[0m | \u001b[33m\u001b[1mCould not release futures for context 059cbc59f1794f86\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:44.09\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m308 \u001b[0m | \u001b[1mInserted 187 documents into docs\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
     "if APPLY:\n",
     "    from superduper import Document\n",
@@ -168,7 +230,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "2d20eaa0-a416-4483-938e-23f79845739a",
    "metadata": {},
    "outputs": [],
@@ -196,7 +258,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "93d21872-d4dc-40dc-abab-fb07ba102ea3",
    "metadata": {},
    "outputs": [],
@@ -215,10 +277,84 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "31900eec-b516-4bef-939e-2e8f46252b12",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:42:48.68\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74  \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (model, chunker) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('model', 'chunker')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new model:chunker:6c65cfb0dc6a4240\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (listener, chunker) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('listener', 'chunker')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (schema, _schema/_outputs__chunker__32a68622e6ac4e8c) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('schema', '_schema/_outputs__chunker__32a68622e6ac4e8c')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf ID already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf str already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new schema:_schema/_outputs__chunker__32a68622e6ac4e8c:b65ad745363446e6\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (table, _outputs__chunker__32a68622e6ac4e8c) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('table', '_outputs__chunker__32a68622e6ac4e8c')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new table:_outputs__chunker__32a68622e6ac4e8c:09d1628b5c0c4870\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.backends.local.artifacts\u001b[0m:\u001b[36m87  \u001b[0m | \u001b[33m\u001b[1mFile /tmp/dbc1aaddc8b7343d6d33b34edcf608b8f8801918 already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf ID already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf str already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new listener:chunker:32a68622e6ac4e8c\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m67  \u001b[0m | \u001b[1mFound these changes and/ or additions that need to be made:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m69  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m70  \u001b[0m | \u001b[1mMETADATA EVENTS:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m71  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m78  \u001b[0m | \u001b[1m[0]: model:chunker:6c65cfb0dc6a4240: create ~ [3]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m78  \u001b[0m | \u001b[1m[1]: schema:_schema/_outputs__chunker__32a68622e6ac4e8c:b65ad745363446e6: create ~ [2]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m78  \u001b[0m | \u001b[1m[2]: table:_outputs__chunker__32a68622e6ac4e8c:09d1628b5c0c4870: create ~ [3]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m82  \u001b[0m | \u001b[1m[3]: listener:chunker:32a68622e6ac4e8c: create\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m84  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m85  \u001b[0m | \u001b[1mJOBS EVENTS:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m86  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m99  \u001b[0m | \u001b[1m[0]: listener:chunker:32a68622e6ac4e8c.run: run\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m94  \u001b[0m | \u001b[1m[1]: listener:chunker:32a68622e6ac4e8c.set_status: set_status ~ [0]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m101 \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 6c65cfb0dc6a4240 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.71\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding model:chunker:6c65cfb0dc6a4240 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.71\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding model: chunker to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.71\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent b65ad745363446e6 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.71\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding schema:_schema/_outputs__chunker__32a68622e6ac4e8c:b65ad745363446e6 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.71\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding schema: _schema/_outputs__chunker__32a68622e6ac4e8c to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.71\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 09d1628b5c0c4870 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.71\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding table:_outputs__chunker__32a68622e6ac4e8c:09d1628b5c0c4870 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.71\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 32a68622e6ac4e8c not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.71\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding listener:chunker:32a68622e6ac4e8c to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.71\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding listener: chunker to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.71\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.model\u001b[0m:\u001b[36m531 \u001b[0m | \u001b[1mRequesting prediction in db - [chunker] with predict_id chunker__32a68622e6ac4e8c\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "187it [00:00, 5526.48it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:42:48.77\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.model\u001b[0m:\u001b[36m664 \u001b[0m | \u001b[1mAdding 187 model outputs to `db`\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:48.84\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m308 \u001b[0m | \u001b[1mInserted 336 documents into _outputs__chunker__32a68622e6ac4e8c\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "if APPLY and EAGER:\n",
     "    db.apply(upstream_listener, force=True)"
@@ -263,13 +399,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "a9b1f538-65ca-499e-b6d0-2dd733f81723",
    "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
-    "from superduper.components.vector_index import sqlvector\n",
     "\n",
     "from superduper_openai import OpenAIEmbedding\n",
     "\n",
@@ -289,7 +424,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "4663fa4b-c2ec-427d-bf8b-b8b109cc2ccf",
    "metadata": {},
    "outputs": [],
@@ -312,10 +447,138 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "509c3505-54c5-4e68-84ec-3df8bea0fd74",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:42:54.93\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf ID already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:54.94\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf str already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.62\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74  \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.63\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74  \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.65\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical model:chunker:6c65cfb0dc6a4240\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m243 \u001b[0m | \u001b[1mFound update model:chunker:6c65cfb0dc6a4240\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.backends.local.artifacts\u001b[0m:\u001b[36m87  \u001b[0m | \u001b[33m\u001b[1mFile /tmp/4b100f6e48727d74cb84f5c7d979d988aee6aa51 already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74  \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical listener:chunker:32a68622e6ac4e8c\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (datatype, vector[1536]) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('datatype', 'vector[1536]')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new datatype:vector[1536]:7da0ede750ef4110\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (model, text-embedding) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('model', 'text-embedding')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new model:text-embedding:169d3962c9964326\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (listener, embeddinglistener) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('listener', 'embeddinglistener')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (datatype, vector[1536]) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('datatype', 'vector[1536]')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new datatype:vector[1536]:7da0ede750ef4110\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (schema, _schema/_outputs__embeddinglistener__6e0274765d264d25) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('schema', '_schema/_outputs__embeddinglistener__6e0274765d264d25')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf ID already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new schema:_schema/_outputs__embeddinglistener__6e0274765d264d25:2013c1d9203b4c92\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (table, _outputs__embeddinglistener__6e0274765d264d25) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.66\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('table', '_outputs__embeddinglistener__6e0274765d264d25')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new table:_outputs__embeddinglistener__6e0274765d264d25:949b42ee0b9e4afc\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.backends.local.artifacts\u001b[0m:\u001b[36m87  \u001b[0m | \u001b[33m\u001b[1mFile /tmp/dbc1aaddc8b7343d6d33b34edcf608b8f8801918 already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf ID already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new listener:embeddinglistener:6e0274765d264d25\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (vector_index, vectorindex) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('vector_index', 'vectorindex')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new vector_index:vectorindex:fb8364f87f6446c0\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m64  \u001b[0m | \u001b[1mFound this diff:\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">vectorindex\n",
+       "└── <span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">chunker</span>\n",
+       "    ├── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">status: </span><span style=\"color: #000080; text-decoration-color: #000080; font-weight: bold\">update</span>\n",
+       "    ├── <span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">changes</span>\n",
+       "    │   └── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">_object: </span><span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Blob(identifier='4b100f6e48727d74cb84f5c7d979d988aee6aa51', uuid='22d9da58406e42e1', bytes=b'\\</span>\n",
+       "    └── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">type_id: </span><span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">model</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "vectorindex\n",
+       "└── \u001b[1;33mchunker\u001b[0m\n",
+       "    ├── \u001b[1;36mstatus: \u001b[0m\u001b[1;34mupdate\u001b[0m\n",
+       "    ├── \u001b[1;33mchanges\u001b[0m\n",
+       "    │   └── \u001b[1;36m_object: \u001b[0m\u001b[1;32mBlob(identifier='4b100f6e48727d74cb84f5c7d979d988aee6aa51', uuid='22d9da58406e42e1', bytes=b'\\\u001b[0m\n",
+       "    └── \u001b[1;36mtype_id: \u001b[0m\u001b[1;32mmodel\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m67  \u001b[0m | \u001b[1mFound these changes and/ or additions that need to be made:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m69  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m70  \u001b[0m | \u001b[1mMETADATA EVENTS:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m71  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m80  \u001b[0m | \u001b[1m[0]: model:chunker:6c65cfb0dc6a4240: update\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m78  \u001b[0m | \u001b[1m[1]: datatype:vector[1536]:7da0ede750ef4110: create ~ [3]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m78  \u001b[0m | \u001b[1m[2]: model:text-embedding:169d3962c9964326: create ~ [5]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m78  \u001b[0m | \u001b[1m[3]: schema:_schema/_outputs__embeddinglistener__6e0274765d264d25:2013c1d9203b4c92: create ~ [4]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m78  \u001b[0m | \u001b[1m[4]: table:_outputs__embeddinglistener__6e0274765d264d25:949b42ee0b9e4afc: create ~ [5]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m78  \u001b[0m | \u001b[1m[5]: listener:embeddinglistener:6e0274765d264d25: create ~ [6]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m82  \u001b[0m | \u001b[1m[6]: vector_index:vectorindex:fb8364f87f6446c0: create\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m84  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m85  \u001b[0m | \u001b[1mJOBS EVENTS:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m86  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m99  \u001b[0m | \u001b[1m[0]: listener:embeddinglistener:6e0274765d264d25.run: run\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m94  \u001b[0m | \u001b[1m[1]: listener:embeddinglistener:6e0274765d264d25.set_status: set_status ~ [0]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m94  \u001b[0m | \u001b[1m[2]: vector_index:vectorindex:fb8364f87f6446c0.copy_vectors: copy_vectors ~ [0,1]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m94  \u001b[0m | \u001b[1m[3]: vector_index:vectorindex:fb8364f87f6446c0.set_status: set_status ~ [2]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.67\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m101 \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.68\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 7da0ede750ef4110 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.68\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding datatype:vector[1536]:7da0ede750ef4110 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.68\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding datatype: vector[1536] to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.68\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 169d3962c9964326 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.68\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding model:text-embedding:169d3962c9964326 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.68\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding model: text-embedding to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.68\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 2013c1d9203b4c92 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.68\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding schema:_schema/_outputs__embeddinglistener__6e0274765d264d25:2013c1d9203b4c92 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.68\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding schema: _schema/_outputs__embeddinglistener__6e0274765d264d25 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.68\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 949b42ee0b9e4afc not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding table:_outputs__embeddinglistener__6e0274765d264d25:949b42ee0b9e4afc to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 6e0274765d264d25 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 32a68622e6ac4e8c not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 6c65cfb0dc6a4240 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding model:chunker:6c65cfb0dc6a4240 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding listener:chunker:32a68622e6ac4e8c to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding listener:embeddinglistener:6e0274765d264d25 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding listener: embeddinglistener to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent fb8364f87f6446c0 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding vector_index:vectorindex:fb8364f87f6446c0 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.69\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding vector_index: vectorindex to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:42:55.70\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.model\u001b[0m:\u001b[36m531 \u001b[0m | \u001b[1mRequesting prediction in db - [text-embedding] with predict_id embeddinglistener__6e0274765d264d25\n",
+      "\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "336it [00:00, 28114.31it/s]\n",
+      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:06<00:00,  1.64s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:43:02.31\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.model\u001b[0m:\u001b[36m664 \u001b[0m | \u001b[1mAdding 336 model outputs to `db`\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:03.90\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m308 \u001b[0m | \u001b[1mInserted 336 documents into _outputs__embeddinglistener__6e0274765d264d25\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
     "if APPLY and EAGER:\n",
     "    db.apply(vector_index, force=True)"
@@ -332,7 +595,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "id": "f98e5ff4",
    "metadata": {},
    "outputs": [],
@@ -356,7 +619,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "id": "44baeb09-6f35-4cf2-b814-46283a59f7e9",
    "metadata": {},
    "outputs": [],
@@ -383,10 +646,41 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "id": "2d3a0d3a-da1c-41ec-b16c-f281c46ad794",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (model, llm-model) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('model', 'llm-model')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new model:llm-model:f5a1f4c2908b4570\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (model, simple_rag) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('model', 'simple_rag')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new model:simple_rag:721ab56bc3784088\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m67  \u001b[0m | \u001b[1mFound these changes and/ or additions that need to be made:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m69  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m70  \u001b[0m | \u001b[1mMETADATA EVENTS:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m71  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m78  \u001b[0m | \u001b[1m[0]: model:llm-model:f5a1f4c2908b4570: create ~ [1]\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m82  \u001b[0m | \u001b[1m[1]: model:simple_rag:721ab56bc3784088: create\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m84  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m85  \u001b[0m | \u001b[1mJOBS EVENTS:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m86  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m90  \u001b[0m | \u001b[1mNo job events...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.40\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m101 \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.41\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent f5a1f4c2908b4570 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.42\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding model:llm-model:f5a1f4c2908b4570 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.42\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding model: llm-model to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.42\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 721ab56bc3784088 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.43\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding model:simple_rag:721ab56bc3784088 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.43\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding model: simple_rag to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:13.43\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.backends.local.compute\u001b[0m:\u001b[36m49  \u001b[0m | \u001b[33m\u001b[1mCould not release futures for context 721ab56bc3784088\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
     "if APPLY and EAGER:\n",
     "    db.apply(rag, force=True)"
@@ -402,10 +696,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "id": "e6787c78-4b14-4a72-818b-450408a74331",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:43:17.42\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.application\u001b[0m:\u001b[36m39  \u001b[0m | \u001b[1mResorting components based on topological order.\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:17.42\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.application\u001b[0m:\u001b[36m56  \u001b[0m | \u001b[1mNew order of components: ['listener:chunker:32a68622e6ac4e8c', 'vector_index:vectorindex:fb8364f87f6446c0', 'model:simple_rag:721ab56bc3784088']\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
     "from superduper import Application\n",
     "\n",
@@ -421,10 +724,125 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "id": "e7c16557-af76-4e70-83d9-2984e19a9554",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:43:18.39\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf ID already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.39\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf str already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.40\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf ID already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.40\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf str already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.40\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf ID already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.40\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74  \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.45\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical model:chunker:6c65cfb0dc6a4240\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.46\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m243 \u001b[0m | \u001b[1mFound update model:chunker:6c65cfb0dc6a4240\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.46\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.backends.local.artifacts\u001b[0m:\u001b[36m87  \u001b[0m | \u001b[33m\u001b[1mFile /tmp/4b100f6e48727d74cb84f5c7d979d988aee6aa51 already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.46\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74  \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.46\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical listener:chunker:32a68622e6ac4e8c\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.46\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74  \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.46\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74  \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.46\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical model:chunker:6c65cfb0dc6a4240\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m243 \u001b[0m | \u001b[1mFound update model:chunker:6c65cfb0dc6a4240\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.backends.local.artifacts\u001b[0m:\u001b[36m87  \u001b[0m | \u001b[33m\u001b[1mFile /tmp/4b100f6e48727d74cb84f5c7d979d988aee6aa51 already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74  \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical listener:chunker:32a68622e6ac4e8c\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical datatype:vector[1536]:7da0ede750ef4110\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m243 \u001b[0m | \u001b[1mFound update datatype:vector[1536]:7da0ede750ef4110\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical model:text-embedding:169d3962c9964326\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74  \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical listener:embeddinglistener:6e0274765d264d25\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m576 \u001b[0m | \u001b[33m\u001b[1mLeaf ID already exists\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m243 \u001b[0m | \u001b[1mFound update listener:embeddinglistener:6e0274765d264d25\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical vector_index:vectorindex:fb8364f87f6446c0\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical model:llm-model:f5a1f4c2908b4570\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m188 \u001b[0m | \u001b[1mFound identical model:simple_rag:721ab56bc3784088\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m616 \u001b[0m | \u001b[1mComponent (application, simple-rag-app) not found in cache, loading from db\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m622 \u001b[0m | \u001b[1mLoad (('application', 'simple-rag-app')) from metadata...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m257 \u001b[0m | \u001b[1mFound new application:simple-rag-app:43582331de8b49c8\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m64  \u001b[0m | \u001b[1mFound this diff:\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">simple-rag-app\n",
+       "├── <span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">chunker</span>\n",
+       "│   ├── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">status: </span><span style=\"color: #000080; text-decoration-color: #000080; font-weight: bold\">update</span>\n",
+       "│   ├── <span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">changes</span>\n",
+       "│   │   └── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">_object: </span><span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Blob(identifier='4b100f6e48727d74cb84f5c7d979d988aee6aa51', uuid='7531030f3a244229', bytes=b'\\</span>\n",
+       "│   └── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">type_id: </span><span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">model</span>\n",
+       "├── <span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">vector[1536]</span>\n",
+       "│   ├── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">status: </span><span style=\"color: #000080; text-decoration-color: #000080; font-weight: bold\">update</span>\n",
+       "│   ├── <span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">changes</span>\n",
+       "│   │   └── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">shape: </span><span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">(1536,)</span>\n",
+       "│   └── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">type_id: </span><span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">datatype</span>\n",
+       "└── <span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">embeddinglistener</span>\n",
+       "    ├── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">status: </span><span style=\"color: #000080; text-decoration-color: #000080; font-weight: bold\">update</span>\n",
+       "    ├── <span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">changes</span>\n",
+       "    │   └── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">upstream: </span><span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">['?chunker', '?chunker']</span>\n",
+       "    └── <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">type_id: </span><span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">listener</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "simple-rag-app\n",
+       "├── \u001b[1;33mchunker\u001b[0m\n",
+       "│   ├── \u001b[1;36mstatus: \u001b[0m\u001b[1;34mupdate\u001b[0m\n",
+       "│   ├── \u001b[1;33mchanges\u001b[0m\n",
+       "│   │   └── \u001b[1;36m_object: \u001b[0m\u001b[1;32mBlob(identifier='4b100f6e48727d74cb84f5c7d979d988aee6aa51', uuid='7531030f3a244229', bytes=b'\\\u001b[0m\n",
+       "│   └── \u001b[1;36mtype_id: \u001b[0m\u001b[1;32mmodel\u001b[0m\n",
+       "├── \u001b[1;33mvector[1536]\u001b[0m\n",
+       "│   ├── \u001b[1;36mstatus: \u001b[0m\u001b[1;34mupdate\u001b[0m\n",
+       "│   ├── \u001b[1;33mchanges\u001b[0m\n",
+       "│   │   └── \u001b[1;36mshape: \u001b[0m\u001b[1;32m(1536,)\u001b[0m\n",
+       "│   └── \u001b[1;36mtype_id: \u001b[0m\u001b[1;32mdatatype\u001b[0m\n",
+       "└── \u001b[1;33membeddinglistener\u001b[0m\n",
+       "    ├── \u001b[1;36mstatus: \u001b[0m\u001b[1;34mupdate\u001b[0m\n",
+       "    ├── \u001b[1;33mchanges\u001b[0m\n",
+       "    │   └── \u001b[1;36mupstream: \u001b[0m\u001b[1;32m['?chunker', '?chunker']\u001b[0m\n",
+       "    └── \u001b[1;36mtype_id: \u001b[0m\u001b[1;32mlistener\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m67  \u001b[0m | \u001b[1mFound these changes and/ or additions that need to be made:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m69  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m70  \u001b[0m | \u001b[1mMETADATA EVENTS:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m71  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m80  \u001b[0m | \u001b[1m[0]: model:chunker:6c65cfb0dc6a4240: update\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m80  \u001b[0m | \u001b[1m[1]: datatype:vector[1536]:7da0ede750ef4110: update\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m80  \u001b[0m | \u001b[1m[2]: listener:embeddinglistener:6e0274765d264d25: update\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m82  \u001b[0m | \u001b[1m[3]: application:simple-rag-app:43582331de8b49c8: create\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m84  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m85  \u001b[0m | \u001b[1mJOBS EVENTS:\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m86  \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m90  \u001b[0m | \u001b[1mNo job events...\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.47\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.apply\u001b[0m:\u001b[36m101 \u001b[0m | \u001b[1m----------------------------------------------------------------------------------------------------\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 43582331de8b49c8 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 32a68622e6ac4e8c not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 6c65cfb0dc6a4240 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding model:chunker:6c65cfb0dc6a4240 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding listener:chunker:32a68622e6ac4e8c to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent fb8364f87f6446c0 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m588 \u001b[0m | \u001b[1mComponent 6e0274765d264d25 not found in cache, loading from db with uuid\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding listener:embeddinglistener:6e0274765d264d25 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding vector_index:vectorindex:fb8364f87f6446c0 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.application\u001b[0m:\u001b[36m39  \u001b[0m | \u001b[1mResorting components based on topological order.\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.application\u001b[0m:\u001b[36m56  \u001b[0m | \u001b[1mNew order of components: ['listener:chunker:32a68622e6ac4e8c', 'vector_index:vectorindex:fb8364f87f6446c0', 'model:simple_rag:721ab56bc3784088']\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m607 \u001b[0m | \u001b[1mAdding application:simple-rag-app:43582331de8b49c8 to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.component\u001b[0m:\u001b[36m585 \u001b[0m | \u001b[1mAdding application: simple-rag-app to cache\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:18.48\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.backends.local.compute\u001b[0m:\u001b[36m49  \u001b[0m | \u001b[33m\u001b[1mCould not release futures for context 43582331de8b49c8\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
     "if APPLY:\n",
     "    db.apply(app, force=True)"
@@ -432,13 +850,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "id": "2a82ea22-9694-4c65-b72f-c89ae49d1ab2",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2024-Nov-25 14:43:20.87\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m792 \u001b[0m | \u001b[1mGetting vector-index\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:20.87\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m800 \u001b[0m | \u001b[1m{}\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:21.39\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m792 \u001b[0m | \u001b[1mGetting vector-index\u001b[0m\n",
+      "\u001b[32m2024-Nov-25 14:43:21.39\u001b[0m| \u001b[1mINFO    \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m800 \u001b[0m | \u001b[1m{}\u001b[0m\n",
+      "Superduper project involves inserting data, tracking components, triggering work based on changes, and using AI models to interact with new data.\n"
+     ]
+    }
+   ],
    "source": [
     "if APPLY:\n",
-    "    print(rag.predict('Tell me about the project'))"
+    "    print(rag.predict('Tell me about the project'))q\n",
+    "    "
    ]
   },
   {
diff --git a/test/integration/usecase/test_training.py b/test/integration/usecase/test_training.py
index 5d2e6bbb9..e1d25019c 100644
--- a/test/integration/usecase/test_training.py
+++ b/test/integration/usecase/test_training.py
@@ -17,7 +17,7 @@ def fit(self, model, db, train_dataset, valid_dataset):
 
 
 class MyModel(Model):
-    _artifacts: t.ClassVar[t.Any] = (('estimator', pickle_serializer),)
+    _fields = {'estimator': pickle_serializer}
     estimator: t.Any
     signature: str = 'singleton'
 
diff --git a/test/unittest/backends/local/test_artifacts.py b/test/unittest/backends/local/test_artifacts.py
index c4989bd9b..93291cba8 100644
--- a/test/unittest/backends/local/test_artifacts.py
+++ b/test/unittest/backends/local/test_artifacts.py
@@ -7,11 +7,7 @@
 
 from superduper.backends.local.artifacts import FileSystemArtifactStore
 from superduper.components.component import Component
-from superduper.components.datatype import (
-    DataType,
-    file_lazy,
-    serializers,
-)
+from superduper.components.datatype import INBUILT_DATATYPES
 
 
 @dc.dataclass(kw_only=True)
@@ -19,9 +15,7 @@ class TestComponent(Component):
     path: str
     type_id: t.ClassVar[str] = "TestComponent"
 
-    _artifacts: t.ClassVar[t.Sequence[t.Tuple[str, "DataType"]]] = (
-        ("path", file_lazy),
-    )
+    _fields = {'path': 'file'}
 
 
 @dc.dataclass(kw_only=True)
@@ -29,9 +23,7 @@ class TestComponentBytes(Component):
     function: callable
     type_id: t.ClassVar[str] = "TestComponent"
 
-    _artifacts: t.ClassVar[t.Sequence[t.Tuple[str, "DataType"]]] = (
-        ("path", file_lazy),
-    )
+    _fields = {'function': 'dill_serializer'}
 
 
 @pytest.fixture
@@ -59,7 +51,7 @@ def random_directory(tmpdir):
 def artifact_store(tmpdir) -> FileSystemArtifactStore:
     tmpdir_path = os.path.join(tmpdir, "artifact_store")
     artifact_strore = FileSystemArtifactStore(f"{tmpdir_path}")
-    artifact_strore._serializers = serializers
+    artifact_strore._serializers = INBUILT_DATATYPES
     return artifact_strore
 
 
diff --git a/test/unittest/base/test_datalayer.py b/test/unittest/base/test_datalayer.py
index 6b0c96b1b..01d2f7d87 100644
--- a/test/unittest/base/test_datalayer.py
+++ b/test/unittest/base/test_datalayer.py
@@ -12,15 +12,13 @@
 from superduper.components.component import Component
 from superduper.components.dataset import Dataset
 from superduper.components.datatype import (
-    DataType,
-    LazyArtifact,
+    BaseDataType,
+    Blob,
     dill_serializer,
-    pickle_decode,
-    pickle_encode,
     pickle_serializer,
 )
 from superduper.components.listener import Listener
-from superduper.components.model import Model, ObjectModel, Trainer
+from superduper.components.model import ImportedModel, Model, ObjectModel, Trainer
 from superduper.components.schema import FieldType, Schema
 from superduper.components.table import Table
 
@@ -39,7 +37,7 @@ class FakeModel(Model):
 
 class TestComponent(Component):
     breaks: ClassVar[Sequence] = ('inc',)
-    _artifacts: ClassVar[Sequence[str]] = (('artifact', dill_serializer),)
+    _fields = {'artifact': dill_serializer}
     inc: int = 0
     type_id: str = 'test-component'
     is_on_create: bool = False
@@ -83,7 +81,6 @@ def add_fake_model(db: Datalayer):
         identifier='fake_model',
         example=((1,), {}),
     )
-    db.apply(model)
     select = db['documents'].select()
     listener = Listener(
         identifier='listener-x',
@@ -134,10 +131,15 @@ def test_add_version(db: Datalayer):
     assert db.show('test-component', 'test') == [0, 1, 2]
 
 
+class TestComponentPickle(TestComponent):
+    _fields = {'artifact': pickle_serializer}
+
+
 def test_add_component_with_bad_artifact(db):
     artifact = {'data': lambda x: x}
-    component = TestComponent(
-        identifier='test', artifact=artifact, artifacts={'artifact': pickle_serializer}
+    component = TestComponentPickle(
+        identifier='test',
+        artifact=artifact,
     )
     with pytest.raises(Exception):
         db.apply(component)
@@ -149,9 +151,7 @@ def test_add_artifact_auto_replace(db):
     component = TestComponent(identifier='test', artifact=artifact)
     db.apply(component)
     r = db.show('test-component', 'test', -1)
-    assert r['artifact'].startswith('?')
-    info = r['_builds'][r['artifact'][1:]]
-    assert info['blob'].startswith('&')
+    assert r['artifact'].startswith('&')
 
 
 def test_add_child(db):
@@ -206,7 +206,7 @@ def test_add_with_artifact(db):
 
     assert m.object is not None
 
-    assert isinstance(m.object, LazyArtifact)
+    assert isinstance(m.object, Blob)
     m.init()
     assert callable(m.object)
 
@@ -289,7 +289,7 @@ def test_remove_component_with_artifact(db):
     info_with_artifact = db.metadata.get_component(
         'test-component', 'test_with_artifact', 0
     )
-    artifact_file_id = info_with_artifact['artifact'][1:]
+    artifact_file_id = info_with_artifact['artifact'].split(':')[-1]
     with patch.object(db.artifact_store, '_delete_bytes') as mock_delete:
         db._remove_component_version(
             'test-component', 'test_with_artifact', 0, force=True
@@ -367,14 +367,22 @@ def test_show(db):
     assert db.show('test-component', 'b', -1)['version'] == 2
 
 
+class DataType(BaseDataType):
+    def encode_data(self, item):
+        return item
+
+    def decode_data(self, item):
+        return item
+
+
 def test_load(db):
-    m1 = ObjectModel(object=lambda x: x, identifier='m1', datatype='int32')
+    m1 = ObjectModel(object=lambda x: x, identifier='m1')
 
     components = [
         DataType(identifier='e1'),
         DataType(identifier='e2'),
         m1,
-        ObjectModel(object=lambda x: x, identifier='m1', datatype='int32'),
+        ObjectModel(object=lambda x: x, identifier='m1'),
     ]
     for component in components:
         db.apply(component)
@@ -389,7 +397,7 @@ def test_load(db):
         db.load('model', 'e1')
 
     datatype = db.load('datatype', 'e1')
-    assert isinstance(datatype, DataType)
+    assert isinstance(datatype, BaseDataType)
 
     assert datatype.type_id, datatype.identifier in db.cluster.cache
 
@@ -411,17 +419,7 @@ def test_insert(db):
 
 
 def test_insert_artifacts(db):
-    dt = DataType(
-        'my_saveable',
-        encodable='artifact',
-        encoder=pickle_encode,
-        decoder=pickle_decode,
-    )
-    table = Table(
-        'documents',
-        schema=Schema('documents', fields={'x': dt}),
-    )
-    db.apply(table)
+    db.cfg.auto_schema = True
     db._insert(
         db['documents'].insert(
             [Document({'x': numpy.random.randn(100)}) for _ in range(1)]
@@ -549,7 +547,7 @@ def my_lambda(x):
 
 
 def test_compound_component(db):
-    m = ObjectModel(
+    m = ImportedModel(
         object=imported_value(my_lambda),
         identifier='my-test-module',
         datatype=FieldType(identifier='int'),
@@ -613,7 +611,7 @@ def test_dataset(db):
     assert len(dataset.data) == len(list(db.execute(dataset.select)))
 
 
-def test_delete_componet_with_same_artifact(db):
+def test_delete_component_with_same_artifact(db):
     from superduper import ObjectModel
 
     model1 = ObjectModel(
diff --git a/test/unittest/base/test_document.py b/test/unittest/base/test_document.py
index 62eca6b8b..cf059a60d 100644
--- a/test/unittest/base/test_document.py
+++ b/test/unittest/base/test_document.py
@@ -3,13 +3,14 @@
 import tempfile
 
 import numpy as np
+import pytest
 
 from superduper.backends.base.query import Query
 from superduper.base.constant import KEY_BLOBS, KEY_BUILDS
 from superduper.base.document import Document
 from superduper.components.datatype import (
-    Artifact,
-    DataType,
+    BaseDataType,
+    pickle_encoder,
     pickle_serializer,
 )
 from superduper.components.model import ObjectModel
@@ -17,12 +18,16 @@
 from superduper.components.table import Table
 
 
-def test_document_encoding():
-    document = Document({'x': pickle_serializer(np.random.rand(20))})
+def test_document_encoding(db):
+    schema = Schema('tmp', fields={'x': pickle_serializer}, db=db)
+    document = Document({'x': np.random.rand(20)}, schema=schema)
     new_document = Document.decode(
-        document.encode(), getters={'component': lambda x: pickle_serializer}
+        document.encode(),
+        schema=schema,
+        db=db,
     )
-    assert (new_document['x'].x - document['x'].x).sum() == 0
+    new_document = new_document.unpack()
+    assert (new_document['x'] - document['x']).sum() == 0
 
 
 def test_flat_query_encoding():
@@ -67,6 +72,7 @@ def test_encode_decode_flattened_document():
     assert isinstance(next(iter(encoded_r[KEY_BLOBS].values())), bytes)
 
 
+@pytest.mark.skip
 def test_encode_model_with_remote_file(db):
     r = {
         '_base': '?20d76167d4a6ad7fe00250e8359d0dca',
@@ -100,6 +106,7 @@ def test_encode_model_with_remote_file(db):
         assert r.readlines() == read
 
 
+@pytest.mark.skip
 def test_encode_model_with_remote_blob():
     m = ObjectModel(
         identifier='test',
@@ -136,7 +143,9 @@ def test_encode_model():
     pprint.pprint(encoded_r)
 
     decoded_r = Document.decode(
-        encoded_r, getters={'blob': lambda x: encoded_r[KEY_BLOBS][x]}
+        encoded_r,
+        getters={'blob': lambda x: encoded_r[KEY_BLOBS][x]},
+        schema=m.build_class_schema(),
     )
 
     print(decoded_r)
@@ -144,9 +153,7 @@ def test_encode_model():
     m = decoded_r.unpack()
 
     assert isinstance(m, ObjectModel)
-    assert isinstance(m.object, Artifact)
-
-    pprint.pprint(m)
+    assert callable(m.object)
 
     r = m.dict()
 
@@ -158,12 +165,12 @@ def test_encode_model():
     pprint.pprint(m.dict().encode())
 
 
-def test_decode_inline_data():
-    schema = Schema('my-schema', fields={'data': pickle_serializer})
+def test_decode_inline_data(db):
+    schema = Schema('my-schema', fields={'data': pickle_encoder}, db=db)
 
     r = {
         'x': 2,
-        'data': pickle_serializer.encode_data(np.random.randn(20)),
+        'data': pickle_encoder.encode_data(np.random.randn(20)),
     }
 
     r = Document.decode(r, schema=schema).unpack()
@@ -171,7 +178,7 @@ def test_decode_inline_data():
 
 
 def test_refer_to_applied_item(db):
-    dt = DataType(identifier='my-type', encodable='artifact')
+    dt = pickle_serializer
     db.apply(dt)
 
     m = ObjectModel(
@@ -183,14 +190,14 @@ def test_refer_to_applied_item(db):
     db.apply(m)
     r = db.metadata.get_component_by_uuid(m.uuid)
 
-    assert r['datatype'].startswith('&:component:datatype:my-type')
+    assert r['datatype'].startswith('&:component:datatype:pickle_serializer')
 
     import pprint
 
     pprint.pprint(r)
 
     print(db.show('datatype'))
-    dt = db.load('datatype', 'my-type', 0)
+    dt = db.load('datatype', 'pickle_serializer', 0)
     print(dt)
     c = db.load('model', 'test')
     print(c)
@@ -220,38 +227,23 @@ def test_column_encoding(db):
 
 
 def test_refer_to_system(db):
-    from superduper.components.datatype import DataType, methods
-
-    serializer = DataType(
-        identifier='my-datatype',
-        encodable='encodable',
-        db=db,
-        **methods['pickle'],
-    )
-    db.apply(serializer)
-
     db.artifact_store.put_bytes(
-        serializer.encode_data(np.random.rand(3)), file_id='12345'
+        pickle_serializer._encode_data(np.random.rand(3)), file_id='12345'
     )
 
     r = {
-        '_builds': {
-            'my_artifact': {
-                '_path': 'superduper.components.datatype.LazyArtifact',
-                'blob': '&:blob:12345',
-                'datatype': "&:component:datatype:my-datatype",
-            }
-        },
-        'data': '?my_artifact',
+        'data': '&:blob:12345',
     }
 
-    r = Document.decode(r, db=db).unpack()
+    r = Document.decode(
+        r, db=db, schema=Schema('tmp', fields={'data': pickle_serializer})
+    ).unpack()
 
     assert isinstance(r['data'], np.ndarray)
 
 
 def test_encode_same_identifier():
-    datatype = DataType(identifier="a")
+    datatype = BaseDataType(identifier="a")
     model = ObjectModel(identifier="a", object=lambda x: x, datatype=datatype)
     listener = model.to_listener(identifier="a", key="a", select=None)
 
diff --git a/test/unittest/base/test_leaf.py b/test/unittest/base/test_leaf.py
index 749c21b4b..3d8e22f06 100644
--- a/test/unittest/base/test_leaf.py
+++ b/test/unittest/base/test_leaf.py
@@ -4,7 +4,7 @@
 
 from superduper import ObjectModel
 from superduper.backends.base.query import Query
-from superduper.base.constant import KEY_BUILDS
+from superduper.base.constant import KEY_BLOBS, KEY_BUILDS
 from superduper.base.document import Document
 from superduper.base.leaf import Leaf
 from superduper.components.component import Component
@@ -24,12 +24,10 @@ class TestSubModel(Component):
     type_id: t.ClassVar[str] = 'test-sub-model'
     a: int = 1
     b: str = 'b'
-    c: ObjectModel = dc.field(
-        default_factory=ObjectModel(identifier='test-2', object=lambda x: x + 2)
-    )
+    c: ObjectModel | None = None
     d: t.List[ObjectModel] = dc.field(default_factory=[])
-    e: OtherSer = dc.field(default_factory=OtherSer(identifier='test', d='test'))
-    f: t.Callable = dc.field(default=lambda x: x)
+    e: OtherSer | None = None
+    f: t.Callable
 
 
 class MySer(Leaf):
@@ -111,15 +109,16 @@ def test_component_with_document():
         f=lambda x: x,
     )
     print('encoding')
-    d = Document(t.dict())
-    r = d.encode()
+    d = t.dict()
+    r = d.encode(leaves_to_keep=Leaf)
     builds = r[KEY_BUILDS]
 
     pprint(r)
-    assert len(builds) == 8
+    assert len(builds) == 3
+    assert len(r[KEY_BLOBS]) == 1
 
     for leaf in builds:
-        print(type(leaf))
+        print(type(builds[leaf]))
 
 
 def test_find_variables():
diff --git a/test/unittest/component/datatype/test_file.py b/test/unittest/component/datatype/test_file.py
index 659c16f55..4aff00316 100644
--- a/test/unittest/component/datatype/test_file.py
+++ b/test/unittest/component/datatype/test_file.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-from superduper import DataType
+from superduper.components.datatype import file
 
 
 @pytest.fixture
@@ -16,45 +16,17 @@ def random_data(tmpdir):
     return file_name
 
 
-def dt_file():
-    return DataType("my-file", encodable="file")
+def test_data_with_schema(db, random_data):
+    datatype_utils.check_data_with_schema(random_data, file, db=db)
 
 
-def dt_file_lazy():
-    return DataType("my-file", encodable="lazy_file")
+def test_data_with_schema_and_db(random_data, db):
+    datatype_utils.check_data_with_schema_and_db(random_data, file, db)
 
 
-datatypes = [
-    dt_file(),
-    dt_file_lazy(),
-]
+def test_component(random_data):
+    datatype_utils.check_component(random_data, file)
 
 
-@pytest.mark.parametrize("datatype", datatypes)
-def test_data_with_schema(db, datatype: DataType, random_data):
-    datatype_utils.check_data_with_schema(random_data, datatype, db=db)
-
-
-@pytest.mark.parametrize("datatype", datatypes)
-def test_data_with_schema_and_db(datatype: DataType, random_data, db):
-    datatype_utils.check_data_with_schema_and_db(random_data, datatype, db)
-
-
-@pytest.mark.parametrize("datatype", datatypes)
-def test_data_without_schema(datatype: DataType, random_data):
-    datatype_utils.check_data_without_schema(random_data, datatype)
-
-
-@pytest.mark.parametrize("datatype", datatypes)
-def test_data_without_schema_and_db(datatype: DataType, random_data, db):
-    datatype_utils.check_data_without_schema_and_db(random_data, datatype, db)
-
-
-@pytest.mark.parametrize("datatype", datatypes)
-def test_component(random_data, datatype):
-    datatype_utils.check_component(random_data, datatype)
-
-
-@pytest.mark.parametrize("datatype", datatypes)
-def test_component_with_db(db, random_data, datatype):
-    datatype_utils.check_component_with_db(random_data, datatype, db)
+def test_component_with_db(db, random_data):
+    datatype_utils.check_component_with_db(random_data, file, db)
diff --git a/test/unittest/component/datatype/test_pickle.py b/test/unittest/component/datatype/test_pickle.py
index f152aa706..1e42dc987 100644
--- a/test/unittest/component/datatype/test_pickle.py
+++ b/test/unittest/component/datatype/test_pickle.py
@@ -6,9 +6,8 @@
 
 from superduper.base.enums import DBType
 from superduper.components.datatype import (
-    DataType,
+    BaseDataType,
     pickle_encoder,
-    pickle_lazy,
     pickle_serializer,
 )
 
@@ -22,30 +21,19 @@ def random_data():
 datatypes = [
     pickle_encoder,
     pickle_serializer,
-    pickle_lazy,
 ]
 
 
 @pytest.mark.parametrize("datatype", datatypes)
-def test_data_with_schema(db, datatype: DataType, random_data: pd.DataFrame):
+def test_data_with_schema(db, datatype: BaseDataType, random_data: pd.DataFrame):
     datatype_utils.check_data_with_schema(random_data, datatype, db)
 
 
 @pytest.mark.parametrize("datatype", datatypes)
-def test_data_with_schema_and_db(datatype: DataType, random_data: pd.DataFrame, db):
+def test_data_with_schema_and_db(datatype: BaseDataType, random_data: pd.DataFrame, db):
     datatype_utils.check_data_with_schema_and_db(random_data, datatype, db)
 
 
-@pytest.mark.parametrize("datatype", datatypes)
-def test_data_without_schema(datatype: DataType, random_data: pd.DataFrame):
-    datatype_utils.check_data_without_schema(random_data, datatype)
-
-
-@pytest.mark.parametrize("datatype", datatypes)
-def test_data_without_schema_and_db(datatype: DataType, random_data: pd.DataFrame, db):
-    datatype_utils.check_data_without_schema_and_db(random_data, datatype, db)
-
-
 @pytest.mark.parametrize("datatype", datatypes)
 def test_component(random_data, datatype):
     datatype_utils.check_component(random_data, datatype)
diff --git a/test/unittest/component/test_component.py b/test/unittest/component/test_component.py
index 64cefb769..1a363d29e 100644
--- a/test/unittest/component/test_component.py
+++ b/test/unittest/component/test_component.py
@@ -10,10 +10,8 @@
 from superduper.base.annotations import trigger
 from superduper.components.component import Component
 from superduper.components.datatype import (
-    Artifact,
-    DataType,
-    Empty,
-    LazyArtifact,
+    BaseDataType,
+    Blob,
     dill_serializer,
 )
 from superduper.components.listener import Listener
@@ -32,27 +30,41 @@ def cleanup():
 @dc.dataclass(kw_only=True)
 class MyComponent(Component):
     type_id: t.ClassVar[str] = "my_type"
-    _lazy_fields: t.ClassVar[t.Sequence[str]] = ("my_dict",)
+    _fields = {
+        'my_dict': dill_serializer,
+        'nested_list': dill_serializer,
+    }
     my_dict: t.Dict
     nested_list: t.List
     a: t.Callable
 
 
-def test_init(monkeypatch):
-    from unittest.mock import MagicMock
+def test_reload(db):
+    m = ObjectModel('test', object=lambda x: x + 1)
 
-    e = Artifact(x=None, identifier="123", datatype=dill_serializer)
-    a = Artifact(x=None, identifier="456", datatype=dill_serializer)
+    db.apply(m)
 
-    def side_effect(*args, **kwargs):
-        a.x = lambda x: x + 1
+    reloaded = db.load('model', 'test')
+    reloaded.unpack()
 
-    a.init = MagicMock()
-    a.init.side_effect = side_effect
 
-    list_ = [e, a]
+def test_init(db, monkeypatch):
+    a = Blob(
+        identifier="456",
+        bytes=dill_serializer._encode_data(lambda x: x + 1),
+        db=db,
+    )
+    my_dict = Blob(
+        identifier="456",
+        bytes=dill_serializer._encode_data({'a': lambda x: x + 1}),
+        db=db,
+    )
+
+    list_ = Blob(
+        identifier='789', bytes=dill_serializer._encode_data([lambda x: x + 1]), db=db
+    )
 
-    c = MyComponent("test", my_dict={"a": a}, a=a, nested_list=list_)
+    c = MyComponent("test", my_dict=my_dict, a=a, nested_list=list_)
 
     c.init()
 
@@ -62,8 +74,8 @@ def side_effect(*args, **kwargs):
     assert callable(c.a)
     assert c.a(1) == 2
 
-    assert callable(c.nested_list[1])
-    assert c.nested_list[1](1) == 2
+    assert callable(c.nested_list[0])
+    assert c.nested_list[0](1) == 2
 
 
 def test_load_lazily(db):
@@ -74,8 +86,8 @@ def test_load_lazily(db):
 
     reloaded = db.load("model", m.identifier)
 
-    assert isinstance(reloaded.object, LazyArtifact)
-    assert isinstance(reloaded.object.x, Empty)
+    assert isinstance(reloaded.object, Blob)
+    assert reloaded.object.bytes is None
 
     reloaded.init(db=db)
 
@@ -97,7 +109,7 @@ def load(blob):
         reloaded = Component.read(save_path)  # getters=getters
 
         assert isinstance(reloaded, ObjectModel)
-        assert isinstance(reloaded.datatype, DataType)
+        assert isinstance(reloaded.datatype, BaseDataType)
 
 
 def test_set_variables(db):
@@ -163,6 +175,7 @@ def test_upstream(db, clean):
     db.apply(m)
 
 
+# TODO needed?
 def test_set_db_deep(db):
     c1 = UpstreamComponent(identifier='c1')
     m = MyListener(
diff --git a/test/unittest/component/test_graph.py b/test/unittest/component/test_graph.py
index 0aa836310..eccca41e9 100644
--- a/test/unittest/component/test_graph.py
+++ b/test/unittest/component/test_graph.py
@@ -12,7 +12,6 @@ def model_object(x):
         return x + 1
 
     model = ObjectModel(identifier='m1', object=model_object, signature='singleton')
-    db.add(model)
     yield model
 
 
@@ -22,7 +21,6 @@ def model_object(x):
         return x + 2, x
 
     model = ObjectModel(identifier='m2', object=model_object)
-    db.add(model)
     yield model
 
 
@@ -32,7 +30,6 @@ def model_object(x):
         return {'x': x + 2}
 
     model = ObjectModel(identifier='m2_multi_dict', object=model_object)
-    db.add(model)
     yield model
 
 
@@ -42,7 +39,6 @@ def model_object(x, y=1):
         return x + y + 2
 
     model = ObjectModel(identifier='m2_multi', object=model_object)
-    db.add(model)
     yield model
 
 
@@ -52,7 +48,6 @@ def model_object(x, y):
         return x + y + 3
 
     model = ObjectModel(identifier='m3', object=model_object)
-    db.add(model)
     yield model
 
 
@@ -150,7 +145,7 @@ def test_complex_graph_with_select(db):
 def test_serialization(db, model1):
     g = Graph(identifier='complex-graph', input=model1)
     original_g = g.G
-    db.add(g)
+    db.apply(g)
     g = db.load('model', 'complex-graph')
     assert nx.utils.graphs_equal(original_g, g.G)
 
diff --git a/test/unittest/component/test_listener.py b/test/unittest/component/test_listener.py
index 78ce5510d..c34d6ff38 100644
--- a/test/unittest/component/test_listener.py
+++ b/test/unittest/component/test_listener.py
@@ -262,14 +262,12 @@ def test_upstream_serializes(db):
         upstream=[upstream_component],
     )
 
-    db.apply(dependent_listener)
-
     listener = Listener(
         identifier="test-listener",
         model=ObjectModel("test", object=lambda x: x),
         select=db[dependent_listener.outputs].select(),
         key=dependent_listener.outputs,
-        upstream=[upstream_component],
+        upstream=[dependent_listener],
     )
     db.apply(listener)
 
diff --git a/test/unittest/component/test_model.py b/test/unittest/component/test_model.py
index 791b07aeb..c674d8986 100644
--- a/test/unittest/component/test_model.py
+++ b/test/unittest/component/test_model.py
@@ -12,7 +12,7 @@
 from superduper.base.datalayer import Datalayer
 from superduper.base.document import Document
 from superduper.components.dataset import Dataset
-from superduper.components.datatype import DataType, pickle_decode, pickle_encode
+from superduper.components.datatype import pickle_serializer
 from superduper.components.metric import Metric
 from superduper.components.model import (
     Mapping,
@@ -158,7 +158,7 @@ def test_pm_predict_with_select_ids(monkeypatch, predict_mixin):
         monkeypatch.setattr(
             predict_mixin,
             'datatype',
-            DataType(identifier='test', encoder=pickle_encode, decoder=pickle_decode),
+            pickle_serializer,
         )
         predict_mixin._predict_with_select_and_ids(
             X=X, select=select, ids=ids, predict_id='test'
diff --git a/test/unittest/component/test_plugin.py b/test/unittest/component/test_plugin.py
index 1cf0e2f6a..8460a5da9 100644
--- a/test/unittest/component/test_plugin.py
+++ b/test/unittest/component/test_plugin.py
@@ -56,19 +56,9 @@ def create_import_plugin(tempdirname):
     component_dict = {
         "_base": "?plugin",
         "_builds": {
-            "file_lazy": {
-                "_path": "superduper.components.datatype.get_serializer",
-                "method": "file",
-                "encodable": "lazy_file",
-            },
-            "file_id": {
-                "_path": "superduper.components.datatype.LazyFile",
-                "datatype": "?file_lazy",
-                "x": "&:file:p_import:file_id",
-            },
             "plugin": {
                 "_path": "superduper.components.plugin.Plugin",
-                "path": "?file_id",
+                "path": "&:file:p_import:file_id",
             },
         },
     }
diff --git a/test/unittest/component/test_schema.py b/test/unittest/component/test_schema.py
index a4893eb48..54d735f63 100644
--- a/test/unittest/component/test_schema.py
+++ b/test/unittest/component/test_schema.py
@@ -1,5 +1,28 @@
-from superduper import Schema, Table
-from superduper.components.datatype import pickle_encoder
+import typing as t
+
+import pytest
+
+from superduper import Component, Schema, Table
+from superduper.components.datatype import (
+    Blob,
+    File,
+    dill_serializer,
+    file,
+    pickle_encoder,
+    pickle_serializer,
+)
+
+
+class TestComponent(Component):
+    _fields = {'a': dill_serializer, 'b': file}
+
+    a: t.Callable
+    b: str | None = None
+
+
+class TestUnannotatedComponent(Component):
+    a: t.Callable
+    b: t.Optional[t.Callable]
 
 
 def test_schema_with_bytes_encoding(db):
@@ -28,3 +51,115 @@ def test_schema_with_bytes_encoding(db):
     assert isinstance(r['txt'], str)
 
     r = db['documents'].find_one()
+
+
+def test_schema_with_blobs(db):
+    db.apply(
+        Table(
+            'documents',
+            schema=Schema('_schema/documents', fields={'txt': pickle_serializer}),
+        )
+    )
+
+    db['documents'].insert([{'txt': 'testing 123'}]).execute()
+
+    r = db['documents'].select().tolist()[0]
+
+    assert isinstance(r['txt'], Blob)
+
+    # artifacts are loaded lazily and initially empty
+    assert r['txt'].bytes is None
+
+    # artifacts are downloaded and decoded with `.unpack()`
+    assert r.unpack()['txt'] == 'testing 123'
+
+
+@pytest.fixture
+def tmp_file():
+    file = '/tmp/test_schema_with_file.txt'
+    with open(file, 'a') as f:
+        f.write('Hello 123')
+        pass
+
+    yield file
+
+    import os
+
+    os.remove(file)
+
+
+def test_schema_with_file(db, tmp_file):
+    # the `file` is a datatype which copies a file
+    # to the artifact store when a reference document
+    # containing a file field is inserted
+    db.apply(
+        Table(
+            'documents',
+            schema=Schema('_schema/documents', fields={'my_file': file}),
+        )
+    )
+    db['documents'].insert([{'my_file': tmp_file}]).execute()
+
+    # only the references are loaded when data is selected
+    r = db['documents'].select().tolist()[0]
+
+    # loaded document contains a pointer to the file
+    assert isinstance(r['my_file'], File)
+
+    # however the path has not been populated
+    assert not r['my_file'].path
+
+    # unpacking the document copies the file to the artifact-store
+    rr = r.unpack()
+
+    # the path has been populated
+    assert r['my_file'].path
+
+    # and is also now local
+    import os
+
+    assert os.path.exists(r['my_file'].path)
+
+    # the unpacked value contains the local path
+    # this may be different from the original file path
+    assert rr['my_file'] == r['my_file'].path
+
+    with open(rr['my_file']) as f:
+        f.read().split('\n')[0] = 'Hello 123'
+
+
+def test_component_serializes_with_schema(db, tmp_file):
+    c = TestComponent('test', a='testing testing 123', b=tmp_file)
+
+    r = c.dict()
+
+    r_encoded = r.encode()
+
+    import pprint
+
+    pprint.pprint(r.schema)
+
+    pprint.pprint(r_encoded)
+
+    assert isinstance(r['a'], Blob)
+
+    assert r_encoded['a'].startswith('&:blob:')
+    assert r_encoded['b'].startswith('&:file:')
+
+
+def test_auto_infer_fields():
+    s = TestUnannotatedComponent.build_class_schema()
+
+    assert isinstance(s, Schema)
+
+    import pprint
+
+    pprint.pprint(s)
+
+    assert list(s.fields.keys()) == ['a', 'b']
+
+
+def test_wrap_function_with_blob():
+    r = TestComponent('test', a=lambda x: x + 1).dict()
+
+    assert isinstance(r['a'], Blob)
diff --git a/test/unittest/component/test_serialization.py b/test/unittest/component/test_serialization.py
index bb7994b9f..32b0f13cc 100644
--- a/test/unittest/component/test_serialization.py
+++ b/test/unittest/component/test_serialization.py
@@ -1,14 +1,14 @@
-from superduper.components.datatype import pickle_serializer
+from superduper.components.datatype import dill_serializer
 from superduper.components.model import ObjectModel
 
 
 def test_model():
     m = ObjectModel(
         identifier='test',
-        datatype=pickle_serializer,
+        datatype=dill_serializer,
         object=lambda x: x + 1,
     )
     m_dict = m.dict()
     assert m_dict['identifier'] == m.identifier
-    assert m_dict['object'].x == m.object
-    assert m_dict['datatype'].identifier == 'pickle'
+    assert m_dict['object'].bytes == dill_serializer._encode_data(m.object)
+    assert m_dict['datatype'].identifier == 'dill_serializer'
diff --git a/test/unittest/component/test_template.py b/test/unittest/component/test_template.py
index 26127ec52..988fea3bb 100644
--- a/test/unittest/component/test_template.py
+++ b/test/unittest/component/test_template.py
@@ -133,6 +133,7 @@ def test_from_template(db):
     component.init()
     assert isinstance(component, Listener)
     assert isinstance(component.model, ObjectModel)
+
     assert component.model.object(3) == 5
 
 
diff --git a/test/unittest/test_quality.py b/test/unittest/test_quality.py
index 014628f2e..ef384de49 100644
--- a/test/unittest/test_quality.py
+++ b/test/unittest/test_quality.py
@@ -17,9 +17,9 @@
 # over time.  If you have decreased the number of defects, change it here,
 # and take a bow!
 ALLOWABLE_DEFECTS = {
-    'cast': 3,  # Try to keep this down
+    'cast': 1,  # Try to keep this down
     'noqa': 3,  # This should never change
-    'type_ignore': 10,  # This should only ever increase in obscure edge cases
+    'type_ignore': 7,  # This should only ever increase in obscure edge cases
 }
 
 
diff --git a/test/utils/component/datatype.py b/test/utils/component/datatype.py
index 779e606d3..652e75c35 100644
--- a/test/utils/component/datatype.py
+++ b/test/utils/component/datatype.py
@@ -9,24 +9,12 @@
 from superduper.base.document import Document
 from superduper.base.enums import DBType
 from superduper.components.component import Component
-from superduper.components.datatype import (
-    DataType,
-    Empty,
-    _BaseEncodable,
-)
+from superduper.components.datatype import BaseDataType, pickle_serializer
 from superduper.components.schema import Schema
 from superduper.components.table import Table
 
 
 def assert_equal(expect, actual):
-    if isinstance(actual, _BaseEncodable) and actual.lazy:
-        actual.init()
-        actual = actual.x
-
-    if isinstance(expect, _BaseEncodable) and expect.lazy:
-        expect.init()
-        expect = expect.x
-
     assert isinstance(expect, type(actual))
     if isinstance(expect, np.ndarray):
         assert np.array_equal(expect, actual)
@@ -45,7 +33,7 @@ def print_sep():
     print("\n", "-" * 80, "\n")
 
 
-def check_data_with_schema(data, datatype: DataType, db):
+def check_data_with_schema(data, datatype, db):
     print("datatype", datatype)
     print_sep()
     schema = Schema(identifier="schema", fields={"x": datatype, "y": int}, db=db)
@@ -58,11 +46,8 @@ def check_data_with_schema(data, datatype: DataType, db):
     pprint(encoded)
     print_sep()
 
-    decoded = Document.decode(encoded, schema=schema)
-    if datatype.encodable == 'lazy_artifact':
-        assert isinstance(decoded["x"], datatype.encodable_cls)
-        assert isinstance(decoded["x"].x, type(data))
-        decoded = Document(decoded.unpack())
+    decoded = Document.decode(encoded, schema=schema, db=db).unpack()
+
     pprint(decoded)
     print_sep()
 
@@ -72,7 +57,7 @@ def check_data_with_schema(data, datatype: DataType, db):
     return document, encoded, decoded
 
 
-def check_data_with_schema_and_db(data, datatype: DataType, db: Datalayer):
+def check_data_with_schema_and_db(data, datatype: BaseDataType, db: Datalayer):
     print("datatype", datatype)
     print_sep()
     schema = Schema(identifier="schema", fields={"x": datatype, "y": int})
@@ -95,11 +80,7 @@ def check_data_with_schema_and_db(data, datatype: DataType, db: Datalayer):
     print_sep()
 
     decoded = list(db["documents"].select().execute())[0]
-
-    if datatype.encodable == 'lazy_artifact':
-        assert isinstance(decoded["x"], datatype.encodable_cls)
-        assert isinstance(decoded["x"].x, Empty)
-        decoded = Document(decoded.unpack())
+    decoded = decoded.unpack()
 
     pprint(decoded)
     print_sep()
@@ -110,51 +91,6 @@ def check_data_with_schema_and_db(data, datatype: DataType, db: Datalayer):
     return document, encoded, decoded
 
 
-def check_data_without_schema(data, datatype: DataType):
-    print("datatype", datatype)
-    print_sep()
-
-    document = Document({"x": datatype(data), "y": 1})
-    pprint(document)
-    print_sep()
-
-    encoded = document.encode()
-    pprint(encoded)
-    print_sep()
-
-    decoded = Document.decode(encoded)
-    pprint(decoded)
-    assert_equal(document["x"], decoded["x"])
-    assert_equal(document["y"], decoded["y"])
-    return document, encoded, decoded
-
-
-def check_data_without_schema_and_db(data, datatype: DataType, db: Datalayer):
-    print("datatype", datatype)
-    print("\n", "-" * 80, "\n")
-
-    table = Table(
-        "documents",
-        schema=Schema(identifier="schema", fields={"x": datatype, "y": int}),
-    )
-
-    db.apply(table)
-
-    document = Document({"x": data, "y": 1})
-    print(document)
-    print("\n", "-" * 80, "\n")
-    db["documents"].insert([document]).execute()
-
-    decoded = list(db["documents"].select().execute())[0]
-    pprint(decoded)
-    print("\n", "-" * 80, "\n")
-
-    assert_equal(document["x"], decoded["x"])
-    assert_equal(document["y"], decoded["y"])
-
-    return document, decoded
-
-
 @dc.dataclass(kw_only=True)
 class ChildComponent(Component):
     type_id: t.ClassVar[str] = "ChildComponent"
@@ -167,18 +103,17 @@ class TestComponent(Component):
     y: int = 1
     x: np.ndarray | None = None
     child: ChildComponent | None = None
-    _artifacts: t.ClassVar = ()
+    _fields = {'x': pickle_serializer}
 
 
-def check_component(data, datatype: DataType):
+def check_component(data, datatype: BaseDataType):
     print("datatype", datatype)
     print_sep()
 
     c = TestComponent(
         "test",
         x=data,
-        child=ChildComponent("child", y=2, artifacts={"x": datatype}),
-        artifacts={"x": datatype},
+        child=ChildComponent("child", y=2),
     )
     pprint(c)
     print_sep()
@@ -203,8 +138,7 @@ def check_component_with_db(data, datatype, db):
     c = TestComponent(
         "test",
         x=data,
-        child=ChildComponent("child", y=2, artifacts={"x": datatype}),
-        artifacts={"x": datatype},
+        child=ChildComponent("child", y=2),
     )
     db.add(c)
     pprint(c)
diff --git a/test/utils/component/model.py b/test/utils/component/model.py
index 502caa486..7a7bcd8a0 100644
--- a/test/utils/component/model.py
+++ b/test/utils/component/model.py
@@ -25,8 +25,6 @@ def test_predict(model: Model, sample_data: t.Any):
 def test_predict_in_db(model: Model, sample_data: t.Any, db: "Datalayer"):
     model.identifier = random_id()
 
-    db.apply(model)
-
     db.cfg.auto_schema = True
 
     db["datas"].insert([{"data": sample_data, "i": i} for i in range(10)]).execute()
diff --git a/test/utils/database/query.py b/test/utils/database/query.py
index 5540f16e7..6cfaf64a3 100644
--- a/test/utils/database/query.py
+++ b/test/utils/database/query.py
@@ -210,16 +210,6 @@ def _check(n):
     table_or_collection.insert([data]).execute()
     _check(2)
 
-    # Without `Document` non dict data
-    table_or_collection.insert([np.zeros((1))]).execute()
-    c = _check(3)
-
-    gt = np.zeros((1))
-
-    # Auto wrapped _base
-    assert "x" in c[-1]
-    assert c[-1].unpack()["x"] == gt
-
 
 def test_model(db):
     from test.utils.setup.fake_data import add_models
diff --git a/test/utils/setup/fake_data.py b/test/utils/setup/fake_data.py
index 2c28b480a..9dac29ee7 100644
--- a/test/utils/setup/fake_data.py
+++ b/test/utils/setup/fake_data.py
@@ -10,7 +10,7 @@
 from superduper.components.schema import Schema
 from superduper.components.table import Table
 from superduper.components.vector_index import VectorIndex
-from superduper.ext.numpy.encoder import array
+from superduper.ext.numpy.encoder import Array
 
 GLOBAL_TEST_N_DATA_POINTS = 100
 
@@ -31,7 +31,7 @@ def add_random_data(
     table_name: str = "documents",
     n: int = GLOBAL_TEST_N_DATA_POINTS,
 ):
-    float_array = array(dtype="float", shape=(32,))
+    float_array = Array(dtype="float", shape=(32,))
 
     schema = Schema(
         identifier=table_name,
@@ -57,16 +57,16 @@ def add_random_data(
 
 def add_datatypes(db: Datalayer):
     for n in [8, 16, 32]:
-        db.apply(array(dtype="float", shape=(n,)))
+        db.apply(Array(dtype="float", shape=(n,)))
 
 
 def add_models(db: Datalayer):
     # identifier, weight_shape, encoder
     params = [
-        ["linear_a", (32, 16), array(dtype="float", shape=(16,)), False],
-        ["linear_a_multi", (32, 16), array(dtype="float", shape=(16,)), True],
-        ["linear_b", (16, 8), array(dtype="float", shape=(8,)), False],
-        ["linear_b_multi", (16, 8), array(dtype="float", shape=(8,)), True],
+        ["linear_a", (32, 16), Array(dtype="float", shape=(16,)), False],
+        ["linear_a_multi", (32, 16), Array(dtype="float", shape=(16,)), True],
+        ["linear_b", (16, 8), Array(dtype="float", shape=(8,)), False],
+        ["linear_b_multi", (16, 8), Array(dtype="float", shape=(8,)), True],
     ]
     for identifier, weight_shape, datatype, flatten in params:
         weight = np.random.randn(weight_shape[1])