Skip to content

Commit

Permalink
Add more flexible default data
Browse files Browse the repository at this point in the history
  • Loading branch information
blythed committed Dec 6, 2024
1 parent 8f2a9e4 commit 6f91ba6
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 45 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add schema to `Template`
- Low-code form builder for frontend
- Add snowflake vector search engine
- Add a meta-datatype `Vector` to handle different databackend requirements
- Add a meta-datatype `Vector` to handle different databackend requirements

#### Bug Fixes

Expand Down
6 changes: 5 additions & 1 deletion superduper/components/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,12 @@ def decode_data(
decoded = {}
for k, value in data.items():
field = self.fields.get(k)
if not isinstance(field, BaseDataType):
if not isinstance(field, BaseDataType) or value is None:
decoded[k] = value
continue

value = data[k]

if reference := parse_reference(value):
saveable: Saveable = getters.run(reference.name, reference.path)
decoded[k] = saveable
Expand Down Expand Up @@ -164,6 +165,9 @@ def encode_data(self, out, builds, blobs, files, leaves_to_keep=()):
if isinstance(out[k], leaves_to_keep):
continue

if out[k] is None:
continue

data = field.encode_data(out[k])

if (
Expand Down
5 changes: 4 additions & 1 deletion superduper/components/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def __post_init__(self, db, substitutions):
def __call__(self, **kwargs):
"""Method to create component from the given template and `kwargs`."""
kwargs.update({k: v for k, v in self.default_values.items() if k not in kwargs})

assert set(kwargs.keys()) == (
set(self.template_variables) - {'output_prefix', 'databackend'}
)
Expand Down Expand Up @@ -103,10 +104,12 @@ class Template(_BaseTemplate):
:param queries: `QueryTemplate` instances to be used with the template.
"""

_fields = {'staged_file': 'file'}
type_id: t.ClassVar[str] = "template"

requirements: t.List[str] | None = None
default_table: Table | None = None
default_tables: t.List[Table] | None = None
staged_file: str | None = None
queries: t.List['QueryTemplate'] | None = None

def _pre_create(self, db: Datalayer) -> None:
Expand Down
19 changes: 4 additions & 15 deletions superduper/rest/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,20 +147,6 @@ def db_upload(raw: bytes = File(...), db: 'Datalayer' = DatalayerDependency()):
# Blob objects to be displayed on the upload
return {"component": component, "artifacts": blob_objects}

def _print_to_screen():
for i in range(100):
print(f'Testing {i}')
time.sleep(0.1)
print('[DONE]')

@app.add('/test/log', method='post')
def test_log():
log_file = "/tmp/test.log"
with redirect_stdout_to_file(log_file):
_print_to_screen()
os.remove(log_file)
return {'status': 'ok'}

def _process_db_apply(db, component, id: str | None = None):
if id:
log_file = f"/tmp/{id}.log"
Expand All @@ -169,6 +155,10 @@ def _process_db_apply(db, component, id: str | None = None):
else:
db.apply(component, force=True)

@app.add('/describe_tables')
def describe_tables(db: 'Datalayer' = DatalayerDependency()):
return db.databackend.list_tables_or_collections()

@app.add('/db/apply', method='post')
async def db_apply(
info: t.Dict,
Expand All @@ -185,7 +175,6 @@ async def db_apply(
return {'status': 'ok'}

import subprocess
import time

from fastapi.responses import StreamingResponse

Expand Down
Binary file not shown.
Binary file not shown.
42 changes: 22 additions & 20 deletions templates/simple_rag/build.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2024-Dec-06 10:23:17.28\u001b[0m| \u001b[1mINFO \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.misc.plugins\u001b[0m:\u001b[36m13 \u001b[0m | \u001b[1mLoading plugin: mongodb\u001b[0m\n",
"\u001b[32m2024-Dec-06 10:23:17.33\u001b[0m| \u001b[1mINFO \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m68 \u001b[0m | \u001b[1mBuilding Data Layer\u001b[0m\n",
"\u001b[32m2024-Dec-06 10:23:17.33\u001b[0m| \u001b[1mINFO \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.build\u001b[0m:\u001b[36m184 \u001b[0m | \u001b[1mConfiguration: \n",
"\u001b[32m2024-Dec-06 12:39:20.75\u001b[0m| \u001b[1mINFO \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.misc.plugins\u001b[0m:\u001b[36m13 \u001b[0m | \u001b[1mLoading plugin: mongodb\u001b[0m\n",
"\u001b[32m2024-Dec-06 12:39:20.79\u001b[0m| \u001b[1mINFO \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.datalayer\u001b[0m:\u001b[36m68 \u001b[0m | \u001b[1mBuilding Data Layer\u001b[0m\n",
"\u001b[32m2024-Dec-06 12:39:20.79\u001b[0m| \u001b[1mINFO \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.build\u001b[0m:\u001b[36m184 \u001b[0m | \u001b[1mConfiguration: \n",
" +---------------+--------------+\n",
"| Configuration | Value |\n",
"+---------------+--------------+\n",
Expand Down Expand Up @@ -425,8 +425,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2024-Dec-06 10:23:18.28\u001b[0m| \u001b[1mINFO \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.application\u001b[0m:\u001b[36m39 \u001b[0m | \u001b[1mResorting components based on topological order.\u001b[0m\n",
"\u001b[32m2024-Dec-06 10:23:18.28\u001b[0m| \u001b[1mINFO \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.application\u001b[0m:\u001b[36m56 \u001b[0m | \u001b[1mNew order of components: ['listener:chunker:f0fb8346e3214611', 'vector_index:vectorindex:120d874f9ec6438a', 'model:simple_rag:bd9f4b83299b4a95']\u001b[0m\n"
"\u001b[32m2024-Dec-06 12:39:21.71\u001b[0m| \u001b[1mINFO \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.application\u001b[0m:\u001b[36m39 \u001b[0m | \u001b[1mResorting components based on topological order.\u001b[0m\n",
"\u001b[32m2024-Dec-06 12:39:21.72\u001b[0m| \u001b[1mINFO \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.application\u001b[0m:\u001b[36m56 \u001b[0m | \u001b[1mNew order of components: ['listener:chunker:820939c155214480', 'vector_index:vectorindex:54bf35fc8c5d41b0', 'model:simple_rag:0cf2be05b78e49bf']\u001b[0m\n"
]
}
],
Expand Down Expand Up @@ -492,12 +492,12 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2024-Dec-06 10:23:18.29\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74 \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
"\u001b[32m2024-Dec-06 10:23:18.30\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74 \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
"\u001b[32m2024-Dec-06 10:23:18.30\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m558 \u001b[0m | \u001b[33m\u001b[1mLeaf listener:chunker already exists\u001b[0m\n",
"\u001b[32m2024-Dec-06 10:23:18.30\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74 \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
"\u001b[32m2024-Dec-06 10:23:18.30\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m558 \u001b[0m | \u001b[33m\u001b[1mLeaf model:chunker already exists\u001b[0m\n",
"\u001b[32m2024-Dec-06 10:23:18.30\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m558 \u001b[0m | \u001b[33m\u001b[1mLeaf var-table-name-select-var-id-field-x already exists\u001b[0m\n"
"\u001b[32m2024-Dec-06 12:39:21.73\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74 \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
"\u001b[32m2024-Dec-06 12:39:21.73\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74 \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
"\u001b[32m2024-Dec-06 12:39:21.73\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m558 \u001b[0m | \u001b[33m\u001b[1mLeaf listener:chunker already exists\u001b[0m\n",
"\u001b[32m2024-Dec-06 12:39:21.73\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.components.listener\u001b[0m:\u001b[36m74 \u001b[0m | \u001b[33m\u001b[1moutput_table not found in listener.dict()\u001b[0m\n",
"\u001b[32m2024-Dec-06 12:39:21.73\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m558 \u001b[0m | \u001b[33m\u001b[1mLeaf model:chunker already exists\u001b[0m\n",
"\u001b[32m2024-Dec-06 12:39:21.74\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m558 \u001b[0m | \u001b[33m\u001b[1mLeaf var-table-name-select-var-id-field-x already exists\u001b[0m\n"
]
}
],
Expand All @@ -514,14 +514,16 @@
" 'gpt-3.5-turbo': 'llm_model',\n",
" },\n",
" template_variables=['table_name', 'id_field', 'embedding_model', 'llm_model'],\n",
" default_table=Table(\n",
" 'sample_simple_rag',\n",
" schema=Schema('sample_simple_rag/schema', fields={'x': 'str'}),\n",
" data=RemoteData(\n",
" 'superduper-docs',\n",
" getter=getter,\n",
" )\n",
" ),\n",
" default_tables=[\n",
" Table(\n",
" 'sample_simple_rag',\n",
" schema=Schema('sample_simple_rag/schema', fields={'x': 'str'}),\n",
" data=RemoteData(\n",
" 'superduper-docs',\n",
" getter=getter,\n",
" )\n",
" ),\n",
" ],\n",
" types={\n",
" 'id_field': {\n",
" 'type': 'str',\n",
Expand Down Expand Up @@ -562,7 +564,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2024-Dec-06 10:23:18.31\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m558 \u001b[0m | \u001b[33m\u001b[1mLeaf str already exists\u001b[0m\n"
"\u001b[32m2024-Dec-06 12:39:21.74\u001b[0m| \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mDuncans-MBP.fritz.box\u001b[0m| \u001b[36msuperduper.base.document\u001b[0m:\u001b[36m558 \u001b[0m | \u001b[33m\u001b[1mLeaf str already exists\u001b[0m\n"
]
}
],
Expand Down
17 changes: 10 additions & 7 deletions templates/simple_rag/component.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"_base": "?simple-rag-app",
"_builds": {
"model:chunker": {
"_object": "&:blob:d00327140f722a84c4d86a6f34b80d38ada07db9",
"_object": "&:blob:26b5d7fea3a717415a6a26fe5553e6fe24c035f1",
"upstream": null,
"plugins": null,
"cache": true,
Expand Down Expand Up @@ -228,19 +228,22 @@
"blobs": null,
"files": null,
"requirements": null,
"default_table": "?table:sample_simple_rag",
"default_tables": [
"?table:sample_simple_rag"
],
"staged_file": null,
"queries": null,
"_literals": [
"template"
],
"_builds": {
"str": {
"_path": "superduper.components.schema.FieldType",
"uuid": "2de054f155fb4fa9"
"uuid": "87367228add245bf"
},
"schema:sample_simple_rag/schema": {
"_path": "superduper.components.schema.Schema",
"uuid": "5ec62406d85a4db9",
"uuid": "7d4160707dd9433e",
"upstream": null,
"plugins": null,
"cache": true,
Expand All @@ -253,17 +256,17 @@
},
"dataset:superduper-docs": {
"_path": "superduper.components.dataset.RemoteData",
"uuid": "ed07f690b6834b96",
"uuid": "4a00c2a7d1834ca3",
"upstream": null,
"plugins": null,
"cache": true,
"build_variables": null,
"build_template": null,
"getter": "&:blob:68c35853625b61260ff74705fd1cf8d537caa79e"
"getter": "&:blob:79ce3dfb3c6c84bb7337688e86681e4848847bbb"
},
"table:sample_simple_rag": {
"_path": "superduper.components.table.Table",
"uuid": "dc1821abae974b67",
"uuid": "c616f83b95fd46ef",
"upstream": null,
"plugins": null,
"cache": true,
Expand Down

0 comments on commit 6f91ba6

Please sign in to comment.