Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify the apply endpoint in the REST API to an asynchronous interface #2572

Merged
merged 1 commit into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add sql table length exceed limit and uuid truncation.
- Add ci workflow to test templates
- Add deploy flag in model.
- Modify the apply endpoint in the REST API to an asynchronous interface.

#### Bug Fixes

Expand Down
5 changes: 1 addition & 4 deletions superduper/base/config_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@
USER_CONFIG: str = (
str(Path(CONFIG_FILE).expanduser())
if CONFIG_FILE
else (
f'{HOME}/.superduper/config.yaml' if HOME
else None
)
else (f'{HOME}/.superduper/config.yaml' if HOME else None)
)
PREFIX = 'SUPERDUPER_'
ROOT = Path(__file__).parents[2]
Expand Down
28 changes: 17 additions & 11 deletions superduper/rest/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import magic
except ImportError:
magic = None
from fastapi import File, Response
from fastapi import BackgroundTasks, File, Response
from fastapi.responses import JSONResponse

from superduper import logging
Expand Down Expand Up @@ -158,7 +158,10 @@ def test_log():
os.remove(log_file)
return {'status': 'ok'}

def _process_db_apply(db, info):
def _process_db_apply(db, component):
db.apply(component, force=True)

def _process_apply_info(db, info):
if '_variables' in info:
assert {'_variables', 'identifier'}.issubset(info.keys())
variables = info.pop('_variables')
Expand All @@ -177,25 +180,28 @@ def _process_db_apply(db, info):
db=db,
**variables,
)
db.apply(component, force=True)
return {'status': 'ok'}
return component
component = Document.decode(info, db=db).unpack()
# TODO this shouldn't be necessary to do twice
component.unpack()
db.apply(component, force=True)
return {'status': 'ok'}
return component

@app.add('/db/apply', method='post')
def db_apply(
info: t.Dict, id: str | None = 'test', db: 'Datalayer' = DatalayerDependency()
async def db_apply(
info: t.Dict,
background_tasks: BackgroundTasks,
id: str | None = 'test',
db: 'Datalayer' = DatalayerDependency(),
):
if id:
log_file = f"/tmp/{id}.log"
with redirect_stdout_to_file(log_file):
out = _process_db_apply(db, info)
component = _process_apply_info(db, info)
background_tasks.add_task(_process_db_apply, db, component)
else:
out = _process_db_apply(db, info)
return out
component = _process_apply_info(db, info)
background_tasks.add_task(_process_db_apply, db, component)
return {'status': 'ok'}

import subprocess
import time
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
8 changes: 5 additions & 3 deletions templates/llm_finetuning/build.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,17 @@
"metadata": {},
"outputs": [],
"source": [
"from superduper import Template, Table, Schema\n",
"from superduper import Template, Table, Schema, Application\n",
"from superduper.components.dataset import RemoteData\n",
"\n",
"llm.trainer.use_lora = \"<var:use_lora>\"\n",
"llm.trainer.num_train_epochs = \"<var:num_train_epochs>\"\n",
"\n",
"app = Application(identifier=\"llm\", components=[llm])\n",
"\n",
"t = Template(\n",
" 'llm-finetune',\n",
" template=llm,\n",
" template=app,\n",
" substitutions={\n",
" TABLE_NAME: 'table_name',\n",
" model_name: 'model_name',\n",
Expand Down Expand Up @@ -400,7 +402,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.11.5"
}
},
"nbformat": 4,
Expand Down
27 changes: 21 additions & 6 deletions templates/llm_finetuning/component.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,19 @@
"method": "dill",
"encodable": "artifact"
},
"913bc7838286b1f1fa6f4f90969bd9acf4dc205f": {
"a026ecb590e04810d1e0f9d4ef7195e74c3614c3": {
"_path": "superduper.components.datatype.Artifact",
"datatype": "?datatype:dill",
"uri": null,
"blob": "&:blob:913bc7838286b1f1fa6f4f90969bd9acf4dc205f"
"blob": "&:blob:a026ecb590e04810d1e0f9d4ef7195e74c3614c3"
},
"dataset:llm_finetuning": {
"_path": "superduper.components.dataset.RemoteData",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"getter": "?913bc7838286b1f1fa6f4f90969bd9acf4dc205f"
"getter": "?a026ecb590e04810d1e0f9d4ef7195e74c3614c3"
},
"table:sample_llm_finetuning": {
"_path": "superduper.components.table.Table",
Expand Down Expand Up @@ -236,7 +236,7 @@
"uri": null,
"blob": "&:blob:4a8dc14137b3a79a81256a795b266fe82bda52d9"
},
"llm": {
"model:llm": {
"_path": "superduper_transformers.model.LLM",
"upstream": null,
"plugins": null,
Expand All @@ -253,6 +253,7 @@
"num_workers": 0,
"serve": false,
"trainer": "?trainer:llm-finetune-trainer",
"deploy": false,
"prompt": "{input}",
"prompt_func": null,
"max_batch_size": 4,
Expand All @@ -261,15 +262,29 @@
"model_kwargs": "?4a8dc14137b3a79a81256a795b266fe82bda52d9",
"tokenizer_kwargs": "?4a8dc14137b3a79a81256a795b266fe82bda52d9",
"prompt_template": "{input}"
},
"llm": {
"_path": "superduper.components.application.Application",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"components": [
"?model:llm"
],
"namespace": null,
"link": null,
"_literals": [
"template"
]
}
}
},
"template_variables": [
"table_name",
"model_name",
"use_lora",
"num_train_epochs",
"output_prefix"
"num_train_epochs"
],
"types": {
"collection": {
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion templates/simple_rag/build.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.11.5"
}
},
"nbformat": 4,
Expand Down
22 changes: 13 additions & 9 deletions templates/simple_rag/component.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,19 @@
"method": "dill",
"encodable": "artifact"
},
"3cf26512c472a982bfddddd931f59a82c03f74e9": {
"60ca8cae14f69a82da08cb69135b087ad78d5203": {
"_path": "superduper.components.datatype.Artifact",
"datatype": "?datatype:dill",
"uri": null,
"blob": "&:blob:3cf26512c472a982bfddddd931f59a82c03f74e9"
"blob": "&:blob:60ca8cae14f69a82da08cb69135b087ad78d5203"
},
"dataset:superduper-docs": {
"_path": "superduper.components.dataset.RemoteData",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"getter": "?3cf26512c472a982bfddddd931f59a82c03f74e9"
"getter": "?60ca8cae14f69a82da08cb69135b087ad78d5203"
},
"table:sample_simple_rag": {
"_path": "superduper.components.table.Table",
Expand All @@ -58,14 +58,14 @@
"method": "dill",
"encodable": "artifact"
},
"adc059378de6bb27a2f6b02b8bf0e92f2c97e1e8": {
"039aed30ddb0b6d251775302b5055579eec0c7f8": {
"_path": "superduper.components.datatype.Artifact",
"datatype": "?datatype:dill",
"uri": null,
"blob": "&:blob:adc059378de6bb27a2f6b02b8bf0e92f2c97e1e8"
"blob": "&:blob:039aed30ddb0b6d251775302b5055579eec0c7f8"
},
"model:chunker": {
"_object": "?adc059378de6bb27a2f6b02b8bf0e92f2c97e1e8",
"_object": "?039aed30ddb0b6d251775302b5055579eec0c7f8",
"upstream": null,
"plugins": null,
"cache": true,
Expand All @@ -81,12 +81,13 @@
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"chunk_size": 200
},
"<var:table_name>-select-id-x": {
"var-table-name-select-var-id-field-x": {
"_path": "superduper_<var:databackend>.query.parse_query",
"documents": [],
"query": "<var:table_name>.select(\"id\", \"x\")"
"query": "<var:table_name>.select(\"<var:id_field>\", \"x\")"
},
"listener:chunker": {
"_path": "superduper.components.listener.Listener",
Expand All @@ -98,7 +99,7 @@
"key": "x",
"model": "?model:chunker",
"predict_kwargs": {},
"select": "?<var:table_name>-select-id-x",
"select": "?var-table-name-select-var-id-field-x",
"flatten": true
},
"datatype:sqlvector[1536]": {
Expand All @@ -124,6 +125,7 @@
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"model": "text-embedding-ada-002",
"max_batch_size": 8,
"openai_api_key": null,
Expand Down Expand Up @@ -193,6 +195,7 @@
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"model": "gpt-4-turbo",
"max_batch_size": 8,
"openai_api_key": null,
Expand All @@ -218,6 +221,7 @@
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"prompt_template": "Use the following context snippets, these snippets are not ordered!, Answer the question based on this context.\nThese snippets are samples from our internal data-repositories, and should be used exclusively and as a matter of priority to answer the question\n\n{context}\n\nHere's the question: {query}",
"select": "?outputs-chunker-?(listener:chunker.uuid)-select-like-outputs-chunker-?(listener:chunker.uuid)-var-query-vector-index-vectorindex-n-5",
"key": "<var:output_prefix>chunker__?(listener:chunker.uuid)",
Expand Down
Loading