Skip to content

Commit

Permalink
Modify the apply endpoint in the REST API to an asynchronous interface.
Browse files Browse the repository at this point in the history
  • Loading branch information
jieguangzhou authored and blythed committed Oct 30, 2024
1 parent e790609 commit e6b9d5f
Show file tree
Hide file tree
Showing 14 changed files with 59 additions and 34 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add sql table length exceed limit and uuid truncation.
- Add ci workflow to test templates
- Add deploy flag in model.
- Modify the apply endpoint in the REST API to an asynchronous interface.

#### Bug Fixes

Expand Down
5 changes: 1 addition & 4 deletions superduper/base/config_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@
USER_CONFIG: str = (
str(Path(CONFIG_FILE).expanduser())
if CONFIG_FILE
else (
f'{HOME}/.superduper/config.yaml' if HOME
else None
)
else (f'{HOME}/.superduper/config.yaml' if HOME else None)
)
PREFIX = 'SUPERDUPER_'
ROOT = Path(__file__).parents[2]
Expand Down
28 changes: 17 additions & 11 deletions superduper/rest/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import magic
except ImportError:
magic = None
from fastapi import File, Response
from fastapi import BackgroundTasks, File, Response
from fastapi.responses import JSONResponse

from superduper import logging
Expand Down Expand Up @@ -158,7 +158,10 @@ def test_log():
os.remove(log_file)
return {'status': 'ok'}

def _process_db_apply(db, info):
def _process_db_apply(db, component):
db.apply(component, force=True)

def _process_apply_info(db, info):
if '_variables' in info:
assert {'_variables', 'identifier'}.issubset(info.keys())
variables = info.pop('_variables')
Expand All @@ -177,25 +180,28 @@ def _process_db_apply(db, info):
db=db,
**variables,
)
db.apply(component, force=True)
return {'status': 'ok'}
return component
component = Document.decode(info, db=db).unpack()
# TODO this shouldn't be necessary to do twice
component.unpack()
db.apply(component, force=True)
return {'status': 'ok'}
return component

@app.add('/db/apply', method='post')
def db_apply(
info: t.Dict, id: str | None = 'test', db: 'Datalayer' = DatalayerDependency()
async def db_apply(
info: t.Dict,
background_tasks: BackgroundTasks,
id: str | None = 'test',
db: 'Datalayer' = DatalayerDependency(),
):
if id:
log_file = f"/tmp/{id}.log"
with redirect_stdout_to_file(log_file):
out = _process_db_apply(db, info)
component = _process_apply_info(db, info)
background_tasks.add_task(_process_db_apply, db, component)
else:
out = _process_db_apply(db, info)
return out
component = _process_apply_info(db, info)
background_tasks.add_task(_process_db_apply, db, component)
return {'status': 'ok'}

import subprocess
import time
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
8 changes: 5 additions & 3 deletions templates/llm_finetuning/build.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,17 @@
"metadata": {},
"outputs": [],
"source": [
"from superduper import Template, Table, Schema\n",
"from superduper import Template, Table, Schema, Application\n",
"from superduper.components.dataset import RemoteData\n",
"\n",
"llm.trainer.use_lora = \"<var:use_lora>\"\n",
"llm.trainer.num_train_epochs = \"<var:num_train_epochs>\"\n",
"\n",
"app = Application(identifier=\"llm\", components=[llm])\n",
"\n",
"t = Template(\n",
" 'llm-finetune',\n",
" template=llm,\n",
" template=app,\n",
" substitutions={\n",
" TABLE_NAME: 'table_name',\n",
" model_name: 'model_name',\n",
Expand Down Expand Up @@ -400,7 +402,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.11.5"
}
},
"nbformat": 4,
Expand Down
27 changes: 21 additions & 6 deletions templates/llm_finetuning/component.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,19 @@
"method": "dill",
"encodable": "artifact"
},
"913bc7838286b1f1fa6f4f90969bd9acf4dc205f": {
"a026ecb590e04810d1e0f9d4ef7195e74c3614c3": {
"_path": "superduper.components.datatype.Artifact",
"datatype": "?datatype:dill",
"uri": null,
"blob": "&:blob:913bc7838286b1f1fa6f4f90969bd9acf4dc205f"
"blob": "&:blob:a026ecb590e04810d1e0f9d4ef7195e74c3614c3"
},
"dataset:llm_finetuning": {
"_path": "superduper.components.dataset.RemoteData",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"getter": "?913bc7838286b1f1fa6f4f90969bd9acf4dc205f"
"getter": "?a026ecb590e04810d1e0f9d4ef7195e74c3614c3"
},
"table:sample_llm_finetuning": {
"_path": "superduper.components.table.Table",
Expand Down Expand Up @@ -236,7 +236,7 @@
"uri": null,
"blob": "&:blob:4a8dc14137b3a79a81256a795b266fe82bda52d9"
},
"llm": {
"model:llm": {
"_path": "superduper_transformers.model.LLM",
"upstream": null,
"plugins": null,
Expand All @@ -253,6 +253,7 @@
"num_workers": 0,
"serve": false,
"trainer": "?trainer:llm-finetune-trainer",
"deploy": false,
"prompt": "{input}",
"prompt_func": null,
"max_batch_size": 4,
Expand All @@ -261,15 +262,29 @@
"model_kwargs": "?4a8dc14137b3a79a81256a795b266fe82bda52d9",
"tokenizer_kwargs": "?4a8dc14137b3a79a81256a795b266fe82bda52d9",
"prompt_template": "{input}"
},
"llm": {
"_path": "superduper.components.application.Application",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"components": [
"?model:llm"
],
"namespace": null,
"link": null,
"_literals": [
"template"
]
}
}
},
"template_variables": [
"table_name",
"model_name",
"use_lora",
"num_train_epochs",
"output_prefix"
"num_train_epochs"
],
"types": {
"collection": {
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion templates/simple_rag/build.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.11.5"
}
},
"nbformat": 4,
Expand Down
22 changes: 13 additions & 9 deletions templates/simple_rag/component.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,19 @@
"method": "dill",
"encodable": "artifact"
},
"3cf26512c472a982bfddddd931f59a82c03f74e9": {
"60ca8cae14f69a82da08cb69135b087ad78d5203": {
"_path": "superduper.components.datatype.Artifact",
"datatype": "?datatype:dill",
"uri": null,
"blob": "&:blob:3cf26512c472a982bfddddd931f59a82c03f74e9"
"blob": "&:blob:60ca8cae14f69a82da08cb69135b087ad78d5203"
},
"dataset:superduper-docs": {
"_path": "superduper.components.dataset.RemoteData",
"upstream": null,
"plugins": null,
"cache": true,
"status": null,
"getter": "?3cf26512c472a982bfddddd931f59a82c03f74e9"
"getter": "?60ca8cae14f69a82da08cb69135b087ad78d5203"
},
"table:sample_simple_rag": {
"_path": "superduper.components.table.Table",
Expand All @@ -58,14 +58,14 @@
"method": "dill",
"encodable": "artifact"
},
"adc059378de6bb27a2f6b02b8bf0e92f2c97e1e8": {
"039aed30ddb0b6d251775302b5055579eec0c7f8": {
"_path": "superduper.components.datatype.Artifact",
"datatype": "?datatype:dill",
"uri": null,
"blob": "&:blob:adc059378de6bb27a2f6b02b8bf0e92f2c97e1e8"
"blob": "&:blob:039aed30ddb0b6d251775302b5055579eec0c7f8"
},
"model:chunker": {
"_object": "?adc059378de6bb27a2f6b02b8bf0e92f2c97e1e8",
"_object": "?039aed30ddb0b6d251775302b5055579eec0c7f8",
"upstream": null,
"plugins": null,
"cache": true,
Expand All @@ -81,12 +81,13 @@
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"chunk_size": 200
},
"<var:table_name>-select-id-x": {
"var-table-name-select-var-id-field-x": {
"_path": "superduper_<var:databackend>.query.parse_query",
"documents": [],
"query": "<var:table_name>.select(\"id\", \"x\")"
"query": "<var:table_name>.select(\"<var:id_field>\", \"x\")"
},
"listener:chunker": {
"_path": "superduper.components.listener.Listener",
Expand All @@ -98,7 +99,7 @@
"key": "x",
"model": "?model:chunker",
"predict_kwargs": {},
"select": "?<var:table_name>-select-id-x",
"select": "?var-table-name-select-var-id-field-x",
"flatten": true
},
"datatype:sqlvector[1536]": {
Expand All @@ -124,6 +125,7 @@
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"model": "text-embedding-ada-002",
"max_batch_size": 8,
"openai_api_key": null,
Expand Down Expand Up @@ -193,6 +195,7 @@
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"model": "gpt-4-turbo",
"max_batch_size": 8,
"openai_api_key": null,
Expand All @@ -218,6 +221,7 @@
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"prompt_template": "Use the following context snippets, these snippets are not ordered!, Answer the question based on this context.\nThese snippets are samples from our internal data-repositories, and should be used exclusively and as a matter of priority to answer the question\n\n{context}\n\nHere's the question: {query}",
"select": "?outputs-chunker-?(listener:chunker.uuid)-select-like-outputs-chunker-?(listener:chunker.uuid)-var-query-vector-index-vectorindex-n-5",
"key": "<var:output_prefix>chunker__?(listener:chunker.uuid)",
Expand Down

0 comments on commit e6b9d5f

Please sign in to comment.