Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/local cluster #2555

Merged
merged 1 commit into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Restore some training test cases
- Simple querying shell
- Fix existing templates
- Add optional data insert to `Table`

#### New Features & Functionality

Expand Down
1 change: 1 addition & 0 deletions plugins/sqlalchemy/plugin_test/config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
artifact_store: null
data_backend: sqlite://
auto_schema: false
json_native: false
6 changes: 5 additions & 1 deletion plugins/sqlalchemy/superduper_sqlalchemy/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,11 @@ def _init_tables(self):
self._table_mapping = {
'_artifact_relations': self.artifact_table,
}
metadata.create_all(self.conn)

try:
metadata.create_all(self.conn)
except Exception as e:
logging.error(f'Error creating tables: {e}')

def _create_data(self, table_name, datas):
table = self._table_mapping[table_name]
Expand Down
8 changes: 0 additions & 8 deletions superduper/backends/local/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,6 @@ def __delitem__(self, item):

def initialize(self):
"""Initialize the cache."""
for component_data in self.db.show():
type_id = component_data['type_id']
identifier = component_data['identifier']
r = self.db.show(type_id=type_id, identifier=identifier, version=-1)
if r.get('cache', False):
component = self.db.load(type_id=type_id, identifier=identifier)
self.put(component)
self.db.cluster.compute.put(component)

def drop(self):
"""Drop the cache."""
Expand Down
1 change: 1 addition & 0 deletions superduper/base/datalayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def _insert(
)
if auto_schema and self.cfg.auto_schema:
self._auto_create_table(insert.table, insert.documents)
# <--- need to wait here --->

inserted_ids = insert.do_execute(self)

Expand Down
22 changes: 0 additions & 22 deletions superduper/components/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,25 +94,3 @@ def __str__(self):
return f'Dataset(identifier={self.identifier}, select={self.select})'

__repr__ = __str__


class DataInit(Component):
"""A data initialization component.

:param data: The data to initialize.
:param table: The table to insert the data.
"""

data: t.List[t.Dict]
table: str

def on_create(self, db: Datalayer) -> None:
"""Called after the first time this component is created.

Generally used if ``self.version`` is important in this logic.

:param db: the db that creates the component.
"""
super().on_create(db)
self.init()
db[self.table].insert(self.data).execute()
10 changes: 10 additions & 0 deletions superduper/components/table.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import typing as t

from superduper import CFG
from superduper.base.annotations import trigger
from superduper.components.component import Component
from superduper.components.datatype import pickle_serializer
from superduper.components.schema import Schema

if t.TYPE_CHECKING:
Expand All @@ -16,12 +18,16 @@ class Table(Component):

:param schema: The schema of the table
:param primary_id: The primary id of the table
:param data: Data to insert post creation
"""

_artifacts: t.ClassVar[t.Tuple[str]] = (('data', pickle_serializer),)

type_id: t.ClassVar[str] = 'table'

schema: Schema
primary_id: str = DEFAULT_PRIMARY_ID
data: t.List[t.Dict] | None = None

def __post_init__(self, db, artifacts):
super().__post_init__(db, artifacts)
Expand Down Expand Up @@ -55,3 +61,7 @@ def on_create(self, db: 'Datalayer'):
pass
else:
raise e

@trigger('apply', requires='data')
def add_data(self):
self.db[self.identifier].insert(self.data).execute()
18 changes: 4 additions & 14 deletions superduper/components/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@
import os
import typing as t

from superduper import logging
from superduper.base.constant import KEY_BLOBS, KEY_FILES
from superduper.base.datalayer import Datalayer
from superduper.base.document import Document, QueryUpdateDocument
from superduper.base.leaf import Leaf
from superduper.base.variables import _replace_variables
from superduper.components.component import Component, _build_info_from_path
from superduper.components.datatype import pickle_serializer
from superduper.components.table import Table
from superduper.misc.special_dicts import SuperDuperFlatEncode

from .component import ensure_initialized
Expand Down Expand Up @@ -91,14 +90,14 @@ class Template(_BaseTemplate):

:param data: Sample data to test the template.
:param requirements: pip requirements for the template.
:param default_table: Default table to be used with the template.
"""

_artifacts: t.ClassVar[t.Tuple[str]] = (('data', pickle_serializer),)

type_id: t.ClassVar[str] = "template"

data: t.List[t.Dict] | None = None
requirements: t.Optional[t.List[str]] = None
requirements: t.List[str] | None = None
default_table: Table | None = None

def _pre_create(self, db: Datalayer) -> None:
"""Run before the object is created."""
Expand All @@ -107,15 +106,6 @@ def _pre_create(self, db: Datalayer) -> None:
self.files = list(self.template.get(KEY_FILES, {}).keys())
db.artifact_store.save_artifact(self.template)
self.init(db)
if self.data is not None:
if not db.cfg.auto_schema:
logging.warn('Auto schema is disabled. Skipping data insertion.')
return
db[self.default_table].insert(self.data).execute()

@property
def default_table(self):
return f'_sample_{self.identifier}'

def export(
self,
Expand Down
21 changes: 21 additions & 0 deletions superduper/rest/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import magic
from fastapi import File, Response
from fastapi.responses import JSONResponse

from superduper import logging
from superduper.backends.base.query import Query
Expand Down Expand Up @@ -59,6 +60,26 @@ def build_rest_app(app: SuperDuperApp):
:param app: SuperDuperApp
"""

@app.add("/health", method="get")
def health():
return {"status": 200}

@app.add("/handshake/config", method="post")
def handshake(cfg: str):
from superduper import CFG

cfg_dict = json.loads(cfg)
match = CFG.match(cfg_dict)
if match:
return {"status": 200, "msg": "matched"}

diff = CFG.diff(cfg_dict)

return JSONResponse(
status_code=400,
content={"error": f"Config doesn't match based on this diff: {diff}"},
)

@app.add('/db/artifact_store/put', method='put')
def db_artifact_store_put_bytes(
raw: bytes = File(...), db: 'Datalayer' = DatalayerDependency()
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Loading