Replies: 3 comments 5 replies
-
import abc
import dataclasses as dc
import typing as t
from superduperdb.base.artifact import Artifact
from superduperdb.ext.utils import ensure_initialized
@dc.dataclass
class SuperduperModel(abc.ABC):
preprocess: t.Union[t.Callable, Artifact, None] = None
postprocess: t.Union[t.Callable, Artifact, None] = None
lazy_loading: bool = True
def __post_init__(self):
if not isinstance(self.preprocess, Artifact):
self.preprocess = Artifact(self.preprocess)
if not isinstance(self.postprocess, Artifact):
self.postprocess = Artifact(self.postprocess)
if not self.lazy_loading:
self.init()
@ensure_initialized
def predict(self, X, *kwargs):
if self.preprocess:
X = self.preprocess.artifact(X)
y = self._predict(X)
if self.postprocess:
y = self.postprocess.artifact(y)
return y
@ensure_initialized
def batch_predict(self, Xs, *kwargs):
if self.preprocess:
Xs = self.preprocess.artifact(Xs)
ys = self._batch_predict(Xs)
if self.postprocess:
ys = self.postprocess.artifact(ys)
return ys
def db_predict(self, *args, *kwargs):
# same as _Predict.predict
datas = fetch_data_from_db()
outputs = self.batch_predict(datas, *args, *kwargs)
save_datas = convert_outputs_to_datas(outputs)
save_to_db(save_datas)
def db_train(self, *args, **kwargs):
# same as _Train.train
datas = fetch_data_from_db()
self.train(datas, *args, **kwargs)
update_model_to_db(self)
...
def init(self):
pass
@abc.abstractmethod
def _predict(self, X, **kwargs):
pass
@abc.abstractmethod
def _batch_predict(self, Xs, **kwargs):
pass
def train(self, datasets, *args, **kwargs):
# same as _Train.train
raise NotImplementedError
#############################################################
# The following is how to create a new model class
@dc.dataclass
class OpenAI(SuperduperModel):
model: str = "gpt-4"
# Lazy loading using init
def init(self):
from openai import Client
self.client = Client()
def __post_init__(self):
return super().__post_init__()
def _predict(self, X):
return self.client.complete(X, model=self.model)
def _batch_predict(self, Xs):
return self.client.complete(Xs, model=self.model)
class ObjectModel(SuperduperModel):
object: t.Union[Artifact, t.Any, None]
predict_method: t.Optional[str] = None
def __post_init__(self):
super().__post_init__()
if not isinstance(self.object, Artifact):
self.object = Artifact(self.object)
def _predict(self, X, **kwargs):
func = getattr(self.object.artifact, self.predict_method)
return func(X, **kwargs)
def _batch_predict(self, Xs, **kwargs):
func = getattr(self.object.artifact, self.predict_method)
return func(Xs, **kwargs)
class APIModel(SuperduperModel):
api = "http://localhost:8000"
def _predict(self, X, *args, **kwargs):
return requests.post(self.api, json=X).json()
def _batch_predict(self, Xs, *args, **kwargs):
return requests.post(self.api, json=Xs).json()
class Ollama(SuperduperModel):
model_name: str = "ollama"
def init(self):
from ollama import Ollama
self.model = Ollama(self.model_name)
def _predict(self, X, **kwargs):
return self.model.predict(X, **kwargs)
def _batch_predict(self, Xs, **kwargs):
class Huggingface(SuperduperModel):
model_name: str = "gpt2"
def init(self):
from transformers import AutoModelForCausalLM
self.model = AutoModelForCausalLM.from_pretrained(self.model_name)
def _predict(self, X, **kwargs):
return self.model.generate(X, **kwargs)
def _batch_predict(self, Xs, **kwargs):
return self.model.generate(Xs, **kwargs)
def train(self, datasets, *args, **kwargs):
# same as _Train.train
from transformers import Trainer
...
model = OpenAI(preprocess=..., postprocess=...) The main point is to
All new models only need to focus on their own initialization, prediction and training functions, and do not need to care about the interaction with the db, including preprocess and postprocess. If we need function enhancements, such as API multi-thread prediction, just write an ApiBatchPredictMixin to enhance it. If we need to process the context in the LLM scenario, we can write a Mixin to process the context, or directly inherit the original LLM and rewrite the x processing of predict. |
Beta Was this translation helpful? Give feedback.
-
In addition suggestion(TBD), regarding the artifact conversion operation, we also leave it to the parent class to provide an
In this way, at the model level, there is no need to convert the relationship artifact, just use it directly, otherwise we need to use like this |
Beta Was this translation helpful? Give feedback.
-
def fit(self, X, y, ...):
...
# implemented once 1x by SuperDuperDB
model = sklearn.svm.SVC()
model.fit(X, y)
class
def _fit(self, train_dataset, valid_dataset, train_func: t.Optional[Callable] = None):
X = []
for r in train_dataset:
X
self.train_func = train_func
# implemented for each type of model by commmunity |
Beta Was this translation helpful? Give feedback.
-
@jieguangzhou to provide ideas how we can simplify the
preprocess -> forward -> postprocess
abstraction without losinglow-code aspect.
Beta Was this translation helpful? Give feedback.
All reactions