You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
importthreadingastdimporttimeclassPythonPredictor:
def__init__(self, config):
self.model=None# initialize the model hereself.waiter=td.Event()
self.waiter.set()
self.batch_max_size=config["batch_max_size"]
self.batch_interval=config["batch_interval"] # measured in secondsself.barrier=td.Barrier(self.batch_max_size+1)
self.samples= {}
self.predictions= {}
td.Thread(target=self._batch_engine).start()
def_batch_engine(self):
whileTrue:
iflen(self.predictions) >0:
time.sleep(0.001)
continuetry:
self.barrier.wait(self.batch_interval)
excepttd.BrokenBarrierError:
passself.waiter.clear()
self.predictions= {}
self.batch_inference()
self.samples= {}
self.barrier.reset()
self.waiter.set()
defbatch_inference(self):
""" Run the batch inference here. """# batch process self.samples# store results in self.predictions# make sure to write the results to self.predictions using the keys from self.samplesdefenqueue_sample(self, sample):
""" Enqueue sample for batch inference. This is a blocking method. """thread_id=td.get_ident()
self.waiter.wait()
self.samples[thread_id] =sampletry:
self.barrier.wait()
excepttd.BrokenBarrierError:
passdefget_prediction(self):
""" Return the prediction. This is a blocking method. """thread_id=td.get_ident()
whilethread_idnotinself.predictions:
time.sleep(0.001)
prediction=self.predictions[thread_id]
delself.predictions[thread_id]
returnpredictiondefpredict(self, payload):
self.enqueue_sample(payload)
prediction=self.get_prediction()
returnprediction
Motivation
Useful for those users who really need server-side batching for the Python Predictor.
Has been requested by @manneshiva.
The text was updated successfully, but these errors were encountered:
Description
Can be inspired by https://docs.cortex.dev/deployments/realtime-api/parallelism#server-side-batching.
Template implementation:
Motivation
Useful for those users who really need server-side batching for the Python Predictor.
Has been requested by @manneshiva.
The text was updated successfully, but these errors were encountered: