-
Notifications
You must be signed in to change notification settings - Fork 188
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Included generate, generate_stream, infer_stream endpoints. * Implemented rest infer_stream. * Included adaptive batching hooks for predict_stream. * Included grpc stream proto. * Implemented ModelInferStream as stream-stream method. * Included lazy fixtures as depenedency. * Included tests for infer_stream endpoint and ModelInferStream. * Introduced gzip_enabled flag. * Included grpc stream error handling. --------- Co-authored-by: Adrian Gonzalez-Martin <agm@seldon.io>
- Loading branch information
1 parent
aad4a5a
commit 54cd47e
Showing
38 changed files
with
1,624 additions
and
174 deletions.
There are no files selected for viewing
22 changes: 22 additions & 0 deletions
22
benchmarking/testserver/models/text-model/model-settings.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
"name": "text-model", | ||
|
||
"implementation": "text_model.TextModel", | ||
|
||
"versions": ["text-model/v1.2.3"], | ||
"platform": "mlserver", | ||
"inputs": [ | ||
{ | ||
"datatype": "BYTES", | ||
"name": "prompt", | ||
"shape": [1] | ||
} | ||
], | ||
"outputs": [ | ||
{ | ||
"datatype": "BYTES", | ||
"name": "output", | ||
"shape": [1] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"debug": false, | ||
"parallel_workers": 0, | ||
"gzip_enabled": false, | ||
"metrics_endpoint": null | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import asyncio | ||
from typing import AsyncIterator | ||
from mlserver import MLModel | ||
from mlserver.types import InferenceRequest, InferenceResponse | ||
from mlserver.codecs import StringCodec | ||
|
||
|
||
class TextModel(MLModel): | ||
|
||
async def predict(self, payload: InferenceRequest) -> InferenceResponse: | ||
text = StringCodec.decode_input(payload.inputs[0])[0] | ||
return InferenceResponse( | ||
model_name=self._settings.name, | ||
outputs=[ | ||
StringCodec.encode_output( | ||
name="output", | ||
payload=[text], | ||
use_bytes=True, | ||
), | ||
], | ||
) | ||
|
||
async def predict_stream( | ||
self, payloads: AsyncIterator[InferenceRequest] | ||
) -> AsyncIterator[InferenceResponse]: | ||
payload = [_ async for _ in payloads][0] | ||
text = StringCodec.decode_input(payload.inputs[0])[0] | ||
words = text.split(" ") | ||
|
||
split_text = [] | ||
for i, word in enumerate(words): | ||
split_text.append(word if i == 0 else " " + word) | ||
|
||
for word in split_text: | ||
await asyncio.sleep(0.5) | ||
yield InferenceResponse( | ||
model_name=self._settings.name, | ||
outputs=[ | ||
StringCodec.encode_output( | ||
name="output", | ||
payload=[word], | ||
use_bytes=True, | ||
), | ||
], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.