-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* fix: multithread * fix: disable threads * fix: benchmark * fix: move bench to scripts folder * fix: tune threads used by pytorch * fix: docstr * fix: optimize onnx * fix: onnx optimization * fix: remove providers argument from onnx executor * fix: clear codes * fix: more ort optimization * fix: minor revision * fix: add onnx optim * fix: revision * fix: bump onnxruntime-gpu version * fix: temp float16 support * fix: add onnx qunantize * fix: polish codes * fix: cast embedding fp16 to fp32 * fix: clean codes * fix: revert quantization * fix: clean setup
- Loading branch information
Showing
5 changed files
with
286 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
import random | ||
import time | ||
from typing import Optional | ||
import threading | ||
import click | ||
import numpy as np | ||
from docarray import Document, DocumentArray | ||
|
||
|
||
def warn(*args, **kwargs): | ||
pass | ||
|
||
|
||
import warnings | ||
|
||
warnings.warn = warn | ||
|
||
|
||
class BenchmarkClient(threading.Thread): | ||
def __init__( | ||
self, | ||
server: str, | ||
batch_size: int = 1, | ||
modality: str = 'text', | ||
num_iter: Optional[int] = 100, | ||
image_sample: str = None, | ||
**kwargs, | ||
): | ||
""" | ||
@param server: the clip-as-service server URI | ||
@param batch_size: number of batch sample | ||
@param num_iter: number of repeat run per experiment | ||
@param image_sample: uri of the test image | ||
""" | ||
assert num_iter > 2, 'num_iter must be greater than 2' | ||
super().__init__() | ||
self.server = server | ||
self.batch_size = batch_size | ||
self.modality = modality | ||
self.image_sample = image_sample | ||
self.num_iter = num_iter | ||
self.avg_time = 0 | ||
|
||
def run(self): | ||
try: | ||
from clip_client import Client | ||
except ImportError: | ||
raise ImportError( | ||
'clip_client module is not available. it is required for benchmarking.' | ||
'Please use ""pip install clip-client" to install it.' | ||
) | ||
|
||
if self.modality == 'text': | ||
from clip_server.model.simple_tokenizer import SimpleTokenizer | ||
|
||
tokenizer = SimpleTokenizer() | ||
vocab = list(tokenizer.encoder.keys()) | ||
batch = DocumentArray( | ||
[ | ||
Document(text=' '.join(random.choices(vocab, k=78))) | ||
for _ in range(self.batch_size) | ||
] | ||
) | ||
elif self.modality == 'image': | ||
batch = DocumentArray( | ||
[ | ||
Document(blob=open(self.image_sample, 'rb').read()) | ||
for _ in range(self.batch_size) | ||
] | ||
) | ||
else: | ||
raise ValueError(f'The modality "{self.modality}" is unsupported') | ||
|
||
client = Client(self.server) | ||
|
||
time_costs = [] | ||
for _ in range(self.num_iter): | ||
start = time.perf_counter() | ||
r = client.encode(batch) | ||
time_costs.append(time.perf_counter() - start) | ||
self.avg_time = np.mean(time_costs[2:]) | ||
|
||
|
||
@click.command(name='clip-as-service benchmark') | ||
@click.argument('server') | ||
@click.option( | ||
'--batch_sizes', | ||
multiple=True, | ||
type=int, | ||
default=[1, 8, 16, 32, 64], | ||
help='number of batch', | ||
) | ||
@click.option( | ||
'--num_iter', default=10, help='number of repeat run per experiment (must > 2)' | ||
) | ||
@click.option( | ||
"--concurrent_clients", | ||
multiple=True, | ||
type=int, | ||
default=[1, 4, 16, 32, 64], | ||
help='number of concurrent clients per experiment', | ||
) | ||
@click.option("--image_sample", help='path to the image sample file') | ||
def main(server, batch_sizes, num_iter, concurrent_clients, image_sample): | ||
# wait until the server is ready | ||
for batch_size in batch_sizes: | ||
for num_client in concurrent_clients: | ||
all_clients = [ | ||
BenchmarkClient( | ||
server, | ||
batch_size=batch_size, | ||
num_iter=num_iter, | ||
modality='image' if (image_sample is not None) else 'text', | ||
image_sample=image_sample, | ||
) | ||
for _ in range(num_client) | ||
] | ||
|
||
for bc in all_clients: | ||
bc.start() | ||
|
||
clients_speed = [] | ||
for bc in all_clients: | ||
bc.join() | ||
clients_speed.append(batch_size / bc.avg_time) | ||
|
||
max_speed, min_speed, avg_speed = ( | ||
max(clients_speed), | ||
min(clients_speed), | ||
np.mean(clients_speed), | ||
) | ||
|
||
print( | ||
'(concurrent client=%d, batch_size=%d) avg speed: %.3f\tmax speed: %.3f\tmin speed: %.3f' | ||
% (num_client, batch_size, avg_speed, max_speed, min_speed), | ||
flush=True, | ||
) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.