From da1fbbe7851cd4e0c82ad98e72318554d3328605 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 16 Dec 2022 17:29:30 +1100 Subject: [PATCH 01/70] add APIs in marqo --- src/marqo/errors.py | 4 ++++ src/marqo/s2_inference/errors.py | 3 +++ src/marqo/s2_inference/s2_inference.py | 16 ++++++++++++++- src/marqo/tensor_search/api.py | 26 ++++++++++++++++++++++++ src/marqo/tensor_search/tensor_search.py | 20 ++++++++++++++++++ 5 files changed, 68 insertions(+), 1 deletion(-) diff --git a/src/marqo/errors.py b/src/marqo/errors.py index d4aa7d81c..cfa19aa4f 100644 --- a/src/marqo/errors.py +++ b/src/marqo/errors.py @@ -176,6 +176,10 @@ class IndexMaxFieldsError(__InvalidRequestError): code = "index_max_fields_error" status_code = HTTPStatus.BAD_REQUEST +class ModelNotLoadedError(__InvalidRequestError): + code = "model_not_loaded" + status_code = HTTPStatus.NOT_FOUND + # ---MARQO INTERNAL ERROR--- diff --git a/src/marqo/s2_inference/errors.py b/src/marqo/s2_inference/errors.py index 318c0623d..106b08c62 100644 --- a/src/marqo/s2_inference/errors.py +++ b/src/marqo/s2_inference/errors.py @@ -42,3 +42,6 @@ class RerankerImageError(S2InferenceError): class RerankerNameError(S2InferenceError): pass + +class ModelNotLoadedError(S2InferenceError): + pass \ No newline at end of file diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 07ff4df8e..da53b4221 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -2,12 +2,13 @@ The functions defined here would have endpoints, later on. """ import numpy as np -from marqo.s2_inference.errors import VectoriseError, InvalidModelPropertiesError, ModelLoadError, UnknownModelError +from marqo.s2_inference.errors import VectoriseError, InvalidModelPropertiesError, ModelLoadError, UnknownModelError, ModelNotLoadedError from PIL import UnidentifiedImageError from marqo.s2_inference.model_registry import load_model_properties from marqo.s2_inference.configs import get_default_device, get_default_normalization, get_default_seq_length from marqo.s2_inference.types import * from marqo.s2_inference.logger import get_logger +import torch logger = get_logger(__name__) @@ -291,6 +292,19 @@ def _load_model(model_name: str, model_properties: dict, device: str = get_defau return model +def get_available_models(): + return available_models + +def eject_model(model_name:str,device:str): + model_cache_key = _create_model_cache_key(model_name, device) + if model_cache_key in available_models: + del available_models[model_cache_key] + if device.startswith("cuda"): + torch.cuda.empty_cache() + return {"message":f"eject SUCCESS, eject model_name={model_name} from device={device}"} + else: + raise ModelNotLoadedError(f"The model_name={model_name} device={device} is not loaded") + # def normalize(inputs): # is_valid = False diff --git a/src/marqo/tensor_search/api.py b/src/marqo/tensor_search/api.py index 728175ee2..8f907f013 100644 --- a/src/marqo/tensor_search/api.py +++ b/src/marqo/tensor_search/api.py @@ -212,6 +212,16 @@ def check_health(marqo_config: config.Config = Depends(generate_config)): def get_indexes(marqo_config: config.Config = Depends(generate_config)): return tensor_search.get_indexes(config=marqo_config) +@app.get("/models") +def get_loaded_models(): + return tensor_search.get_loaded_models() +@app.delete("/models") +def eject_model(model_name:str, model_device:str): + return tensor_search.eject_model(model_name = model_name, device = model_device) +@app.get("/device/cuda") +def get_cuda_info(): + return tensor_search.get_cuda_info() + # try these curl commands: # ADD DOCS: @@ -282,3 +292,19 @@ def get_indexes(marqo_config: config.Config = Depends(generate_config)): curl -XDELETE http://localhost:8882/indexes/my-irst-ix """ +# check cuda info +""" +curl -XGET http://localhost:8882/device/cuda +""" + +# check the loaded models +""" +curl -XGET http://localhost:8882/models +""" + +# eject a model +""" +curl -X DELETE 'http://localhost:8882/models?model_name=ViT-L/14&model_device=cuda' +curl -X DELETE 'http://localhost:8882/models?model_name=hf/all_datasets_v4_MiniLM-L6&model_device=cuda' +curl -X DELETE 'http://localhost:8882/models?model_name=hf/all_datasets_v4_MiniLM-L6&model_device=cpu' +""" \ No newline at end of file diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 15ca8b376..54a29c8be 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -54,6 +54,7 @@ from marqo.s2_inference.clip_utils import _is_image from marqo.s2_inference.reranking import rerank from marqo.s2_inference import s2_inference +import torch.cuda # We depend on _httprequests.py for now, but this may be replaced in the future, as # _httprequests.py is designed for the client @@ -1238,3 +1239,22 @@ def _get_model_properties(index_info): f"Please provide model_properties if the model is a custom model and is not supported by default") return model_properties + +def get_loaded_models() -> dict: + available_models = s2_inference.get_available_models() + message = { + 'models' : [ + {"model_name": ix[0], "device": ix[1]} for ix in available_models + ] + } + return message +def eject_model(model_name: str, device: str) -> dict: + try: + result = s2_inference.eject_model(model_name, device) + except s2_inference_errors.ModelNotLoadedError as e: + raise errors.ModelNotLoadedError(message=str(e)) + return result +def get_cuda_info() -> dict: + return {"device": "cuda", + "memory_usage": f"{round(torch.cuda.memory_allocated() / 1024**3, 1)} GiB", + "total_device_memory": f"{round(torch.cuda.get_device_properties(0).total_memory/ 1024**3, 1)} GiB"} \ No newline at end of file From 03ca4dd5a0330431957bcd42467ca27616ad1727 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 16 Dec 2022 17:58:19 +1100 Subject: [PATCH 02/70] test new model cache key --- src/marqo/tensor_search/tensor_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 54a29c8be..23507119f 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1244,7 +1244,7 @@ def get_loaded_models() -> dict: available_models = s2_inference.get_available_models() message = { 'models' : [ - {"model_name": ix[0], "device": ix[1]} for ix in available_models + {"model_name": ix} for ix in available_models ] } return message From e7e7b0939cad473f99cefd908c8dd800ed8e3fad Mon Sep 17 00:00:00 2001 From: Li Wan Date: Tue, 20 Dec 2022 22:07:09 +1100 Subject: [PATCH 03/70] cleaning --- src/marqo/errors.py | 5 +++-- src/marqo/s2_inference/errors.py | 3 ++- src/marqo/s2_inference/s2_inference.py | 2 ++ src/marqo/tensor_search/api.py | 13 ++++++++++--- src/marqo/tensor_search/tensor_search.py | 8 ++++---- 5 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/marqo/errors.py b/src/marqo/errors.py index cfa19aa4f..6f047c451 100644 --- a/src/marqo/errors.py +++ b/src/marqo/errors.py @@ -176,8 +176,9 @@ class IndexMaxFieldsError(__InvalidRequestError): code = "index_max_fields_error" status_code = HTTPStatus.BAD_REQUEST -class ModelNotLoadedError(__InvalidRequestError): - code = "model_not_loaded" + +class ModelNotInCache(__InvalidRequestError): + code = "model_not_in_cache" status_code = HTTPStatus.NOT_FOUND # ---MARQO INTERNAL ERROR--- diff --git a/src/marqo/s2_inference/errors.py b/src/marqo/s2_inference/errors.py index 106b08c62..3a7a54073 100644 --- a/src/marqo/s2_inference/errors.py +++ b/src/marqo/s2_inference/errors.py @@ -43,5 +43,6 @@ class RerankerImageError(S2InferenceError): class RerankerNameError(S2InferenceError): pass -class ModelNotLoadedError(S2InferenceError): + +class ModelNotInCache(S2InferenceError): pass \ No newline at end of file diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index da53b4221..4708cca3a 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -292,9 +292,11 @@ def _load_model(model_name: str, model_properties: dict, device: str = get_defau return model + def get_available_models(): return available_models + def eject_model(model_name:str,device:str): model_cache_key = _create_model_cache_key(model_name, device) if model_cache_key in available_models: diff --git a/src/marqo/tensor_search/api.py b/src/marqo/tensor_search/api.py index 8f907f013..2e935f11e 100644 --- a/src/marqo/tensor_search/api.py +++ b/src/marqo/tensor_search/api.py @@ -212,15 +212,22 @@ def check_health(marqo_config: config.Config = Depends(generate_config)): def get_indexes(marqo_config: config.Config = Depends(generate_config)): return tensor_search.get_indexes(config=marqo_config) + @app.get("/models") def get_loaded_models(): return tensor_search.get_loaded_models() + + @app.delete("/models") def eject_model(model_name:str, model_device:str): return tensor_search.eject_model(model_name = model_name, device = model_device) -@app.get("/device/cuda") -def get_cuda_info(): - return tensor_search.get_cuda_info() + + +@app.get("/device/cuda/{device}") +def get_cuda_info(device: int = 0): + return tensor_search.get_cuda_info(device) + + # try these curl commands: diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 23507119f..a589aefe7 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1251,10 +1251,10 @@ def get_loaded_models() -> dict: def eject_model(model_name: str, device: str) -> dict: try: result = s2_inference.eject_model(model_name, device) - except s2_inference_errors.ModelNotLoadedError as e: - raise errors.ModelNotLoadedError(message=str(e)) + except s2_inference_errors.ModelNotInCache as e: + raise errors.ModelNotInCache(message=str(e)) return result -def get_cuda_info() -> dict: +def get_cuda_info(device: int = 0) -> dict: return {"device": "cuda", "memory_usage": f"{round(torch.cuda.memory_allocated() / 1024**3, 1)} GiB", - "total_device_memory": f"{round(torch.cuda.get_device_properties(0).total_memory/ 1024**3, 1)} GiB"} \ No newline at end of file + "total_device_memory": f"{round(torch.cuda.get_device_properties(device).total_memory/ 1024**3, 1)} GiB"} \ No newline at end of file From 8ad198bae80cddcce1a45763c1ba31c7c0f13020 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Tue, 20 Dec 2022 22:08:16 +1100 Subject: [PATCH 04/70] Add todo --- src/marqo/tensor_search/api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/marqo/tensor_search/api.py b/src/marqo/tensor_search/api.py index 2e935f11e..358333426 100644 --- a/src/marqo/tensor_search/api.py +++ b/src/marqo/tensor_search/api.py @@ -224,6 +224,7 @@ def eject_model(model_name:str, model_device:str): @app.get("/device/cuda/{device}") +# TODO: Add device detection and raise error def get_cuda_info(device: int = 0): return tensor_search.get_cuda_info(device) From d8a4fddead7f2f4c08e5a7f420cda41c1ef37edd Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 12:08:45 +1100 Subject: [PATCH 05/70] add multi-gpu support --- src/marqo/tensor_search/api.py | 7 +++---- src/marqo/tensor_search/tensor_search.py | 7 ++++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/marqo/tensor_search/api.py b/src/marqo/tensor_search/api.py index 358333426..5715cd133 100644 --- a/src/marqo/tensor_search/api.py +++ b/src/marqo/tensor_search/api.py @@ -223,10 +223,9 @@ def eject_model(model_name:str, model_device:str): return tensor_search.eject_model(model_name = model_name, device = model_device) -@app.get("/device/cuda/{device}") -# TODO: Add device detection and raise error -def get_cuda_info(device: int = 0): - return tensor_search.get_cuda_info(device) +@app.get("/device/cuda") +def get_cuda_info(): + return tensor_search.get_cuda_info() diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index a589aefe7..d5f669e1c 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1255,6 +1255,7 @@ def eject_model(model_name: str, device: str) -> dict: raise errors.ModelNotInCache(message=str(e)) return result def get_cuda_info(device: int = 0) -> dict: - return {"device": "cuda", - "memory_usage": f"{round(torch.cuda.memory_allocated() / 1024**3, 1)} GiB", - "total_device_memory": f"{round(torch.cuda.get_device_properties(device).total_memory/ 1024**3, 1)} GiB"} \ No newline at end of file + return {"cuda_usage_info":[{"device_id" : id, "device_name" : torch.cuda.get_device_name(id), + "memory_used":f"{round(torch.cuda.memory_allocated(id) / 1024**3, 1)} GiB", + "total_memory": f"{round(torch.cuda.get_device_properties(id).total_memory/ 1024**3, 1)} GiB"} + for id in range(torch.cuda.device_count())]} \ No newline at end of file From db30bcbbc3c27d80311c16dcb5213f4865699026 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 12:30:52 +1100 Subject: [PATCH 06/70] add multi-gpu support --- src/marqo/tensor_search/tensor_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index d5f669e1c..bb767487f 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1254,7 +1254,7 @@ def eject_model(model_name: str, device: str) -> dict: except s2_inference_errors.ModelNotInCache as e: raise errors.ModelNotInCache(message=str(e)) return result -def get_cuda_info(device: int = 0) -> dict: +def get_cuda_info() -> dict: return {"cuda_usage_info":[{"device_id" : id, "device_name" : torch.cuda.get_device_name(id), "memory_used":f"{round(torch.cuda.memory_allocated(id) / 1024**3, 1)} GiB", "total_memory": f"{round(torch.cuda.get_device_properties(id).total_memory/ 1024**3, 1)} GiB"} From f0a801b482f297f8baf8e08658886e19a4ef7c0a Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 12:31:08 +1100 Subject: [PATCH 07/70] space adding --- src/marqo/tensor_search/tensor_search.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index bb767487f..ad6a627b1 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1248,12 +1248,16 @@ def get_loaded_models() -> dict: ] } return message + + def eject_model(model_name: str, device: str) -> dict: try: result = s2_inference.eject_model(model_name, device) except s2_inference_errors.ModelNotInCache as e: raise errors.ModelNotInCache(message=str(e)) return result + + def get_cuda_info() -> dict: return {"cuda_usage_info":[{"device_id" : id, "device_name" : torch.cuda.get_device_name(id), "memory_used":f"{round(torch.cuda.memory_allocated(id) / 1024**3, 1)} GiB", From e0f14a25a78dde0b772c6595001242a8f2579225 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 14:22:45 +1100 Subject: [PATCH 08/70] adding cpu usage, RAM usage api --- src/marqo/tensor_search/api.py | 6 ++++++ src/marqo/tensor_search/tensor_search.py | 10 +++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/marqo/tensor_search/api.py b/src/marqo/tensor_search/api.py index 5715cd133..32e449afa 100644 --- a/src/marqo/tensor_search/api.py +++ b/src/marqo/tensor_search/api.py @@ -223,6 +223,12 @@ def eject_model(model_name:str, model_device:str): return tensor_search.eject_model(model_name = model_name, device = model_device) +@app.get("/device/cpu") +def get_cpu_info(): + return tensor_search.get_cpu_info() + + + @app.get("/device/cuda") def get_cuda_info(): return tensor_search.get_cuda_info() diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index ad6a627b1..0d96e53d5 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -55,7 +55,7 @@ from marqo.s2_inference.reranking import rerank from marqo.s2_inference import s2_inference import torch.cuda - +import psutil # We depend on _httprequests.py for now, but this may be replaced in the future, as # _httprequests.py is designed for the client from marqo._httprequests import HttpRequests @@ -1258,6 +1258,14 @@ def eject_model(model_name: str, device: str) -> dict: return result +def get_cpu_info() -> dict: + return { + "CPU usage (last 5 seconds)" :f"{psutil.cpu_percent(4)} %", + "RAM memory % used": f"{psutil.virtual_memory()[2]} %", + "RAM Used (GB)" : f"{psutil.virtual_memory()[3]/1000000000}", + } + + def get_cuda_info() -> dict: return {"cuda_usage_info":[{"device_id" : id, "device_name" : torch.cuda.get_device_name(id), "memory_used":f"{round(torch.cuda.memory_allocated(id) / 1024**3, 1)} GiB", From 186180a64a563cd57b908ef685cfd354b19ea6cb Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 14:24:49 +1100 Subject: [PATCH 09/70] adding cpu usage, RAM usage api --- src/marqo/tensor_search/api.py | 5 +++++ src/marqo/tensor_search/tensor_search.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/marqo/tensor_search/api.py b/src/marqo/tensor_search/api.py index 32e449afa..9b1d99b51 100644 --- a/src/marqo/tensor_search/api.py +++ b/src/marqo/tensor_search/api.py @@ -305,6 +305,11 @@ def get_cuda_info(): curl -XDELETE http://localhost:8882/indexes/my-irst-ix """ +# check cpu info +""" +curl -XGET http://localhost:8882/device/cpu +""" + # check cuda info """ curl -XGET http://localhost:8882/device/cuda diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 0d96e53d5..4a48bd916 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1260,9 +1260,9 @@ def eject_model(model_name: str, device: str) -> dict: def get_cpu_info() -> dict: return { - "CPU usage (last 5 seconds)" :f"{psutil.cpu_percent(4)} %", + "CPU usage (last 5 seconds)": f"{psutil.cpu_percent(4)} %", "RAM memory % used": f"{psutil.virtual_memory()[2]} %", - "RAM Used (GB)" : f"{psutil.virtual_memory()[3]/1000000000}", + "RAM Used (GB)": f"{psutil.virtual_memory()[3]/1000000000}", } From 2c77fa70b3ac8194d65689f209394f20526f8b53 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 14:28:00 +1100 Subject: [PATCH 10/70] adding cpu usage, RAM usage api --- src/marqo/s2_inference/s2_inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 4708cca3a..47a0fb992 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -2,7 +2,7 @@ The functions defined here would have endpoints, later on. """ import numpy as np -from marqo.s2_inference.errors import VectoriseError, InvalidModelPropertiesError, ModelLoadError, UnknownModelError, ModelNotLoadedError +from marqo.s2_inference.errors import VectoriseError, InvalidModelPropertiesError, ModelLoadError, UnknownModelError, ModelNotInCache from PIL import UnidentifiedImageError from marqo.s2_inference.model_registry import load_model_properties from marqo.s2_inference.configs import get_default_device, get_default_normalization, get_default_seq_length @@ -305,7 +305,7 @@ def eject_model(model_name:str,device:str): torch.cuda.empty_cache() return {"message":f"eject SUCCESS, eject model_name={model_name} from device={device}"} else: - raise ModelNotLoadedError(f"The model_name={model_name} device={device} is not loaded") + raise ModelNotInCache(f"The model_name={model_name} device={device} is not loaded") # def normalize(inputs): From d88b5908dd2993e121ea3e4df590169798156cc5 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 14:34:46 +1100 Subject: [PATCH 11/70] adding cpu usage, RAM usage api --- src/marqo/tensor_search/tensor_search.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 4a48bd916..9830dee64 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1260,9 +1260,9 @@ def eject_model(model_name: str, device: str) -> dict: def get_cpu_info() -> dict: return { - "CPU usage (last 5 seconds)": f"{psutil.cpu_percent(4)} %", - "RAM memory % used": f"{psutil.virtual_memory()[2]} %", - "RAM Used (GB)": f"{psutil.virtual_memory()[3]/1000000000}", + "CPU usage": f"{psutil.cpu_percent(1)} %", # The number 1 is a time interval for CPU usage calculation. + "RAM memory % used": f"{psutil.virtual_memory()[2]} %", # The number 2 is just a index number to get the expected results + "RAM Used (GB)": f"{round(psutil.virtual_memory()[3]/1000000000,1)}", # The number 3 is just a index number to get the expected results } From a95d030a5e64b8c86dca47c5f84c490c51e3c697 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 14:39:53 +1100 Subject: [PATCH 12/70] revert back model cache key --- src/marqo/s2_inference/s2_inference.py | 10 +--------- src/marqo/tensor_search/tensor_search.py | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 47a0fb992..1f174c730 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -61,15 +61,7 @@ def _create_model_cache_key(model_name: str, device: str, model_properties: dict Returns: str: _description_ """ - if model_properties is None: - model_properties = dict() - - model_cache_key = (model_name - + model_properties.get('name', '') - + str(model_properties.get('dimensions', '')) - + model_properties.get('type', '') - + str(model_properties.get('tokens', '')) - + device) + model_cache_key = (model_name, device) return model_cache_key diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 9830dee64..d9760c5ef 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1244,7 +1244,7 @@ def get_loaded_models() -> dict: available_models = s2_inference.get_available_models() message = { 'models' : [ - {"model_name": ix} for ix in available_models + {"model_name": ix[0], "model_device": ix[1]} for ix in available_models ] } return message From ba97a44c324219d07e336a7e8b8c295c7599d07a Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 14:48:50 +1100 Subject: [PATCH 13/70] revert back model cache key --- src/marqo/s2_inference/s2_inference.py | 2 +- src/marqo/tensor_search/api.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 1f174c730..75f121d66 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -297,7 +297,7 @@ def eject_model(model_name:str,device:str): torch.cuda.empty_cache() return {"message":f"eject SUCCESS, eject model_name={model_name} from device={device}"} else: - raise ModelNotInCache(f"The model_name={model_name} device={device} is not loaded") + raise ModelNotInCache(f"The model_name={model_name} device={device} is not cached") # def normalize(inputs): diff --git a/src/marqo/tensor_search/api.py b/src/marqo/tensor_search/api.py index 9b1d99b51..0ace593de 100644 --- a/src/marqo/tensor_search/api.py +++ b/src/marqo/tensor_search/api.py @@ -323,6 +323,7 @@ def get_cuda_info(): # eject a model """ curl -X DELETE 'http://localhost:8882/models?model_name=ViT-L/14&model_device=cuda' +curl -X DELETE 'http://localhost:8882/models?model_name=ViT-L/14&model_device=cpu' curl -X DELETE 'http://localhost:8882/models?model_name=hf/all_datasets_v4_MiniLM-L6&model_device=cuda' curl -X DELETE 'http://localhost:8882/models?model_name=hf/all_datasets_v4_MiniLM-L6&model_device=cpu' """ \ No newline at end of file From 2457920130db90adb67c0400c3a82fd096b85f42 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 15:56:31 +1100 Subject: [PATCH 14/70] add test_eject_model test --- .../test_model_cache_management.py | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 tests/tensor_search/test_model_cache_management.py diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py new file mode 100644 index 000000000..09a54fb18 --- /dev/null +++ b/tests/tensor_search/test_model_cache_management.py @@ -0,0 +1,73 @@ +import json +import pprint +import time +from marqo.errors import IndexNotFoundError, MarqoError +from marqo.tensor_search import tensor_search, constants, index_meta_cache +import unittest +import copy +from tests.marqo_test import MarqoTestCase +from marqo.s2_inference.s2_inference import _validate_model_properties,\ + _create_model_cache_key, _update_available_models, available_models +from marqo.tensor_search.tensor_search import eject_model +from marqo.errors import ModelNotInCache + + + +def load_model(model_name: str, device: str) -> None: + validated_model_properties = _validate_model_properties(model_name, None) + model_cache_key = _create_model_cache_key(model_name, device, validated_model_properties) + _update_available_models(model_cache_key, model_name, validated_model_properties, device, True) + + +class TestModelCacheManagement(MarqoTestCase): + + def setUp(self) -> None: + # We pre-define 3 dummy models for testing purpose + self.MODEL_1 = "ViT/L-14" + self.MODEL_2 = "open_clip/ViT-L-14/laion400m_e31" + self.MODEL_3 = "hf/all-MiniLM-L6-v2" + self.MODEL_LIST = [self.MODEL_1, self.MODEL_2, self.MODEL_3] + + + # load several models into cache for setting up + for model_name in self.MODEL_LIST: + load_model(model_name, "cuda") + load_model(model_name, "cpu") + # We will load 6 models (3 in cuda, 3 in cpu) as initial setup + + def test_eject_model(self): + + for model_name in self.MODEL_LIST: + eject_model(model_name, "cpu") + if (model_name, "cpu") in available_models: + raise AssertionError + + + eject_model(model_name,"cuda") + if (model_name, "cuda") in available_models: + raise AssertionError + + my_test_model_1 = "test-model-1" + my_test_model_2 = "test-model-2" + + try: + eject_model(my_test_model_1, "cuda") + except ModelNotInCache: + pass + + try: + eject_model(my_test_model_1, "cpu") + except ModelNotInCache: + pass + + try: + eject_model(my_test_model_2, "cuda") + except ModelNotInCache: + pass + + try: + eject_model(my_test_model_2, "cpu") + except ModelNotInCache: + pass + + From 856b5f93863e7caf9fc621fe863da8e8a13dd0bf Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:06:11 +1100 Subject: [PATCH 15/70] add test_eject_model test --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 13e34c3a4..f13ad586a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,3 +23,4 @@ onnxruntime==1.13.1 pandas==1.5.1 optimum==1.4.1 opencv-python-headless==4.6.0.66 +psutil==5.9.4 From 3df97f0e9ad651cf6ac0e98e51cbc6a1c89febc9 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:09:12 +1100 Subject: [PATCH 16/70] add test_eject_model test --- tests/tensor_search/test_model_cache_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 09a54fb18..7c654603f 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -23,7 +23,7 @@ class TestModelCacheManagement(MarqoTestCase): def setUp(self) -> None: # We pre-define 3 dummy models for testing purpose - self.MODEL_1 = "ViT/L-14" + self.MODEL_1 = "ViT-L/14" self.MODEL_2 = "open_clip/ViT-L-14/laion400m_e31" self.MODEL_3 = "hf/all-MiniLM-L6-v2" self.MODEL_LIST = [self.MODEL_1, self.MODEL_2, self.MODEL_3] From e3d3bec6b279c92541ab621e3dab051f301745d1 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:15:06 +1100 Subject: [PATCH 17/70] add test_eject_model test --- tests/tensor_search/test_model_cache_management.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 7c654603f..b9e896a55 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -33,9 +33,11 @@ def setUp(self) -> None: for model_name in self.MODEL_LIST: load_model(model_name, "cuda") load_model(model_name, "cpu") + print(available_models) # We will load 6 models (3 in cuda, 3 in cpu) as initial setup def test_eject_model(self): + print(available_models) for model_name in self.MODEL_LIST: eject_model(model_name, "cpu") From b6122aa8c33e3046cbe25548ed84acbff11dfb25 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:17:46 +1100 Subject: [PATCH 18/70] add test_eject_model test --- tests/tensor_search/test_model_cache_management.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index b9e896a55..f698b0abb 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -67,9 +67,6 @@ def test_eject_model(self): except ModelNotInCache: pass - try: - eject_model(my_test_model_2, "cpu") - except ModelNotInCache: - pass + eject_model(my_test_model_2, "cpu") From 1132cc5e77d9d404c085e811dc57bb1bfb7e36d5 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:23:59 +1100 Subject: [PATCH 19/70] add test_eject_model test --- src/marqo/tensor_search/tensor_search.py | 14 ++++++++++---- tests/tensor_search/test_model_cache_management.py | 6 +++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index d9760c5ef..bf55363f6 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1267,7 +1267,13 @@ def get_cpu_info() -> dict: def get_cuda_info() -> dict: - return {"cuda_usage_info":[{"device_id" : id, "device_name" : torch.cuda.get_device_name(id), - "memory_used":f"{round(torch.cuda.memory_allocated(id) / 1024**3, 1)} GiB", - "total_memory": f"{round(torch.cuda.get_device_properties(id).total_memory/ 1024**3, 1)} GiB"} - for id in range(torch.cuda.device_count())]} \ No newline at end of file + if torch.cuda.is_available(): + return {"cuda_usage_info":[{"device_id" : id, "device_name" : torch.cuda.get_device_name(id), + "memory_used":f"{round(torch.cuda.memory_allocated(id) / 1024**3, 1)} GiB", + "total_memory": f"{round(torch.cuda.get_device_properties(id).total_memory/ 1024**3, 1)} GiB"} + for id in range(torch.cuda.device_count())]} + + else: + raise errors.HardwareCompatabilityError(message=str( + "ERROR: cuda is not supported in your machine!!" + )) \ No newline at end of file diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index f698b0abb..a5e5a3509 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -67,6 +67,10 @@ def test_eject_model(self): except ModelNotInCache: pass - eject_model(my_test_model_2, "cpu") + try: + eject_model(my_test_model_2, "cpu") + except ModelNotInCache: + pass + From fd69a4260047fc74f57cf4f5fa401c18cc2ae124 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:29:12 +1100 Subject: [PATCH 20/70] add test_eject_model test --- .../test_model_cache_management.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index a5e5a3509..4f72b060e 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -8,8 +8,8 @@ from tests.marqo_test import MarqoTestCase from marqo.s2_inference.s2_inference import _validate_model_properties,\ _create_model_cache_key, _update_available_models, available_models -from marqo.tensor_search.tensor_search import eject_model -from marqo.errors import ModelNotInCache +from marqo.tensor_search.tensor_search import eject_model, get_cuda_info +from marqo.errors import ModelNotInCache, HardwareCompatabilityError @@ -41,8 +41,8 @@ def test_eject_model(self): for model_name in self.MODEL_LIST: eject_model(model_name, "cpu") - if (model_name, "cpu") in available_models: - raise AssertionError + if (model_name, "cpu") not in available_models: + raise AssertionError(f"Model= {model_name} device = cpu is not deleted from cache") eject_model(model_name,"cuda") @@ -72,5 +72,14 @@ def test_eject_model(self): except ModelNotInCache: pass + def test_cuda_info(self): + try: + get_cuda_info() + except HardwareCompatabilityError: + pass + + + + From fe2f5faef68b43b982ea75d00f59979344651b93 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:30:24 +1100 Subject: [PATCH 21/70] add test_eject_model test --- tests/tensor_search/test_model_cache_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 4f72b060e..99baa5766 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -41,7 +41,7 @@ def test_eject_model(self): for model_name in self.MODEL_LIST: eject_model(model_name, "cpu") - if (model_name, "cpu") not in available_models: + if (model_name, "cpu") in available_models: raise AssertionError(f"Model= {model_name} device = cpu is not deleted from cache") From 4b515c272075a81914929fc796ed48f38e370ee7 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:35:45 +1100 Subject: [PATCH 22/70] add test_eject_model test --- tests/tensor_search/test_model_cache_management.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 99baa5766..3e0edf8c3 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -8,7 +8,7 @@ from tests.marqo_test import MarqoTestCase from marqo.s2_inference.s2_inference import _validate_model_properties,\ _create_model_cache_key, _update_available_models, available_models -from marqo.tensor_search.tensor_search import eject_model, get_cuda_info +from marqo.tensor_search.tensor_search import eject_model, get_cuda_info, get_loaded_models from marqo.errors import ModelNotInCache, HardwareCompatabilityError @@ -78,6 +78,16 @@ def test_cuda_info(self): except HardwareCompatabilityError: pass + def test_loaded_models(self) -> dict: + + loaded_models = get_loaded_models()["models"] + loaded_models_list = [list(i) for i in loaded_models] + assert loaded_models_list==available_models + + + + + From 7aa1a06266bb98f2477bd5d6bd40ba1c3c55a213 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:40:16 +1100 Subject: [PATCH 23/70] add test_eject_model test --- tests/tensor_search/test_model_cache_management.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 3e0edf8c3..ed4fe1e83 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -81,8 +81,8 @@ def test_cuda_info(self): def test_loaded_models(self) -> dict: loaded_models = get_loaded_models()["models"] - loaded_models_list = [list(i) for i in loaded_models] - assert loaded_models_list==available_models + loaded_models_list = [tuple(i) for i in loaded_models] + assert loaded_models_list==available_models.keys() From 53a6a132502a8513476c1a33ae55bccc80039fd8 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:40:57 +1100 Subject: [PATCH 24/70] add test_eject_model test --- tests/tensor_search/test_model_cache_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index ed4fe1e83..05e59d7fc 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -81,7 +81,7 @@ def test_cuda_info(self): def test_loaded_models(self) -> dict: loaded_models = get_loaded_models()["models"] - loaded_models_list = [tuple(i) for i in loaded_models] + loaded_models_list = [(key, loaded_models[key]) for key in loaded_models] assert loaded_models_list==available_models.keys() From 0d5140bd8fdde0cbf83fdb9158670e919d0c06ee Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:44:12 +1100 Subject: [PATCH 25/70] add test_eject_model test --- tests/tensor_search/test_model_cache_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 05e59d7fc..68161fe0a 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -81,7 +81,7 @@ def test_cuda_info(self): def test_loaded_models(self) -> dict: loaded_models = get_loaded_models()["models"] - loaded_models_list = [(key, loaded_models[key]) for key in loaded_models] + loaded_models_list = [tuple(dic) for dic in loaded_models] assert loaded_models_list==available_models.keys() From 05719e7937e7c8faa154477eada5ab411d92af2d Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:46:11 +1100 Subject: [PATCH 26/70] add test_eject_model test --- tests/tensor_search/test_model_cache_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 68161fe0a..1c8bb3323 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -81,7 +81,7 @@ def test_cuda_info(self): def test_loaded_models(self) -> dict: loaded_models = get_loaded_models()["models"] - loaded_models_list = [tuple(dic) for dic in loaded_models] + loaded_models_list = [tuple(dic.values()) for dic in loaded_models] assert loaded_models_list==available_models.keys() From 1b058af4428952f933990f97fe7ac1df6f10f2cd Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:48:21 +1100 Subject: [PATCH 27/70] add test_eject_model test --- tests/tensor_search/test_model_cache_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 1c8bb3323..bbdbafaa9 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -82,7 +82,7 @@ def test_loaded_models(self) -> dict: loaded_models = get_loaded_models()["models"] loaded_models_list = [tuple(dic.values()) for dic in loaded_models] - assert loaded_models_list==available_models.keys() + assert loaded_models_list==list(available_models.keys()) From 74f92388b108c577543c8cc196dd978ab7127584 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 16:59:39 +1100 Subject: [PATCH 28/70] add test_eject_model test --- .../test_model_cache_management.py | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index bbdbafaa9..5f04a48c2 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -1,15 +1,10 @@ -import json -import pprint -import time -from marqo.errors import IndexNotFoundError, MarqoError -from marqo.tensor_search import tensor_search, constants, index_meta_cache -import unittest -import copy +import torch.cuda from tests.marqo_test import MarqoTestCase from marqo.s2_inference.s2_inference import _validate_model_properties,\ _create_model_cache_key, _update_available_models, available_models from marqo.tensor_search.tensor_search import eject_model, get_cuda_info, get_loaded_models from marqo.errors import ModelNotInCache, HardwareCompatabilityError +import psutil @@ -33,18 +28,17 @@ def setUp(self) -> None: for model_name in self.MODEL_LIST: load_model(model_name, "cuda") load_model(model_name, "cpu") - print(available_models) - # We will load 6 models (3 in cuda, 3 in cpu) as initial setup + # We loaded 6 models (3 in cuda, 3 in cpu) as initial setup - def test_eject_model(self): - print(available_models) + assert len(available_models) == 6 + def test_eject_model(self): + # check if we can eject the models for model_name in self.MODEL_LIST: eject_model(model_name, "cpu") if (model_name, "cpu") in available_models: raise AssertionError(f"Model= {model_name} device = cpu is not deleted from cache") - eject_model(model_name,"cuda") if (model_name, "cuda") in available_models: raise AssertionError @@ -78,12 +72,26 @@ def test_cuda_info(self): except HardwareCompatabilityError: pass - def test_loaded_models(self) -> dict: + def test_loaded_models(self): loaded_models = get_loaded_models()["models"] loaded_models_list = [tuple(dic.values()) for dic in loaded_models] assert loaded_models_list==list(available_models.keys()) + def edge_case(self): + test_iterations = 50 + for i in range(test_iterations): + eject_model(self.MODEL_1, "cuda") + load_model(self.MODEL_1, "cuda") + + for id in range(torch.cuda.device_count()): + # cuda usage + assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory + # cpu usage + assert psutil.cpu_percent(1) < 100.0 + # memory usage + assert psutil.virtual_memory()[2]< 100.0 + From e624322ae67fe59d868959fe8458cf3315667c98 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 17:05:36 +1100 Subject: [PATCH 29/70] add unit test --- tests/tensor_search/test_model_cache_management.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 5f04a48c2..f85b72b35 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -66,18 +66,21 @@ def test_eject_model(self): except ModelNotInCache: pass + def test_cuda_info(self): try: get_cuda_info() except HardwareCompatabilityError: pass + def test_loaded_models(self): loaded_models = get_loaded_models()["models"] loaded_models_list = [tuple(dic.values()) for dic in loaded_models] assert loaded_models_list==list(available_models.keys()) + def edge_case(self): test_iterations = 50 for i in range(test_iterations): From 847653a8026671ac6537952c5662df8b7e0a72be Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 17:06:07 +1100 Subject: [PATCH 30/70] add unit test --- tests/tensor_search/test_model_cache_management.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index f85b72b35..c182eeca3 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -32,6 +32,7 @@ def setUp(self) -> None: assert len(available_models) == 6 + def test_eject_model(self): # check if we can eject the models for model_name in self.MODEL_LIST: From f144d3e9f2d2d3163a7eb4220a357469e4890781 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 17:06:44 +1100 Subject: [PATCH 31/70] add unit test --- tests/tensor_search/test_model_cache_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index c182eeca3..e281a9b56 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -92,7 +92,7 @@ def edge_case(self): # cuda usage assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory # cpu usage - assert psutil.cpu_percent(1) < 100.0 + assert psutil.cpu_percent(1) > 100.0 # memory usage assert psutil.virtual_memory()[2]< 100.0 From d07d56c80b06267a639241199a2bf721c83ed9d2 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 17:08:23 +1100 Subject: [PATCH 32/70] add unit test --- tests/tensor_search/test_model_cache_management.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index e281a9b56..28852134c 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -82,7 +82,7 @@ def test_loaded_models(self): assert loaded_models_list==list(available_models.keys()) - def edge_case(self): + def test_edge_case(self): test_iterations = 50 for i in range(test_iterations): eject_model(self.MODEL_1, "cuda") @@ -92,7 +92,7 @@ def edge_case(self): # cuda usage assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory # cpu usage - assert psutil.cpu_percent(1) > 100.0 + assert psutil.cpu_percent(1) < 100.0 # memory usage assert psutil.virtual_memory()[2]< 100.0 From 046761e85ae36f002a7177cc5b78517b94a6565d Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 17:13:20 +1100 Subject: [PATCH 33/70] add unit test --- .../test_model_cache_management.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 28852134c..a9344c8a4 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -82,19 +82,19 @@ def test_loaded_models(self): assert loaded_models_list==list(available_models.keys()) - def test_edge_case(self): - test_iterations = 50 - for i in range(test_iterations): - eject_model(self.MODEL_1, "cuda") - load_model(self.MODEL_1, "cuda") - - for id in range(torch.cuda.device_count()): - # cuda usage - assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory - # cpu usage - assert psutil.cpu_percent(1) < 100.0 - # memory usage - assert psutil.virtual_memory()[2]< 100.0 + # def test_edge_case(self): + # test_iterations = 50 + # for i in range(test_iterations): + # eject_model(self.MODEL_1, "cuda") + # load_model(self.MODEL_1, "cuda") + # + # for id in range(torch.cuda.device_count()): + # # cuda usage + # assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory + # # cpu usage + # assert psutil.cpu_percent(1) < 100.0 + # # memory usage + # assert psutil.virtual_memory()[2]< 100.0 From 4a8e1e4ed4de2186ad764643067e50540d6de9c5 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 17:19:04 +1100 Subject: [PATCH 34/70] add unit test --- .../test_model_cache_management.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index a9344c8a4..28852134c 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -82,19 +82,19 @@ def test_loaded_models(self): assert loaded_models_list==list(available_models.keys()) - # def test_edge_case(self): - # test_iterations = 50 - # for i in range(test_iterations): - # eject_model(self.MODEL_1, "cuda") - # load_model(self.MODEL_1, "cuda") - # - # for id in range(torch.cuda.device_count()): - # # cuda usage - # assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory - # # cpu usage - # assert psutil.cpu_percent(1) < 100.0 - # # memory usage - # assert psutil.virtual_memory()[2]< 100.0 + def test_edge_case(self): + test_iterations = 50 + for i in range(test_iterations): + eject_model(self.MODEL_1, "cuda") + load_model(self.MODEL_1, "cuda") + + for id in range(torch.cuda.device_count()): + # cuda usage + assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory + # cpu usage + assert psutil.cpu_percent(1) < 100.0 + # memory usage + assert psutil.virtual_memory()[2]< 100.0 From 18e83e4f57861e25bbc904dbc3fb9f00887ee31d Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 17:22:34 +1100 Subject: [PATCH 35/70] add unit test --- .../tensor_search/test_model_cache_management.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 28852134c..1016d5984 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -83,7 +83,8 @@ def test_loaded_models(self): def test_edge_case(self): - test_iterations = 50 + test_iterations = 10 + for i in range(test_iterations): eject_model(self.MODEL_1, "cuda") load_model(self.MODEL_1, "cuda") @@ -97,6 +98,19 @@ def test_edge_case(self): assert psutil.virtual_memory()[2]< 100.0 + for i in range(test_iterations): + eject_model(self.MODEL_1, "cpu") + load_model(self.MODEL_1, "cpu") + + for id in range(torch.cuda.device_count()): + # cuda usage + assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory + # cpu usage + assert psutil.cpu_percent(1) < 100.0 + # memory usage + assert psutil.virtual_memory()[2]< 100.0 + + From c5c014a18544e3dfc5492b1a45efa285cff74296 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Wed, 21 Dec 2022 17:35:29 +1100 Subject: [PATCH 36/70] test cuda only when cuda is available --- .../test_model_cache_management.py | 86 ++++++++++++------- 1 file changed, 56 insertions(+), 30 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 1016d5984..4da6348b8 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -22,49 +22,66 @@ def setUp(self) -> None: self.MODEL_2 = "open_clip/ViT-L-14/laion400m_e31" self.MODEL_3 = "hf/all-MiniLM-L6-v2" self.MODEL_LIST = [self.MODEL_1, self.MODEL_2, self.MODEL_3] + self.CUDA_FLAG = torch.cuda.is_available() # load several models into cache for setting up for model_name in self.MODEL_LIST: - load_model(model_name, "cuda") load_model(model_name, "cpu") - # We loaded 6 models (3 in cuda, 3 in cpu) as initial setup - assert len(available_models) == 6 + if self.CUDA_FLAG: + for model_name in self.MODEL_LIST: + load_model(model_name, "cuda") - def test_eject_model(self): - # check if we can eject the models + # We loaded 6 models (3 in cuda, 3 in cpu) as initial setup + if self.CUDA_FLAG: + assert len(available_models) == 6 + else: + assert len(available_models) == 3 + + + def test_eject_model_cpu(self): for model_name in self.MODEL_LIST: eject_model(model_name, "cpu") if (model_name, "cpu") in available_models: - raise AssertionError(f"Model= {model_name} device = cpu is not deleted from cache") - - eject_model(model_name,"cuda") - if (model_name, "cuda") in available_models: raise AssertionError + my_test_model_1 = "test-model-1" my_test_model_2 = "test-model-2" - try: - eject_model(my_test_model_1, "cuda") - except ModelNotInCache: - pass - try: eject_model(my_test_model_1, "cpu") except ModelNotInCache: pass try: - eject_model(my_test_model_2, "cuda") + eject_model(my_test_model_2, "cpu") except ModelNotInCache: pass - try: - eject_model(my_test_model_2, "cpu") - except ModelNotInCache: + + def test_eject_model_cuda(self): + if self.CUDA_FLAG: + # check if we can eject the models + for model_name in self.MODEL_LIST: + eject_model(model_name,"cuda") + if (model_name, "cuda") in available_models: + raise AssertionError + my_test_model_1 = "test-model-1" + my_test_model_2 = "test-model-2" + + try: + eject_model(my_test_model_1, "cuda") + except ModelNotInCache: + pass + + try: + eject_model(my_test_model_2, "cuda") + except ModelNotInCache: + pass + else: pass @@ -82,22 +99,30 @@ def test_loaded_models(self): assert loaded_models_list==list(available_models.keys()) - def test_edge_case(self): - test_iterations = 10 + def test_edge_case_cuda(self): + if self.CUDA_FLAG: + test_iterations = 10 + # Note this is a time consuming test. - for i in range(test_iterations): - eject_model(self.MODEL_1, "cuda") - load_model(self.MODEL_1, "cuda") + for i in range(test_iterations): + eject_model(self.MODEL_1, "cuda") + load_model(self.MODEL_1, "cuda") - for id in range(torch.cuda.device_count()): - # cuda usage - assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory - # cpu usage - assert psutil.cpu_percent(1) < 100.0 - # memory usage - assert psutil.virtual_memory()[2]< 100.0 + for id in range(torch.cuda.device_count()): + # cuda usage + assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory + # cpu usage + assert psutil.cpu_percent(1) < 100.0 + # memory usage + assert psutil.virtual_memory()[2]< 100.0 + else: + pass + def test_edge_case_cpu(self): + test_iterations = 10 + # Note this is a time consuming test. + for i in range(test_iterations): eject_model(self.MODEL_1, "cpu") load_model(self.MODEL_1, "cpu") @@ -119,3 +144,4 @@ def test_edge_case(self): + From 2c040488c2aa8ad79e1d3d319349b056851e8f0f Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 22 Dec 2022 09:56:06 +1100 Subject: [PATCH 37/70] format update --- src/marqo/s2_inference/s2_inference.py | 4 ++-- src/marqo/tensor_search/tensor_search.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 75f121d66..640a87f37 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -295,9 +295,9 @@ def eject_model(model_name:str,device:str): del available_models[model_cache_key] if device.startswith("cuda"): torch.cuda.empty_cache() - return {"message":f"eject SUCCESS, eject model_name={model_name} from device={device}"} + return {"result": "success", "message": f"successfully eject model_name \`{model_name}\` from device \`{device}\`"} else: - raise ModelNotInCache(f"The model_name={model_name} device={device} is not cached") + raise ModelNotInCache(f"The model_name \`{model_name}\` device \`{device}\` is not cached") # def normalize(inputs): diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index bf55363f6..373af9469 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1260,9 +1260,9 @@ def eject_model(model_name: str, device: str) -> dict: def get_cpu_info() -> dict: return { - "CPU usage": f"{psutil.cpu_percent(1)} %", # The number 1 is a time interval for CPU usage calculation. - "RAM memory % used": f"{psutil.virtual_memory()[2]} %", # The number 2 is just a index number to get the expected results - "RAM Used (GB)": f"{round(psutil.virtual_memory()[3]/1000000000,1)}", # The number 3 is just a index number to get the expected results + "cpu_usage_percent": f"{psutil.cpu_percent(1)} %", # The number 1 is a time interval for CPU usage calculation. + "memory_used_percent": f"{psutil.virtual_memory()[2]} %", # The number 2 is just a index number to get the expected results + "memory_used_gb": f"{round(psutil.virtual_memory()[3]/1000000000,1)}", # The number 3 is just a index number to get the expected results } From fe969e3807a3c0f7e58503f9deb9240541cd7842 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 22 Dec 2022 09:58:49 +1100 Subject: [PATCH 38/70] format update --- src/marqo/tensor_search/tensor_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 373af9469..04aa0cfe3 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1268,7 +1268,7 @@ def get_cpu_info() -> dict: def get_cuda_info() -> dict: if torch.cuda.is_available(): - return {"cuda_usage_info":[{"device_id" : id, "device_name" : torch.cuda.get_device_name(id), + return {"cuda_devices:": [{"device_id" : id, "device_name" : torch.cuda.get_device_name(id), "memory_used":f"{round(torch.cuda.memory_allocated(id) / 1024**3, 1)} GiB", "total_memory": f"{round(torch.cuda.get_device_properties(id).total_memory/ 1024**3, 1)} GiB"} for id in range(torch.cuda.device_count())]} From d37c1fe4396f3d2e8d8c06814f807ddcb85ce68b Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 22 Dec 2022 10:01:36 +1100 Subject: [PATCH 39/70] format update --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fdcf62e10..2ff26b94b 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,8 @@ "fastapi", "uvicorn[standard]", "fastapi_utils", - "opencv-python-headless" + "opencv-python-headless", + "psutil" ], name="marqo-engine", version="0.1.10", From 2f8f087f3c74dfbc2caaccbe6e4ea5d99122f26e Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 22 Dec 2022 10:18:17 +1100 Subject: [PATCH 40/70] test_edge_case_cpu fix (remove cuda memory test) --- tests/tensor_search/test_model_cache_management.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 4da6348b8..21d2972c1 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -126,10 +126,6 @@ def test_edge_case_cpu(self): for i in range(test_iterations): eject_model(self.MODEL_1, "cpu") load_model(self.MODEL_1, "cpu") - - for id in range(torch.cuda.device_count()): - # cuda usage - assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory # cpu usage assert psutil.cpu_percent(1) < 100.0 # memory usage From aa3f5cb42b5ad540d9e22bfa4aa5a549bf9e3d98 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 23 Dec 2022 13:38:24 +1100 Subject: [PATCH 41/70] add separators for readable information in model cache key --- src/marqo/s2_inference/s2_inference.py | 11 +++++++++-- src/marqo/tensor_search/tensor_search.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 640a87f37..43c59c4bf 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -61,7 +61,14 @@ def _create_model_cache_key(model_name: str, device: str, model_properties: dict Returns: str: _description_ """ - model_cache_key = (model_name, device) + # Changing the format of model cache key will also need to change eject_model api + + model_cache_key = (model_name + "||" + + + model_properties.get('name', '') + "||" + + + str(model_properties.get('dimensions', '')) + "||" + + + model_properties.get('type', '') + "||" + + + str(model_properties.get('tokens', '')) + "||" + + + device) return model_cache_key @@ -289,7 +296,7 @@ def get_available_models(): return available_models -def eject_model(model_name:str,device:str): +def eject_model(model_name:str, device:str): model_cache_key = _create_model_cache_key(model_name, device) if model_cache_key in available_models: del available_models[model_cache_key] diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 04aa0cfe3..9231c8399 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1244,7 +1244,7 @@ def get_loaded_models() -> dict: available_models = s2_inference.get_available_models() message = { 'models' : [ - {"model_name": ix[0], "model_device": ix[1]} for ix in available_models + {"model_name": ix.split("||")[0], "model_device": ix.split("||")[-1]} for ix in available_models ] } return message From a51487692af05dfe1707886647b712ae0eb595b7 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 23 Dec 2022 13:49:48 +1100 Subject: [PATCH 42/70] add separators for readable information in model cache key --- src/marqo/s2_inference/s2_inference.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 43c59c4bf..3eed8d24e 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -64,11 +64,11 @@ def _create_model_cache_key(model_name: str, device: str, model_properties: dict # Changing the format of model cache key will also need to change eject_model api model_cache_key = (model_name + "||" + - + model_properties.get('name', '') + "||" + - + str(model_properties.get('dimensions', '')) + "||" + - + model_properties.get('type', '') + "||" + - + str(model_properties.get('tokens', '')) + "||" + - + device) + model_properties.get('name', '') + "||" + + str(model_properties.get('dimensions', '')) + "||" + + model_properties.get('type', '') + "||" + + str(model_properties.get('tokens', '')) + "||" + + device) return model_cache_key From a7d399502f095c6fd917cce11d60165a4f3a848b Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 23 Dec 2022 13:53:48 +1100 Subject: [PATCH 43/70] add separators for readable information in model cache key --- src/marqo/s2_inference/s2_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 3eed8d24e..6b5a3d88e 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -297,7 +297,7 @@ def get_available_models(): def eject_model(model_name:str, device:str): - model_cache_key = _create_model_cache_key(model_name, device) + model_cache_key = _create_model_cache_key(model_name, device, _validate_model_properties(model_name)) if model_cache_key in available_models: del available_models[model_cache_key] if device.startswith("cuda"): From 61ad7b5109684e182f733356a5f528ca35b2dd53 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 23 Dec 2022 13:57:37 +1100 Subject: [PATCH 44/70] add separators for readable information in model cache key --- tests/tensor_search/test_model_cache_management.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 21d2972c1..b60a51dad 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -95,8 +95,9 @@ def test_cuda_info(self): def test_loaded_models(self): loaded_models = get_loaded_models()["models"] - loaded_models_list = [tuple(dic.values()) for dic in loaded_models] - assert loaded_models_list==list(available_models.keys()) + loaded_models_keys = [_create_model_cache_key(dic["model_name"], dic["device"], + _validate_model_properties(dic["model_name"])) for dic in loaded_models] + assert loaded_models_keys==list(available_models.keys()) def test_edge_case_cuda(self): From 6d15f9898e37e354b07ae2b4e7d59da5451e2492 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 23 Dec 2022 14:00:56 +1100 Subject: [PATCH 45/70] add separators for readable information in model cache key --- src/marqo/s2_inference/s2_inference.py | 4 ++-- tests/tensor_search/test_model_cache_management.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 6b5a3d88e..dc76ddebd 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -91,7 +91,7 @@ def _update_available_models(model_cache_key: str, model_name: str, validated_mo f"and the model has valid access permission. ") -def _validate_model_properties(model_name: str, model_properties: dict) -> dict: +def _validate_model_properties(model_name: str, model_properties: dict = None) -> dict: """validate model_properties, if not given then return model_registry properties """ if model_properties is not None: @@ -297,7 +297,7 @@ def get_available_models(): def eject_model(model_name:str, device:str): - model_cache_key = _create_model_cache_key(model_name, device, _validate_model_properties(model_name)) + model_cache_key = _create_model_cache_key(model_name, device, _validate_model_properties(model_name, None)) if model_cache_key in available_models: del available_models[model_cache_key] if device.startswith("cuda"): diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index b60a51dad..0766f1820 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -96,7 +96,7 @@ def test_loaded_models(self): loaded_models = get_loaded_models()["models"] loaded_models_keys = [_create_model_cache_key(dic["model_name"], dic["device"], - _validate_model_properties(dic["model_name"])) for dic in loaded_models] + _validate_model_properties(dic["model_name"], None)) for dic in loaded_models] assert loaded_models_keys==list(available_models.keys()) From 1cc424016fb55346e8296b8dffbc8ee64f3e1807 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 23 Dec 2022 14:11:45 +1100 Subject: [PATCH 46/70] adding test --- src/marqo/s2_inference/s2_inference.py | 7 +++++-- tests/tensor_search/test_model_cache_management.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index dc76ddebd..47ee61813 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -297,14 +297,17 @@ def get_available_models(): def eject_model(model_name:str, device:str): - model_cache_key = _create_model_cache_key(model_name, device, _validate_model_properties(model_name, None)) + try: + model_cache_key = _create_model_cache_key(model_name, device, _validate_model_properties(model_name, None)) + except UnknownModelError: + raise ModelNotInCache(f"The model_name \`{model_name}\` device \`{device}\` is not cached or found") if model_cache_key in available_models: del available_models[model_cache_key] if device.startswith("cuda"): torch.cuda.empty_cache() return {"result": "success", "message": f"successfully eject model_name \`{model_name}\` from device \`{device}\`"} else: - raise ModelNotInCache(f"The model_name \`{model_name}\` device \`{device}\` is not cached") + raise ModelNotInCache(f"The model_name \`{model_name}\` device \`{device}\` is not cached or found") # def normalize(inputs): diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 0766f1820..bec7379f7 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -95,7 +95,7 @@ def test_cuda_info(self): def test_loaded_models(self): loaded_models = get_loaded_models()["models"] - loaded_models_keys = [_create_model_cache_key(dic["model_name"], dic["device"], + loaded_models_keys = [_create_model_cache_key(dic["model_name"], dic["model_device"], _validate_model_properties(dic["model_name"], None)) for dic in loaded_models] assert loaded_models_keys==list(available_models.keys()) From 43d438a86b26470871002e12aa64e18668b77081 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 23 Dec 2022 14:33:45 +1100 Subject: [PATCH 47/70] adding test --- tests/tensor_search/test_model_cache_management.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index bec7379f7..9a89cdde4 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -36,9 +36,9 @@ def setUp(self) -> None: # We loaded 6 models (3 in cuda, 3 in cpu) as initial setup if self.CUDA_FLAG: - assert len(available_models) == 6 + assert len(available_models) >= 6 else: - assert len(available_models) == 3 + assert len(available_models) >= 3 def test_eject_model_cpu(self): From a6b164a0cf10baea77baddeb724f2eba008c80e4 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 23 Dec 2022 15:11:19 +1100 Subject: [PATCH 48/70] adding test --- src/marqo/s2_inference/s2_inference.py | 15 +++++++++++---- src/marqo/tensor_search/tensor_search.py | 2 +- tests/s2_inference/test_utils.py | 10 +++++----- .../tensor_search/test_model_cache_management.py | 4 +++- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 47ee61813..be37d6ebd 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -91,7 +91,7 @@ def _update_available_models(model_cache_key: str, model_name: str, validated_mo f"and the model has valid access permission. ") -def _validate_model_properties(model_name: str, model_properties: dict = None) -> dict: +def _validate_model_properties(model_name: str, model_properties: dict) -> dict: """validate model_properties, if not given then return model_registry properties """ if model_properties is not None: @@ -297,10 +297,17 @@ def get_available_models(): def eject_model(model_name:str, device:str): - try: - model_cache_key = _create_model_cache_key(model_name, device, _validate_model_properties(model_name, None)) - except UnknownModelError: + + model_cache_keys = available_models.keys() + + model_cache_key = None + + for key in model_cache_keys: + if key.startswith(model_name) and key.endswith(device): + model_cache_key = key + if model_cache_key is None: raise ModelNotInCache(f"The model_name \`{model_name}\` device \`{device}\` is not cached or found") + if model_cache_key in available_models: del available_models[model_cache_key] if device.startswith("cuda"): diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 9231c8399..f8b7857d5 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1244,7 +1244,7 @@ def get_loaded_models() -> dict: available_models = s2_inference.get_available_models() message = { 'models' : [ - {"model_name": ix.split("||")[0], "model_device": ix.split("||")[-1]} for ix in available_models + {"model_name": ix.split("||")[0], "model_device": ix.split("||")[-1]} for ix in available_models.keys() ] } return message diff --git a/tests/s2_inference/test_utils.py b/tests/s2_inference/test_utils.py index 657d3f745..1f15840e4 100644 --- a/tests/s2_inference/test_utils.py +++ b/tests/s2_inference/test_utils.py @@ -39,11 +39,11 @@ def test_create_model_cache_key(self): assert ( _create_model_cache_key(name, device, model_properties) == ( - name - + model_properties.get('name', '') - + str(model_properties.get('dimensions', '')) - + model_properties.get('type', '') - + str(model_properties.get('tokens', '')) + name + "||" + + model_properties.get('name', '') + "||" + + str(model_properties.get('dimensions', '')) + "||" + + model_properties.get('type', '') + "||" + + str(model_properties.get('tokens', '')) + "||" + device) ) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 9a89cdde4..c463295d9 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -1,7 +1,7 @@ import torch.cuda from tests.marqo_test import MarqoTestCase from marqo.s2_inference.s2_inference import _validate_model_properties,\ - _create_model_cache_key, _update_available_models, available_models + _create_model_cache_key, _update_available_models, available_models, clear_loaded_models from marqo.tensor_search.tensor_search import eject_model, get_cuda_info, get_loaded_models from marqo.errors import ModelNotInCache, HardwareCompatabilityError import psutil @@ -40,6 +40,8 @@ def setUp(self) -> None: else: assert len(available_models) >= 3 + def tearDown(self) -> None: + clear_loaded_models() def test_eject_model_cpu(self): for model_name in self.MODEL_LIST: From 8072324a2353beb6965373beb23688f3824ef57f Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 23 Dec 2022 15:15:12 +1100 Subject: [PATCH 49/70] adding test --- src/marqo/s2_inference/s2_inference.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index be37d6ebd..bd2badfff 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -63,6 +63,9 @@ def _create_model_cache_key(model_name: str, device: str, model_properties: dict """ # Changing the format of model cache key will also need to change eject_model api + if model_properties is None: + model_properties = dict() + model_cache_key = (model_name + "||" + model_properties.get('name', '') + "||" + str(model_properties.get('dimensions', '')) + "||" + @@ -91,7 +94,7 @@ def _update_available_models(model_cache_key: str, model_name: str, validated_mo f"and the model has valid access permission. ") -def _validate_model_properties(model_name: str, model_properties: dict) -> dict: +def _validate_model_properties(model_name: str, model_properties: dict = None) -> dict: """validate model_properties, if not given then return model_registry properties """ if model_properties is not None: From 315d5fc6d6a800c46bc7901946bbd7ca77af60fd Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 23 Dec 2022 15:23:11 +1100 Subject: [PATCH 50/70] adding test --- src/marqo/s2_inference/s2_inference.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index bd2badfff..be7f0bfe2 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -305,9 +305,13 @@ def eject_model(model_name:str, device:str): model_cache_key = None + # we can't handle the situation where there are two models with the same name and device + # but different properties. for key in model_cache_keys: if key.startswith(model_name) and key.endswith(device): model_cache_key = key + break + if model_cache_key is None: raise ModelNotInCache(f"The model_name \`{model_name}\` device \`{device}\` is not cached or found") From e132d70757817e85ff6b85e7d25c2fcf3399b760 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 23 Dec 2022 16:02:20 +1100 Subject: [PATCH 51/70] adding test --- src/marqo/tensor_search/tensor_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index f8b7857d5..44adcdd25 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1268,7 +1268,7 @@ def get_cpu_info() -> dict: def get_cuda_info() -> dict: if torch.cuda.is_available(): - return {"cuda_devices:": [{"device_id" : id, "device_name" : torch.cuda.get_device_name(id), + return {"cuda_devices": [{"device_id" : id, "device_name" : torch.cuda.get_device_name(id), "memory_used":f"{round(torch.cuda.memory_allocated(id) / 1024**3, 1)} GiB", "total_memory": f"{round(torch.cuda.get_device_properties(id).total_memory/ 1024**3, 1)} GiB"} for id in range(torch.cuda.device_count())]} From 7cb6c4215b1c814b0a7928072566997b5ebc8e98 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 29 Dec 2022 14:04:35 +1100 Subject: [PATCH 52/70] update id to _device_id --- src/marqo/tensor_search/tensor_search.py | 8 ++++---- tests/tensor_search/test_model_cache_management.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 44adcdd25..bfc446d61 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1268,10 +1268,10 @@ def get_cpu_info() -> dict: def get_cuda_info() -> dict: if torch.cuda.is_available(): - return {"cuda_devices": [{"device_id" : id, "device_name" : torch.cuda.get_device_name(id), - "memory_used":f"{round(torch.cuda.memory_allocated(id) / 1024**3, 1)} GiB", - "total_memory": f"{round(torch.cuda.get_device_properties(id).total_memory/ 1024**3, 1)} GiB"} - for id in range(torch.cuda.device_count())]} + return {"cuda_devices": [{"device_id" : _device_id, "device_name" : torch.cuda.get_device_name(_device_id), + "memory_used":f"{round(torch.cuda.memory_allocated(_device_id) / 1024**3, 1)} GiB", + "total_memory": f"{round(torch.cuda.get_device_properties(_device_id).total_memory/ 1024**3, 1)} GiB"} + for _device_id in range(torch.cuda.device_count())]} else: raise errors.HardwareCompatabilityError(message=str( diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index c463295d9..9d0bd1ddf 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -111,9 +111,9 @@ def test_edge_case_cuda(self): eject_model(self.MODEL_1, "cuda") load_model(self.MODEL_1, "cuda") - for id in range(torch.cuda.device_count()): + for _device_id in range(torch.cuda.device_count()): # cuda usage - assert torch.cuda.memory_allocated(id) < torch.cuda.get_device_properties(id).total_memory + assert torch.cuda.memory_allocated(_device_id) < torch.cuda.get_device_properties(_device_id).total_memory # cpu usage assert psutil.cpu_percent(1) < 100.0 # memory usage From b11d30c869e3f5ee7cc6dd7fd14853fd39e3421b Mon Sep 17 00:00:00 2001 From: Li Wan <49334982+wanliAlex@users.noreply.github.com> Date: Thu, 29 Dec 2022 17:50:14 +1100 Subject: [PATCH 53/70] [Onnx clip]Adding the clip_onnx to our avaible models for faster inference (#245) * onnx32/openai/ViT-L/14 * onnx32/openai/ViT-L/14 * onnx32/openai/ViT-L/14 * onnx32/openai/ViT-L/14 * add a timer * add a timer * add a timer * add a timer * add a timer * add a timer * add a timer * add a timer * add a timer * add a timer * add a timer * add a timer * add a timer * add a timer * add a timer * cleaning * add test for onnx_clip * make sure onnx-16 model still use float32 for textual inference for best accuracy. * make sure onnx-16 model still use float32 for textual inference for best accuracy. * we merge the hf models. * update id to _device_id --- src/marqo/s2_inference/model_registry.py | 23 +++ src/marqo/s2_inference/onnx_clip_utils.py | 177 ++++++++++++++++++++++ src/marqo/s2_inference/s2_inference.py | 5 +- tests/s2_inference/test_encoding.py | 29 +++- 4 files changed, 231 insertions(+), 3 deletions(-) create mode 100644 src/marqo/s2_inference/onnx_clip_utils.py diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py index 1cb6ee799..4aa136b6c 100644 --- a/src/marqo/s2_inference/model_registry.py +++ b/src/marqo/s2_inference/model_registry.py @@ -4,6 +4,7 @@ from marqo.s2_inference.random_utils import Random from marqo.s2_inference.clip_utils import CLIP, OPEN_CLIP from marqo.s2_inference.types import Any, Dict, List, Optional, Union, FloatTensor +from marqo.s2_inference.onnx_clip_utils import CLIP_ONNX # we need to keep track of the embed dim and model load functions/classes # we can use this as a registry @@ -511,6 +512,25 @@ def _get_sbert_test_properties() -> Dict: } return TEST_MODEL_PROPERTIES +def _get_onnx_clip_properties() -> Dict: + ONNX_CLIP_MODEL_PROPERTIES = { + "onnx32/openai/ViT-L/14": + { + "name":"onnx32/openai/ViT-L/14", + "dimensions" : 768, + "type":"clip_onnx", + "note":"the onnx float32 version of openai ViT-L/14" + }, + "onnx16/openai/ViT-L/14": + { + "name": "onnx16/openai/ViT-L/14", + "dimensions": 768, + "type": "clip_onnx", + "note": "the onnx float16 version of openai ViT-L/14" + }, + } + return ONNX_CLIP_MODEL_PROPERTIES + def _get_random_properties() -> Dict: RANDOM_MODEL_PROPERTIES = { "random": @@ -547,6 +567,7 @@ def _get_model_load_mappings() -> Dict: 'sbert':SBERT, 'test':TEST, 'sbert_onnx':SBERT_ONNX, + 'clip_onnx': CLIP_ONNX, 'random':Random, 'hf':HF_MODEL} @@ -562,6 +583,7 @@ def load_model_properties() -> Dict: random_model_properties = _get_random_properties() hf_model_properties = _get_hf_properties() open_clip_model_properties = _get_open_clip_properties() + onnx_clip_model_properties = _get_onnx_clip_properties() # combine the above dicts model_properties = dict(clip_model_properties.items()) @@ -571,6 +593,7 @@ def load_model_properties() -> Dict: model_properties.update(random_model_properties) model_properties.update(hf_model_properties) model_properties.update(open_clip_model_properties) + model_properties.update(onnx_clip_model_properties) all_properties = dict() all_properties['models'] = model_properties diff --git a/src/marqo/s2_inference/onnx_clip_utils.py b/src/marqo/s2_inference/onnx_clip_utils.py new file mode 100644 index 000000000..c33adec15 --- /dev/null +++ b/src/marqo/s2_inference/onnx_clip_utils.py @@ -0,0 +1,177 @@ +# from torch import FloatTensor +# from typing import Any, Dict, List, Optional, Union +import onnx +import os +import validators +import requests +import numpy as np +import clip +import torch +from PIL import Image +import open_clip +from huggingface_hub import hf_hub_download +from marqo.s2_inference.types import * +from marqo.s2_inference.logger import get_logger +import onnxruntime as ort + +# Loading shared functions from clip_utils.py. This part should be decoupled from models in the future +from marqo.s2_inference.clip_utils import get_allowed_image_types, format_and_load_CLIP_image, format_and_load_CLIP_images, load_image_from_path,_is_image + +logger = get_logger(__name__) + +_HF_MODEL_DOWNLOAD = { + + #Please check the link https://huggingface.co/Marqo for available models. + + + "onnx32/openai/ViT-L/14": + { + "repo_id": "Marqo/onnx-openai-ViT-L-14", + "visual_file": "onnx32-openai-ViT-L-14-visual.onnx", + "textual_file": "onnx32-openai-ViT-L-14-textual.onnx", + "token": None + }, + + "onnx16/openai/ViT-L/14": + { + "repo_id": "Marqo/onnx-openai-ViT-L-14", + "visual_file": "onnx16-openai-ViT-L-14-visual.onnx", + "textual_file": "onnx16-openai-ViT-L-14-textual.onnx", + "token": None + + } +} + + +class CLIP_ONNX(object): + """ + Load a clip model and convert it to onnx version for faster inference + """ + + def __init__(self, model_name = "onnx32/openai/ViT-L/14", device = "cpu", embedding_dim: int = None, truncate: bool = True, + load=True, **kwargs): + self.model_name = model_name + self.onnx_type, self.source, self.clip_model = self.model_name.split("/", 2) + self.device = device + self.truncate = truncate + self.provider = ['CUDAExecutionProvider', "CPUExecutionProvider"] if self.device.startswith("cuda") else ["CPUExecutionProvider"] + self.visual_session = None + self.textual_session = None + self.model_info = _HF_MODEL_DOWNLOAD[self.model_name] + + if self.onnx_type == "onnx16": + self.visual_type = np.float16 + elif self.onnx_type == "onnx32": + self.visual_type = np.float32 + + def load(self): + self.load_clip() + self.load_onnx() + + @staticmethod + def normalize(outputs): + return outputs.norm(dim=-1, keepdim=True) + + def _convert_output(self, output): + if self.device == 'cpu': + return output.numpy() + elif self.device.startswith('cuda'): + return output.cpu().numpy() + + def load_clip(self): + if self.source == "openai": + clip_model, self.clip_preprocess = clip.load(self.clip_model, device="cpu", jit=False) + self.tokenizer = clip.tokenize + del clip_model + elif self.source =="open_clip": + clip_name, pre_trained = self.clip_model.split("/", 2) + clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms(clip_name, pre_trained, device="cpu") + self.tokenizer = open_clip.get_tokenizer(clip_name) + del clip_model + + def encode_text(self, sentence, normalize=True): + text = clip.tokenize(sentence, truncate=self.truncate).cpu() + text_onnx = text.detach().cpu().numpy().astype(np.int32) + + onnx_input_text = {self.textual_session.get_inputs()[0].name: text_onnx} + # The onnx output has the shape [1,1,768], we need to squeeze the dimension + outputs = torch.squeeze(torch.tensor(np.array(self.textual_session.run(None, onnx_input_text)))).to(torch.float32) + + if normalize: + print("we are normalizing") + _shape_before = outputs.shape + print(torch.linalg.norm(outputs)) + outputs /= self.normalize(outputs) + print(torch.linalg.norm(outputs)) + assert outputs.shape == _shape_before + return self._convert_output(outputs) + + def encode_image(self, images, normalize=True): + if isinstance(images, list): + image_input = format_and_load_CLIP_images(images) + else: + image_input = [format_and_load_CLIP_image(images)] + + image_input_processed = torch.stack([self.clip_preprocess(_img) for _img in image_input]) + images_onnx = image_input_processed.detach().cpu().numpy().astype(self.visual_type) + + onnx_input_image = {self.visual_session.get_inputs()[0].name: images_onnx} + # The onnx output has the shape [1,1,768], we need to squeeze the dimension + outputs = torch.squeeze(torch.tensor(np.array(self.visual_session.run(None, onnx_input_image)))).to(torch.float32) + + if normalize: + _shape_before = outputs.shape + outputs /= self.normalize(outputs) + assert outputs.shape == _shape_before + + return self._convert_output(outputs) + + def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]], + default: str = 'text', normalize=True, **kwargs) -> FloatTensor: + + if self.clip_preprocess is None or self.tokenizer is None: + self.load_clip() + if self.visual_session is None or self.textual_session is None: + self.load_onnx() + + infer = kwargs.pop('infer', True) + + if infer and _is_image(inputs): + is_image = True + else: + is_image = False + if default == 'text': + is_image = False + elif default == 'image': + is_image = True + else: + raise ValueError(f"expected default='image' or default='text' but received {default}") + + if is_image: + logger.debug('image') + return self.encode_image(inputs, normalize=True) + else: + logger.debug('text') + return self.encode_text(inputs, normalize=True) + + def load_onnx(self): + self.visual_file = self.download_model(self.model_info["repo_id"], self.model_info["visual_file"]) + self.textual_file = self.download_model(self.model_info["repo_id"], self.model_info["textual_file"]) + self.visual_session = ort.InferenceSession(self.visual_file, providers=self.provider) + self.textual_session = ort.InferenceSession(self.textual_file, providers=self.provider) + + @staticmethod + def download_model(repo_id:str, filename:str, cache_folder:str = None) -> str: + file_path = hf_hub_download(repo_id=repo_id, filename=filename, + cache_dir=cache_folder) + return file_path + + + + + + + + + + diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index be7f0bfe2..2b27e267c 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -8,6 +8,7 @@ from marqo.s2_inference.configs import get_default_device, get_default_normalization, get_default_seq_length from marqo.s2_inference.types import * from marqo.s2_inference.logger import get_logger +from timeit import default_timer as timer import torch logger = get_logger(__name__) @@ -42,11 +43,13 @@ def vectorise(model_name: str, content: Union[str, List[str]], model_properties: _update_available_models(model_cache_key, model_name, validated_model_properties, device, normalize_embeddings) + try: vectorised = available_models[model_cache_key].encode(content, normalize=normalize_embeddings, **kwargs) except UnidentifiedImageError as e: raise VectoriseError from e + return _convert_vectorized_output(vectorised) @@ -94,7 +97,7 @@ def _update_available_models(model_cache_key: str, model_name: str, validated_mo f"and the model has valid access permission. ") -def _validate_model_properties(model_name: str, model_properties: dict = None) -> dict: +def _validate_model_properties(model_name: str, model_properties: dict) -> dict: """validate model_properties, if not given then return model_registry properties """ if model_properties is not None: diff --git a/tests/s2_inference/test_encoding.py b/tests/s2_inference/test_encoding.py index 1cb2bba27..514877425 100644 --- a/tests/s2_inference/test_encoding.py +++ b/tests/s2_inference/test_encoding.py @@ -106,7 +106,8 @@ def test_compare_onnx_sbert_text_models(self): assert abs(model_onnx.encode(sentence) - model_sbert.encode(sentence)).sum() < eps def test_model_outputs(self): - names = ['open_clip/ViT-B-32/laion400m_e32', "all-MiniLM-L6-v1", + names = ["onnx32/openai/ViT-L/14", "onnx16/openai/ViT-L/14", + 'open_clip/ViT-B-32/laion400m_e32', "all-MiniLM-L6-v1", "all_datasets_v4_MiniLM-L6", "hf/all-MiniLM-L6-v1", "hf/all_datasets_v4_MiniLM-L6", "onnx/all-MiniLM-L6-v1", "onnx/all_datasets_v4_MiniLM-L6"] sentences = ['hello', 'this is a test sentence. so is this.', ['hello', 'this is a test sentence. so is this.']] @@ -121,7 +122,8 @@ def test_model_outputs(self): assert _check_output_type(_convert_vectorized_output(output)) def test_model_normalization(self): - names = ['open_clip/ViT-B-32/laion400m_e32', 'RN50', "ViT-B/16", "all-MiniLM-L6-v1", + names = ["onnx32/openai/ViT-L/14", "onnx16/openai/ViT-L/14", + 'open_clip/ViT-B-32/laion400m_e32', 'RN50', "ViT-B/16", "all-MiniLM-L6-v1", "all_datasets_v4_MiniLM-L6", "hf/all-MiniLM-L6-v1", "hf/all_datasets_v4_MiniLM-L6", "onnx/all-MiniLM-L6-v1", "onnx/all_datasets_v4_MiniLM-L6"] sentences = ['hello', 'this is a test sentence. so is this.', ['hello', 'this is a test sentence. so is this.']] @@ -203,3 +205,26 @@ def test_open_clip_embedding_size(self): output_dimension = len(output_v[0]) assert registered_dimension == output_dimension + + def test_onnx_clip_vectorise(self): + + names = ["onnx32/openai/ViT-L/14", "onnx16/openai/ViT-L/14"] + + sentences = ['hello', 'this is a test sentence. so is this.', + ['hello', 'this is a test sentence. so is this.']] + device = 'cpu' + eps = 1e-9 + + for name in names: + model_properties = get_model_properties_from_registry(name) + model = _load_model(model_properties['name'], model_properties=model_properties, device=device) + + for sentence in sentences: + output_v = vectorise(name, sentence, model_properties, device, normalize_embeddings=True) + + assert _check_output_type(output_v) + + output_m = model.encode(sentence, normalize=True) + + assert abs(torch.FloatTensor(output_m) - torch.FloatTensor(output_v)).sum() < eps + From 9dea569fb133939d5b1987d0b005d16446a438a5 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 29 Dec 2022 21:46:30 +1100 Subject: [PATCH 54/70] mainline merge --- src/marqo/s2_inference/s2_inference.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index b72201160..1e2b14eae 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -8,9 +8,7 @@ from marqo.s2_inference.configs import get_default_device, get_default_normalization, get_default_seq_length from marqo.s2_inference.types import * from marqo.s2_inference.logger import get_logger -from timeit import default_timer as timer import torch -from timeit import default_timer as timer logger = get_logger(__name__) From 1cad359625e550c809fb123f4d949e00610084cf Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 29 Dec 2022 23:31:13 +1100 Subject: [PATCH 55/70] mainline merge --- .../test_model_cache_management.py | 70 +++++++++++++++++-- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 9d0bd1ddf..a7edb1c3a 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -2,7 +2,7 @@ from tests.marqo_test import MarqoTestCase from marqo.s2_inference.s2_inference import _validate_model_properties,\ _create_model_cache_key, _update_available_models, available_models, clear_loaded_models -from marqo.tensor_search.tensor_search import eject_model, get_cuda_info, get_loaded_models +from marqo.tensor_search.tensor_search import eject_model, get_cuda_info, get_loaded_models, get_cpu_info from marqo.errors import ModelNotInCache, HardwareCompatabilityError import psutil @@ -43,6 +43,8 @@ def setUp(self) -> None: def tearDown(self) -> None: clear_loaded_models() + + def test_eject_model_cpu(self): for model_name in self.MODEL_LIST: eject_model(model_name, "cpu") @@ -55,11 +57,13 @@ def test_eject_model_cpu(self): try: eject_model(my_test_model_1, "cpu") + raise AssertionError except ModelNotInCache: pass try: eject_model(my_test_model_2, "cpu") + raise AssertionError except ModelNotInCache: pass @@ -88,10 +92,28 @@ def test_eject_model_cuda(self): def test_cuda_info(self): - try: - get_cuda_info() - except HardwareCompatabilityError: - pass + if self.CUDA_FLAG is True: + res = get_cuda_info() + if "cuda_devices" not in res: + raise AssertionError + else: + try: + get_cuda_info() + except HardwareCompatabilityError: + pass + + + def test_get_cpu_info(self) -> None: + res = get_cpu_info() + + if "cpu_usage_percent" not in res: + raise AssertionError + + if "memory_used_percent" not in res: + raise AssertionError + + if "memory_used_gb" not in res: + raise AssertionError def test_loaded_models(self): @@ -135,6 +157,44 @@ def test_edge_case_cpu(self): assert psutil.virtual_memory()[2]< 100.0 + def test_overall_eject_and_load_model(self): + clear_loaded_models() + if len(available_models) != 0: + raise AssertionError + + for model_name in self.MODEL_LIST: + validated_model_properties = _validate_model_properties(model_name, None) + model_cache_key = _create_model_cache_key(model_name, "cpu", validated_model_properties) + _update_available_models(model_cache_key, model_name, validated_model_properties, "cpu", True) + + if model_cache_key not in available_models: + raise AssertionError + + res = get_loaded_models()["models"] + assert res[model_name] == "cpu" + + eject_model(model_name, "cpu") + + if model_cache_key in available_models: + raise AssertionError + + if self.CUDA_FLAG is True: + for model_name in self.MODEL_LIST: + validated_model_properties = _validate_model_properties(model_name, None) + model_cache_key = _create_model_cache_key(model_name, "cuda", validated_model_properties) + _update_available_models(model_cache_key, model_name, validated_model_properties, "cuda", True) + + if model_cache_key not in available_models: + raise AssertionError + + res = get_loaded_models()["models"] + assert res[model_name] == "cuda" + + eject_model(model_name, "cuda") + + if model_cache_key in available_models: + raise AssertionError + From aa00d189af0edc4f9e20ab6b54f07b4b1968445b Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 30 Dec 2022 08:56:09 +1100 Subject: [PATCH 56/70] mainline merge --- tests/tensor_search/test_model_cache_management.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index a7edb1c3a..441075c7d 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -18,8 +18,7 @@ class TestModelCacheManagement(MarqoTestCase): def setUp(self) -> None: # We pre-define 3 dummy models for testing purpose - self.MODEL_1 = "ViT-L/14" - self.MODEL_2 = "open_clip/ViT-L-14/laion400m_e31" + self.MODEL_1 = "ViT-B/32" self.MODEL_3 = "hf/all-MiniLM-L6-v2" self.MODEL_LIST = [self.MODEL_1, self.MODEL_2, self.MODEL_3] self.CUDA_FLAG = torch.cuda.is_available() From 7afbb8896d2dbff71d6008736188e4f49275aebb Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 30 Dec 2022 09:06:32 +1100 Subject: [PATCH 57/70] reduce a model for testing stability --- tests/tensor_search/test_model_cache_management.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 441075c7d..417a9d9f2 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -19,8 +19,8 @@ class TestModelCacheManagement(MarqoTestCase): def setUp(self) -> None: # We pre-define 3 dummy models for testing purpose self.MODEL_1 = "ViT-B/32" - self.MODEL_3 = "hf/all-MiniLM-L6-v2" - self.MODEL_LIST = [self.MODEL_1, self.MODEL_2, self.MODEL_3] + self.MODEL_2 = "hf/all-MiniLM-L6-v2" + self.MODEL_LIST = [self.MODEL_1, self.MODEL_2] self.CUDA_FLAG = torch.cuda.is_available() From 9aa15a952b72f08d9035411485b565a56faee423 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 30 Dec 2022 09:07:40 +1100 Subject: [PATCH 58/70] reduce a model for testing stability --- tests/tensor_search/test_model_cache_management.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 417a9d9f2..d2bb907f8 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -35,9 +35,9 @@ def setUp(self) -> None: # We loaded 6 models (3 in cuda, 3 in cpu) as initial setup if self.CUDA_FLAG: - assert len(available_models) >= 6 + assert len(available_models) >= 4 else: - assert len(available_models) >= 3 + assert len(available_models) >= 2 def tearDown(self) -> None: clear_loaded_models() From bb530f2ff13d3a94d18e75f4ef4463349b96e3bb Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 30 Dec 2022 09:13:56 +1100 Subject: [PATCH 59/70] update --- src/marqo/errors.py | 2 +- src/marqo/s2_inference/errors.py | 2 +- src/marqo/s2_inference/s2_inference.py | 6 +++--- src/marqo/tensor_search/tensor_search.py | 4 ++-- tests/tensor_search/test_model_cache_management.py | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/marqo/errors.py b/src/marqo/errors.py index 6f047c451..90167bd60 100644 --- a/src/marqo/errors.py +++ b/src/marqo/errors.py @@ -177,7 +177,7 @@ class IndexMaxFieldsError(__InvalidRequestError): status_code = HTTPStatus.BAD_REQUEST -class ModelNotInCache(__InvalidRequestError): +class ModelNotInCacheError(__InvalidRequestError): code = "model_not_in_cache" status_code = HTTPStatus.NOT_FOUND diff --git a/src/marqo/s2_inference/errors.py b/src/marqo/s2_inference/errors.py index 3a7a54073..296132e2e 100644 --- a/src/marqo/s2_inference/errors.py +++ b/src/marqo/s2_inference/errors.py @@ -44,5 +44,5 @@ class RerankerNameError(S2InferenceError): pass -class ModelNotInCache(S2InferenceError): +class ModelNotInCacheError(S2InferenceError): pass \ No newline at end of file diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py index 1e2b14eae..252f255e6 100644 --- a/src/marqo/s2_inference/s2_inference.py +++ b/src/marqo/s2_inference/s2_inference.py @@ -2,7 +2,7 @@ The functions defined here would have endpoints, later on. """ import numpy as np -from marqo.s2_inference.errors import VectoriseError, InvalidModelPropertiesError, ModelLoadError, UnknownModelError +from marqo.s2_inference.errors import VectoriseError, InvalidModelPropertiesError, ModelLoadError, UnknownModelError, ModelNotInCacheError from PIL import UnidentifiedImageError from marqo.s2_inference.model_registry import load_model_properties from marqo.s2_inference.configs import get_default_device, get_default_normalization, get_default_seq_length @@ -313,7 +313,7 @@ def eject_model(model_name:str, device:str): break if model_cache_key is None: - raise ModelNotInCache(f"The model_name \`{model_name}\` device \`{device}\` is not cached or found") + raise ModelNotInCacheError(f"The model_name \`{model_name}\` device \`{device}\` is not cached or found") if model_cache_key in available_models: del available_models[model_cache_key] @@ -321,7 +321,7 @@ def eject_model(model_name:str, device:str): torch.cuda.empty_cache() return {"result": "success", "message": f"successfully eject model_name \`{model_name}\` from device \`{device}\`"} else: - raise ModelNotInCache(f"The model_name \`{model_name}\` device \`{device}\` is not cached or found") + raise ModelNotInCacheError(f"The model_name \`{model_name}\` device \`{device}\` is not cached or found") # def normalize(inputs): diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index fb4aa17ec..7aed2ed2a 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -1345,8 +1345,8 @@ def get_loaded_models() -> dict: def eject_model(model_name: str, device: str) -> dict: try: result = s2_inference.eject_model(model_name, device) - except s2_inference_errors.ModelNotInCache as e: - raise errors.ModelNotInCache(message=str(e)) + except s2_inference_errors.ModelNotInCacheError as e: + raise errors.ModelNotInCacheError(message=str(e)) return result diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index d2bb907f8..2ab7af70d 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -3,7 +3,7 @@ from marqo.s2_inference.s2_inference import _validate_model_properties,\ _create_model_cache_key, _update_available_models, available_models, clear_loaded_models from marqo.tensor_search.tensor_search import eject_model, get_cuda_info, get_loaded_models, get_cpu_info -from marqo.errors import ModelNotInCache, HardwareCompatabilityError +from marqo.errors import ModelNotInCacheError, HardwareCompatabilityError import psutil From eab2a75757962ef254acf0ff6bf4e575690e5ed3 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 30 Dec 2022 09:17:34 +1100 Subject: [PATCH 60/70] update --- tests/tensor_search/test_model_cache_management.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 2ab7af70d..ff8d09d92 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -57,13 +57,13 @@ def test_eject_model_cpu(self): try: eject_model(my_test_model_1, "cpu") raise AssertionError - except ModelNotInCache: + except ModelNotInCacheError: pass try: eject_model(my_test_model_2, "cpu") raise AssertionError - except ModelNotInCache: + except ModelNotInCacheError: pass @@ -79,12 +79,12 @@ def test_eject_model_cuda(self): try: eject_model(my_test_model_1, "cuda") - except ModelNotInCache: + except ModelNotInCacheError: pass try: eject_model(my_test_model_2, "cuda") - except ModelNotInCache: + except ModelNotInCacheError: pass else: pass @@ -169,7 +169,9 @@ def test_overall_eject_and_load_model(self): if model_cache_key not in available_models: raise AssertionError - res = get_loaded_models()["models"] + # the res is a list of dict with {"model_name", "device"} + # since we only have one model, we only test index 0. + res = get_loaded_models()["models"][0] assert res[model_name] == "cpu" eject_model(model_name, "cpu") @@ -186,7 +188,7 @@ def test_overall_eject_and_load_model(self): if model_cache_key not in available_models: raise AssertionError - res = get_loaded_models()["models"] + res = get_loaded_models()["models"][0] assert res[model_name] == "cuda" eject_model(model_name, "cuda") From a4f0a423e9222ad8a594c141ad93bc7ee74b938e Mon Sep 17 00:00:00 2001 From: Li Wan Date: Fri, 30 Dec 2022 09:23:26 +1100 Subject: [PATCH 61/70] update --- tests/tensor_search/test_model_cache_management.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index ff8d09d92..8acdf8749 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -169,10 +169,11 @@ def test_overall_eject_and_load_model(self): if model_cache_key not in available_models: raise AssertionError - # the res is a list of dict with {"model_name", "device"} + # the res is a list of dict with {"model_name" : model_name, "model_device" : device} # since we only have one model, we only test index 0. res = get_loaded_models()["models"][0] - assert res[model_name] == "cpu" + assert res["model_name"] == model_name + assert res["model_device"] == "cpu" eject_model(model_name, "cpu") @@ -189,7 +190,8 @@ def test_overall_eject_and_load_model(self): raise AssertionError res = get_loaded_models()["models"][0] - assert res[model_name] == "cuda" + assert res["model_name"] == model_name + assert res["model_device"] == "cuda" eject_model(model_name, "cuda") From 670fceea40a37a849f3fd34db1fdf4ce4b7a8a00 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 5 Jan 2023 16:45:58 +1100 Subject: [PATCH 62/70] add test for generic model --- .../test_model_cache_management.py | 66 ++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 8acdf8749..95fdf0a5b 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -8,8 +8,8 @@ -def load_model(model_name: str, device: str) -> None: - validated_model_properties = _validate_model_properties(model_name, None) +def load_model(model_name: str, model_properteis: dict = None, device: str = "cpu") -> None: + validated_model_properties = _validate_model_properties(model_name, model_properteis) model_cache_key = _create_model_cache_key(model_name, device, validated_model_properties) _update_available_models(model_cache_key, model_name, validated_model_properties, device, True) @@ -156,6 +156,68 @@ def test_edge_case_cpu(self): assert psutil.virtual_memory()[2]< 100.0 + def test_generic_model(self): + clear_loaded_models() + assert len(available_models()) == 0 + generic_model_1 = { + "model_name" : "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", + "model_properties":{ + "name" : "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", + "dimension" : 384, + "tokens": 128, + "type" : "sbert", + } + } + + generic_model_2 = { + "model_name" : "sentence-transformers/multi-qa-distilbert-cos-v1", + "model_properties":{ + "name" : "sentence-transformers/multi-qa-distilbert-cos-v1", + "dimension" : 768, + "tokens": 512, + "type" : "sbert", + } + } + + generic_model_3 = { + "model_name" : "sentence-transformers/paraphrase-MiniLM-L3-v2", + "model_properties":{ + "name" : "sentence-transformers/paraphrase-MiniLM-L3-v2 ", + "dimension" : 384, + "tokens": 128, + "type" : "sbert", + } + } + + generic_model_list = [generic_model_1, generic_model_2, generic_model_3] + + for generic_model in generic_model_list: + load_model(generic_model["model_name"], generic_model["model_properties"], device="cpu") + + assert len(available_models) == 3 + + for generic_model in generic_model_list: + eject_model(generic_model["model_name"], device="cpu") + assert len(available_models) == 0 + + + if self.CUDA_FLAG == True: + for generic_model in generic_model_list: + load_model(generic_model["model_name"], generic_model["model_properties"], device="cuda") + + assert len(available_models) == 3 + + for generic_model in generic_model_list: + eject_model(generic_model["model_name"], device="cuda") + assert len(available_models) == 0 + + + + + + + + def test_overall_eject_and_load_model(self): clear_loaded_models() if len(available_models) != 0: From 2f8c6f049eb1cc7de02eddc29816922f8fd5b248 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 5 Jan 2023 16:49:52 +1100 Subject: [PATCH 63/70] add test for generic model --- tests/tensor_search/test_model_cache_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 95fdf0a5b..703931a27 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -8,7 +8,7 @@ -def load_model(model_name: str, model_properteis: dict = None, device: str = "cpu") -> None: +def load_model(model_name: str, device: str, model_properteis: dict = None) -> None: validated_model_properties = _validate_model_properties(model_name, model_properteis) model_cache_key = _create_model_cache_key(model_name, device, validated_model_properties) _update_available_models(model_cache_key, model_name, validated_model_properties, device, True) From 4026435d1e79565cd210d6a6c762b115a5c622df Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 5 Jan 2023 16:51:56 +1100 Subject: [PATCH 64/70] add test for generic model --- tests/tensor_search/test_model_cache_management.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 703931a27..cce43f9c6 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -192,7 +192,7 @@ def test_generic_model(self): generic_model_list = [generic_model_1, generic_model_2, generic_model_3] for generic_model in generic_model_list: - load_model(generic_model["model_name"], generic_model["model_properties"], device="cpu") + load_model(generic_model["model_name"], model_properteis= generic_model["model_properties"], device="cpu") assert len(available_models) == 3 @@ -203,7 +203,7 @@ def test_generic_model(self): if self.CUDA_FLAG == True: for generic_model in generic_model_list: - load_model(generic_model["model_name"], generic_model["model_properties"], device="cuda") + load_model(generic_model["model_name"], model_properteis = generic_model["model_properties"], device="cuda") assert len(available_models) == 3 From 45c58914080100a32774bfd6f500915c28e2dd56 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 5 Jan 2023 16:56:17 +1100 Subject: [PATCH 65/70] add test for generic model --- tests/tensor_search/test_model_cache_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index cce43f9c6..2b6d7bf90 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -158,7 +158,7 @@ def test_edge_case_cpu(self): def test_generic_model(self): clear_loaded_models() - assert len(available_models()) == 0 + assert len(available_models) == 0 generic_model_1 = { "model_name" : "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", "model_properties":{ From a782e52d255976e5996a7a5c61f66cbf98654799 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 5 Jan 2023 16:57:37 +1100 Subject: [PATCH 66/70] add test for generic model --- tests/tensor_search/test_model_cache_management.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 2b6d7bf90..2fcc0d621 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -211,13 +211,6 @@ def test_generic_model(self): eject_model(generic_model["model_name"], device="cuda") assert len(available_models) == 0 - - - - - - - def test_overall_eject_and_load_model(self): clear_loaded_models() if len(available_models) != 0: From 621df431c60b67cfbf72a1cd63257b9e50dde384 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 5 Jan 2023 16:57:58 +1100 Subject: [PATCH 67/70] add test for generic model --- tests/tensor_search/test_model_cache_management.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 2fcc0d621..4c67e7fa8 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -163,7 +163,7 @@ def test_generic_model(self): "model_name" : "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", "model_properties":{ "name" : "sentence-transformers/multi-qa-MiniLM-L6-cos-v1", - "dimension" : 384, + "dimensions" : 384, "tokens": 128, "type" : "sbert", } @@ -173,7 +173,7 @@ def test_generic_model(self): "model_name" : "sentence-transformers/multi-qa-distilbert-cos-v1", "model_properties":{ "name" : "sentence-transformers/multi-qa-distilbert-cos-v1", - "dimension" : 768, + "dimensions" : 768, "tokens": 512, "type" : "sbert", } @@ -183,7 +183,7 @@ def test_generic_model(self): "model_name" : "sentence-transformers/paraphrase-MiniLM-L3-v2", "model_properties":{ "name" : "sentence-transformers/paraphrase-MiniLM-L3-v2 ", - "dimension" : 384, + "dimensions" : 384, "tokens": 128, "type" : "sbert", } From 0b10d26b600c47cba1dd6ea10dcfda28fe447a43 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 5 Jan 2023 17:01:44 +1100 Subject: [PATCH 68/70] add test for generic model --- tests/tensor_search/test_model_cache_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index 4c67e7fa8..af1a2f560 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -182,7 +182,7 @@ def test_generic_model(self): generic_model_3 = { "model_name" : "sentence-transformers/paraphrase-MiniLM-L3-v2", "model_properties":{ - "name" : "sentence-transformers/paraphrase-MiniLM-L3-v2 ", + "name" : "sentence-transformers/paraphrase-MiniLM-L3-v2", "dimensions" : 384, "tokens": 128, "type" : "sbert", From 0cf173a31a8e660ba08d7559d8815b8acfcd1797 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 5 Jan 2023 17:17:51 +1100 Subject: [PATCH 69/70] revision --- src/marqo/tensor_search/api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/marqo/tensor_search/api.py b/src/marqo/tensor_search/api.py index 0ace593de..f108fe311 100644 --- a/src/marqo/tensor_search/api.py +++ b/src/marqo/tensor_search/api.py @@ -228,7 +228,6 @@ def get_cpu_info(): return tensor_search.get_cpu_info() - @app.get("/device/cuda") def get_cuda_info(): return tensor_search.get_cuda_info() From ba6fb8d2a7578a04bb86359fc41fa5dac1efecda Mon Sep 17 00:00:00 2001 From: Li Wan Date: Thu, 5 Jan 2023 17:19:15 +1100 Subject: [PATCH 70/70] revision --- tests/tensor_search/test_model_cache_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor_search/test_model_cache_management.py b/tests/tensor_search/test_model_cache_management.py index af1a2f560..c78171e74 100644 --- a/tests/tensor_search/test_model_cache_management.py +++ b/tests/tensor_search/test_model_cache_management.py @@ -43,7 +43,6 @@ def tearDown(self) -> None: clear_loaded_models() - def test_eject_model_cpu(self): for model_name in self.MODEL_LIST: eject_model(model_name, "cpu") @@ -211,6 +210,7 @@ def test_generic_model(self): eject_model(generic_model["model_name"], device="cuda") assert len(available_models) == 0 + def test_overall_eject_and_load_model(self): clear_loaded_models() if len(available_models) != 0: