marqo-ai · pandu-k · Jan 6, 2023 · Dec 16, 2022 · Dec 16, 2022 · Dec 20, 2022
diff --git a/src/marqo/errors.py b/src/marqo/errors.py
@@ -176,6 +176,10 @@ class IndexMaxFieldsError(__InvalidRequestError):
     code = "index_max_fields_error"
     status_code = HTTPStatus.BAD_REQUEST
 
+class ModelNotLoadedError(__InvalidRequestError):
+    code = "model_not_loaded"
+    status_code = HTTPStatus.NOT_FOUND
+
 # ---MARQO INTERNAL ERROR---
 
 

diff --git a/src/marqo/s2_inference/errors.py b/src/marqo/s2_inference/errors.py
@@ -42,3 +42,6 @@ class RerankerImageError(S2InferenceError):
 
 class RerankerNameError(S2InferenceError):
     pass
+
+class ModelNotLoadedError(S2InferenceError):
+    pass
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
@@ -2,12 +2,13 @@
 The functions defined here would have endpoints, later on.
 """
 import numpy as np
-from marqo.s2_inference.errors import VectoriseError, InvalidModelPropertiesError, ModelLoadError, UnknownModelError
+from marqo.s2_inference.errors import VectoriseError, InvalidModelPropertiesError, ModelLoadError, UnknownModelError, ModelNotLoadedError
 from PIL import UnidentifiedImageError
 from marqo.s2_inference.model_registry import load_model_properties
 from marqo.s2_inference.configs import get_default_device, get_default_normalization, get_default_seq_length
 from marqo.s2_inference.types import *
 from marqo.s2_inference.logger import get_logger
+import torch
 
 logger = get_logger(__name__)
 
@@ -291,6 +292,19 @@ def _load_model(model_name: str, model_properties: dict, device: str = get_defau
 
     return model
 
+def get_available_models():
+    return available_models
+
+def eject_model(model_name:str,device:str):
+    model_cache_key = _create_model_cache_key(model_name, device)
+    if model_cache_key in available_models:
+        del available_models[model_cache_key]
+        if device.startswith("cuda"):
+            torch.cuda.empty_cache()
+        return {"message":f"eject SUCCESS, eject model_name={model_name} from device={device}"}
+    else:
+        raise ModelNotLoadedError(f"The model_name={model_name} device={device} is not loaded")
+
 # def normalize(inputs):
 
 #     is_valid = False

diff --git a/src/marqo/tensor_search/api.py b/src/marqo/tensor_search/api.py
@@ -212,6 +212,16 @@ def check_health(marqo_config: config.Config = Depends(generate_config)):
 def get_indexes(marqo_config: config.Config = Depends(generate_config)):
     return tensor_search.get_indexes(config=marqo_config)
 
+@app.get("/models")
+def get_loaded_models():
+    return tensor_search.get_loaded_models()
+@app.delete("/models")
+def eject_model(model_name:str, model_device:str):
+    return tensor_search.eject_model(model_name = model_name, device = model_device)
+@app.get("/device/cuda")
+def get_cuda_info():
+    return tensor_search.get_cuda_info()
+
 # try these curl commands:
 
 # ADD DOCS:
@@ -282,3 +292,19 @@ def get_indexes(marqo_config: config.Config = Depends(generate_config)):
 curl -XDELETE http://localhost:8882/indexes/my-irst-ix
 """
 
+# check cuda info
+"""
+curl -XGET http://localhost:8882/device/cuda
+"""
+
+# check the loaded models
+"""
+curl -XGET http://localhost:8882/models
+"""
+
+# eject a model
+"""
+curl -X DELETE 'http://localhost:8882/models?model_name=ViT-L/14&model_device=cuda'
+curl -X DELETE 'http://localhost:8882/models?model_name=hf/all_datasets_v4_MiniLM-L6&model_device=cuda' 
+curl -X DELETE 'http://localhost:8882/models?model_name=hf/all_datasets_v4_MiniLM-L6&model_device=cpu' 
+"""
diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py
@@ -54,6 +54,7 @@
 from marqo.s2_inference.clip_utils import _is_image
 from marqo.s2_inference.reranking import rerank
 from marqo.s2_inference import s2_inference
+import torch.cuda
 
 # We depend on _httprequests.py for now, but this may be replaced in the future, as
 # _httprequests.py is designed for the client
@@ -1238,3 +1239,22 @@ def _get_model_properties(index_info):
                 f"Please provide model_properties if the model is a custom model and is not supported by default")
 
     return model_properties
+
+def get_loaded_models() -> dict:
+    available_models = s2_inference.get_available_models()
+    message = {
+        'models' : [
+            {"model_name": ix} for ix in available_models
+        ]
+    }
+    return message
+def eject_model(model_name: str, device: str) -> dict:
+    try:
+       result = s2_inference.eject_model(model_name, device)
+    except s2_inference_errors.ModelNotLoadedError as e:
+        raise errors.ModelNotLoadedError(message=str(e))
+    return result
+def get_cuda_info() -> dict:
+    return {"device": "cuda",
+            "memory_usage": f"{round(torch.cuda.memory_allocated() / 1024**3, 1)} GiB",
+            "total_device_memory": f"{round(torch.cuda.get_device_properties(0).total_memory/ 1024**3, 1)} GiB"}