Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preload / Prewarm custom models via URL on startup #475

Merged
merged 11 commits into from
May 16, 2023
2 changes: 1 addition & 1 deletion src/marqo/s2_inference/s2_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def _update_available_models(model_cache_key: str, model_name: str, validated_mo
f"and Marqo has access to the weights file.")
else:
most_recently_used_time = datetime.datetime.now()
logger.debug(f'renew {model_name} on device {device} with new time={most_recently_used_time}.')
logger.debug(f'renewed {model_name} on device {device} with new most recently time={most_recently_used_time}.')
try:
available_models[model_cache_key][AvailableModelsKey.most_recently_used_time] = most_recently_used_time
except KeyError:
Expand Down
48 changes: 43 additions & 5 deletions src/marqo/tensor_search/on_start_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from marqo import errors
from marqo.tensor_search.throttling.redis_throttle import throttle
from marqo.connections import redis_driver
from marqo.s2_inference.s2_inference import vectorise


def on_start(marqo_os_url: str):
Expand Down Expand Up @@ -95,6 +96,7 @@ def id_to_device(id):
device_names.append( {'id':device_id, 'name':id_to_device(device_id)})
self.logger.info(f"found devices {device_names}")


class ModelsForCacheing:
"""warms the in-memory model cache by preloading good defaults
"""
Expand All @@ -109,10 +111,12 @@ def __init__(self):
try:
self.models = json.loads(warmed_models)
except json.JSONDecodeError as e:
# TODO: Change error message to match new format
raise errors.EnvVarError(
f"Could not parse environment variable `{EnvVars.MARQO_MODELS_TO_PRELOAD}`. "
f"Please ensure that this a JSON-encoded array of strings. For example:\n"
f"Please ensure that this a JSON-encoded array of strings or dicts. For example:\n"
f"""export {EnvVars.MARQO_MODELS_TO_PRELOAD}='["ViT-L/14", "onnx/all_datasets_v4_MiniLM-L6"]'"""
f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Models-Reference/bring_your_own_model/`"""
) from e
else:
self.models = warmed_models
Expand All @@ -123,32 +127,66 @@ def __init__(self):
self.logger.info(f"pre-loading {self.models} onto devices={self.default_devices}")

def run(self):
from marqo.s2_inference.s2_inference import vectorise

test_string = 'this is a test string'
N = 10
messages = []
for model in self.models:
for device in self.default_devices:
self.logger.debug(f"Beginning loading for model: {model} on device: {device}")

# warm it up
_ = vectorise(model, test_string, device=device)
_ = _preload_model(model=model, content=test_string, device=device)

t = 0
for n in range(N):
t0 = time.time()
_ = vectorise(model, test_string, device=device)
_ = _preload_model(model=model, content=test_string, device=device)
t1 = time.time()
t += (t1 - t0)
message = f"{(t)/float((N))} for {model} and {device}"
messages.append(message)
self.logger.debug(f"{model} {device} vectorise run {N} times.")
self.logger.info(f"{model} {device} run succesfully!")

for message in messages:
self.logger.info(message)
self.logger.info("completed loading models")


def _preload_model(model, content, device):
"""
Calls vectorise for a model once. This will load in the model if it isn't already loaded.
If `model` is a str, it should be a model name in the registry
If `model is a dict, it should be an object containing `model_name` and `model_properties`
Model properties will be passed to vectorise call if object exists
"""
if isinstance(model, str):
# For models IN REGISTRY
_ = vectorise(
model_name=model,
content=content,
device=device
)
elif isinstance(model, dict):
# For models from URL
"""
TODO: include validation from on start script (model name properties etc)
_check_model_name(index_settings)
"""
try:
_ = vectorise(
model_name=model["model"],
model_properties=model["model_properties"],
content=content,
device=device
)
except KeyError as e:
raise errors.EnvVarError(
f"Your custom model {model} is missing either `model` or `model_properties`."
f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Advanced-Usage/configuration/#configuring-preloaded-models`"""
) from e


class InitializeRedis:

def __init__(self, host: str, port: int):
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.0.19"
__version__ = "0.0.20"


def get_version() -> str:
Expand Down
110 changes: 107 additions & 3 deletions tests/tensor_search/test_on_start_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

class TestOnStartScript(MarqoTestCase):

def test_preload_models(self):
def test_preload_registry_models(self):
environ_expected_models = [
({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: []}, []),
({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: ""}, []),
Expand All @@ -28,11 +28,11 @@ def test_preload_models(self):
for mock_environ, expected in environ_expected_models:
mock_vectorise = mock.MagicMock()
@mock.patch("os.environ", mock_environ)
@mock.patch("marqo.s2_inference.s2_inference.vectorise", mock_vectorise)
@mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
def run():
model_caching_script = on_start_script.ModelsForCacheing()
model_caching_script.run()
loaded_models = {args[0] for args, kwargs in mock_vectorise.call_args_list}
loaded_models = {kwargs["model_name"] for args, kwargs in mock_vectorise.call_args_list}
assert loaded_models == set(expected)
return True
assert run()
Expand All @@ -47,7 +47,111 @@ def run():
print(str(e))
return True
assert run()

def test_preload_url_models(self):
clip_model_object = {
"model": "generic-clip-test-model-2",
"model_properties": {
"name": "ViT-B/32",
"dimensions": 512,
"type": "clip",
"url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt"
}
}

clip_model_expected = (
"generic-clip-test-model-2",
"ViT-B/32",
512,
"clip",
"https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt"
)

open_clip_model_object = {
"model": "random-open-clip-1",
"model_properties": {
"name": "ViT-B-32-quickgelu",
"dimensions": 512,
"type": "open_clip",
"url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
}
}

# must be an immutable datatype
open_clip_model_expected = (
"random-open-clip-1",
"ViT-B-32-quickgelu",
512,
"open_clip",
"https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
)

# So far has clip and open clip tests
environ_expected_models = [
({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [clip_model_object, open_clip_model_object]}, [clip_model_expected, open_clip_model_expected]),
({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: json.dumps([clip_model_object, open_clip_model_object])}, [clip_model_expected, open_clip_model_expected])
]
for mock_environ, expected in environ_expected_models:
mock_vectorise = mock.MagicMock()
@mock.patch("os.environ", mock_environ)
@mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
def run():
model_caching_script = on_start_script.ModelsForCacheing()
model_caching_script.run()
loaded_models = {
(
kwargs["model_name"],
kwargs["model_properties"]["name"],
kwargs["model_properties"]["dimensions"],
kwargs["model_properties"]["type"],
kwargs["model_properties"]["url"]
)
for args, kwargs in mock_vectorise.call_args_list
}
assert loaded_models == set(expected)
return True
assert run()

def test_preload_url_missing_model(self):
open_clip_model_object = {
"model_properties": {
"name": "ViT-B-32-quickgelu",
"dimensions": 512,
"type": "open_clip",
"url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
}
}
mock_vectorise = mock.MagicMock()
@mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
@mock.patch("os.environ", {enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [open_clip_model_object]})
def run():
try:
model_caching_script = on_start_script.ModelsForCacheing()
# There should be a KeyError -> EnvVarError when attempting to call vectorise
model_caching_script.run()
raise AssertionError
except errors.EnvVarError as e:
return True
assert run()

def test_preload_url_missing_model_properties(self):
open_clip_model_object = {
"model": "random-open-clip-1"
}
mock_vectorise = mock.MagicMock()
@mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
@mock.patch("os.environ", {enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [open_clip_model_object]})
def run():
try:
model_caching_script = on_start_script.ModelsForCacheing()
# There should be a KeyError -> EnvVarError when attempting to call vectorise
model_caching_script.run()
raise AssertionError
except errors.EnvVarError as e:
return True
assert run()

# TODO: test bad/no names/URLS in end-to-end tests, as this logic is done in vectorise call

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we have one more test to ensure the prewarmed model will not need to be download/load again?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we have one more test to ensure the prewarmed model will not need to be download/load again?

For both search and add_document.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this we can save for the API end-to-end test



Expand Down