From f39d2c4c39907d8afdcc9f17792a0ceaaf3031d2 Mon Sep 17 00:00:00 2001 From: Sftobias <154964601+Sftobias@users.noreply.github.com> Date: Mon, 3 Mar 2025 10:30:02 +0100 Subject: [PATCH] feat: add LLM tool (#89) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: list vllm as a tool * feat: tool yml * feat: vllm job * feat: vllm args * fix: remove quota and images constraint * feat: add huggingface token and model configurations * fix: vllm options typo * fix: tool name and main task * format yaml * refactor nomad job * feat: remove docker image/tags from configuration * feat: update param names * feat: refactor LLM deployment * feat: tokens * feat: deepseek * ci: auto-format from pre-commit.com hooks * fix: clean changes * feat: remove finegrained vllm params * fix: fix tools info retrieval * ci: auto-format from pre-commit.com hooks * feat: move models to conf file * ci: auto-format from pre-commit.com hooks * feat: restore Huggingface token * ci: auto-format from pre-commit.com hooks * fix: fix HF token * feat: rename to `ai4os-llm` * feat: add tests * feat: add more models * fix: HF typo * fix: add parameters for openwebui standalone * feat: automatically create admin for Open WebUI * fix: fix UI password check * feat: reorganize ai4os-llm user.yaml * feat: use https in endpoints list * fix: userconf checks * refactor: rename conf group from `vllm` to `llm` * fix: add needs_HF_token variable to vllm.yaml * feat: reorder models * feat: add force_pull * fix: cvat error * tests: fix test configuration * feat: add tool deployment restrictions --------- Co-authored-by: Ignacio Heredia Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Marta Obregón --- ai4papi/conf.py | 5 + ai4papi/nomad/common.py | 3 +- ai4papi/routers/v1/catalog/tools.py | 19 +- ai4papi/routers/v1/deployments/tools.py | 118 +++++++++++- etc/tools/ai4os-llm/nomad.hcl | 244 ++++++++++++++++++++++++ etc/tools/ai4os-llm/user.yaml | 54 ++++++ etc/vllm.yaml | 40 ++++ tests/deployments/tools.py | 208 ++++++++------------ 8 files changed, 556 insertions(+), 135 deletions(-) create mode 100644 etc/tools/ai4os-llm/nomad.hcl create mode 100644 etc/tools/ai4os-llm/user.yaml create mode 100644 etc/vllm.yaml diff --git a/ai4papi/conf.py b/ai4papi/conf.py index b942c783..821a9078 100644 --- a/ai4papi/conf.py +++ b/ai4papi/conf.py @@ -100,6 +100,7 @@ def load_yaml_conf(fpath): tools_nomad2id = { "fl": "ai4os-federated-server", "cvat": "ai4os-cvat", + "llm": "ai4os-llm", "ai4life": "ai4os-ai4life-loader", } for tool in TOOLS.keys(): @@ -110,6 +111,10 @@ def load_yaml_conf(fpath): with open(paths["conf"] / "oscar.yaml", "r") as f: OSCAR_TMPL = Template(f.read()) +# vLLM conf +with open(paths["conf"] / "vllm.yaml", "r") as f: + VLLM = yaml.safe_load(f) + # Try-me endpoints nmd = load_nomad_job(paths["conf"] / "try_me" / "nomad.hcl") TRY_ME = { diff --git a/ai4papi/nomad/common.py b/ai4papi/nomad/common.py index 6fbd6d6b..0ab6f40b 100644 --- a/ai4papi/nomad/common.py +++ b/ai4papi/nomad/common.py @@ -115,6 +115,7 @@ def get_deployment( info["docker_image"] = usertask["Config"]["image"] command = usertask["Config"].get("command", "") args = usertask["Config"].get("args", []) + args[:] = [str(a) for a in args] info["docker_command"] = f"{command} {' '.join(args)}".strip() # Add endpoints @@ -136,7 +137,7 @@ def get_deployment( if label == "deepaas": label = "api" - info["endpoints"][label] = f"http://{url}" + info["endpoints"][label] = f"https://{url}" # Add '/ui' to deepaas endpoint # If in the future we support other APIs, this will have to be removed. diff --git a/ai4papi/routers/v1/catalog/tools.py b/ai4papi/routers/v1/catalog/tools.py index af1d9ab4..aabaee4a 100644 --- a/ai4papi/routers/v1/catalog/tools.py +++ b/ai4papi/routers/v1/catalog/tools.py @@ -36,22 +36,22 @@ def get_config( metadata = self.get_metadata(item_name) # Modify the resources limits for a given user or VO - if conf.get("hardware", None): + if "hardware" in conf.keys(): conf["hardware"] = quotas.limit_resources( item_name=item_name, vo=vo, ) - # Parse docker registry - registry = metadata["links"]["docker_image"] - repo, image = registry.split("/")[-2:] - if repo not in ["deephdc", "ai4oshub"]: - repo = "ai4oshub" - # Fill with correct Docker image and tags if item_name in ["ai4os-federated-server", "ai4os-ai4life-loader"]: + # Parse docker registry + registry = metadata["links"]["docker_image"] + repo, image = registry.split("/")[-2:] + if repo not in ["deephdc", "ai4oshub"]: + repo = "ai4oshub" conf["general"]["docker_image"]["value"] = f"{repo}/{image}" + # Retrieve Docker tags tags = retrieve_docker_tags(image=image, repo=repo) conf["general"]["docker_tag"]["options"] = tags conf["general"]["docker_tag"]["value"] = tags[0] @@ -67,6 +67,11 @@ def get_config( if models: conf["hardware"]["gpu_type"]["options"] += models + if item_name == "ai4os-llm": + models = list(papiconf.VLLM["models"].keys()) + conf["llm"]["model_id"]["options"] = models + conf["llm"]["model_id"]["value"] = models[0] + return conf diff --git a/ai4papi/routers/v1/deployments/tools.py b/ai4papi/routers/v1/deployments/tools.py index 423d167b..5018e9c4 100644 --- a/ai4papi/routers/v1/deployments/tools.py +++ b/ai4papi/routers/v1/deployments/tools.py @@ -1,4 +1,5 @@ from copy import deepcopy +import json import re import secrets import types @@ -195,9 +196,21 @@ def create_deployment( detail="This ID does not correspond to an available tool.", ) + # Check if your are allowed to deploy the tool + restrictions = {"ai4os-llm": ["vo.imagine-ai.eu"]} + if vo in restrictions.get(tool_name, []): + raise HTTPException( + status_code=403, + detail="Your VO doesn't allow to deploy this tool.", + ) + # Load tool configuration nomad_conf = deepcopy(papiconf.TOOLS[tool_name]["nomad"]) user_conf = deepcopy(papiconf.TOOLS[tool_name]["user"]["values"]) + # TODO: given that some parts of the configuration are dynamically generated + # (eg. model_id in ai4life/vllm) we should read "user_conf" from the catalog + # We have to apply conversion to only keep the values + # Same goes for modules # Update values conf in case we received a submitted conf if conf is not None: @@ -210,8 +223,8 @@ def create_deployment( user_conf = utils.validate_conf(user_conf) # Check if the provided configuration is within the job quotas - # Skip this check with CVAT because it does not have a "hardware" section in the conf - if tool_name not in ["ai4os-cvat"]: + # We only do this for tools that have a "hardware" section in the conf + if "hardware" in user_conf.keys(): quotas.check_jobwise( conf=user_conf, vo=vo, @@ -367,7 +380,106 @@ def create_deployment( # Convert template to Nomad conf nomad_conf = nomad.load_job_conf(nomad_conf) - # Deploy a CVAT tool + # Deploy a OpenWebUI+vllm tool + elif tool_name == "ai4os-llm": + vllm_args = [] + + if user_conf["llm"]["type"] == "open-webui": + # Check if user has provided OpenAPI key/url + if not ( + user_conf["llm"]["openai_api_key"] + and user_conf["llm"]["openai_api_url"] + ): + raise HTTPException( + status_code=400, + detail="You need to define an OpenAI key and url to deploy Open WebUI as standalone.", + ) + api_token = user_conf["llm"]["openai_api_key"] + api_endpoint = user_conf["llm"]["openai_api_url"] + + if user_conf["llm"]["type"] in ["openwebui", "both"]: + # Check if user has provided a password + if not user_conf["llm"]["ui_password"]: + raise HTTPException( + status_code=400, + detail="A password is required to deploy this tool.", + ) + + if user_conf["llm"]["type"] in ["vllm", "both"]: + # Create a OpenAPI key secret for the vLLM deployment + api_token = secrets.token_hex() + _ = ai4secrets.create_secret( + vo=vo, + secret_path=f"deployments/{job_uuid}/llm/vllm", + secret_data={"token": api_token}, + authorization=SimpleNamespace( + credentials=authorization.credentials, + ), + ) + api_endpoint = ( + f"https://vllm-{job_uuid}" + ".${meta.domain}" + f"-{base_domain}/v1" + ) + + # Configure VLLM args + model_id = user_conf["llm"]["model_id"] + vllm_args += ["--model", model_id] + vllm_args += papiconf.VLLM["models"][model_id]["args"] + + # Check if HF token is needed + if ( + papiconf.VLLM["models"][model_id]["needs_HF_token"] + and not user_conf["llm"]["HF_token"] + ): + raise HTTPException( + status_code=400, + detail="This model requires a valid Huggingface token for deployment.", + ) + + # Replace the Nomad job template + nomad_conf = nomad_conf.safe_substitute( + { + "JOB_UUID": job_uuid, + "NAMESPACE": papiconf.MAIN_CONF["nomad"]["namespaces"][vo], + "PRIORITY": priority, + "OWNER": auth_info["id"], + "OWNER_NAME": auth_info["name"], + "OWNER_EMAIL": auth_info["email"], + "TITLE": user_conf["general"]["title"][:45], + "DESCRIPTION": user_conf["general"]["desc"][:1000], + "BASE_DOMAIN": base_domain, + "HOSTNAME": job_uuid, + "VLLM_ARGS": json.dumps(vllm_args), + "API_TOKEN": api_token, + "API_ENDPOINT": api_endpoint, + "HUGGINGFACE_TOKEN": user_conf["llm"]["HF_token"], + "OPEN_WEBUI_PASSWORD": user_conf["llm"]["ui_password"], + } + ) + + # Convert template to Nomad conf + nomad_conf = nomad.load_job_conf(nomad_conf) + + # Define what to exclude + if user_conf["llm"]["type"] == "vllm": + exclude_tasks = ["open-webui", "create-admin"] + exclude_services = ["ui"] + elif user_conf["llm"]["type"] == "open-webui": + exclude_tasks = ["vllm"] + exclude_services = ["vllm"] + else: + exclude_tasks, exclude_services = [], [] + + tasks = nomad_conf["TaskGroups"][0]["Tasks"] + tasks[:] = [t for t in tasks if t["Name"] not in exclude_tasks] + + services = nomad_conf["TaskGroups"][0]["Services"] + services[:] = [s for s in services if s["PortLabel"] not in exclude_services] + + # Rename first task as main task + t = tasks[0] + t["Name"] = "main" + + # Deploy AI4Life tool elif tool_name == "ai4os-ai4life-loader": # Replace the Nomad job template nomad_conf = nomad_conf.safe_substitute( diff --git a/etc/tools/ai4os-llm/nomad.hcl b/etc/tools/ai4os-llm/nomad.hcl new file mode 100644 index 00000000..6ecb5c8b --- /dev/null +++ b/etc/tools/ai4os-llm/nomad.hcl @@ -0,0 +1,244 @@ +/* +Convention: +----------- +* ${UPPERCASE} are replaced by the user +* ${lowercase} are replace by Nomad at launchtime +* remaining is default, same for everybody + +When replacing user values we use safe_substitute() so that ge don't get an error for not +replacing Nomad values +*/ + +job "tool-llm-${JOB_UUID}" { + namespace = "${NAMESPACE}" + type = "service" + region = "global" + id = "${JOB_UUID}" + priority = "${PRIORITY}" + + meta { + owner = "${OWNER}" # user-id from OIDC + owner_name = "${OWNER_NAME}" + owner_email = "${OWNER_EMAIL}" + title = "${TITLE}" + description = "${DESCRIPTION}" + } + + # Only use nodes that have successfully passed the ai4-nomad_tests (ie. meta.status=ready) + constraint { + attribute = "${meta.status}" + operator = "regexp" + value = "ready" + } + + # Only launch in compute nodes (to avoid clashing with system jobs, eg. Traefik) + constraint { + attribute = "${meta.compute}" + operator = "=" + value = "true" + } + + # Only deploy in nodes serving that namespace (we use metadata instead of node-pools + # because Nomad does not allow a node to belong to several node pools) + constraint { + attribute = "${meta.namespace}" + operator = "regexp" + value = "${NAMESPACE}" + } + + # Try to deploy iMagine jobs on nodes that are iMagine-exclusive + # In this way, we leave AI4EOSC nodes for AI4EOSC users and for iMagine users only + # when iMagine nodes are fully booked. + affinity { + attribute = "${meta.namespace}" + operator = "regexp" + value = "ai4eosc" + weight = -100 # anti-affinity for ai4eosc clients + } + + # CPU-only jobs should deploy *preferably* on CPU clients (affinity) to avoid + # overloading GPU clients with CPU-only jobs. + affinity { + attribute = "${meta.tags}" + operator = "regexp" + value = "cpu" + weight = 100 + } + + # Avoid rescheduling the job on **other** nodes during a network cut + # Command not working due to https://github.com/hashicorp/nomad/issues/16515 + reschedule { + attempts = 0 + unlimited = false + } + + group "usergroup" { + + # Avoid rescheduling the job when the node fails: + # * if the node is lost for good, you would need to manually redeploy, + # * if the node is unavailable due to a network cut, you will recover the job (and + # your saved data) once the network comes back. + prevent_reschedule_on_lost = true + + network { + + port "ui" { + to = 8080 + } + port "vllm" { + to = 8000 + } + } + + service { + name = "${JOB_UUID}-ui" + port = "ui" + tags = [ + "traefik.enable=true", + "traefik.http.routers.${JOB_UUID}-ui.tls=true", + "traefik.http.routers.${JOB_UUID}-ui.rule=Host(`ui-${HOSTNAME}.${meta.domain}-${BASE_DOMAIN}`, `www.ui-${HOSTNAME}.${meta.domain}-${BASE_DOMAIN}`)", + ] + } + + service { + name = "${JOB_UUID}-vllm" + port = "vllm" + tags = [ + "traefik.enable=true", + "traefik.http.routers.${JOB_UUID}-vllm.tls=true", + "traefik.http.routers.${JOB_UUID}-vllm.rule=Host(`vllm-${HOSTNAME}.${meta.domain}-${BASE_DOMAIN}`, `www.vllm-${HOSTNAME}.${meta.domain}-${BASE_DOMAIN}`)", + ] + } + + ephemeral_disk { + size = 4096 + } + + task "vllm" { + + driver = "docker" + + config { + force_pull = true + image = "vllm/vllm-openai:latest" + ports = ["vllm"] + args = ${VLLM_ARGS} + } + + env { + HUGGING_FACE_HUB_TOKEN = "${HUGGINGFACE_TOKEN}" + VLLM_API_KEY = "${API_TOKEN}" + } + + resources { + cores = 8 + memory = 16000 + + device "gpu" { + count = 1 + + # Add a constraint for a particular GPU model + constraint { + attribute = "${device.model}" + operator = "=" + value = "Tesla T4" + } + + } + } + + } + + task "open-webui" { + + driver = "docker" + + config { + force_pull = true + image = "ghcr.io/open-webui/open-webui:main" + ports = ["ui"] + } + + env { + OPENAI_API_KEY = "${API_TOKEN}" + OPENAI_API_BASE_URL = "${API_ENDPOINT}" + WEBUI_AUTH = true + } + + resources { # UI needs a fair amount of resources because it's also doing RAG + cores = 4 + memory = 8000 + } + } + + + task "create-admin" { + # Open WebUI does not allow to create admin from configuration, so we have to + # to make an HTTP call to create it, in order not to leave the UI vulnerable + + lifecycle { + hook = "poststart" + sidecar = false + } + + driver = "docker" + + config { + force_pull = true + image = "python:slim-bullseye" + command = "bash" + args = ["local/create_admin.sh"] + } + + env { + OPEN_WEBUI_URL = "https://ui-${HOSTNAME}.${meta.domain}-${BASE_DOMAIN}" + NAME = "${OWNER_NAME}" + EMAIL = "${OWNER_EMAIL}" + PASSWORD = "${OPEN_WEBUI_PASSWORD}" + } + + template { + data = <<-EOF + #!/bin/bash + + pip install requests + + python -c """ + import os + import time + + import requests + + + # Define the URL + base_url = os.getenv('OPEN_WEBUI_URL') + + # Define the JSON data + data = { + 'name': os.getenv('NAME'), + 'email': os.getenv('EMAIL'), + 'password': os.getenv('PASSWORD'), + 'profile_image_url': '/user.png' + } + + # Make the POST request (we repeat it because Open WebUI can take some time to warm) + while True: + r = requests.post(f'{base_url}/api/v1/auths/signup', json=data) + if not r.ok: + print(f'Error: status code {r.status_code}') + time.sleep(1) + else: + break + + print(f'Status Code: {r.status_code}') + print(f'Response Content: {r.text}') + """ + EOF + destination = "local/create_admin.sh" + } + + + } + + } +} diff --git a/etc/tools/ai4os-llm/user.yaml b/etc/tools/ai4os-llm/user.yaml new file mode 100644 index 00000000..dd70700f --- /dev/null +++ b/etc/tools/ai4os-llm/user.yaml @@ -0,0 +1,54 @@ +--- +# User customizable configuration to make a deployment in Nomad. +# Additional non-customizable values (eg. ports) are hardcoded in `job.nomad`. + +# All conf parameters follow the same structure: +# varname: +# name: name of the parameter to be displayed to end user (mandatory) +# value: (default) value of the parameter (mandatory) +# options: restricted set of values that the parameter can take (optional) +# description: some comments on the parameter to be displayed to the end user (optional) + +general: + title: + name: Deployment title + value: '' + description: Provide short title for this deployment (less than 45 characters). Useful when you have lots of different active deployments. + + desc: + name: Deployment description + value: '' + description: Provide some additional extended information about this deployment. + +llm: + type: + name: Deployment type + value: 'both' + description: Sub-components to deploy. + options: ['both', 'vllm', 'open-webui'] + + model_id: + name: LLM modelname + value: '' + description: Large Language Model to use (retrieve from Huggingface). + options: [] + + ui_password: + name: Open WebUI password + value: '' + description: Admin password for Open WebUI + + HF_token: + name: Huggingface token + value: '' + description: Needed for the deployment of some gated models. + + openai_api_key: + name: OpenAI API key + value: '' + description: Needed when deploying Open WebUI as standalone. + + openai_api_url: + name: OpenAI API base url + value: '' + description: Needed when deploying Open WebUI as standalone. diff --git a/etc/vllm.yaml b/etc/vllm.yaml new file mode 100644 index 00000000..a5a66315 --- /dev/null +++ b/etc/vllm.yaml @@ -0,0 +1,40 @@ +--- +# In non-quantized models, we have to specify "--dtype float16" because the default +# bfloat16 is only supported in GPUs with compute capability +8.0 (NVIDIA T4 has 7.5). + +# Models are sorted in rough performance order + +models: + + Qwen/Qwen2.5-7B-Instruct-AWQ: + needs_HF_token: False + args: ['--quantization', 'awq'] + + Qwen/Qwen2.5-Coder-7B-Instruct-AWQ: + needs_HF_token: False + args: ['--quantization', 'awq'] + + Qwen/Qwen2.5-Math-1.5B-Instruct: + needs_HF_token: False + args: ['--dtype', 'float16'] + + deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B: + needs_HF_token: False + args: ['--dtype', 'float16'] + + meta-llama/Llama-3.2-3B: + needs_HF_token: True + args: ['--dtype', 'float16'] + + meta-llama/Llama-3.2-3B-Instruct: + needs_HF_token: True + args: ['--dtype', 'float16'] + + # TODO: Add this small vision model in the future. It does not work with the current Transformers library. + # ERROR 02-12 04:48:33 engine.py:389] ValueError: The checkpoint you are trying to load has model type `qwen2_5_vl` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date. + + # Qwen/Qwen2.5-VL-3B-Instruct: + # needs_HF_token: False + # args: [ + # "--dtype", "float16", + # ] diff --git a/tests/deployments/tools.py b/tests/deployments/tools.py index 3d11113f..c2345e3f 100644 --- a/tests/deployments/tools.py +++ b/tests/deployments/tools.py @@ -18,89 +18,12 @@ that ENV variable.' ) -print(" Testing FL server") - -# Create tool -rcreate = tools.create_deployment( - vo="vo.ai4eosc.eu", - tool_name="ai4os-federated-server", - conf={}, - authorization=SimpleNamespace(credentials=token), -) -assert isinstance(rcreate, dict) -assert "job_ID" in rcreate.keys() - -time.sleep(0.2) # Nomad takes some time to allocate deployment - -# Retrieve that tool -rdep = tools.get_deployment( - vo="vo.ai4eosc.eu", - deployment_uuid=rcreate["job_ID"], - authorization=SimpleNamespace(credentials=token), -) -assert isinstance(rdep, dict) -assert "job_ID" in rdep.keys() -assert rdep["job_ID"] == rcreate["job_ID"] -assert rdep["status"] != "error" - -# Retrieve all tools -rdeps = tools.get_deployments( - vos=["vo.ai4eosc.eu"], - authorization=SimpleNamespace(credentials=token), -) -assert isinstance(rdeps, list) -assert any([d["job_ID"] == rcreate["job_ID"] for d in rdeps]) -assert all([d["job_ID"] != "error" for d in rdeps]) - -# Check that we cannot retrieve that tool from modules -# This should break! -# modules.get_deployment( -# vo='vo.ai4eosc.eu', -# deployment_uuid=rcreate['job_ID'], -# authorization=SimpleNamespace( -# credentials=token -# ), -# ) - -# Check that we cannot retrieve that tool from modules list -rdeps2 = modules.get_deployments( - vos=["vo.ai4eosc.eu"], - authorization=SimpleNamespace(credentials=token), -) -assert isinstance(rdeps2, list) -assert not any([d["job_ID"] == rcreate["job_ID"] for d in rdeps2]) - -# Delete tool -rdel = tools.delete_deployment( - vo="vo.ai4eosc.eu", - deployment_uuid=rcreate["job_ID"], - authorization=SimpleNamespace(credentials=token), -) -assert isinstance(rdel, dict) -assert "status" in rdel.keys() - -time.sleep(3) # Nomad takes some time to delete - -# Check tool no longer exists -rdeps3 = tools.get_deployments( - vos=["vo.ai4eosc.eu"], - authorization=SimpleNamespace(credentials=token), -) -assert not any([d["job_ID"] == rcreate["job_ID"] for d in rdeps3]) - -############################################################ -# Additionally test simply the creation of the other tools # -############################################################ - -print(" Testing CVAT") - -# Create tool -rcreate = tools.create_deployment( - vo="vo.ai4eosc.eu", - tool_name="ai4os-cvat", - conf={ + +# Only use mandatory config parameters, otherwise use defaults +tools_config = { + "ai4os-federated-server": {}, + "ai4os-cvat": { "general": { - "title": "CVAT test", "cvat_username": "mock_user", "cvat_password": "mock_password", }, @@ -112,51 +35,88 @@ "rclone_password": "mock_password", }, }, - authorization=SimpleNamespace(credentials=token), -) -assert isinstance(rcreate, dict) -assert "job_ID" in rcreate.keys() -assert rdep["status"] != "error" - -time.sleep(0.2) # Nomad takes some time to allocate deployment - -# Delete tool -rdel = tools.delete_deployment( - vo="vo.ai4eosc.eu", - deployment_uuid=rcreate["job_ID"], - authorization=SimpleNamespace(credentials=token), -) -assert isinstance(rdel, dict) -assert "status" in rdel.keys() - - -print(" Testing AI4Life loader") - -# Create tool -rcreate = tools.create_deployment( - vo="vo.ai4eosc.eu", - tool_name="ai4os-ai4life-loader", - conf={ + "ai4os-ai4life-loader": { "general": { - "title": "AI4Life test", "model_id": "happy-elephant", }, }, - authorization=SimpleNamespace(credentials=token), -) -assert isinstance(rcreate, dict) -assert "job_ID" in rcreate.keys() -assert rdep["status"] != "error" - -time.sleep(0.2) # Nomad takes some time to allocate deployment - -# Delete tool -rdel = tools.delete_deployment( - vo="vo.ai4eosc.eu", - deployment_uuid=rcreate["job_ID"], - authorization=SimpleNamespace(credentials=token), -) -assert isinstance(rdel, dict) -assert "status" in rdel.keys() + "ai4os-llm": { + "llm": { + "model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "ui_password": "papi-test", + }, + }, +} + +for tname, tconfig in tools_config.items(): + print(f" Testing tool: {tname}") + + # Create tool + rcreate = tools.create_deployment( + vo="vo.ai4eosc.eu", + tool_name=tname, + conf=tconfig, + authorization=SimpleNamespace(credentials=token), + ) + assert isinstance(rcreate, dict) + assert "job_ID" in rcreate.keys() + + time.sleep(0.2) # Nomad takes some time to allocate deployment + + # Retrieve that tool + rdep = tools.get_deployment( + vo="vo.ai4eosc.eu", + deployment_uuid=rcreate["job_ID"], + authorization=SimpleNamespace(credentials=token), + ) + assert isinstance(rdep, dict) + assert "job_ID" in rdep.keys() + assert rdep["job_ID"] == rcreate["job_ID"] + assert rdep["status"] != "error" + + # Retrieve all tools + rdeps = tools.get_deployments( + vos=["vo.ai4eosc.eu"], + authorization=SimpleNamespace(credentials=token), + ) + assert isinstance(rdeps, list) + assert any([d["job_ID"] == rcreate["job_ID"] for d in rdeps]) + assert all([d["job_ID"] != "error" for d in rdeps]) + + # Check that we cannot retrieve that tool from modules + # This should break! + # modules.get_deployment( + # vo='vo.ai4eosc.eu', + # deployment_uuid=rcreate['job_ID'], + # authorization=SimpleNamespace( + # credentials=token + # ), + # ) + + # Check that we cannot retrieve that tool from modules list + rdeps2 = modules.get_deployments( + vos=["vo.ai4eosc.eu"], + authorization=SimpleNamespace(credentials=token), + ) + assert isinstance(rdeps2, list) + assert not any([d["job_ID"] == rcreate["job_ID"] for d in rdeps2]) + + # Delete tool + rdel = tools.delete_deployment( + vo="vo.ai4eosc.eu", + deployment_uuid=rcreate["job_ID"], + authorization=SimpleNamespace(credentials=token), + ) + assert isinstance(rdel, dict) + assert "status" in rdel.keys() + + time.sleep(3) # Nomad takes some time to delete + + # Check tool no longer exists + rdeps3 = tools.get_deployments( + vos=["vo.ai4eosc.eu"], + authorization=SimpleNamespace(credentials=token), + ) + assert not any([d["job_ID"] == rcreate["job_ID"] for d in rdeps3]) print("Deployments (tools) tests passed!")