feat: add LLM tool (#89)

* feat: list vllm as a tool * feat: tool yml * feat: vllm job * feat: vllm args * fix: remove quota and images constraint * feat: add huggingface token and model configurations * fix: vllm options typo * fix: tool name and main task * format yaml * refactor nomad job * feat: remove docker image/tags from configuration * feat: update param names * feat: refactor LLM deployment * feat: tokens * feat: deepseek * ci: auto-format from pre-commit.com hooks * fix: clean changes * feat: remove finegrained vllm params * fix: fix tools info retrieval * ci: auto-format from pre-commit.com hooks * feat: move models to conf file * ci: auto-format from pre-commit.com hooks * feat: restore Huggingface token * ci: auto-format from pre-commit.com hooks * fix: fix HF token * feat: rename to `ai4os-llm` * feat: add tests * feat: add more models * fix: HF typo * fix: add parameters for openwebui standalone * feat: automatically create admin for Open WebUI * fix: fix UI password check * feat: reorganize ai4os-llm user.yaml * feat: use https in endpoints list * fix: userconf checks * refactor: rename conf group from `vllm` to `llm` * fix: add needs_HF_token variable to vllm.yaml * feat: reorder models * feat: add force_pull * fix: cvat error * tests: fix test configuration * feat: add tool deployment restrictions --------- Co-authored-by: Ignacio Heredia <iheredia@ifca.unican.es> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Marta Obregón <obregonm@ifca.unican.es>
ai4os · Mar 3, 2025 · f39d2c4 · f39d2c4
1 parent 0accdb5
commit f39d2c4
Show file tree

Hide file tree

Showing 8 changed files with 556 additions and 135 deletions.
diff --git a/ai4papi/conf.py b/ai4papi/conf.py
@@ -100,6 +100,7 @@ def load_yaml_conf(fpath):
 tools_nomad2id = {
     "fl": "ai4os-federated-server",
     "cvat": "ai4os-cvat",
+    "llm": "ai4os-llm",
     "ai4life": "ai4os-ai4life-loader",
 }
 for tool in TOOLS.keys():
@@ -110,6 +111,10 @@ def load_yaml_conf(fpath):
 with open(paths["conf"] / "oscar.yaml", "r") as f:
     OSCAR_TMPL = Template(f.read())
 
+# vLLM conf
+with open(paths["conf"] / "vllm.yaml", "r") as f:
+    VLLM = yaml.safe_load(f)
+
 # Try-me endpoints
 nmd = load_nomad_job(paths["conf"] / "try_me" / "nomad.hcl")
 TRY_ME = {

diff --git a/ai4papi/nomad/common.py b/ai4papi/nomad/common.py
@@ -115,6 +115,7 @@ def get_deployment(
     info["docker_image"] = usertask["Config"]["image"]
     command = usertask["Config"].get("command", "")
     args = usertask["Config"].get("args", [])
+    args[:] = [str(a) for a in args]
     info["docker_command"] = f"{command} {' '.join(args)}".strip()
 
     # Add endpoints
@@ -136,7 +137,7 @@ def get_deployment(
         if label == "deepaas":
             label = "api"
 
-        info["endpoints"][label] = f"http://{url}"
+        info["endpoints"][label] = f"https://{url}"
 
     # Add '/ui' to deepaas endpoint
     # If in the future we support other APIs, this will have to be removed.

diff --git a/ai4papi/routers/v1/catalog/tools.py b/ai4papi/routers/v1/catalog/tools.py
@@ -36,22 +36,22 @@ def get_config(
     metadata = self.get_metadata(item_name)
 
     # Modify the resources limits for a given user or VO
-    if conf.get("hardware", None):
+    if "hardware" in conf.keys():
         conf["hardware"] = quotas.limit_resources(
             item_name=item_name,
             vo=vo,
         )
 
-    # Parse docker registry
-    registry = metadata["links"]["docker_image"]
-    repo, image = registry.split("/")[-2:]
-    if repo not in ["deephdc", "ai4oshub"]:
-        repo = "ai4oshub"
-
     # Fill with correct Docker image and tags
     if item_name in ["ai4os-federated-server", "ai4os-ai4life-loader"]:
+        # Parse docker registry
+        registry = metadata["links"]["docker_image"]
+        repo, image = registry.split("/")[-2:]
+        if repo not in ["deephdc", "ai4oshub"]:
+            repo = "ai4oshub"
         conf["general"]["docker_image"]["value"] = f"{repo}/{image}"
 
+        # Retrieve Docker tags
         tags = retrieve_docker_tags(image=image, repo=repo)
         conf["general"]["docker_tag"]["options"] = tags
         conf["general"]["docker_tag"]["value"] = tags[0]
@@ -67,6 +67,11 @@ def get_config(
         if models:
             conf["hardware"]["gpu_type"]["options"] += models
 
+    if item_name == "ai4os-llm":
+        models = list(papiconf.VLLM["models"].keys())
+        conf["llm"]["model_id"]["options"] = models
+        conf["llm"]["model_id"]["value"] = models[0]
+
     return conf
 
 

diff --git a/ai4papi/routers/v1/deployments/tools.py b/ai4papi/routers/v1/deployments/tools.py
@@ -1,4 +1,5 @@
 from copy import deepcopy
+import json
 import re
 import secrets
 import types
@@ -195,9 +196,21 @@ def create_deployment(
             detail="This ID does not correspond to an available tool.",
         )
 
+    # Check if your are allowed to deploy the tool
+    restrictions = {"ai4os-llm": ["vo.imagine-ai.eu"]}
+    if vo in restrictions.get(tool_name, []):
+        raise HTTPException(
+            status_code=403,
+            detail="Your VO doesn't allow to deploy this tool.",
+        )
+
     # Load tool configuration
     nomad_conf = deepcopy(papiconf.TOOLS[tool_name]["nomad"])
     user_conf = deepcopy(papiconf.TOOLS[tool_name]["user"]["values"])
+    # TODO: given that some parts of the configuration are dynamically generated
+    # (eg. model_id in ai4life/vllm) we should read "user_conf" from the catalog
+    # We have to apply conversion to only keep the values
+    # Same goes for modules
 
     # Update values conf in case we received a submitted conf
     if conf is not None:
@@ -210,8 +223,8 @@ def create_deployment(
     user_conf = utils.validate_conf(user_conf)
 
     # Check if the provided configuration is within the job quotas
-    # Skip this check with CVAT because it does not have a "hardware" section in the conf
-    if tool_name not in ["ai4os-cvat"]:
+    # We only do this for tools that have a "hardware" section in the conf
+    if "hardware" in user_conf.keys():
         quotas.check_jobwise(
             conf=user_conf,
             vo=vo,
@@ -367,7 +380,106 @@ def create_deployment(
         # Convert template to Nomad conf
         nomad_conf = nomad.load_job_conf(nomad_conf)
 
-    # Deploy a CVAT tool
+    # Deploy a OpenWebUI+vllm tool
+    elif tool_name == "ai4os-llm":
+        vllm_args = []
+
+        if user_conf["llm"]["type"] == "open-webui":
+            # Check if user has provided OpenAPI key/url
+            if not (
+                user_conf["llm"]["openai_api_key"]
+                and user_conf["llm"]["openai_api_url"]
+            ):
+                raise HTTPException(
+                    status_code=400,
+                    detail="You need to define an OpenAI key and url to deploy Open WebUI as standalone.",
+                )
+            api_token = user_conf["llm"]["openai_api_key"]
+            api_endpoint = user_conf["llm"]["openai_api_url"]
+
+        if user_conf["llm"]["type"] in ["openwebui", "both"]:
+            # Check if user has provided a password
+            if not user_conf["llm"]["ui_password"]:
+                raise HTTPException(
+                    status_code=400,
+                    detail="A password is required to deploy this tool.",
+                )
+
+        if user_conf["llm"]["type"] in ["vllm", "both"]:
+            # Create a OpenAPI key secret for the vLLM deployment
+            api_token = secrets.token_hex()
+            _ = ai4secrets.create_secret(
+                vo=vo,
+                secret_path=f"deployments/{job_uuid}/llm/vllm",
+                secret_data={"token": api_token},
+                authorization=SimpleNamespace(
+                    credentials=authorization.credentials,
+                ),
+            )
+            api_endpoint = (
+                f"https://vllm-{job_uuid}" + ".${meta.domain}" + f"-{base_domain}/v1"
+            )
+
+            # Configure VLLM args
+            model_id = user_conf["llm"]["model_id"]
+            vllm_args += ["--model", model_id]
+            vllm_args += papiconf.VLLM["models"][model_id]["args"]
+
+            # Check if HF token is needed
+            if (
+                papiconf.VLLM["models"][model_id]["needs_HF_token"]
+                and not user_conf["llm"]["HF_token"]
+            ):
+                raise HTTPException(
+                    status_code=400,
+                    detail="This model requires a valid Huggingface token for deployment.",
+                )
+
+        # Replace the Nomad job template
+        nomad_conf = nomad_conf.safe_substitute(
+            {
+                "JOB_UUID": job_uuid,
+                "NAMESPACE": papiconf.MAIN_CONF["nomad"]["namespaces"][vo],
+                "PRIORITY": priority,
+                "OWNER": auth_info["id"],
+                "OWNER_NAME": auth_info["name"],
+                "OWNER_EMAIL": auth_info["email"],
+                "TITLE": user_conf["general"]["title"][:45],
+                "DESCRIPTION": user_conf["general"]["desc"][:1000],
+                "BASE_DOMAIN": base_domain,
+                "HOSTNAME": job_uuid,
+                "VLLM_ARGS": json.dumps(vllm_args),
+                "API_TOKEN": api_token,
+                "API_ENDPOINT": api_endpoint,
+                "HUGGINGFACE_TOKEN": user_conf["llm"]["HF_token"],
+                "OPEN_WEBUI_PASSWORD": user_conf["llm"]["ui_password"],
+            }
+        )
+
+        # Convert template to Nomad conf
+        nomad_conf = nomad.load_job_conf(nomad_conf)
+
+        # Define what to exclude
+        if user_conf["llm"]["type"] == "vllm":
+            exclude_tasks = ["open-webui", "create-admin"]
+            exclude_services = ["ui"]
+        elif user_conf["llm"]["type"] == "open-webui":
+            exclude_tasks = ["vllm"]
+            exclude_services = ["vllm"]
+        else:
+            exclude_tasks, exclude_services = [], []
+
+        tasks = nomad_conf["TaskGroups"][0]["Tasks"]
+        tasks[:] = [t for t in tasks if t["Name"] not in exclude_tasks]
+
+        services = nomad_conf["TaskGroups"][0]["Services"]
+        services[:] = [s for s in services if s["PortLabel"] not in exclude_services]
+
+        # Rename first task as main task
+        t = tasks[0]
+        t["Name"] = "main"
+
+    # Deploy AI4Life tool
     elif tool_name == "ai4os-ai4life-loader":
         # Replace the Nomad job template
         nomad_conf = nomad_conf.safe_substitute(