Skip to content

Commit

Permalink
feat: add LLM tool (#89)
Browse files Browse the repository at this point in the history
* feat: list vllm as a tool

* feat: tool yml

* feat: vllm job

* feat: vllm args

* fix: remove quota and images constraint

* feat: add huggingface token and model configurations

* fix: vllm options typo

* fix: tool name and main task

* format yaml

* refactor nomad job

* feat: remove docker image/tags from configuration

* feat: update param names

* feat: refactor LLM deployment

* feat: tokens

* feat: deepseek

* ci: auto-format from pre-commit.com hooks

* fix: clean changes

* feat: remove finegrained vllm params

* fix: fix tools info retrieval

* ci: auto-format from pre-commit.com hooks

* feat: move models to conf file

* ci: auto-format from pre-commit.com hooks

* feat: restore Huggingface token

* ci: auto-format from pre-commit.com hooks

* fix: fix HF token

* feat: rename to `ai4os-llm`

* feat: add tests

* feat: add more models

* fix: HF typo

* fix: add parameters for openwebui standalone

* feat: automatically create admin for Open WebUI

* fix: fix UI password check

* feat: reorganize ai4os-llm user.yaml

* feat: use https in endpoints list

* fix: userconf checks

* refactor: rename conf group from `vllm` to `llm`

* fix: add needs_HF_token variable to vllm.yaml

* feat: reorder models

* feat: add force_pull

* fix: cvat error

* tests: fix test configuration

* feat: add tool deployment restrictions

---------

Co-authored-by: Ignacio Heredia <iheredia@ifca.unican.es>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Marta Obregón <obregonm@ifca.unican.es>
  • Loading branch information
4 people authored Mar 3, 2025
1 parent 0accdb5 commit f39d2c4
Show file tree
Hide file tree
Showing 8 changed files with 556 additions and 135 deletions.
5 changes: 5 additions & 0 deletions ai4papi/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def load_yaml_conf(fpath):
tools_nomad2id = {
"fl": "ai4os-federated-server",
"cvat": "ai4os-cvat",
"llm": "ai4os-llm",
"ai4life": "ai4os-ai4life-loader",
}
for tool in TOOLS.keys():
Expand All @@ -110,6 +111,10 @@ def load_yaml_conf(fpath):
with open(paths["conf"] / "oscar.yaml", "r") as f:
OSCAR_TMPL = Template(f.read())

# vLLM conf
with open(paths["conf"] / "vllm.yaml", "r") as f:
VLLM = yaml.safe_load(f)

# Try-me endpoints
nmd = load_nomad_job(paths["conf"] / "try_me" / "nomad.hcl")
TRY_ME = {
Expand Down
3 changes: 2 additions & 1 deletion ai4papi/nomad/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def get_deployment(
info["docker_image"] = usertask["Config"]["image"]
command = usertask["Config"].get("command", "")
args = usertask["Config"].get("args", [])
args[:] = [str(a) for a in args]
info["docker_command"] = f"{command} {' '.join(args)}".strip()

# Add endpoints
Expand All @@ -136,7 +137,7 @@ def get_deployment(
if label == "deepaas":
label = "api"

info["endpoints"][label] = f"http://{url}"
info["endpoints"][label] = f"https://{url}"

# Add '/ui' to deepaas endpoint
# If in the future we support other APIs, this will have to be removed.
Expand Down
19 changes: 12 additions & 7 deletions ai4papi/routers/v1/catalog/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,22 @@ def get_config(
metadata = self.get_metadata(item_name)

# Modify the resources limits for a given user or VO
if conf.get("hardware", None):
if "hardware" in conf.keys():
conf["hardware"] = quotas.limit_resources(
item_name=item_name,
vo=vo,
)

# Parse docker registry
registry = metadata["links"]["docker_image"]
repo, image = registry.split("/")[-2:]
if repo not in ["deephdc", "ai4oshub"]:
repo = "ai4oshub"

# Fill with correct Docker image and tags
if item_name in ["ai4os-federated-server", "ai4os-ai4life-loader"]:
# Parse docker registry
registry = metadata["links"]["docker_image"]
repo, image = registry.split("/")[-2:]
if repo not in ["deephdc", "ai4oshub"]:
repo = "ai4oshub"
conf["general"]["docker_image"]["value"] = f"{repo}/{image}"

# Retrieve Docker tags
tags = retrieve_docker_tags(image=image, repo=repo)
conf["general"]["docker_tag"]["options"] = tags
conf["general"]["docker_tag"]["value"] = tags[0]
Expand All @@ -67,6 +67,11 @@ def get_config(
if models:
conf["hardware"]["gpu_type"]["options"] += models

if item_name == "ai4os-llm":
models = list(papiconf.VLLM["models"].keys())
conf["llm"]["model_id"]["options"] = models
conf["llm"]["model_id"]["value"] = models[0]

return conf


Expand Down
118 changes: 115 additions & 3 deletions ai4papi/routers/v1/deployments/tools.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from copy import deepcopy
import json
import re
import secrets
import types
Expand Down Expand Up @@ -195,9 +196,21 @@ def create_deployment(
detail="This ID does not correspond to an available tool.",
)

# Check if your are allowed to deploy the tool
restrictions = {"ai4os-llm": ["vo.imagine-ai.eu"]}
if vo in restrictions.get(tool_name, []):
raise HTTPException(
status_code=403,
detail="Your VO doesn't allow to deploy this tool.",
)

# Load tool configuration
nomad_conf = deepcopy(papiconf.TOOLS[tool_name]["nomad"])
user_conf = deepcopy(papiconf.TOOLS[tool_name]["user"]["values"])
# TODO: given that some parts of the configuration are dynamically generated
# (eg. model_id in ai4life/vllm) we should read "user_conf" from the catalog
# We have to apply conversion to only keep the values
# Same goes for modules

# Update values conf in case we received a submitted conf
if conf is not None:
Expand All @@ -210,8 +223,8 @@ def create_deployment(
user_conf = utils.validate_conf(user_conf)

# Check if the provided configuration is within the job quotas
# Skip this check with CVAT because it does not have a "hardware" section in the conf
if tool_name not in ["ai4os-cvat"]:
# We only do this for tools that have a "hardware" section in the conf
if "hardware" in user_conf.keys():
quotas.check_jobwise(
conf=user_conf,
vo=vo,
Expand Down Expand Up @@ -367,7 +380,106 @@ def create_deployment(
# Convert template to Nomad conf
nomad_conf = nomad.load_job_conf(nomad_conf)

# Deploy a CVAT tool
# Deploy a OpenWebUI+vllm tool
elif tool_name == "ai4os-llm":
vllm_args = []

if user_conf["llm"]["type"] == "open-webui":
# Check if user has provided OpenAPI key/url
if not (
user_conf["llm"]["openai_api_key"]
and user_conf["llm"]["openai_api_url"]
):
raise HTTPException(
status_code=400,
detail="You need to define an OpenAI key and url to deploy Open WebUI as standalone.",
)
api_token = user_conf["llm"]["openai_api_key"]
api_endpoint = user_conf["llm"]["openai_api_url"]

if user_conf["llm"]["type"] in ["openwebui", "both"]:
# Check if user has provided a password
if not user_conf["llm"]["ui_password"]:
raise HTTPException(
status_code=400,
detail="A password is required to deploy this tool.",
)

if user_conf["llm"]["type"] in ["vllm", "both"]:
# Create a OpenAPI key secret for the vLLM deployment
api_token = secrets.token_hex()
_ = ai4secrets.create_secret(
vo=vo,
secret_path=f"deployments/{job_uuid}/llm/vllm",
secret_data={"token": api_token},
authorization=SimpleNamespace(
credentials=authorization.credentials,
),
)
api_endpoint = (
f"https://vllm-{job_uuid}" + ".${meta.domain}" + f"-{base_domain}/v1"
)

# Configure VLLM args
model_id = user_conf["llm"]["model_id"]
vllm_args += ["--model", model_id]
vllm_args += papiconf.VLLM["models"][model_id]["args"]

# Check if HF token is needed
if (
papiconf.VLLM["models"][model_id]["needs_HF_token"]
and not user_conf["llm"]["HF_token"]
):
raise HTTPException(
status_code=400,
detail="This model requires a valid Huggingface token for deployment.",
)

# Replace the Nomad job template
nomad_conf = nomad_conf.safe_substitute(
{
"JOB_UUID": job_uuid,
"NAMESPACE": papiconf.MAIN_CONF["nomad"]["namespaces"][vo],
"PRIORITY": priority,
"OWNER": auth_info["id"],
"OWNER_NAME": auth_info["name"],
"OWNER_EMAIL": auth_info["email"],
"TITLE": user_conf["general"]["title"][:45],
"DESCRIPTION": user_conf["general"]["desc"][:1000],
"BASE_DOMAIN": base_domain,
"HOSTNAME": job_uuid,
"VLLM_ARGS": json.dumps(vllm_args),
"API_TOKEN": api_token,
"API_ENDPOINT": api_endpoint,
"HUGGINGFACE_TOKEN": user_conf["llm"]["HF_token"],
"OPEN_WEBUI_PASSWORD": user_conf["llm"]["ui_password"],
}
)

# Convert template to Nomad conf
nomad_conf = nomad.load_job_conf(nomad_conf)

# Define what to exclude
if user_conf["llm"]["type"] == "vllm":
exclude_tasks = ["open-webui", "create-admin"]
exclude_services = ["ui"]
elif user_conf["llm"]["type"] == "open-webui":
exclude_tasks = ["vllm"]
exclude_services = ["vllm"]
else:
exclude_tasks, exclude_services = [], []

tasks = nomad_conf["TaskGroups"][0]["Tasks"]
tasks[:] = [t for t in tasks if t["Name"] not in exclude_tasks]

services = nomad_conf["TaskGroups"][0]["Services"]
services[:] = [s for s in services if s["PortLabel"] not in exclude_services]

# Rename first task as main task
t = tasks[0]
t["Name"] = "main"

# Deploy AI4Life tool
elif tool_name == "ai4os-ai4life-loader":
# Replace the Nomad job template
nomad_conf = nomad_conf.safe_substitute(
Expand Down
Loading

0 comments on commit f39d2c4

Please sign in to comment.