Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: module migration to ai4os-hub #51

Merged
merged 8 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ai4papi/module_patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def patch_nextcloud_mount(
"DEEP-OC-speech-to-text-tf",
]
modules = [f'deephdc/{m.lower()}' for m in modules]
# TODO: this will need to be updated to ai4os-hub

if docker_image in modules:
task['Env']['RCLONE_CONTIMEOUT'] = '1s'
Expand Down
71 changes: 34 additions & 37 deletions ai4papi/routers/v1/catalog/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

Both modules and tools share similar workflows so they will inherit from a common
Catalog class. We only finetune methods wheen needed (eg. /config).
Both modules and tools are referred in the common code as "items".

Implementation notes:
=====================
Expand Down Expand Up @@ -38,16 +39,23 @@ def __init__(self) -> None:


@cached(cache=TTLCache(maxsize=1024, ttl=6*60*60))
def get_list(
def get_items(
self,
):
"""
Retrieve a list of *all* items.

Retrieve a dict of *all* items.
```
{'module 1': {
'url': ***,
'branch': ***,
},
...
}
```
This is implemented in a separate function as many functions from this router
are using this function, so we need to avoid infinite recursions.
"""
return []
return {}


@cached(cache=TTLCache(maxsize=1024, ttl=6*60*60))
Expand Down Expand Up @@ -75,8 +83,9 @@ def get_filtered_list(

"""
# Retrieve all modules
modules = self.get_list()

modules = list(self.get_items().keys())
# (!): without list(...) FastAPI throws weird error
# ValueError: [ValueError('dictionary update sequence element #0 has length 1; 2 is required'), TypeError('vars() argument must have __dict__ attribute')]
if any([tags, tags_any, not_tags, not_tags_any]): # apply filtering

# Move to tag dict for easier manipulation (wildcard substitution)
Expand Down Expand Up @@ -194,7 +203,7 @@ def get_tags(
Retrieve a list of all the existing tags.
"""
tags = []
for m in self.get_list():
for m in self.get_items().keys():
meta = self.get_metadata(m)
tags += meta['keywords']
tags = sorted(set(tags))
Expand All @@ -209,37 +218,20 @@ def get_metadata(
"""
Get the item's full metadata.
"""

# Check the module is in the modules list
items = self.get_list()
if item_name not in items:
# Check if item is in the items list
items = self.get_items()
if item_name not in items.keys():
raise HTTPException(
status_code=400,
detail="Item {item_name} not in catalog: {items}",
detail=f"Item {item_name} not in catalog: {list(items.keys())}",
)

# Read the index of modules from Github
gitmodules_url = "https://raw.githubusercontent.com/deephdc/deep-oc/master/.gitmodules"
r = requests.get(gitmodules_url)

cfg = configparser.ConfigParser()
cfg.read_string(r.text)

# Convert ConfigParser to cleaner dict
# and retrieve default branch (if no branch use master)
modules_conf = {
re.search(r'submodule "(.*)"', s).group(1).lower():
# 'submodule "DEEP-OC-..."' --> 'deep-oc-...'
dict(cfg.items(s))
for s in cfg.sections()
}
branch = modules_conf[item_name].get("branch", "master")

# Retrieve metadata from that branch
# Retrieve metadata from default branch
# Use try/except to avoid that a single module formatting error could take down
# all the Dashboard
metadata_url = f"https://raw.githubusercontent.com/deephdc/{item_name}/{branch}/metadata.json"

branch = items[item_name].get("branch", "master")
url = items[item_name]['url'].replace('github.com', 'raw.githubusercontent.com')
metadata_url = f"{url}/{branch}/metadata.json"
try:
r = requests.get(metadata_url)
metadata = json.loads(r.text)
Expand All @@ -256,8 +248,8 @@ def get_metadata(
"license": "",
"date_creation": "",
"sources": {
"dockerfile_repo": f"https://github.com/deephdc/{item_name}",
"docker_registry_repo": f"deephdc/{item_name}",
"dockerfile_repo": f"https://github.com/ai4oshub/{item_name}",
"docker_registry_repo": f"ai4os-hub/{item_name}",
"code": "",
}
}
Expand All @@ -269,14 +261,19 @@ def get_metadata(

def get_config(
self,
):
):
"""
Returns the default configuration (dict) for creating a deployment
for a specific item. It is prefilled with the appropriate
docker image and the available docker tags.
"""
return {}


def retrieve_docker_tags(
image: str,
repo: str = 'deephdc',
):
repo: str = 'ai4oshub',
):
"""
Retrieve tags from Dockerhub image
"""
Expand Down
59 changes: 30 additions & 29 deletions ai4papi/routers/v1/catalog/modules.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,30 @@
import configparser
from copy import deepcopy
import re
import types

from cachetools import cached, TTLCache
from fastapi import APIRouter
from fastapi import APIRouter, HTTPException
import requests

from ai4papi import quotas, nomad
import ai4papi.conf as papiconf
from .common import Catalog, retrieve_docker_tags



@cached(cache=TTLCache(maxsize=1024, ttl=6*60*60))
def get_list(self):
"""
Retrieve a list of *all* modules.

This is implemented in a separate function as many functions from this router
are using this function, so we need to avoid infinite recursions.
"""

gitmodules_url = "https://raw.githubusercontent.com/deephdc/deep-oc/master/.gitmodules"
def get_items(self):
gitmodules_url = "https://raw.githubusercontent.com/ai4os-hub/modules-catalog/master/.gitmodules"
r = requests.get(gitmodules_url)

cfg = configparser.ConfigParser()
cfg.read_string(r.text)

# Convert 'submodule "DEEP-OC-..."' --> 'deep-oc-...'
modules = [
re.search(r'submodule "(.*)"', s).group(1).lower() for s in cfg.sections()
]
modules = {}
for section in cfg.sections():
items = dict(cfg.items(section))
key = items.pop('path').lower()
items['url'] = items['url'].replace('.git', '') # remove `.git`, if present
modules[key] = items

return modules

Expand All @@ -41,22 +34,31 @@ def get_config(
item_name: str,
vo: str,
):
"""
Returns the default configuration (dict) for creating a deployment
for a specific module. It is prefilled with the appropriate
docker image and the available docker tags.
"""
#TODO: We are not checking if module exists in the marketplace because
# we are treating each route as independent. In the future, this can
# be done as an API call to the other route.

# Check if module exists
modules = self.get_items()
if item_name not in modules.keys():
raise HTTPException(
status_code=400,
detail=f"{item_name} is not an available module.",
)

# Retrieve module configuration
conf = deepcopy(papiconf.MODULES['user']['full'])

# Retrieve module metadata
metadata = self.get_metadata(item_name)

# Parse docker registry
registry = metadata['sources']['docker_registry_repo']
repo, image = registry.split('/')[:2]
if repo not in ['deephdc', 'ai4oshub']:
repo = 'ai4oshub'

# Fill with correct Docker image
conf["general"]["docker_image"]["value"] = f"deephdc/{item_name}"
conf["general"]["docker_image"]["value"] = f"{repo}/{image}"

# Add available Docker tags
tags = retrieve_docker_tags(item_name)
tags = retrieve_docker_tags(image=image, repo=repo)
conf["general"]["docker_tag"]["options"] = tags
conf["general"]["docker_tag"]["value"] = tags[0]

Expand All @@ -80,9 +82,8 @@ def get_config(
return conf



Modules = Catalog()
Modules.get_list = types.MethodType(get_list, Modules)
Modules.get_items = types.MethodType(get_items, Modules)
Modules.get_config = types.MethodType(get_config, Modules)


Expand Down
86 changes: 23 additions & 63 deletions ai4papi/routers/v1/catalog/tools.py
Original file line number Diff line number Diff line change
@@ -1,86 +1,36 @@
from copy import deepcopy
import json
import types

from cachetools import cached, TTLCache
from fastapi import APIRouter, HTTPException
import requests

from ai4papi import quotas
import ai4papi.conf as papiconf
from .common import Catalog, retrieve_docker_tags



@cached(cache=TTLCache(maxsize=1024, ttl=6*60*60))
def get_list(self):
"""
Retrieve a list of *all* modules.

This is implemented in a separate function as many functions from this router
are using this function, so we need to avoid infinite recursions.
"""

return list(papiconf.TOOLS.keys())


@cached(cache=TTLCache(maxsize=1024, ttl=6*60*60))
def get_metadata(
self,
item_name: str,
):
"""
Get the module's full metadata.
"""
# Get default branch
def get_items(self):
# Set default branch manually (because we are not yet reading this from submodules)
tools_branches= {
'deep-oc-federated-server': 'main',
}
branch = tools_branches[item_name]

# Retrieve metadata from that branch
# Use try/except to avoid that a single module formatting error could take down
# all the Dashboard
metadata_url = f"https://raw.githubusercontent.com/deephdc/{item_name}/{branch}/metadata.json"

try:
r = requests.get(metadata_url)
metadata = json.loads(r.text)

except Exception:
metadata = {
"title": item_name,
"summary": "",
"description": [
"The metadata of this module could not be retrieved probably due to a ",
"JSON formatting error from the module maintainer."
],
"keywords": [],
"license": "",
"date_creation": "",
"sources": {
"dockerfile_repo": f"https://github.com/deephdc/{item_name}",
"docker_registry_repo": f"deephdc/{item_name}",
"code": "",
}
tools = {}
for k in papiconf.TOOLS.keys():
tools[k] = {
'url': f'https://github.com/deephdc/{k}', #TODO: this will need to be updated
'branch': tools_branches[k],
}

# Format "description" field nicely for the Dashboards Markdown parser
metadata["description"] = "\n".join(metadata["description"])

return metadata
return tools


def get_config(
self,
item_name: str,
vo: str,
):
"""
Returns the default configuration (dict) for creating a deployment
for a specific module. It is prefilled with the appropriate
docker image and the available docker tags.
"""
# Retrieve tool configuration
try:
conf = deepcopy(papiconf.TOOLS[item_name]['user']['full'])
Expand All @@ -90,8 +40,20 @@ def get_config(
detail=f"{item_name} is not an available tool.",
)

# Retrieve tool metadata
metadata = self.get_metadata(item_name)

# Parse docker registry
registry = metadata['sources']['docker_registry_repo']
repo, image = registry.split('/')[:2]
if repo not in ['deephdc', 'ai4oshub']:
repo = 'ai4oshub'

# Fill with correct Docker image
conf["general"]["docker_image"]["value"] = f"{repo}/{image}"

# Add available Docker tags
tags = retrieve_docker_tags(item_name)
tags = retrieve_docker_tags(image=image, repo=repo)
conf["general"]["docker_tag"]["options"] = tags
conf["general"]["docker_tag"]["value"] = tags[0]

Expand All @@ -100,15 +62,13 @@ def get_config(
item_name=item_name,
vo=vo,
)

return conf

return conf


Tools = Catalog()
Tools.get_list = types.MethodType(get_list, Tools)
Tools.get_items = types.MethodType(get_items, Tools)
Tools.get_config = types.MethodType(get_config, Tools)
Tools.get_metadata = types.MethodType(get_metadata, Tools)


router = APIRouter(
Expand Down
4 changes: 2 additions & 2 deletions tests/catalog/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@


# List modules
modules_list = Modules.get_list()
modules_list = list(Modules.get_items().keys())

assert isinstance(modules_list, list)
assert 'deep-oc-image-classification-tf' in modules_list
assert 'dogs-breed-detector' in modules_list
assert 'deep-oc-federated-server' not in modules_list

# List filtered modules
Expand Down
Loading
Loading