diff --git a/Makefile b/Makefile index ffdd9c45..81003e70 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ SHELL=/bin/bash -o pipefail -ONNXRUNTIME_VERSION := 1.13.1-? -TOOLCHAIN_VERSION := 0.9.1-? +ONNXRUNTIME_VERSION := 1.15.1-? +TOOLCHAIN_VERSION := 0.10.0-? LIBHAL_VERSION := 0.11.0-? .PHONY: check-docker-tag toolchain lint test unit_tests examples regression-test-all \ @@ -15,8 +15,11 @@ ifndef DOCKER_TAG endif toolchain: + apt-get update apt-get install -y --allow-downgrades libonnxruntime=$(ONNXRUNTIME_VERSION) - apt-get install -y --allow-downgrades furiosa-libcompiler=$(TOOLCHAIN_VERSION) furiosa-libnux=$(TOOLCHAIN_VERSION) + apt-get install -y --allow-downgrades furiosa-compiler=$(TOOLCHAIN_VERSION) + # TODO: remove me when possible + apt-get install -y --allow-downgrades furiosa-libcompiler=$(TOOLCHAIN_VERSION) apt-get install -y --allow-downgrades furiosa-libhal-warboy=$(LIBHAL_VERSION) lint: @@ -31,7 +34,7 @@ unit_tests: pytest ./tests/unit/ -s examples: - for f in $$(ls docs/examples/*.py | grep -v "ssd_resnet34"); do echo"";echo "[TEST] $$f ..."; python3 $$f || exit 1; done + for f in $$(find docs/examples/ -name *.py); do printf "\n[TEST] $$f ...\n"; python3 $$f || exit 1; done regression-test-all: pytest ./tests/bench/ diff --git a/ci-constraints.txt b/ci-constraints.txt new file mode 100644 index 00000000..45e95128 --- /dev/null +++ b/ci-constraints.txt @@ -0,0 +1,5 @@ +# This pip constraints file is for the reproducibility of model accuracies +opencv-python-headless==4.8.0.76 +torch==2.0.1 +torchvision==0.15.2 +numpy==1.25.2 diff --git a/docker/Dockerfile b/docker/Dockerfile index f7d3df28..7a0ad2df 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -19,6 +19,8 @@ RUN pip3 install --upgrade pip wheel setuptools Cython pytest pycocotools \ RUN echo "deb [arch=amd64] https://internal-archive.furiosa.dev/ubuntu focal restricted" \ > /etc/apt/sources.list.d/furiosa.list && \ + echo "deb [arch=amd64] https://internal-archive.furiosa.dev/ubuntu focal-rc restricted" \ + >> /etc/apt/sources.list.d/furiosa.list && \ echo "deb [arch=amd64] https://internal-archive.furiosa.dev/ubuntu focal-nightly restricted" \ >> /etc/apt/sources.list.d/furiosa.list @@ -31,4 +33,4 @@ RUN --mount=type=secret,id=furiosa.conf,dst=/etc/apt/auth.conf.d/furiosa.conf,re apt-get update && \ make toolchain RUN --mount=type=secret,id=.netrc,dst=/root/.netrc,required \ - pip install --extra-index-url https://internal-pypi.furiosa.dev/simple .[test] + pip install --pre --extra-index-url https://internal-pypi.furiosa.dev/simple --constraint ci-constraints.txt .[test] diff --git a/docs/examples/efficientnet_b0.py b/docs/examples/efficientnet_b0.py index 84da1b3f..89c28934 100644 --- a/docs/examples/efficientnet_b0.py +++ b/docs/examples/efficientnet_b0.py @@ -1,10 +1,10 @@ from furiosa.models.vision import EfficientNetB0 -from furiosa.runtime import session +from furiosa.runtime.sync import create_runner image = "tests/assets/cat.jpg" -effnetb0 = EfficientNetB0.load() -with session.create(effnetb0.enf) as sess: +effnetb0 = EfficientNetB0() +with create_runner(effnetb0.model_source()) as runner: inputs, _ = effnetb0.preprocess(image) - outputs = sess.run(inputs).numpy() + outputs = runner.run(inputs) effnetb0.postprocess(outputs) diff --git a/docs/examples/efficientnet_v2_s.py b/docs/examples/efficientnet_v2_s.py index c2923bb8..d281c540 100644 --- a/docs/examples/efficientnet_v2_s.py +++ b/docs/examples/efficientnet_v2_s.py @@ -1,10 +1,10 @@ from furiosa.models.vision import EfficientNetV2s -from furiosa.runtime import session +from furiosa.runtime.sync import create_runner image = "tests/assets/cat.jpg" -effnetv2s = EfficientNetV2s.load() -with session.create(effnetv2s.enf) as sess: +effnetv2s = EfficientNetV2s() +with create_runner(effnetv2s.model_source()) as runner: inputs, _ = effnetv2s.preprocess(image) - outputs = sess.run(inputs).numpy() + outputs = runner.run(inputs) effnetv2s.postprocess(outputs) diff --git a/docs/examples/loading_model.py b/docs/examples/loading_model.py index 11b367b2..cfe2bee7 100644 --- a/docs/examples/loading_model.py +++ b/docs/examples/loading_model.py @@ -1,4 +1,4 @@ from furiosa.models.types import Model from furiosa.models.vision import ResNet50 -model: Model = ResNet50.load() +model: Model = ResNet50() diff --git a/docs/examples/resnet50.py b/docs/examples/resnet50.py index 2ca165db..4add2c19 100644 --- a/docs/examples/resnet50.py +++ b/docs/examples/resnet50.py @@ -1,10 +1,10 @@ from furiosa.models.vision import ResNet50 -from furiosa.runtime import session +from furiosa.runtime.sync import create_runner image = "tests/assets/cat.jpg" -resnet50 = ResNet50.load() -with session.create(resnet50.enf) as sess: +resnet50 = ResNet50() +with create_runner(resnet50.model_source()) as runner: inputs, _ = resnet50.preprocess(image) - outputs = sess.run(inputs).numpy() + outputs = runner.run(inputs) resnet50.postprocess(outputs) diff --git a/docs/examples/ssd_mobilenet.py b/docs/examples/ssd_mobilenet.py index c425f198..d0287bcc 100644 --- a/docs/examples/ssd_mobilenet.py +++ b/docs/examples/ssd_mobilenet.py @@ -1,10 +1,10 @@ from furiosa.models.vision import SSDMobileNet -from furiosa.runtime import session +from furiosa.runtime.sync import create_runner image = ["tests/assets/cat.jpg"] -mobilenet = SSDMobileNet.load() -with session.create(mobilenet.enf) as sess: +mobilenet = SSDMobileNet() +with create_runner(mobilenet.model_source()) as runner: inputs, contexts = mobilenet.preprocess(image) - outputs = sess.run(inputs).numpy() + outputs = runner.run(inputs) mobilenet.postprocess(outputs, contexts[0]) diff --git a/docs/examples/ssd_mobilenet_native.py b/docs/examples/ssd_mobilenet_native.py index 0554a31c..4f071214 100644 --- a/docs/examples/ssd_mobilenet_native.py +++ b/docs/examples/ssd_mobilenet_native.py @@ -1,10 +1,10 @@ from furiosa.models.vision import SSDMobileNet -from furiosa.runtime import session +from furiosa.runtime.sync import create_runner image = ["tests/assets/cat.jpg"] -mobilenet = SSDMobileNet.load(use_native=True) -with session.create(mobilenet.enf) as sess: +mobilenet = SSDMobileNet(postprocessor_type="Rust") +with create_runner(mobilenet.model_source()) as runner: inputs, contexts = mobilenet.preprocess(image) - outputs = sess.run(inputs).numpy() + outputs = runner.run(inputs) mobilenet.postprocess(outputs, contexts[0]) diff --git a/docs/examples/ssd_mobilenet_onnx.py b/docs/examples/ssd_mobilenet_onnx.py index 17dcca20..b1244fc1 100644 --- a/docs/examples/ssd_mobilenet_onnx.py +++ b/docs/examples/ssd_mobilenet_onnx.py @@ -1,22 +1,20 @@ -import yaml - from furiosa.models.vision import SSDMobileNet from furiosa.quantizer import quantize -from furiosa.runtime import session +from furiosa.runtime.sync import create_runner compiler_config = {"lower_tabulated_dequantize": True} image = ["tests/assets/cat.jpg"] -mobilenet = SSDMobileNet.load() -onnx_model: bytes = mobilenet.source -calib_range: dict = yaml.full_load(mobilenet.calib_yaml) +mobilenet = SSDMobileNet() +onnx_model: bytes = mobilenet.origin +calib_range: dict = mobilenet.tensor_name_to_range # See https://furiosa-ai.github.io/docs/latest/en/api/python/furiosa.quantizer.html#furiosa.quantizer.quantize # for more details -dfg = quantize(onnx_model, calib_range, with_quantize=False) +quantized_onnx = quantize(onnx_model, calib_range) -with session.create(dfg, compiler_config=compiler_config) as sess: - inputs, contexts = mobilenet.preprocess(image) - outputs = sess.run(inputs).numpy() +with create_runner(quantized_onnx, compiler_config=compiler_config) as runner: + inputs, contexts = mobilenet.preprocess(image, with_scaling=True) + outputs = runner.run(inputs) mobilenet.postprocess(outputs, contexts[0]) diff --git a/docs/examples/ssd_resnet34.py b/docs/examples/ssd_resnet34.py index cd88f9a4..d47b0575 100644 --- a/docs/examples/ssd_resnet34.py +++ b/docs/examples/ssd_resnet34.py @@ -1,9 +1,9 @@ from furiosa.models.vision import SSDResNet34 -from furiosa.runtime import session +from furiosa.runtime.sync import create_runner -resnet34 = SSDResNet34.load() +resnet34 = SSDResNet34(postprocessor_type="Python") -with session.create(resnet34.enf) as sess: +with create_runner(resnet34.model_source()) as runner: image, contexts = resnet34.preprocess(["tests/assets/cat.jpg"]) - output = sess.run(image).numpy() + output = runner.run(image) resnet34.postprocess(output, contexts=contexts) diff --git a/docs/examples/ssd_resnet34_native.py b/docs/examples/ssd_resnet34_native.py index 1fe380b8..1caf42fd 100644 --- a/docs/examples/ssd_resnet34_native.py +++ b/docs/examples/ssd_resnet34_native.py @@ -1,9 +1,9 @@ from furiosa.models.vision import SSDResNet34 -from furiosa.runtime import session +from furiosa.runtime.sync import create_runner -resnet34 = SSDResNet34.load(use_native=True) +resnet34 = SSDResNet34(postprocessor_type="Rust") -with session.create(resnet34.enf) as sess: +with create_runner(resnet34.model_source()) as runner: image, contexts = resnet34.preprocess(["tests/assets/cat.jpg"]) - output = sess.run(image).numpy() + output = runner.run(image) resnet34.postprocessor(output, contexts=contexts[0]) diff --git a/docs/examples/yolov5l.py b/docs/examples/yolov5l.py index 414cece4..7e6f5947 100644 --- a/docs/examples/yolov5l.py +++ b/docs/examples/yolov5l.py @@ -2,12 +2,12 @@ import numpy as np from furiosa.models.vision import YOLOv5l -from furiosa.runtime import session +from furiosa.runtime.sync import create_runner -yolov5l = YOLOv5l.load() +yolov5l = YOLOv5l() -with session.create(yolov5l.enf) as sess: +with create_runner(yolov5l.model_source()) as runner: image = cv2.imread("tests/assets/yolov5-test.jpg") inputs, contexts = yolov5l.preprocess([image]) - output = sess.run(np.expand_dims(inputs[0], axis=0)).numpy() + output = runner.run(np.expand_dims(inputs[0], axis=0)) yolov5l.postprocess(output, contexts=contexts) diff --git a/docs/examples/yolov5m.py b/docs/examples/yolov5m.py index 7553ad62..a50afdfe 100644 --- a/docs/examples/yolov5m.py +++ b/docs/examples/yolov5m.py @@ -2,12 +2,12 @@ import numpy as np from furiosa.models.vision import YOLOv5m -from furiosa.runtime import session +from furiosa.runtime.sync import create_runner -yolov5m = YOLOv5m.load() +yolov5m = YOLOv5m() -with session.create(yolov5m.enf) as sess: +with create_runner(yolov5m.model_source()) as runner: image = cv2.imread("tests/assets/yolov5-test.jpg") inputs, contexts = yolov5m.preprocess([image]) - output = sess.run(np.expand_dims(inputs[0], axis=0)).numpy() + output = runner.run(np.expand_dims(inputs[0], axis=0)) yolov5m.postprocess(output, contexts=contexts) diff --git a/furiosa/models/__init__.py b/furiosa/models/__init__.py index ab54a242..15c7e384 100644 --- a/furiosa/models/__init__.py +++ b/furiosa/models/__init__.py @@ -1,5 +1,5 @@ """Furiosa Models""" from . import errors, vision -__version__ = "0.9.0.dev0" +__version__ = "0.10.0.dev0" __all__ = ["errors", "vision"] diff --git a/furiosa/models/_utils.py b/furiosa/models/_utils.py new file mode 100644 index 00000000..9ebff37f --- /dev/null +++ b/furiosa/models/_utils.py @@ -0,0 +1,161 @@ +import logging +import os +from pathlib import Path +from typing import TYPE_CHECKING, Collection, Optional, Tuple, Union + +if TYPE_CHECKING: + from .types import Platform + +import requests +import yaml + +from . import errors + +EXT_CALIB_YAML = "calib_range.yaml" +EXT_ONNX = "onnx" +DATA_DIRECTORY_BASE = Path(__file__).parent / "data" +CACHE_DIRECTORY_BASE = Path( + os.getenv( + "FURIOSA_MODELS_CACHE_HOME", + os.path.join(os.getenv("XDG_CACHE_HOME", Path.home() / ".cache"), "furiosa/models"), + ) +) +DVC_PUBLIC_HTTP_ENDPOINT = ( + "https://furiosa-public-artifacts.s3-accelerate.amazonaws.com/furiosa-artifacts/" +) + +module_logger = logging.getLogger(__name__) + + +def get_version_info() -> Optional[str]: + from furiosa.tools.compiler.api import VersionInfo + + version_info = VersionInfo() + return f"{version_info.version}_{version_info.git_hash}" + + +def find_dvc_cache_directory(path: Path) -> Optional[Path]: + if path is None or path == path.parent: + return None + if (path / ".dvc").is_dir(): + return path / ".dvc" / "cache" + return find_dvc_cache_directory(path.parent) + + +def parse_dvc_file(file_path: Path) -> Tuple[str, str, int]: + info_dict = yaml.safe_load(open(f"{file_path}.dvc").read())["outs"][0] + md5sum = info_dict["md5"] + return md5sum[:2], md5sum[2:], info_dict["size"] + + +def get_from_url(path: str, uri: Path, is_legacy_path: bool = False) -> bytes: + url = f"{DVC_PUBLIC_HTTP_ENDPOINT}{path}" + module_logger.debug(f"Fetching from remote: {url}") + with requests.get(url) as resp: + if resp.status_code != 200: + if not is_legacy_path: + # New dvc now stores data into /files/md5 + return get_from_url(f"files/md5/{path}", uri, True) + raise errors.NotFoundInDVCRemote(uri, path) + data = resp.content + caching_path = CACHE_DIRECTORY_BASE / get_version_info() / (uri.name) + module_logger.debug(f"caching to {caching_path}") + caching_path.parent.mkdir(parents=True, exist_ok=True) + with open(caching_path, mode="wb") as f: + f.write(data) + return data + + +class ArtifactResolver: + def __init__(self, uri: Union[str, Path]): + self.uri = Path(uri) + # Note: DVC_REPO is to locate local DVC directory not remote git repository + self.dvc_cache_path = os.environ.get("DVC_REPO", find_dvc_cache_directory(Path.cwd())) + if self.dvc_cache_path is not None: + self.dvc_cache_path = Path(self.dvc_cache_path) + if self.dvc_cache_path.is_symlink(): + self.dvc_cache_path = self.dvc_cache_path.readlink() + module_logger.debug(f"Found DVC cache directory: {self.dvc_cache_path}") + + def _read(self, directory: str, filename: str) -> bytes: + # Try to find local cached file + local_cache_path = CACHE_DIRECTORY_BASE / get_version_info() / (self.uri.name) + if local_cache_path.exists(): + module_logger.debug(f"Local cache exists: {local_cache_path}") + with open(local_cache_path, mode="rb") as f: + return f.read() + + # Try to find real file along with DVC file (no DVC) + if Path(self.uri).exists(): + module_logger.debug(f"Local file exists: {self.uri}") + with open(self.uri, mode="rb") as f: + return f.read() + + module_logger.debug(f"{self.uri} not exists, resolving DVC") + if self.dvc_cache_path is not None: + cached: Path = self.dvc_cache_path / directory / filename + if cached.exists(): + module_logger.debug(f"DVC cache hit: {cached}") + with open(cached, mode="rb") as f: + return f.read() + else: + module_logger.debug(f"DVC cache directory exists, but not having {self.uri}") + + # Fetching from remote + return get_from_url(f"{directory}/{filename}", self.uri) + + def read(self) -> bytes: + directory, filename, size = parse_dvc_file(self.uri) + data = self._read(directory, filename) + assert len(data) == size + return data + + +def resolve_artifact(src_name: str, full_path: Path) -> bytes: + try: + return ArtifactResolver(full_path).read() + except Exception as e: + raise errors.ArtifactNotFound(f"{src_name}:{full_path}") from e + + +def resolve_source(src_name: str, extension: str) -> bytes: + full_path = next((DATA_DIRECTORY_BASE / src_name).glob(f'*.{extension}.dvc')) + # Remove `.dvc` suffix + full_path = full_path.with_suffix('') + return resolve_artifact(src_name, full_path) + + +def resolve_model_source(src_name: str, num_pe: int = 2) -> bytes: + version_info = get_version_info() + if version_info is None: + raise errors.VersionInfoNotFound() + generated_path_base = DATA_DIRECTORY_BASE / f"generated/{version_info}" + if not generated_path_base.exists(): + module_logger.warning("ENF does not exist. Trying to generate from source..") + + try: + import onnx + import yaml + + from furiosa.quantizer import ModelEditor, TensorType, get_pure_input_names, quantize + except ImportError: + raise errors.ExtraPackageRequired() + module_logger.warning(f"Returning quantized ONNX for {src_name}") + onnx_model = onnx.load_from_string(resolve_source(src_name, EXT_ONNX)) + calib_range = yaml.full_load(resolve_source(src_name, EXT_CALIB_YAML)) + editor = ModelEditor(onnx_model) + for input_name in get_pure_input_names(onnx_model): + editor.convert_input_type(input_name, TensorType.UINT8) + return quantize(onnx_model, calib_range) + file_name = f'{src_name}_warboy_{num_pe}pe.enf' + return resolve_artifact(src_name, generated_path_base / file_name) + + +def validate_postprocessor_type( + postprocessor_type: "Platform", postprocessor_map: Collection["Platform"] +): + if postprocessor_type not in postprocessor_map: + raise ValueError( + f"Not supported postprocessor type: {postprocessor_type}, " + f"Available choices: {', '.join(postprocessor_map)}" + ) diff --git a/furiosa/models/client/api.py b/furiosa/models/client/api.py index 2313a2e3..6236ae57 100644 --- a/furiosa/models/client/api.py +++ b/furiosa/models/client/api.py @@ -4,8 +4,7 @@ from tqdm import tqdm from .. import vision -from ..types import Model -from ..utils import get_field_default +from ..types import Model, PythonPostProcessor def normalize(text: str) -> str: @@ -30,7 +29,7 @@ def prettified_task_type(model: Type[Model]): Returns: Prettified string for model's task type """ - task_type = get_field_default(model, "task_type").name + task_type = model.task_type.name return " ".join(map(lambda x: x.capitalize(), task_type.split("_"))) @@ -46,10 +45,11 @@ def get_model_list(filter_func: Optional[Callable[..., bool]] = None) -> List[Li filter_func = filter_func or (lambda _: True) model_list = [] for model_name in vision.__all__: - model = getattr(vision, model_name) - if not filter_func(model): + model_cls = getattr(vision, model_name) + if not filter_func(model_cls): continue - postproc_map = get_field_default(model, "postprocessor_map") + model = model_cls() + postproc_map = model.postprocessor_map if not postproc_map: raise ValueError(f"No postprocessor map found for {model_name.capitalize()}") postprocs = ', '.join(map(lambda x: x.name.capitalize(), postproc_map.keys())) @@ -102,38 +102,35 @@ def decorate_result( def run_inferences(model_cls: Type[Model], input_paths: Sequence[str], postprocess: Optional[str]): - from furiosa.runtime import session + from furiosa.runtime.sync import create_runner warning = """WARN: the benchmark results may depend on the number of input samples, sizes of the images, and a machine where this benchmark is running.""" - postprocess = postprocess and postprocess.lower() - use_native = any( - map( - lambda x: x.is_native_platform(), - get_field_default(model_cls, "postprocessor_map").keys(), - ) + if postprocess: + model = model_cls(postprocessor_type=postprocess) + else: + model = model_cls() + # FIXME: For native postprocess implementations, only YOLO can handle multiple contexts + single_context = not isinstance(model.postprocessor, PythonPostProcessor) and not isinstance( + model, (vision.YOLOv5m, vision.YOLOv5l) ) - model = model_cls.load(use_native=use_native) queries = len(input_paths) print(f"Running {queries} input samples ...") print(decorate_with_bar(warning)) - sess, queue = session.create_async(model.enf) - model_inputs, model_outputs = [], [] - initial_time = perf_counter() - for input_path in tqdm(input_paths, desc="Preprocess"): - model_inputs.append(model.preprocess(input_path)) - after_preprocess = perf_counter() - for idx, (model_input, ctx) in enumerate(model_inputs): - sess.submit(model_input, context=idx) - for _ in tqdm(range(queries), desc="Inference"): - model_outputs.append(queue.recv()) - after_npu = perf_counter() - for ctx, model_output in tqdm(model_outputs, desc="Postprocess"): - contexts = model_inputs[ctx][1] - contexts = contexts[0] if contexts is not None and use_native else contexts - model.postprocess(model_output.numpy(), contexts) - all_done = perf_counter() - sess.close() + with create_runner(model.model_source()) as runner: + model_inputs, model_outputs = [], [] + initial_time = perf_counter() + for input_path in tqdm(input_paths, desc="Preprocess"): + model_inputs.append(model.preprocess(input_path)) + after_preprocess = perf_counter() + for model_input in tqdm(model_inputs, desc="Inference"): + model_outputs.append([runner.run(model_input[0]), model_input[1]]) + after_npu = perf_counter() + for contexted_model_output in tqdm(model_outputs, desc="Postprocess"): + model_output, context = contexted_model_output + context = context[0] if context is not None and single_context else context + model.postprocess(model_output, context) + all_done = perf_counter() print( decorate_result(all_done - initial_time, queries, "Preprocess -> Inference -> Postprocess") diff --git a/furiosa/models/client/main.py b/furiosa/models/client/main.py index 3b55c42f..d86560af 100644 --- a/furiosa/models/client/main.py +++ b/furiosa/models/client/main.py @@ -2,7 +2,7 @@ import logging from pathlib import Path import sys -from typing import Callable, List, Optional +from typing import Callable, List, Optional, Type from tabulate import tabulate import yaml @@ -105,12 +105,11 @@ def get_model_or_exit(model_name: str) -> Model: return model -def describe_model(model_cls: Model) -> str: - # TODO: Make dry load (to avoid resolving heavy artifacts) - model = model_cls.load() - include = {'name', 'format', 'family', 'version', 'metadata'} +def describe_model(model_cls: Type[Model]) -> str: + model = model_cls() + include = {"name", "format", "family", "version", "metadata", "tags"} output = [] - output.append(yaml.dump(model.dict(include=include))) + output.append(yaml.dump(model.model_dump(include=include, exclude_none=True), sort_keys=False)) output.append(f"task type: {api.prettified_task_type(model)}\n") available_postprocs = ', '.join( map(lambda x: x.name.capitalize(), model.postprocessor_map.keys()) diff --git a/furiosa/models/data/enf_generator.py b/furiosa/models/data/enf_generator.py index a09a98cf..26882856 100644 --- a/furiosa/models/data/enf_generator.py +++ b/furiosa/models/data/enf_generator.py @@ -8,10 +8,9 @@ import onnx import yaml -import furiosa.quantizer +from furiosa.quantizer import ModelEditor, TensorType, get_pure_input_names, quantize from furiosa.tools.compiler.api import VersionInfo, compile -QUANTIZER_CONFIG = {"with_quantize": False} COMPILER_CONFIG = {"lower_tabulated_dequantize": True} base_path = Path(__file__).parent @@ -48,12 +47,15 @@ def quantize_and_compile_model(arg: Tuple[int, Path, int]): print(f" [{index}] {model_short_name} starts from {onnx_path}", flush=True) # Load ONNX model - onnx_model = onnx.load(onnx_path).SerializeToString() + onnx_model = onnx.load(onnx_path) # Quantize with open(calib_range_path) as f: calib_ranges = yaml.full_load(f) - dfg = furiosa.quantizer.quantize(onnx_model, calib_ranges, **QUANTIZER_CONFIG) + editor = ModelEditor(onnx_model) + for input_name in get_pure_input_names(onnx_model): + editor.convert_input_type(input_name, TensorType.UINT8) + quantized_onnx = quantize(onnx_model, calib_ranges) print(f" [{index}] {model_short_name} quantized", flush=True) compiler_config = dict(COMPILER_CONFIG) @@ -72,7 +74,7 @@ def quantize_and_compile_model(arg: Tuple[int, Path, int]): # Compile and write to file target_npu = "warboy" if num_pe == 1 else "warboy-2pe" - enf = compile(bytes(dfg), target_npu=target_npu) + enf = compile(bytes(quantized_onnx), target_npu=target_npu) with open(enf_path, 'wb') as f: f.write(enf) print(f" [{index}] {model_short_name} compiled to {enf_path}", flush=True) diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_b0_warboy_1pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_b0_warboy_1pe.enf.dvc new file mode 100644 index 00000000..c9d15674 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_b0_warboy_1pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: f0489ac1e755ba1d6dc33b07cf38da25 + size: 17798664 + hash: md5 + path: efficientnet_b0_warboy_1pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_b0_warboy_2pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_b0_warboy_2pe.enf.dvc new file mode 100644 index 00000000..a1b018a6 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_b0_warboy_2pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 8e630b1f91b7908bd9e9f83843a1bf2d + size: 22193001 + hash: md5 + path: efficientnet_b0_warboy_2pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_v2_s_warboy_1pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_v2_s_warboy_1pe.enf.dvc new file mode 100644 index 00000000..99b9d003 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_v2_s_warboy_1pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 621c7fca75cce85a2a13619c349f30b0 + size: 44861541 + hash: md5 + path: efficientnet_v2_s_warboy_1pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_v2_s_warboy_2pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_v2_s_warboy_2pe.enf.dvc new file mode 100644 index 00000000..16b9c2fa --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/efficientnet_v2_s_warboy_2pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: d084dfd1b5138294713c31257a0d264d + size: 67662577 + hash: md5 + path: efficientnet_v2_s_warboy_2pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_resnet50_v1.5_warboy_1pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_resnet50_v1.5_warboy_1pe.enf.dvc new file mode 100644 index 00000000..0a65f2a1 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_resnet50_v1.5_warboy_1pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: a17838cd3dbd863a4ea396aa758899a2 + size: 39525369 + hash: md5 + path: mlcommons_resnet50_v1.5_warboy_1pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_resnet50_v1.5_warboy_2pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_resnet50_v1.5_warboy_2pe.enf.dvc new file mode 100644 index 00000000..85cd5921 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_resnet50_v1.5_warboy_2pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 031d2759cfb9f551390c2ae34139acf5 + size: 57063777 + hash: md5 + path: mlcommons_resnet50_v1.5_warboy_2pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_mobilenet_v1_warboy_1pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_mobilenet_v1_warboy_1pe.enf.dvc new file mode 100644 index 00000000..cc0adf11 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_mobilenet_v1_warboy_1pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 71256730cb32eba5454109d5da82a09f + size: 16474290 + hash: md5 + path: mlcommons_ssd_mobilenet_v1_warboy_1pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_mobilenet_v1_warboy_2pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_mobilenet_v1_warboy_2pe.enf.dvc new file mode 100644 index 00000000..5fc26163 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_mobilenet_v1_warboy_2pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 7a55ebcf34cfa3a2ae047dc8272a0a54 + size: 17356401 + hash: md5 + path: mlcommons_ssd_mobilenet_v1_warboy_2pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_resnet34_warboy_1pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_resnet34_warboy_1pe.enf.dvc new file mode 100644 index 00000000..0e2c262f --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_resnet34_warboy_1pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: c41d85d99620f796f2159b6b3f6d6bdb + size: 24005454 + hash: md5 + path: mlcommons_ssd_resnet34_warboy_1pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_resnet34_warboy_2pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_resnet34_warboy_2pe.enf.dvc new file mode 100644 index 00000000..b2d49e29 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/mlcommons_ssd_resnet34_warboy_2pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: fd61992e1b93bdd99f07e30f19aa0a53 + size: 24830635 + hash: md5 + path: mlcommons_ssd_resnet34_warboy_2pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5l_warboy_1pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5l_warboy_1pe.enf.dvc new file mode 100644 index 00000000..627fc784 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5l_warboy_1pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: bf32798392e40b6fee7324fa09907036 + size: 54142786 + hash: md5 + path: yolov5l_warboy_1pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5l_warboy_2pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5l_warboy_2pe.enf.dvc new file mode 100644 index 00000000..97230eb0 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5l_warboy_2pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 2e918e3f928f650562d1ea3974517f08 + size: 68725643 + hash: md5 + path: yolov5l_warboy_2pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5m_warboy_1pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5m_warboy_1pe.enf.dvc new file mode 100644 index 00000000..a6e9d781 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5m_warboy_1pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 3332050bec042b3cf174b48dfdf06a4a + size: 35308825 + hash: md5 + path: yolov5m_warboy_1pe.enf diff --git a/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5m_warboy_2pe.enf.dvc b/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5m_warboy_2pe.enf.dvc new file mode 100644 index 00000000..efef2657 --- /dev/null +++ b/furiosa/models/data/generated/0.10.0_f8f05c8ea/yolov5m_warboy_2pe.enf.dvc @@ -0,0 +1,5 @@ +outs: +- md5: f5196fb0d0bc2182d7e79e13f7256c25 + size: 49493758 + hash: md5 + path: yolov5m_warboy_2pe.enf diff --git a/furiosa/models/errors.py b/furiosa/models/errors.py index 640d67f7..b2bfd0c3 100644 --- a/furiosa/models/errors.py +++ b/furiosa/models/errors.py @@ -30,5 +30,15 @@ class VersionInfoNotFound(FuriosaModelException): def __init__(self): super().__init__( - "Could not retrieve furiosa compiler information. Try: `pip install furiosa-sdk`." + "Could not retrieve furiosa compiler information. Try: `pip install furiosa-sdk`" + ) + + +class ExtraPackageRequired(FuriosaModelException): + """Needs extra packges to quantize and compile""" + + def __init__(self): + super().__init__( + "Needs extra packges to quantize and compile manually. Try: `pip install " + "furiosa-models[full]`" ) diff --git a/furiosa/models/types.py b/furiosa/models/types.py index 4031d63b..7eb4e450 100644 --- a/furiosa/models/types.py +++ b/furiosa/models/types.py @@ -1,68 +1,71 @@ from abc import ABC, abstractmethod import datetime -from enum import Enum, IntEnum -from typing import Any, Dict, List, Optional, Sequence, Tuple, Type +from enum import Enum +from functools import cached_property +from typing import Any, Dict, List, Optional, Sequence, Tuple import numpy.typing as npt -from pydantic import BaseConfig, BaseModel, Extra, Field +from pydantic import BaseModel, ConfigDict, Field, computed_field, field_serializer from typing_extensions import TypeAlias +import yaml -from .utils import EXT_CALIB_YAML, EXT_ENF, EXT_ONNX, resolve_file +from ._utils import EXT_CALIB_YAML, EXT_ONNX, resolve_model_source, resolve_source # Context type alias Context: TypeAlias = Any -class PreProcessor(ABC): - @abstractmethod - def __call__(self, inputs: Any) -> Tuple[Sequence[npt.ArrayLike], Sequence[Context]]: - ... +class Platform(str, Enum): + """Implemented platform""" + PYTHON = "PYTHON" + C = "C" + CPP = "CPP" + RUST = "RUST" -class Platform(IntEnum): - """Implemented platform""" + @classmethod + def _missing_(cls, value): + for member in cls: + if member.value == value.upper(): + return member - PYTHON = 0 - C = 1 - CPP = 2 - RUST = 3 - def is_native_platform(self): - return self != self.PYTHON +class ModelTaskType(str, Enum): + """Model's task type""" + OBJECT_DETECTION = "OBJECT_DETECTION" + IMAGE_CLASSIFICATION = "IMAGE_CLASSIFICATION" -class PostProcessor(ABC): - def __init__(self, *args, **kwargs): - ... - @abstractmethod - def __call__(self, model_outputs: Sequence[npt.ArrayLike], contexts: Sequence[Context]) -> Any: - ... +class Format(str, Enum): + """Model binary format to represent the binary specified.""" + ONNX = "ONNX" + TFLite = "TFLITE" -class ModelTaskType(IntEnum): - """Model's task type""" - OBJECT_DETECTION = 0 - IMAGE_CLASSIFICATION = 1 +class PreProcessor(ABC): + @abstractmethod + def __call__(self, inputs: Any) -> Tuple[Sequence[npt.ArrayLike], Sequence[Context]]: + ... -class Config(BaseConfig): - # Extra fields not permitted - extra: Extra = Extra.forbid +class PostProcessor(ABC): + @abstractmethod + def __call__(self, model_outputs: Sequence[npt.ArrayLike], contexts: Sequence[Context]) -> Any: + ... -class Format(str, Enum): - """Model binary format to represent the binary specified.""" +class RustPostProcessor(PostProcessor): + platform = Platform.RUST - ONNX = "onnx" - TFLite = "tflite" +class PythonPostProcessor(PostProcessor): + platform = Platform.PYTHON -class Publication(BaseModel): - """Model publication information.""" - __config__ = Config +class Publication(BaseModel, extra='forbid'): + """Model publication information.""" authors: Optional[List[str]] = None title: Optional[str] = None @@ -71,141 +74,107 @@ class Publication(BaseModel): url: Optional[str] = None -class Metadata(BaseModel): +class Metadata(BaseModel, extra='forbid'): """Model metadata to understand a model.""" - __config__ = Config - description: Optional[str] = None publication: Optional[Publication] = None -class Tags(BaseModel): - class Config: - extra = Extra.allow +class Tags(BaseModel, extra='forbid'): + """Model tags to understand a model.""" content_type: Optional[str] = None -class ModelTensor(BaseModel): - name: str - datatype: str - shape: List[int] - tags: Optional[Tags] = None - - class Model(ABC, BaseModel): """Represent the artifacts and metadata of a neural network model Attributes: name: a name of this model - format: the binary format type of model source; e.g., ONNX, tflite - source: a source binary in ONNX or tflite. It can be used for compiling this model - with a custom compiler configuration. - enf: the executable binary for furiosa runtime and NPU - calib_yaml: the calibration ranges in yaml format for quantization - version: model version - inputs: data type and shape of input tensors - outputs: data type and shape of output tensors - compiler_config: a pre-defined compiler option + task_type: the task type of this model + format: the binary format type of model origin; e.g., ONNX, tflite + family: the model family + version: the model version + metadata: the model metadata + tags: the model tags + origin: an origin f32 binary in ONNX or tflite. It can be used for compiling this model + with or without quantization and proper compiler configuration + tensor_name_to_range: the calibration ranges of each tensor in origin + preprocessor: a preprocessor to preprocess input tensors + postprocessor: a postprocessor to postprocess output tensors + + Methods: + preprocess: preprocess input tensors + postprocess: postprocess output tensors + model_source(num_pe=[1|2]): the executable binary for furiosa runtime and NPU. It can be + directly fed to `furiosa.runtime.create_runner`. If model binary is not compiled yet, + it will be quantized & compiled automatically if possible + resolve_all: resolve all non-cached properties(origin, tensor_name_to_range, model_sources) """ - class Config(BaseConfig): - extra: Extra = Extra.forbid - # To allow Session, Processor type - arbitrary_types_allowed = True - use_enum_values = True - # To make aliases for lazy-loaded fields - fields = { - "source_": "source", - "enf_": "enf", - "enf_1pe_": "enf_1pe", - "calib_yaml_": "calib_yaml", - } + model_config = ConfigDict(arbitrary_types_allowed=True) name: str + task_type: ModelTaskType format: Format - - # These fields are aliases for lazy-loaded fields - source_: Optional[bytes] = Field(None, repr=False) - enf_1pe_: Optional[bytes] = Field(None, repr=False) - enf_: Optional[bytes] = Field(None, repr=False) - calib_yaml_: Optional[str] = Field(None, repr=False) - family: Optional[str] = None version: Optional[str] = None - metadata: Optional[Metadata] = None + tags: Optional[Tags] = None - inputs: Optional[List[ModelTensor]] = [] - outputs: Optional[List[ModelTensor]] = [] - - postprocessor_map: Optional[Dict[Platform, Type[PostProcessor]]] = None - - preprocessor: Optional[PreProcessor] = None - postprocessor: Optional[PostProcessor] = None - - @staticmethod - @abstractmethod - def get_artifact_name() -> str: - ... + _artifact_name: str - @classmethod - @abstractmethod - def load(cls, use_native: Optional[bool] = None) -> 'Model': - ... + preprocessor: PreProcessor = Field(..., repr=False, exclude=True) + postprocessor: PostProcessor = Field(..., repr=False, exclude=True) def preprocess(self, *args, **kwargs) -> Tuple[Sequence[npt.ArrayLike], Sequence[Context]]: - assert self.preprocessor return self.preprocessor(*args, **kwargs) def postprocess(self, *args, **kwargs): - assert self.postprocessor return self.postprocessor(*args, **kwargs) - @property - def source(self) -> bytes: - source = self.__dict__.get('source_') - if source is None: - source = resolve_file(self.get_artifact_name(), EXT_ONNX) - self.__dict__['source_'] = source - return source - - @property - def enf(self) -> bytes: - enf = self.__dict__.get('enf_') - if enf is None: - enf = resolve_file(self.get_artifact_name(), EXT_ENF) - self.__dict__['enf_'] = enf - return enf - - @property - def enf_1pe(self) -> bytes: - enf = self.__dict__.get('enf_1pe_') - if enf is None: - enf = resolve_file(self.get_artifact_name(), EXT_ENF, num_pe=1) - self.__dict__['enf_1pe_'] = enf - return enf - - @property - def calib_yaml(self) -> bytes: - calib_yaml = self.__dict__.get('calib_yaml_') - if calib_yaml is None: - calib_yaml = resolve_file(self.get_artifact_name(), EXT_CALIB_YAML) - self.__dict__['calib_yaml_'] = calib_yaml - return calib_yaml + @computed_field(repr=False) + @cached_property + def origin(self) -> bytes: + return resolve_source(self._artifact_name, EXT_ONNX) + + @computed_field(repr=False) + @cached_property + def tensor_name_to_range(self) -> Dict[str, List[float]]: + calib_yaml = resolve_source(self._artifact_name, EXT_CALIB_YAML) + return yaml.full_load(calib_yaml) + + def model_source(self, num_pe: int = 2) -> bytes: + if num_pe not in (1, 2): + raise ValueError(f"Invalid num_pe: {num_pe}") + + # TODO: Add in-memory cached value(like cached_property), currently uses disk-cached value + return resolve_model_source(self._artifact_name, num_pe=num_pe) def resolve_all(self): - _ = self.source, self.enf, self.enf_1pe, self.calib_yaml + _ = self.origin, self.tensor_name_to_range + for num_pe in (1, 2): + _ = self.model_source(num_pe=num_pe) + + @field_serializer('format') + def serialize_format(self, format: Format): + return format.value + + @field_serializer('task_type') + def serialize_task_type(self, task_type: ModelTaskType): + return task_type.value class ObjectDetectionModel(Model, ABC): """Object Detection Model Base Class""" - task_type: ModelTaskType = ModelTaskType.OBJECT_DETECTION + def __init__(self, *args, **kwargs): + super().__init__(task_type=ModelTaskType.OBJECT_DETECTION, *args, **kwargs) class ImageClassificationModel(Model, ABC): """Image Classification Model Base Class""" - task_type: ModelTaskType = ModelTaskType.IMAGE_CLASSIFICATION + def __init__(self, *args, **kwargs): + super().__init__(task_type=ModelTaskType.IMAGE_CLASSIFICATION, *args, **kwargs) diff --git a/furiosa/models/utils.py b/furiosa/models/utils.py deleted file mode 100644 index 2eb2bca4..00000000 --- a/furiosa/models/utils.py +++ /dev/null @@ -1,172 +0,0 @@ -from dataclasses import dataclass -import logging -import os -from pathlib import Path -from typing import Any, Optional, Tuple, Type, Union - -import aiofiles -import aiohttp -from pydantic import BaseModel -import yaml - -from furiosa.common.native import DEFAULT_ENCODING, find_native_libs -from furiosa.common.thread import synchronous - -from . import errors - -EXT_CALIB_YAML = "calib_range.yaml" -EXT_ENF = "enf" -EXT_ONNX = "onnx" -DATA_DIRECTORY_BASE = Path(__file__).parent / "data" -CACHE_DIRECTORY_BASE = Path( - os.getenv( - "FURIOSA_MODELS_CACHE_HOME", - os.path.join(os.getenv("XDG_CACHE_HOME", Path.home() / ".cache"), "furiosa/models"), - ) -) -DVC_PUBLIC_HTTP_ENDPOINT = ( - "https://furiosa-public-artifacts.s3-accelerate.amazonaws.com/furiosa-artifacts" -) - -module_logger = logging.getLogger(__name__) - - -@dataclass -class CompilerVersion: - version: str - revision: str - - -def get_field_default(model: Type[BaseModel], field: str) -> Any: - """Returns field's default value from BaseModel cls - - Args: - model: A pydantic BaseModel cls - - Returns: - Pydantic class' default field value - """ - return model.__fields__[field].default - - -def get_nux_version() -> Optional[CompilerVersion]: - # TODO - hacky version. Eventually, - # it should find a compiler version being used by runtime. - libnux = find_native_libs("nux") - if libnux is None: - return None - return CompilerVersion( - libnux.version().decode(DEFAULT_ENCODING), - libnux.git_short_hash().decode(DEFAULT_ENCODING), - ) - - -def get_version_info() -> Optional[str]: - version_info = get_nux_version() - if not version_info: - return None - return f"{version_info.version}_{version_info.revision}" - - -def removesuffix(base: str, suffix: str) -> str: - # Copied from https://github.com/python/cpython/blob/6dab8c95/Tools/scripts/deepfreeze.py#L105-L108 - if base.endswith(suffix): - return base[: len(base) - len(suffix)] - return base - - -class ArtifactResolver: - def __init__(self, uri: Union[str, Path]): - self.uri = Path(uri) - # Note: DVC_REPO is to locate local DVC directory not remote git repository - self.dvc_cache_path = os.environ.get("DVC_REPO", self.find_dvc_cache_directory(Path.cwd())) - if self.dvc_cache_path is not None: - self.dvc_cache_path = Path(self.dvc_cache_path) - if self.dvc_cache_path.is_symlink(): - self.dvc_cache_path = self.dvc_cache_path.readlink() - module_logger.debug(f"Found DVC cache directory: {self.dvc_cache_path}") - - @classmethod - def find_dvc_cache_directory(cls, path: Path) -> Optional[Path]: - if path is None or path == path.parent: - return None - if (path / ".dvc").is_dir(): - return path / ".dvc" / "cache" - return cls.find_dvc_cache_directory(path.parent) - - @staticmethod - def parse_dvc_file(file_path: Path) -> Tuple[str, str, int]: - info_dict = yaml.safe_load(open(f"{file_path}.dvc").read())["outs"][0] - md5sum = info_dict["md5"] - return md5sum[:2], md5sum[2:], info_dict["size"] - - @staticmethod - def get_url( - directory: str, filename: str, http_endpoint: str = DVC_PUBLIC_HTTP_ENDPOINT - ) -> str: - return f"{http_endpoint}/{directory}/{filename}" - - async def _read(self, directory: str, filename: str) -> bytes: - # Try to find local cached file - local_cache_path = CACHE_DIRECTORY_BASE / get_version_info() / (self.uri.name) - if local_cache_path.exists(): - module_logger.debug(f"Local cache exists: {local_cache_path}") - async with aiofiles.open(local_cache_path, mode="rb") as f: - return await f.read() - - # Try to find real file along with DVC file (no DVC) - if Path(self.uri).exists(): - module_logger.debug(f"Local file exists: {self.uri}") - async with aiofiles.open(self.uri, mode="rb") as f: - return await f.read() - - module_logger.debug(f"{self.uri} not exists, resolving DVC") - if self.dvc_cache_path is not None: - cached: Path = self.dvc_cache_path / directory / filename - if cached.exists(): - module_logger.debug(f"DVC cache hit: {cached}") - async with aiofiles.open(cached, mode="rb") as f: - return await f.read() - else: - module_logger.debug(f"DVC cache directory exists, but not having {self.uri}") - - # Fetching from remote - async with aiohttp.ClientSession() as session: - url = self.get_url(directory, filename) - module_logger.debug(f"Fetching from remote: {url}") - async with session.get(url) as resp: - if resp.status != 200: - raise errors.NotFoundInDVCRemote(self.uri, f"{directory}{filename}") - data = await resp.read() - caching_path = CACHE_DIRECTORY_BASE / get_version_info() / (self.uri.name) - module_logger.debug(f"caching to {caching_path}") - caching_path.parent.mkdir(parents=True, exist_ok=True) - async with aiofiles.open(caching_path, mode="wb") as f: - await f.write(data) - return data - - async def read(self) -> bytes: - directory, filename, size = self.parse_dvc_file(self.uri) - data = await self._read(directory, filename) - assert len(data) == size - return data - - -def resolve_file(src_name: str, extension: str, num_pe: int = 2) -> bytes: - # First check whether it is generated file or not - if extension == EXT_ENF: - version_info = get_version_info() - if version_info is None: - raise errors.VersionInfoNotFound() - generated_path_base = f"generated/{version_info}" - file_name = f'{src_name}_warboy_{num_pe}pe.{extension}' - full_path = DATA_DIRECTORY_BASE / f'{generated_path_base}/{file_name}' - else: - full_path = next((DATA_DIRECTORY_BASE / src_name).glob(f'*.{extension}.dvc')) - # Remove `.dvc` suffix - full_path = full_path.with_suffix('') - - try: - return synchronous(ArtifactResolver(full_path).read)() - except Exception as e: - raise errors.ArtifactNotFound(f"{src_name}:{full_path}") from e diff --git a/furiosa/models/vision/efficientnet_b0/__init__.py b/furiosa/models/vision/efficientnet_b0/__init__.py index 8fc8bfea..a6413a21 100644 --- a/furiosa/models/vision/efficientnet_b0/__init__.py +++ b/furiosa/models/vision/efficientnet_b0/__init__.py @@ -1,11 +1,12 @@ import math from pathlib import Path -from typing import Any, Dict, List, Sequence, Tuple, Type, Union +from typing import Any, ClassVar, Dict, List, Sequence, Tuple, Type, Union from PIL import Image import numpy as np import numpy.typing as npt +from ..._utils import validate_postprocessor_type from ...types import ( Format, ImageClassificationModel, @@ -14,6 +15,7 @@ PostProcessor, PreProcessor, Publication, + PythonPostProcessor, ) from ..common.datasets import imagenet1k @@ -57,12 +59,15 @@ def center_crop(image: Image.Image, cropped_height: int, cropped_width: int) -> class EfficientNetB0PreProcessor(PreProcessor): @staticmethod def __call__( - image: Union[str, Path, npt.ArrayLike], with_quantize: bool = False + image: Union[str, Path, npt.ArrayLike], with_scaling: bool = False ) -> Tuple[np.ndarray, None]: """Read and preprocess an image located at image_path. Args: image: A path of an image. + with_scaling: Whether to apply model-specific techniques that involve scaling the + model's input and converting its data type to float32. Refer to the code to gain a + precise understanding of the techniques used. Defaults to False. Returns: The first element of the tuple is a numpy array that meets the input requirements of @@ -79,8 +84,9 @@ def __call__( data = center_crop(image, 224, 224) data = np.transpose(data, axes=(2, 0, 1)) + assert data.dtype == np.uint8 - if with_quantize: + if with_scaling: data = np.asarray(data, dtype=np.float32) data /= 255 @@ -90,7 +96,7 @@ def __call__( return data[np.newaxis, ...], None -class EfficientNetB0PostProcessor(PostProcessor): +class EfficientNetB0PostProcessor(PythonPostProcessor): def __call__(self, model_outputs: Sequence[npt.ArrayLike], contexts: Any = None) -> str: """Convert the outputs of a model to a label string, such as car and cat. @@ -109,17 +115,14 @@ def __call__(self, model_outputs: Sequence[npt.ArrayLike], contexts: Any = None) class EfficientNetB0(ImageClassificationModel): """EfficientNet B0 model""" - postprocessor_map: Dict[Platform, Type[PostProcessor]] = { + postprocessor_map: ClassVar[Dict[Platform, Type[PostProcessor]]] = { Platform.PYTHON: EfficientNetB0PostProcessor, } - @staticmethod - def get_artifact_name(): - return "efficientnet_b0" - - @classmethod - def load(cls, use_native: bool = False): - return cls( + def __init__(self, *, postprocessor_type: Union[str, Platform] = Platform.PYTHON): + postprocessor_type = Platform(postprocessor_type) + validate_postprocessor_type(postprocessor_type, self.postprocessor_map.keys()) + super().__init__( name="EfficientNetB0", format=Format.ONNX, family="EfficientNet", @@ -129,5 +132,7 @@ def load(cls, use_native: bool = False): publication=Publication(url="https://arxiv.org/abs/1905.11946"), ), preprocessor=EfficientNetB0PreProcessor(), - postprocessor=EfficientNetB0PostProcessor(), + postprocessor=self.postprocessor_map[postprocessor_type](), ) + + self._artifact_name = "efficientnet_b0" diff --git a/furiosa/models/vision/efficientnet_v2_s/__init__.py b/furiosa/models/vision/efficientnet_v2_s/__init__.py index aecd4312..7a069d04 100644 --- a/furiosa/models/vision/efficientnet_v2_s/__init__.py +++ b/furiosa/models/vision/efficientnet_v2_s/__init__.py @@ -1,10 +1,11 @@ from pathlib import Path -from typing import Any, Dict, List, Sequence, Tuple, Type, Union +from typing import Any, ClassVar, Dict, List, Sequence, Tuple, Type, Union from PIL import Image, ImageOps import numpy as np import numpy.typing as npt +from ..._utils import validate_postprocessor_type from ...types import ( Format, ImageClassificationModel, @@ -13,8 +14,8 @@ PostProcessor, PreProcessor, Publication, + PythonPostProcessor, ) -from ...utils import get_field_default from ..common.datasets import imagenet1k IMAGENET_DEFAULT_MEAN = np.array((0.485, 0.456, 0.406), dtype=np.float32)[:, np.newaxis, np.newaxis] @@ -64,12 +65,15 @@ def normalize(image: Image.Image) -> np.ndarray: class EfficientNetV2sPreProcessor(PreProcessor): @staticmethod def __call__( - image: Union[str, Path, npt.ArrayLike], with_quantize: bool = False + image: Union[str, Path, npt.ArrayLike], with_scaling: bool = False ) -> Tuple[np.ndarray, None]: """Read and preprocess an image located at image_path. Args: image: A path of an image. + with_scaling: Whether to apply model-specific techniques that involve scaling the + model's input and converting its data type to float32. Refer to the code to gain a + precise understanding of the techniques used. Defaults to False. Returns: The first element of the tuple is a numpy array that meets the input requirements of the @@ -86,16 +90,17 @@ def __call__( image = center_crop(image, INPUT_SIZE) image = np.ascontiguousarray(image) + assert image.dtype == np.uint8 data = np.transpose(image, (2, 0, 1)) - if with_quantize: + if with_scaling: data = data.astype(np.float32) / 255 data = normalize(data) return np.expand_dims(data, axis=0), None -class EfficientNetV2sPostProcessor(PostProcessor): +class EfficientNetV2sPostProcessor(PythonPostProcessor): def __call__(self, model_outputs: Sequence[npt.ArrayLike], contexts: Any = None) -> str: """Convert the outputs of a model to a label string, such as car and cat. @@ -114,18 +119,14 @@ def __call__(self, model_outputs: Sequence[npt.ArrayLike], contexts: Any = None) class EfficientNetV2s(ImageClassificationModel): """EfficientNetV2-s model""" - postprocessor_map: Dict[Platform, Type[PostProcessor]] = { + postprocessor_map: ClassVar[Dict[Platform, Type[PostProcessor]]] = { Platform.PYTHON: EfficientNetV2sPostProcessor, } - @staticmethod - def get_artifact_name(): - return "efficientnet_v2_s" - - @classmethod - def load(cls, use_native: bool = False): - postprocessor = get_field_default(cls, "postprocessor_map")[Platform.PYTHON]() - return cls( + def __init__(self, *, postprocessor_type: Union[str, Platform] = Platform.PYTHON): + postprocessor_type = Platform(postprocessor_type) + validate_postprocessor_type(postprocessor_type, self.postprocessor_map.keys()) + super().__init__( name="EfficientNetV2s", format=Format.ONNX, family="EfficientNetV2", @@ -135,5 +136,7 @@ def load(cls, use_native: bool = False): publication=Publication(url="https://arxiv.org/abs/2104.00298"), ), preprocessor=EfficientNetV2sPreProcessor(), - postprocessor=postprocessor, + postprocessor=self.postprocessor_map[postprocessor_type](), ) + + self._artifact_name = "efficientnet_v2_s" diff --git a/furiosa/models/vision/preprocess.py b/furiosa/models/vision/preprocess.py index 95c959fc..d559ea22 100644 --- a/furiosa/models/vision/preprocess.py +++ b/furiosa/models/vision/preprocess.py @@ -1,5 +1,9 @@ +import os +from typing import Union + import cv2 import numpy as np +from numpy import ndarray def center_crop(image: np.ndarray, cropped_height: int, cropped_width: int) -> np.ndarray: @@ -32,3 +36,16 @@ def resize_with_aspect_ratio( new_width = int(new_width * width / height) image = cv2.resize(image, (new_width, new_height), interpolation=interpolation) return image + + +def read_image_opencv_if_needed(image: Union[str, os.PathLike, ndarray]): + if isinstance(image, ndarray): + return image + elif isinstance(image, os.PathLike): + path = image.__fspath__() # imread only accepts str (opencv/opencv#15731) + else: + path = image + image = cv2.imread(path) + if image is None: + raise FileNotFoundError(path) + return image diff --git a/furiosa/models/vision/resnet50/__init__.py b/furiosa/models/vision/resnet50/__init__.py index e0bbb528..651bbc9f 100644 --- a/furiosa/models/vision/resnet50/__init__.py +++ b/furiosa/models/vision/resnet50/__init__.py @@ -1,10 +1,11 @@ import logging -from typing import Any, Dict, List, Sequence, Tuple, Type, Union +from typing import Any, ClassVar, Dict, List, Sequence, Tuple, Type, Union import cv2 import numpy as np import numpy.typing as npt +from ..._utils import validate_postprocessor_type from ...types import ( Format, ImageClassificationModel, @@ -13,9 +14,10 @@ PostProcessor, PreProcessor, Publication, + PythonPostProcessor, ) from ..common.datasets import imagenet1k -from ..preprocess import center_crop, resize_with_aspect_ratio +from ..preprocess import center_crop, read_image_opencv_if_needed, resize_with_aspect_ratio CLASSES: List[str] = imagenet1k.ImageNet1k_CLASSES @@ -25,13 +27,16 @@ class ResNet50PreProcessor(PreProcessor): @staticmethod def __call__( - image: Union[str, npt.ArrayLike], with_quantize: bool = False + image: Union[str, npt.ArrayLike], with_scaling: bool = False ) -> Tuple[np.ndarray, None]: """Convert an input image to a model input tensor Args: image: A path of an image or an image loaded as a numpy array in BGR order. + with_scaling: Whether to apply model-specific techniques that involve scaling the + model's input and converting its data type to float32. Refer to the code to gain a + precise understanding of the techniques used. Defaults to False. Returns: The first element of the tuple is a numpy array that meets the input requirements of the @@ -41,16 +46,14 @@ def __call__( """ # https://github.com/mlcommons/inference/blob/af7f5a0b856402b9f461002cfcad116736a8f8af/vision/classification_and_detection/python/main.py#L37-L39 # https://github.com/mlcommons/inference/blob/af7f5a0b856402b9f461002cfcad116736a8f8af/vision/classification_and_detection/python/dataset.py#L168-L184 - if type(image) == str: - image = cv2.imread(image) - if image is None: - raise FileNotFoundError(image) + image = read_image_opencv_if_needed(image) + assert image.dtype == np.uint8 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = resize_with_aspect_ratio( image, 224, 224, percent=87.5, interpolation=cv2.INTER_AREA ) image = center_crop(image, 224, 224) - if with_quantize: + if with_scaling: image = np.asarray(image, dtype=np.float32) # https://github.com/mlcommons/inference/blob/af7f5a0b856402b9f461002cfcad116736a8f8af/vision/classification_and_detection/python/dataset.py#L178 image -= np.array([123.68, 116.78, 103.94], dtype=np.float32) @@ -58,7 +61,7 @@ def __call__( return image[np.newaxis, ...], None -class ResNet50PostProcessor(PostProcessor): +class ResNet50PostProcessor(PythonPostProcessor): """Convert the outputs of a model to a label string, such as car and cat. Args: @@ -77,17 +80,14 @@ def __call__(self, model_outputs: Sequence[npt.ArrayLike], contexts: Any = None) class ResNet50(ImageClassificationModel): """MLCommons ResNet50 model""" - postprocessor_map: Dict[Platform, Type[PostProcessor]] = { + postprocessor_map: ClassVar[Dict[Platform, Type[PostProcessor]]] = { Platform.PYTHON: ResNet50PostProcessor, } - @staticmethod - def get_artifact_name(): - return "mlcommons_resnet50_v1.5" - - @classmethod - def load(cls, use_native: bool = False): - return cls( + def __init__(self, *, postprocessor_type: Union[str, Platform] = Platform.PYTHON): + postprocessor_type = Platform(postprocessor_type) + validate_postprocessor_type(postprocessor_type, self.postprocessor_map.keys()) + super().__init__( name="ResNet50", format=Format.ONNX, family="ResNet", @@ -97,5 +97,7 @@ def load(cls, use_native: bool = False): publication=Publication(url="https://arxiv.org/abs/1512.03385.pdf"), ), preprocessor=ResNet50PreProcessor(), - postprocessor=ResNet50PostProcessor(), + postprocessor=self.postprocessor_map[postprocessor_type](), ) + + self._artifact_name = "mlcommons_resnet50_v1.5" diff --git a/furiosa/models/vision/ssd_mobilenet/__init__.py b/furiosa/models/vision/ssd_mobilenet/__init__.py index b8281b71..69db8d8a 100644 --- a/furiosa/models/vision/ssd_mobilenet/__init__.py +++ b/furiosa/models/vision/ssd_mobilenet/__init__.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Dict, List, Sequence, Tuple, Type, Union +from typing import Any, ClassVar, Dict, List, Sequence, Tuple, Type, Union import cv2 import numpy @@ -8,6 +8,7 @@ from . import anchor_generator # type: ignore[import] from .. import native +from ..._utils import validate_postprocessor_type from ...types import ( Format, Metadata, @@ -16,10 +17,12 @@ PostProcessor, PreProcessor, Publication, + PythonPostProcessor, + RustPostProcessor, ) -from ...utils import get_field_default from ..common.datasets import coco from ..postprocess import LtrbBoundingBox, ObjectDetectionResult, calibration_ltrbbox, sigmoid +from ..preprocess import read_image_opencv_if_needed NUM_OUTPUTS: int = 12 CLASSES = coco.MobileNetSSD_CLASSES @@ -156,13 +159,16 @@ class SSDMobileNetPreProcessor(PreProcessor): @staticmethod def __call__( images: Sequence[Union[str, np.ndarray]], - with_quantize: bool = False, + with_scaling: bool = False, ) -> Tuple[npt.ArrayLike, List[Dict[str, Any]]]: """Preprocess input images to a batch of input tensors. Args: images: A list of paths of image files (e.g., JPEG, PNG) or a stacked image loaded as a numpy array in BGR order or gray order. + with_scaling: Whether to apply model-specific techniques that involve scaling the + model's input and converting its data type to float32. Refer to the code to gain a + precise understanding of the techniques used. Defaults to False. Returns: The first element is 3-channel images of 300x300 in NCHW format, @@ -179,12 +185,10 @@ def __call__( if isinstance(images, str): images = [images] for image in images: - if type(image) == str: - image = cv2.imread(image) - if image is None: - raise FileNotFoundError(image) + image = read_image_opencv_if_needed(image) + assert image.dtype == np.uint8 - if with_quantize: + if with_scaling: image = np.array(image, dtype=np.float32) if len(image.shape) < 3 or image.shape[2] != 3: image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) @@ -193,7 +197,7 @@ def __call__( width = image.shape[1] height = image.shape[0] image = cv2.resize(image, (300, 300), interpolation=cv2.INTER_LINEAR) - if with_quantize: + if with_scaling: image -= 127 image /= 127 image = image.transpose([2, 0, 1]) @@ -202,7 +206,7 @@ def __call__( return np.stack(batch_image, axis=0), batch_preproc_param -class SSDMobileNetPythonPostProcessor(PostProcessor): +class SSDMobileNetPythonPostProcessor(PythonPostProcessor): @staticmethod def __call__( model_outputs: Sequence[numpy.ndarray], @@ -279,7 +283,7 @@ def __call__( return batch_results -class SSDMobileNetNativePostProcessor(PostProcessor): +class SSDMobileNetNativePostProcessor(RustPostProcessor): def __init__(self): self._native = native.ssd_mobilenet.RustPostProcessor() @@ -313,21 +317,15 @@ def __call__(self, model_outputs: Sequence[numpy.ndarray], contexts: Dict[str, A class SSDMobileNet(ObjectDetectionModel): """MLCommons MobileNet v1 model""" - postprocessor_map: Dict[Platform, Type[PostProcessor]] = { + postprocessor_map: ClassVar[Dict[Platform, Type[PostProcessor]]] = { Platform.PYTHON: SSDMobileNetPythonPostProcessor, Platform.RUST: SSDMobileNetNativePostProcessor, } - @staticmethod - def get_artifact_name(): - return "mlcommons_ssd_mobilenet_v1" - - @classmethod - def load(cls, use_native: bool = True): - postproc_type = Platform.RUST if use_native else Platform.PYTHON - logger.debug(f"Using {postproc_type.name} postprocessor") - postprocessor = get_field_default(cls, "postprocessor_map")[postproc_type]() - return cls( + def __init__(self, *, postprocessor_type: Union[str, Platform] = Platform.RUST): + postprocessor_type = Platform(postprocessor_type) + validate_postprocessor_type(postprocessor_type, self.postprocessor_map.keys()) + super().__init__( name="MLCommonsSSDMobileNet", format=Format.ONNX, family="MobileNetV1", @@ -337,5 +335,7 @@ def load(cls, use_native: bool = True): publication=Publication(url="https://arxiv.org/abs/1704.04861.pdf"), ), preprocessor=SSDMobileNetPreProcessor(), - postprocessor=postprocessor, + postprocessor=self.postprocessor_map[postprocessor_type](), ) + + self._artifact_name = "mlcommons_ssd_mobilenet_v1" diff --git a/furiosa/models/vision/ssd_resnet34/__init__.py b/furiosa/models/vision/ssd_resnet34/__init__.py index db5558ad..5fe60cce 100644 --- a/furiosa/models/vision/ssd_resnet34/__init__.py +++ b/furiosa/models/vision/ssd_resnet34/__init__.py @@ -1,7 +1,7 @@ import itertools import logging from math import sqrt -from typing import Any, Dict, List, Sequence, Tuple, Type, Union +from typing import Any, ClassVar, Dict, List, Sequence, Tuple, Type, Union import cv2 import numpy @@ -11,6 +11,7 @@ import torch.nn.functional as F from .. import native +from ..._utils import validate_postprocessor_type from ...types import ( Format, Metadata, @@ -19,10 +20,12 @@ PostProcessor, PreProcessor, Publication, + PythonPostProcessor, + RustPostProcessor, ) -from ...utils import get_field_default from ..common.datasets import coco from ..postprocess import LtrbBoundingBox, ObjectDetectionResult, calibration_ltrbbox +from ..preprocess import read_image_opencv_if_needed NUM_OUTPUTS: int = 12 CLASSES = coco.MobileNetSSD_Large_CLASSES @@ -262,13 +265,16 @@ class SSDResNet34PreProcessor(PreProcessor): @staticmethod def __call__( images: Sequence[Union[str, np.ndarray]], - with_quantize: bool = False, + with_scaling: bool = False, ) -> Tuple[npt.ArrayLike, List[Dict[str, Any]]]: """Preprocess input images to a batch of input tensors Args: images: A list of paths of image files (e.g., JPEG, PNG) or a stacked image loaded as a numpy array in BGR order or gray order. + with_scaling: Whether to apply model-specific techniques that involve scaling the + model's input and converting its data type to float32. Refer to the code to gain a + precise understanding of the techniques used. Defaults to False. Returns: The first element is a list of 3-channel images of 1200x1200 @@ -286,12 +292,10 @@ def __call__( if isinstance(images, str): images = [images] for image in images: - if type(image) == str: - image = cv2.imread(image) - if image is None: - raise FileNotFoundError(image) + image = read_image_opencv_if_needed(image) + assert image.dtype == np.uint8 - if with_quantize: + if with_scaling: image = np.array(image, dtype=np.float32) if len(image.shape) < 3 or image.shape[2] != 3: image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) @@ -300,7 +304,7 @@ def __call__( width = image.shape[1] height = image.shape[0] image = cv2.resize(image, (1200, 1200), interpolation=cv2.INTER_LINEAR) - if with_quantize: + if with_scaling: mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) std = np.array([0.229, 0.224, 0.225], dtype=np.float32) image = image / 255.0 - mean @@ -314,7 +318,7 @@ def __call__( return np.stack(batch_image, axis=0), batch_preproc_param -class SSDResNet34PythonPostProcessor(PostProcessor): +class SSDResNet34PythonPostProcessor(PythonPostProcessor): @staticmethod def __call__( model_outputs: Sequence[np.ndarray], @@ -389,7 +393,7 @@ def __call__( return batch_results -class SSDResNet34NativePostProcessor(PostProcessor): +class SSDResNet34NativePostProcessor(RustPostProcessor): def __init__(self): self._native = native.ssd_resnet34.RustPostProcessor() @@ -422,21 +426,15 @@ def __call__(self, model_outputs: Sequence[numpy.ndarray], contexts: Sequence[Di class SSDResNet34(ObjectDetectionModel): """MLCommons SSD ResNet34 model""" - postprocessor_map: Dict[Platform, Type[PostProcessor]] = { + postprocessor_map: ClassVar[Dict[Platform, Type[PostProcessor]]] = { Platform.PYTHON: SSDResNet34PythonPostProcessor, Platform.RUST: SSDResNet34NativePostProcessor, } - @staticmethod - def get_artifact_name(): - return "mlcommons_ssd_resnet34" - - @classmethod - def load(cls, use_native: bool = True): - postproc_type = Platform.RUST if use_native else Platform.PYTHON - logger.debug(f"Using {postproc_type.name} postprocessor") - postprocessor = get_field_default(cls, "postprocessor_map")[postproc_type]() - return cls( + def __init__(self, *, postprocessor_type: Union[str, Platform] = Platform.RUST): + postprocessor_type = Platform(postprocessor_type) + validate_postprocessor_type(postprocessor_type, self.postprocessor_map.keys()) + super().__init__( name="SSDResNet34", format=Format.ONNX, family="ResNet", @@ -449,5 +447,7 @@ def load(cls, use_native: bool = True): ), ), preprocessor=SSDResNet34PreProcessor(), - postprocessor=postprocessor, + postprocessor=self.postprocessor_map[postprocessor_type](), ) + + self._artifact_name = "mlcommons_ssd_resnet34" diff --git a/furiosa/models/vision/yolov5/core.py b/furiosa/models/vision/yolov5/core.py index 4e3714a8..7248afe1 100644 --- a/furiosa/models/vision/yolov5/core.py +++ b/furiosa/models/vision/yolov5/core.py @@ -1,13 +1,21 @@ from abc import ABC -from typing import Any, Dict, List, Sequence, Tuple, Type, Union +from typing import Any, ClassVar, Dict, List, Sequence, Tuple, Type, Union import cv2 import numpy as np import numpy.typing as npt from .. import native -from ...types import ObjectDetectionModel, Platform, PostProcessor, PreProcessor +from ...types import ( + Format, + ObjectDetectionModel, + Platform, + PostProcessor, + PreProcessor, + RustPostProcessor, +) from ...vision.postprocess import LtrbBoundingBox, ObjectDetectionResult +from ..preprocess import read_image_opencv_if_needed _INPUT_SIZE = (640, 640) _STRIDES = [8, 16, 32] @@ -97,14 +105,15 @@ def _reshape_output(feat: np.ndarray, anchor_per_layer_count: int, num_classes: class YOLOv5PreProcessor(PreProcessor): @staticmethod def __call__( - images: Sequence[Union[str, np.ndarray]], with_quantize: bool = False + images: Sequence[Union[str, np.ndarray]], with_scaling: bool = False ) -> Tuple[np.ndarray, List[Dict[str, Any]]]: """Preprocess input images to a batch of input tensors Args: - images: Color images have (NCHW: Batch, Channel, Height, Width) dimensions. - with_quantize: Whether to put quantize operator in front of the model or not. - + images: Color images have (NHWC: Batch, Height, Width, Channel) dimensions. + with_scaling: Whether to apply model-specific techniques that involve scaling the + model's input and converting its data type to float32. Refer to the code to gain a + precise understanding of the techniques used. Defaults to False. Returns: a pre-processed image, scales and padded sizes(width,height) per images. The first element is a stacked numpy array containing a batch of images. @@ -124,28 +133,26 @@ def __call__( batched_proc_params = [] if isinstance(images, str): images = [images] - for img in images: - if type(img) == str: - img = cv2.imread(img) - if img is None: - raise FileNotFoundError(img) - if with_quantize: - img = img.astype(np.float32) - img, (sx, sy), (padw, padh) = _resize(img, _INPUT_SIZE) - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - - if with_quantize: - img /= 255.0 - img = img.transpose([2, 0, 1]) + for image in images: + image = read_image_opencv_if_needed(image) + assert image.dtype == np.uint8 + if with_scaling: + image = image.astype(np.float32) + image, (sx, sy), (padw, padh) = _resize(image, _INPUT_SIZE) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + if with_scaling: + image /= 255.0 + image = image.transpose([2, 0, 1]) # NHWC -> NCHW assert sx == sy, "yolov5 must be the same rescale for width and height" scale = sx - batched_image.append(img) + batched_image.append(image) batched_proc_params.append({"scale": scale, "pad": (padw, padh)}) return np.stack(batched_image, axis=0), batched_proc_params -class YOLOv5PostProcessor(PostProcessor): +class YOLOv5PostProcessor(RustPostProcessor): def __init__(self, anchors: npt.ArrayLike, class_names: Sequence[str]): """ native (RustProcessor): A native postprocessor. It has several information to decode: (xyxy, @@ -224,6 +231,16 @@ def __call__( class YOLOv5Base(ObjectDetectionModel, ABC): - postprocessor_map: Dict[Platform, Type[PostProcessor]] = { - Platform.PYTHON: YOLOv5PostProcessor, + postprocessor_map: ClassVar[Dict[Platform, Type[PostProcessor]]] = { + Platform.RUST: YOLOv5PostProcessor, } + + def __init__(self, *args, **kwargs): + super().__init__( + family="YOLO", + version="v5", + format=Format.ONNX, + preprocessor=YOLOv5PreProcessor(), + *args, + **kwargs, + ) diff --git a/furiosa/models/vision/yolov5/large.py b/furiosa/models/vision/yolov5/large.py index 3f7988a1..25f47ddf 100644 --- a/furiosa/models/vision/yolov5/large.py +++ b/furiosa/models/vision/yolov5/large.py @@ -4,13 +4,14 @@ CLASSES (List[str]): a list of class names """ import pathlib -from typing import List +from typing import List, Union import numpy as np import yaml -from ...types import Format, Metadata, Publication -from .core import YOLOv5Base, YOLOv5PostProcessor, YOLOv5PreProcessor +from ..._utils import validate_postprocessor_type +from ...types import Metadata, Platform, Publication +from .core import YOLOv5Base with open(pathlib.Path(__file__).parent / "datasets/yolov5l/cfg.yaml", "r") as f: configuration = yaml.safe_load(f) @@ -23,25 +24,18 @@ class YOLOv5l(YOLOv5Base): """YOLOv5 Large model""" - classes = CLASSES + classes: List[str] = CLASSES - @staticmethod - def get_artifact_name(): - return "yolov5l" - - @classmethod - def load(cls, use_native: bool = False): - if use_native: - raise NotImplementedError("No native implementation for YOLOv5") - return cls( + def __init__(self, *, postprocessor_type: Union[str, Platform] = Platform.RUST): + postprocessor_type = Platform(postprocessor_type) + validate_postprocessor_type(postprocessor_type, self.postprocessor_map.keys()) + super().__init__( name="YOLOv5Large", - format=Format.ONNX, - family="YOLOv5", - version="v5", metadata=Metadata( description="YOLOv5 large model", publication=Publication(url="https://github.com/ultralytics/yolov5"), ), - preprocessor=YOLOv5PreProcessor(), - postprocessor=YOLOv5PostProcessor(_ANCHORS, CLASSES), + postprocessor=self.postprocessor_map[postprocessor_type](_ANCHORS, CLASSES), ) + + self._artifact_name = "yolov5l" diff --git a/furiosa/models/vision/yolov5/medium.py b/furiosa/models/vision/yolov5/medium.py index a0a20774..3e735d55 100644 --- a/furiosa/models/vision/yolov5/medium.py +++ b/furiosa/models/vision/yolov5/medium.py @@ -4,13 +4,14 @@ CLASSES (List[str]): a list of class names """ import pathlib -from typing import List +from typing import List, Union import numpy as np import yaml -from ...types import Format, Metadata, Publication -from .core import YOLOv5Base, YOLOv5PostProcessor, YOLOv5PreProcessor +from ..._utils import validate_postprocessor_type +from ...types import Metadata, Platform, Publication +from .core import YOLOv5Base with open(pathlib.Path(__file__).parent / "datasets/yolov5m/cfg.yaml", "r") as f: configuration = yaml.safe_load(f) @@ -23,25 +24,18 @@ class YOLOv5m(YOLOv5Base): """YOLOv5 Medium model""" - classes = CLASSES + classes: List[str] = CLASSES - @staticmethod - def get_artifact_name(): - return "yolov5m" - - @classmethod - def load(cls, use_native: bool = False): - if use_native: - raise NotImplementedError("No native implementation for YOLOv5") - return cls( + def __init__(self, *, postprocessor_type: Union[str, Platform] = Platform.RUST): + postprocessor_type = Platform(postprocessor_type) + validate_postprocessor_type(postprocessor_type, self.postprocessor_map.keys()) + super().__init__( name="YOLOv5Medium", - format=Format.ONNX, - family="YOLOv5", - version="v5", metadata=Metadata( description="YOLOv5 medium model", publication=Publication(url="https://github.com/ultralytics/yolov5"), ), - preprocessor=YOLOv5PreProcessor(), - postprocessor=YOLOv5PostProcessor(_ANCHORS, CLASSES), + postprocessor=self.postprocessor_map[postprocessor_type](_ANCHORS, CLASSES), ) + + self._artifact_name = "yolov5m" diff --git a/pyproject.toml b/pyproject.toml index d9a9c0f0..5bc8b159 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,13 +10,9 @@ license-files = { paths = ["LICENSE"] } classifiers = [ "Development Status :: 3 - Alpha", "Environment :: Console", - "Environment :: Web Environment", "Intended Audience :: Developers", "Intended Audience :: System Administrators", "License :: OSI Approved :: Apache Software License", - "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", @@ -28,32 +24,33 @@ description = "Furiosa Models" requires-python = "~=3.8" dynamic = ["version"] dependencies = [ - "furiosa-common == 0.9.*", - "furiosa-runtime == 0.9.*", + "furiosa-common == 0.10.*", + "furiosa-tools == 0.10.*", + "furiosa-runtime == 0.10.*", "furiosa-native-postprocess == 0.9.0_dev0", - "aiofiles ~= 22.1.0", - "aiohttp ~= 3.8.3", - "opencv-python-headless ~= 4.6", - "torch ~= 1.13", - "torchvision ~= 0.14", - "pydantic ~= 1.10", - "tabulate ~= 0.9.0", - "PyYAML ~= 6.0", + "opencv-python-headless", + "torch", + "torchvision", + "pydantic ~= 2.0", + "tabulate", + "PyYAML", - "numpy ~= 1.21", + "numpy", ] [project.optional-dependencies] test = [ + "onnx", + "furiosa-quantizer == 0.10.*", + "pytest", "pytest-benchmark == 4.0.0", "pytest-asyncio ~= 0.17.2", "pycocotools ~= 2.0.4", ] -dvc = [ - "dvc[s3]", -] +dvc = ["dvc[s3]"] +full = ["dvc[s3]", "onnx", "furiosa-quantizer == 0.10.*"] [project.scripts] furiosa-models = "furiosa.models.client.main:main" diff --git a/tekton/furiosa-models-ci/pr-trigger.yaml b/tekton/furiosa-models-ci/pr-trigger.yaml index 2d2b3ca2..0264ebba 100644 --- a/tekton/furiosa-models-ci/pr-trigger.yaml +++ b/tekton/furiosa-models-ci/pr-trigger.yaml @@ -57,6 +57,19 @@ spec: value: "$(tt.params.triggerContext)" - name: headRef value: "$(tt.params.headRef)" + taskRunSpecs: + - pipelineTaskName: unittests + taskPodTemplate: + tolerations: + - key: "npu" + operator: "Exists" + effect: "NoSchedule" + - pipelineTaskName: test-examples + taskPodTemplate: + tolerations: + - key: "npu" + operator: "Exists" + effect: "NoSchedule" resources: - name: image resourceSpec: diff --git a/tekton/furiosa-models-ci/test.yaml b/tekton/furiosa-models-ci/test.yaml index cffc27c0..9bddce20 100644 --- a/tekton/furiosa-models-ci/test.yaml +++ b/tekton/furiosa-models-ci/test.yaml @@ -35,17 +35,19 @@ spec: #!/usr/bin/env bash set -e - TOOLCHAIN_VERSION=$(apt-cache policy furiosa-libcompiler | grep Installed | awk '{print $2}') - apt-get update && apt-get install -y furiosa-libhal-sim=$TOOLCHAIN_VERSION - NPU_GLOBAL_CONFIG_PATH=warboy-b0-2pe make unit_tests + # FIXME: Remove me when TLS problem is solved (https://github.com/furiosa-ai/furiosa-sdk-private/issues/719) + export LD_PRELOAD=$(find $(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')/torch/lib/ -name "libgomp*"):$LD_PRELOAD + make unit_tests resources: requests: memory: 16Gi cpu: 8 + alpha.furiosa.ai/npu: 1 limits: memory: 16Gi cpu: 8 + alpha.furiosa.ai/npu: 1 --- apiVersion: tekton.dev/v1beta1 kind: Task @@ -84,17 +86,16 @@ spec: #!/usr/bin/env bash set -e - TOOLCHAIN_VERSION=$(apt-cache policy furiosa-libcompiler | grep Installed | awk '{print $2}') - apt-get update && apt-get install -y furiosa-libhal-sim=$TOOLCHAIN_VERSION - - pip install 'furiosa-quantizer==0.9.*' - - NPU_GLOBAL_CONFIG_PATH=warboy-b0-2pe make examples + # FIXME: Remove me when TLS problem is solved (https://github.com/furiosa-ai/furiosa-sdk-private/issues/719) + export LD_PRELOAD=$(find $(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')/torch/lib/ -name "libgomp*"):$LD_PRELOAD + make examples resources: requests: memory: 16Gi cpu: 8 + alpha.furiosa.ai/npu: 1 limits: memory: 16Gi cpu: 8 + alpha.furiosa.ai/npu: 1 diff --git a/tekton/furiosa-models-regression-test/regression-test.yaml b/tekton/furiosa-models-regression-test/regression-test.yaml index 7bf074dd..3200f78e 100644 --- a/tekton/furiosa-models-regression-test/regression-test.yaml +++ b/tekton/furiosa-models-regression-test/regression-test.yaml @@ -58,6 +58,9 @@ spec: set -ex git config --global --add safe.directory /workspace/source + # FIXME: Remove me when TLS problem is solved (https://github.com/furiosa-ai/furiosa-sdk-private/issues/719) + export LD_PRELOAD=$(find $(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')/torch/lib/ -name "libgomp*"):$LD_PRELOAD + echo -n "## Pip freeze result for " > pip_freeze.txt echo $(params.modelName) >> pip_freeze.txt echo "
pip freeze result
" >> pip_freeze.txt
diff --git a/tests/bench/test_efficientnet_b0.py b/tests/bench/test_efficientnet_b0.py
index 9ec04bc5..1faef857 100644
--- a/tests/bench/test_efficientnet_b0.py
+++ b/tests/bench/test_efficientnet_b0.py
@@ -6,7 +6,7 @@
 
 from furiosa.models.vision import EfficientNetB0
 from furiosa.models.vision.common.datasets import imagenet1k
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 EXPECTED_ACCURACY = 72.44
 CLASSES: List[str] = imagenet1k.ImageNet1k_CLASSES
@@ -18,7 +18,7 @@ def test_efficientnetb0_accuracy(benchmark):
         os.environ.get('IMAGENET_VAL_LABELS', 'tests/data/imagenet/aux/val.txt')
     )
 
-    model = EfficientNetB0.load()
+    model = EfficientNetB0()
 
     image_paths = list(imagenet_val_images.glob("*.[Jj][Pp][Ee][Gg]"))
     with open(imagenet_val_labels, encoding="ascii") as file:
@@ -38,7 +38,7 @@ def read_image():
     def workload(image, answer):
         global correct_predictions, incorrect_predictions
         image, _ = model.preprocess(image)
-        output = sess.run(image).numpy()
+        output = runner.run(image)
         output = model.postprocess(output)
 
         if output == answer:
@@ -46,9 +46,8 @@ def workload(image, answer):
         else:
             incorrect_predictions += 1
 
-    sess = session.create(model.enf)
-    benchmark.pedantic(workload, setup=read_image, rounds=num_images)
-    sess.close()
+    with create_runner(model.model_source()) as runner:
+        benchmark.pedantic(workload, setup=read_image, rounds=num_images)
 
     total_predictions = correct_predictions + incorrect_predictions
     accuracy = 100.0 * correct_predictions / total_predictions
diff --git a/tests/bench/test_efficientnet_v2_s.py b/tests/bench/test_efficientnet_v2_s.py
index 6e99cac8..e4cbd6d5 100644
--- a/tests/bench/test_efficientnet_v2_s.py
+++ b/tests/bench/test_efficientnet_v2_s.py
@@ -6,7 +6,7 @@
 
 from furiosa.models.vision import EfficientNetV2s
 from furiosa.models.vision.common.datasets import imagenet1k
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 EXPECTED_ACCURACY = 83.532
 CLASSES: List[str] = imagenet1k.ImageNet1k_CLASSES
@@ -18,7 +18,7 @@ def test_efficientnetv2s_accuracy(benchmark):
         os.environ.get('IMAGENET_VAL_LABELS', 'tests/data/imagenet/aux/val.txt')
     )
 
-    model = EfficientNetV2s.load()
+    model = EfficientNetV2s()
 
     image_paths = list(imagenet_val_images.glob("*.[Jj][Pp][Ee][Gg]"))
     with open(imagenet_val_labels, encoding="ascii") as file:
@@ -38,7 +38,7 @@ def read_image():
     def workload(image, answer):
         global correct_predictions, incorrect_predictions
         image, _ = model.preprocess(image)
-        output = sess.run(image).numpy()
+        output = runner.run(image)
         output = model.postprocess(output)
 
         if output == answer:
@@ -46,9 +46,8 @@ def workload(image, answer):
         else:
             incorrect_predictions += 1
 
-    sess = session.create(model.enf)
-    benchmark.pedantic(workload, setup=read_image, rounds=num_images)
-    sess.close()
+    with create_runner(model.model_source()) as runner:
+        benchmark.pedantic(workload, setup=read_image, rounds=num_images)
 
     total_predictions = correct_predictions + incorrect_predictions
     accuracy = 100.0 * correct_predictions / total_predictions
diff --git a/tests/bench/test_resnet50.py b/tests/bench/test_resnet50.py
index 381ad629..4c8d4841 100644
--- a/tests/bench/test_resnet50.py
+++ b/tests/bench/test_resnet50.py
@@ -7,7 +7,7 @@
 
 from furiosa.models.vision import ResNet50
 from furiosa.models.vision.common.datasets import imagenet1k
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 EXPECTED_ACCURACY = 75.618
 CLASSES: List[str] = imagenet1k.ImageNet1k_CLASSES
@@ -19,7 +19,7 @@ def test_mlcommons_resnet50_accuracy(benchmark):
         os.environ.get('IMAGENET_VAL_LABELS', 'tests/data/imagenet/aux/val.txt')
     )
 
-    model = ResNet50.load(use_native=False)
+    model = ResNet50(postprocessor_type="Python")
 
     image_paths = list(imagenet_val_images.glob("*.[Jj][Pp][Ee][Gg]"))
     with open(imagenet_val_labels, encoding="ascii") as file:
@@ -40,16 +40,15 @@ def read_image():
     def workload(image, answer):
         global correct_predictions, incorrect_predictions
         image, _ = model.preprocess(image)
-        output = model.postprocess(sess.run(image).numpy())
+        output = model.postprocess(runner.run(image))
 
         if output == answer:
             correct_predictions += 1
         else:
             incorrect_predictions += 1
 
-    sess = session.create(model.enf)
-    benchmark.pedantic(workload, setup=read_image, rounds=num_images)
-    sess.close()
+    with create_runner(model.model_source()) as runner:
+        benchmark.pedantic(workload, setup=read_image, rounds=num_images)
 
     total_predictions = correct_predictions + incorrect_predictions
     accuracy = 100.0 * correct_predictions / total_predictions
diff --git a/tests/bench/test_ssd_mobilenet.py b/tests/bench/test_ssd_mobilenet.py
index 4b311576..eb4319e2 100644
--- a/tests/bench/test_ssd_mobilenet.py
+++ b/tests/bench/test_ssd_mobilenet.py
@@ -9,7 +9,7 @@
 
 from furiosa.models.types import Model
 from furiosa.models.vision import SSDMobileNet
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 EXPECTED_ACCURACY = 0.2319698092633901
 EXPECTED_ACCURACY_NATIVE_RUST_PP = 0.23178397430922199
@@ -26,7 +26,7 @@ def load_coco_from_env_variable():
 
 
 def test_mlcommons_ssd_mobilenet_accuracy(benchmark):
-    model: Model = SSDMobileNet.load(use_native=False)
+    model: Model = SSDMobileNet(postprocessor_type="Python")
 
     image_directory, coco = load_coco_from_env_variable()
     image_src_iter = iter(tqdm.tqdm(coco.dataset["images"]))
@@ -43,7 +43,7 @@ def read_image():
 
     def workload(image_id, image):
         image, contexts = model.preprocess([image])
-        outputs = sess.run(image).numpy()
+        outputs = runner.run(image)
         batch_result = model.postprocess(outputs, contexts, confidence_threshold=0.3)
         result = np.squeeze(batch_result, axis=0)  # squeeze the batch axis
 
@@ -61,9 +61,8 @@ def workload(image_id, image):
             }
             detections.append(detection)
 
-    sess = session.create(model.enf)
-    benchmark.pedantic(workload, setup=read_image, rounds=num_images)
-    sess.close()
+    with create_runner(model.model_source()) as runner:
+        benchmark.pedantic(workload, setup=read_image, rounds=num_images)
 
     coco_detections = coco.loadRes(detections)
     coco_eval = COCOeval(coco, coco_detections, iouType="bbox")
@@ -76,7 +75,7 @@ def workload(image_id, image):
 
 
 def test_mlcommons_ssd_mobilenet_with_native_rust_pp_accuracy(benchmark):
-    model = SSDMobileNet.load(use_native=True)
+    model = SSDMobileNet(postprocessor_type="Rust")
 
     image_directory, coco = load_coco_from_env_variable()
     image_src_iter = iter(tqdm.tqdm(coco.dataset["images"]))
@@ -93,7 +92,7 @@ def read_image():
 
     def workload(image_id, image):
         image, contexts = model.preprocess([image])
-        outputs = sess.run(image).numpy()
+        outputs = runner.run(image)
         result = model.postprocess(outputs, contexts[0])
 
         for res in result:
@@ -110,9 +109,8 @@ def workload(image_id, image):
             }
             detections.append(detection)
 
-    sess = session.create(model.enf)
-    benchmark.pedantic(workload, setup=read_image, rounds=num_images)
-    sess.close()
+    with create_runner(model.model_source()) as runner:
+        benchmark.pedantic(workload, setup=read_image, rounds=num_images)
 
     coco_detections = coco.loadRes(detections)
     coco_eval = COCOeval(coco, coco_detections, iouType="bbox")
diff --git a/tests/bench/test_ssd_resnet34.py b/tests/bench/test_ssd_resnet34.py
index 55484a8b..ff56a014 100644
--- a/tests/bench/test_ssd_resnet34.py
+++ b/tests/bench/test_ssd_resnet34.py
@@ -10,7 +10,7 @@
 
 from furiosa.models.types import Model
 from furiosa.models.vision import SSDResNet34
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 EXPECTED_ACCURACY = 0.2132147932
 EXPECTED_ACCURACY_RUST = 0.2201333639
@@ -27,7 +27,7 @@ def load_coco_from_env_variable():
 
 
 def test_mlcommons_ssd_resnet34_accuracy(benchmark):
-    model: Model = SSDResNet34.load()
+    model: Model = SSDResNet34(postprocessor_type="Python")
 
     image_directory, coco = load_coco_from_env_variable()
     instances_val2017 = Path(
@@ -52,7 +52,7 @@ def read_image():
 
     def workload(image_id, image):
         image, contexts = model.preprocess([image])
-        outputs = sess.run(image).numpy()
+        outputs = runner.run(image)
         batch_result = model.postprocess(outputs, contexts, confidence_threshold=0.05)
         result = np.squeeze(batch_result, axis=0)  # squeeze the batch axis
 
@@ -70,9 +70,8 @@ def workload(image_id, image):
             }
             detections.append(detection)
 
-    sess = session.create(model.enf)
-    benchmark.pedantic(workload, setup=read_image, rounds=num_images)
-    sess.close()
+    with create_runner(model.model_source()) as runner:
+        benchmark.pedantic(workload, setup=read_image, rounds=num_images)
 
     coco_detections = coco.loadRes(detections)
     coco_eval = COCOeval(coco, coco_detections, iouType="bbox")
@@ -84,7 +83,7 @@ def workload(image_id, image):
 
 
 def test_mlcommons_ssd_resnet34_with_native_rust_pp_accuracy(benchmark):
-    model = SSDResNet34.load(use_native=True)
+    model = SSDResNet34(postprocessor_type="Rust")
 
     image_directory, coco = load_coco_from_env_variable()
     instances_val2017 = Path(
@@ -109,7 +108,7 @@ def read_image():
 
     def workload(image_id, image):
         image, contexts = model.preprocess([image])
-        outputs = sess.run(image).numpy()
+        outputs = runner.run(image)
         result = model.postprocess(outputs, contexts[0])
 
         for res in result:
@@ -126,9 +125,8 @@ def workload(image_id, image):
             }
             detections.append(detection)
 
-    sess = session.create(model.enf)
-    benchmark.pedantic(workload, setup=read_image, rounds=num_images)
-    sess.close()
+    with create_runner(model.model_source()) as runner:
+        benchmark.pedantic(workload, setup=read_image, rounds=num_images)
 
     coco_detections = coco.loadRes(detections)
     coco_eval = COCOeval(coco, coco_detections, iouType="bbox")
diff --git a/tests/bench/test_yolov5l.py b/tests/bench/test_yolov5l.py
index aa3a6400..73ef9736 100644
--- a/tests/bench/test_yolov5l.py
+++ b/tests/bench/test_yolov5l.py
@@ -6,7 +6,7 @@
 from tqdm import tqdm
 
 from furiosa.models.vision import YOLOv5l
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 from .test_acc_util import bdd100k
 
@@ -23,7 +23,7 @@ def load_db_from_env_variable() -> Tuple[Path, bdd100k.Yolov5Dataset]:
 
 
 def test_yolov5l_accuracy(benchmark):
-    model: YOLOv5l = YOLOv5l.load()
+    model: YOLOv5l = YOLOv5l()
 
     image_directory, yolov5db = load_db_from_env_variable()
 
@@ -43,7 +43,7 @@ def workload(im, boxes_target, classes_target):
         batch_pre_img, batch_preproc_param = model.preprocess(
             batch_im,
         )  # single-batch
-        batch_feat = sess.run(np.expand_dims(batch_pre_img[0], axis=0)).numpy()
+        batch_feat = runner.run(np.expand_dims(batch_pre_img[0], axis=0))
         detected_boxes = model.postprocess(
             batch_feat, batch_preproc_param, conf_thres=0.001, iou_thres=0.6
         )
@@ -56,9 +56,8 @@ def workload(im, boxes_target, classes_target):
             classes_target=classes_target,
         )
 
-    sess = session.create(model.enf)
-    benchmark.pedantic(workload, setup=read_image, rounds=num_images)
-    sess.close()
+    with create_runner(model.model_source()) as runner:
+        benchmark.pedantic(workload, setup=read_image, rounds=num_images)
 
     result = metric.compute()
     print("YOLOv5Large mAP:", result['map'])
diff --git a/tests/bench/test_yolov5m.py b/tests/bench/test_yolov5m.py
index 5b35c41c..4f763fcc 100644
--- a/tests/bench/test_yolov5m.py
+++ b/tests/bench/test_yolov5m.py
@@ -6,7 +6,7 @@
 from tqdm import tqdm
 
 from furiosa.models.vision import YOLOv5m
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 from .test_acc_util import bdd100k
 
@@ -23,7 +23,7 @@ def load_db_from_env_variable() -> Tuple[Path, bdd100k.Yolov5Dataset]:
 
 
 def test_yolov5m_accuracy(benchmark):
-    model: YOLOv5m = YOLOv5m.load()
+    model: YOLOv5m = YOLOv5m()
 
     image_directory, yolov5db = load_db_from_env_variable()
 
@@ -43,7 +43,7 @@ def workload(im, boxes_target, classes_target):
         batch_pre_img, batch_preproc_param = model.preprocess(
             batch_im,
         )  # single-batch
-        batch_feat = sess.run(np.expand_dims(batch_pre_img[0], axis=0)).numpy()
+        batch_feat = runner.run(np.expand_dims(batch_pre_img[0], axis=0))
         detected_boxes = model.postprocess(
             batch_feat, batch_preproc_param, conf_thres=0.001, iou_thres=0.6
         )
@@ -58,9 +58,8 @@ def workload(im, boxes_target, classes_target):
             classes_target=classes_target,
         )
 
-    sess = session.create(model.enf)
-    benchmark.pedantic(workload, setup=read_image, rounds=num_images)
-    sess.close()
+    with create_runner(model.model_source()) as runner:
+        benchmark.pedantic(workload, setup=read_image, rounds=num_images)
 
     result = metric.compute()
     print("YOLOv5Medium mAP:", result['map'])
diff --git a/tests/unit/test_artifacts.py b/tests/unit/test_artifacts.py
index 93cdfb20..f0f1c5d0 100644
--- a/tests/unit/test_artifacts.py
+++ b/tests/unit/test_artifacts.py
@@ -2,7 +2,7 @@
 
 import yaml
 
-from furiosa.models.utils import DATA_DIRECTORY_BASE
+from furiosa.models._utils import DATA_DIRECTORY_BASE
 from furiosa.models.vision import (
     EfficientNetB0,
     EfficientNetV2s,
@@ -16,55 +16,55 @@
 
 def sanity_check_for_dvc_file(model, dvc_file_path: str):
     assert model
-    assert model.enf
-    assert model.source
-    assert yaml.safe_load(open(dvc_file_path).read())["outs"][0]["size"] == len(model.source)
+    assert model.model_source()
+    assert model.origin
+    assert yaml.safe_load(open(dvc_file_path).read())["outs"][0]["size"] == len(model.origin)
 
 
 def test_mlcommons_resnet50():
     sanity_check_for_dvc_file(
-        ResNet50.load(use_native=False),
+        ResNet50(),
         next((DATA_DIRECTORY_BASE / "mlcommons_resnet50_v1.5").glob("*.onnx.dvc")),
     )
 
 
 def test_ssd_mobilenet():
     sanity_check_for_dvc_file(
-        SSDMobileNet.load(use_native=False),
+        SSDMobileNet(),
         next((DATA_DIRECTORY_BASE / "mlcommons_ssd_mobilenet_v1").glob("*.onnx.dvc")),
     )
 
 
 def test_ssd_resnet34():
     sanity_check_for_dvc_file(
-        SSDResNet34.load(),
+        SSDResNet34(),
         next((DATA_DIRECTORY_BASE / "mlcommons_ssd_resnet34").glob("*.onnx.dvc")),
     )
 
 
 def test_yolov5_large():
     sanity_check_for_dvc_file(
-        YOLOv5l.load(),
+        YOLOv5l(),
         next((DATA_DIRECTORY_BASE / "yolov5l").glob("*.onnx.dvc")),
     )
 
 
 def test_yolov5_medium():
     sanity_check_for_dvc_file(
-        YOLOv5m.load(),
+        YOLOv5m(),
         next((DATA_DIRECTORY_BASE / "yolov5m").glob("*.onnx.dvc")),
     )
 
 
 def test_efficientnet_b0():
     sanity_check_for_dvc_file(
-        EfficientNetB0.load(),
+        EfficientNetB0(),
         next((DATA_DIRECTORY_BASE / "efficientnet_b0").glob("*.onnx.dvc")),
     )
 
 
 def test_efficientnet_v2_s():
     sanity_check_for_dvc_file(
-        EfficientNetV2s.load(),
+        EfficientNetV2s(),
         next((DATA_DIRECTORY_BASE / "efficientnet_v2_s").glob("*.onnx.dvc")),
     )
diff --git a/tests/unit/test_batched_yolov5l.py b/tests/unit/test_batched_yolov5l.py
index 5a997cfc..5cbb6656 100644
--- a/tests/unit/test_batched_yolov5l.py
+++ b/tests/unit/test_batched_yolov5l.py
@@ -5,7 +5,7 @@
 
 from furiosa.models.vision import YOLOv5l
 from furiosa.models.vision.postprocess import collate
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 TEST_IMAGE_PATH = str(Path(__file__).parent / "../assets/yolov5-test.jpg")
 
@@ -15,15 +15,15 @@
 
 
 def test_yolov5_large_batched():
-    m = YOLOv5l.load()
+    m = YOLOv5l()
     assert len(m.classes) == NUM_CLASSES, "expected CLASS is 10"
 
     batch_im = [cv2.imread(TEST_IMAGE_PATH), cv2.imread(TEST_IMAGE_PATH)]
-    with session.create(m.enf) as sess:
+    with create_runner(m.model_source()) as runner:
         batch_pre_img, batch_preproc_param = m.preprocess(batch_im)
         batch_feat = []
         for pre_image in batch_pre_img:
-            batch_feat.append(sess.run(np.expand_dims(pre_image, axis=0)).numpy())
+            batch_feat.append(runner.run(np.expand_dims(pre_image, axis=0)))
         batch_feat = collate(batch_feat)
         detected_boxes = m.postprocess(batch_feat, batch_preproc_param)
 
diff --git a/tests/unit/test_batched_yolov5m.py b/tests/unit/test_batched_yolov5m.py
index 9678524b..363ee766 100644
--- a/tests/unit/test_batched_yolov5m.py
+++ b/tests/unit/test_batched_yolov5m.py
@@ -5,7 +5,7 @@
 
 from furiosa.models.vision import YOLOv5m
 from furiosa.models.vision.postprocess import collate
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 TEST_IMAGE_PATH = str(Path(__file__).parent / "../assets/yolov5-test.jpg")
 
@@ -15,15 +15,15 @@
 
 
 def test_yolov5_medium_batched():
-    m = YOLOv5m.load()
+    m = YOLOv5m()
     assert len(m.classes) == NUM_CLASSES, "expected CLASS is 10"
 
     batch_im = [cv2.imread(TEST_IMAGE_PATH), cv2.imread(TEST_IMAGE_PATH)]
-    with session.create(m.enf) as sess:
+    with create_runner(m.model_source()) as runner:
         batch_pre_img, batch_preproc_param = m.preprocess(batch_im)
         batch_feat = []
         for pre_image in batch_pre_img:
-            batch_feat.append(sess.run(np.expand_dims(pre_image, axis=0)).numpy())
+            batch_feat.append(runner.run(np.expand_dims(pre_image, axis=0)))
         batch_feat = collate(batch_feat)
         detected_boxes = m.postprocess(batch_feat, batch_preproc_param)
 
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
deleted file mode 100644
index 2c788f47..00000000
--- a/tests/unit/test_utils.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from furiosa.models.utils import get_nux_version
-
-
-def test_compiler_version():
-    version = get_nux_version()
-    assert version
-    assert len(version.version) > 0
-    assert len(version.revision) > 0