furiosa-ai · furiosamg · Aug 18, 2023 · Aug 10, 2023 · Aug 10, 2023 · Aug 10, 2023
diff --git a/Makefile b/Makefile
@@ -1,7 +1,7 @@
 SHELL=/bin/bash -o pipefail
 
-ONNXRUNTIME_VERSION := 1.13.1-?
-TOOLCHAIN_VERSION := 0.9.1-?
+ONNXRUNTIME_VERSION := 1.15.1-?
+TOOLCHAIN_VERSION := 0.10.0-?
 LIBHAL_VERSION := 0.11.0-?
 
 .PHONY: check-docker-tag toolchain lint test unit_tests examples regression-test-all \
@@ -15,8 +15,11 @@ ifndef DOCKER_TAG
 endif
 
 toolchain:
+	apt-get update
 	apt-get install -y --allow-downgrades libonnxruntime=$(ONNXRUNTIME_VERSION)
-	apt-get install -y --allow-downgrades furiosa-libcompiler=$(TOOLCHAIN_VERSION) furiosa-libnux=$(TOOLCHAIN_VERSION)
+	apt-get install -y --allow-downgrades furiosa-compiler=$(TOOLCHAIN_VERSION)
+	# TODO: remove me when possible
+	apt-get install -y --allow-downgrades furiosa-libcompiler=$(TOOLCHAIN_VERSION)
 	apt-get install -y --allow-downgrades furiosa-libhal-warboy=$(LIBHAL_VERSION)
 
 lint:
@@ -31,7 +34,7 @@ unit_tests:
 	pytest ./tests/unit/ -s
 
 examples:
-	for f in $$(ls docs/examples/*.py | grep -v "ssd_resnet34"); do echo"";echo "[TEST] $$f ..."; python3 $$f || exit 1; done
+	for f in $$(find docs/examples/ -name *.py); do printf "\n[TEST] $$f ...\n"; python3 $$f || exit 1; done
 
 regression-test-all:
 	pytest ./tests/bench/

diff --git a/ci-constraints.txt b/ci-constraints.txt
@@ -0,0 +1,5 @@
+# This pip constraints file is for the reproducibility of model accuracies
+opencv-python-headless==4.8.0.76
+torch==2.0.1
+torchvision==0.15.2
+numpy==1.25.2
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -19,6 +19,8 @@ RUN pip3 install --upgrade pip wheel setuptools Cython pytest pycocotools \
 
 RUN echo "deb [arch=amd64] https://internal-archive.furiosa.dev/ubuntu focal restricted" \
     > /etc/apt/sources.list.d/furiosa.list && \
+    echo "deb [arch=amd64] https://internal-archive.furiosa.dev/ubuntu focal-rc restricted" \
+    >> /etc/apt/sources.list.d/furiosa.list && \
     echo "deb [arch=amd64] https://internal-archive.furiosa.dev/ubuntu focal-nightly restricted" \
     >> /etc/apt/sources.list.d/furiosa.list
 
@@ -31,4 +33,4 @@ RUN --mount=type=secret,id=furiosa.conf,dst=/etc/apt/auth.conf.d/furiosa.conf,re
     apt-get update && \
     make toolchain
 RUN --mount=type=secret,id=.netrc,dst=/root/.netrc,required \
-    pip install --extra-index-url https://internal-pypi.furiosa.dev/simple .[test]
+    pip install --pre --extra-index-url https://internal-pypi.furiosa.dev/simple --constraint ci-constraints.txt .[test]
diff --git a/docs/examples/efficientnet_b0.py b/docs/examples/efficientnet_b0.py
@@ -1,10 +1,10 @@
 from furiosa.models.vision import EfficientNetB0
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 image = "tests/assets/cat.jpg"
 
-effnetb0 = EfficientNetB0.load()
-with session.create(effnetb0.enf) as sess:
+effnetb0 = EfficientNetB0()
+with create_runner(effnetb0.model_source()) as runner:
     inputs, _ = effnetb0.preprocess(image)
-    outputs = sess.run(inputs).numpy()
+    outputs = runner.run(inputs)
     effnetb0.postprocess(outputs)
diff --git a/docs/examples/efficientnet_v2_s.py b/docs/examples/efficientnet_v2_s.py
@@ -1,10 +1,10 @@
 from furiosa.models.vision import EfficientNetV2s
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 image = "tests/assets/cat.jpg"
 
-effnetv2s = EfficientNetV2s.load()
-with session.create(effnetv2s.enf) as sess:
+effnetv2s = EfficientNetV2s()
+with create_runner(effnetv2s.model_source()) as runner:
     inputs, _ = effnetv2s.preprocess(image)
-    outputs = sess.run(inputs).numpy()
+    outputs = runner.run(inputs)
     effnetv2s.postprocess(outputs)
diff --git a/docs/examples/loading_model.py b/docs/examples/loading_model.py
@@ -1,4 +1,4 @@
 from furiosa.models.types import Model
 from furiosa.models.vision import ResNet50
 
-model: Model = ResNet50.load()
+model: Model = ResNet50()
diff --git a/docs/examples/resnet50.py b/docs/examples/resnet50.py
@@ -1,10 +1,10 @@
 from furiosa.models.vision import ResNet50
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 image = "tests/assets/cat.jpg"
 
-resnet50 = ResNet50.load()
-with session.create(resnet50.enf) as sess:
+resnet50 = ResNet50()
+with create_runner(resnet50.model_source()) as runner:
     inputs, _ = resnet50.preprocess(image)
-    outputs = sess.run(inputs).numpy()
+    outputs = runner.run(inputs)
     resnet50.postprocess(outputs)
diff --git a/docs/examples/ssd_mobilenet.py b/docs/examples/ssd_mobilenet.py
@@ -1,10 +1,10 @@
 from furiosa.models.vision import SSDMobileNet
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 image = ["tests/assets/cat.jpg"]
 
-mobilenet = SSDMobileNet.load()
-with session.create(mobilenet.enf) as sess:
+mobilenet = SSDMobileNet()
+with create_runner(mobilenet.model_source()) as create_runner:
-with create_runner(mobilenet.model_source()) as create_runner:
+with create_runner(mobilenet.model_source()) as runner:
-with create_runner(mobilenet.model_source()) as create_runner:
+with create_runner(mobilenet.model_source()) as runner:
     inputs, contexts = mobilenet.preprocess(image)
-    outputs = sess.run(inputs).numpy()
+    outputs = create_runner.run(inputs)
     mobilenet.postprocess(outputs, contexts[0])
diff --git a/docs/examples/ssd_mobilenet_native.py b/docs/examples/ssd_mobilenet_native.py
@@ -1,10 +1,10 @@
 from furiosa.models.vision import SSDMobileNet
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 image = ["tests/assets/cat.jpg"]
 
-mobilenet = SSDMobileNet.load(use_native=True)
-with session.create(mobilenet.enf) as sess:
+mobilenet = SSDMobileNet(postprocessor_type="Rust")
+with create_runner(mobilenet.model_source()) as runner:
     inputs, contexts = mobilenet.preprocess(image)
-    outputs = sess.run(inputs).numpy()
+    outputs = runner.run(inputs)
     mobilenet.postprocess(outputs, contexts[0])
diff --git a/docs/examples/ssd_mobilenet_onnx.py b/docs/examples/ssd_mobilenet_onnx.py
@@ -1,22 +1,20 @@
-import yaml
-
 from furiosa.models.vision import SSDMobileNet
 from furiosa.quantizer import quantize
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
 compiler_config = {"lower_tabulated_dequantize": True}
 
 image = ["tests/assets/cat.jpg"]
 
-mobilenet = SSDMobileNet.load()
-onnx_model: bytes = mobilenet.source
-calib_range: dict = yaml.full_load(mobilenet.calib_yaml)
+mobilenet = SSDMobileNet()
+onnx_model: bytes = mobilenet.origin
+calib_range: dict = mobilenet.tensor_name_to_range
 
 # See https://furiosa-ai.github.io/docs/latest/en/api/python/furiosa.quantizer.html#furiosa.quantizer.quantize
 # for more details
-dfg = quantize(onnx_model, calib_range, with_quantize=False)
+quantized_onnx = quantize(onnx_model, calib_range)
 
-with session.create(dfg, compiler_config=compiler_config) as sess:
-    inputs, contexts = mobilenet.preprocess(image)
-    outputs = sess.run(inputs).numpy()
+with create_runner(quantized_onnx, compiler_config=compiler_config) as runner:
+    inputs, contexts = mobilenet.preprocess(image, skip_quantize=False)
+    outputs = runner.run(inputs)
     mobilenet.postprocess(outputs, contexts[0])
diff --git a/docs/examples/ssd_resnet34.py b/docs/examples/ssd_resnet34.py
@@ -1,9 +1,9 @@
 from furiosa.models.vision import SSDResNet34
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
-resnet34 = SSDResNet34.load()
+resnet34 = SSDResNet34(postprocessor_type="Python")
 
-with session.create(resnet34.enf) as sess:
+with create_runner(resnet34.model_source()) as runner:
     image, contexts = resnet34.preprocess(["tests/assets/cat.jpg"])
-    output = sess.run(image).numpy()
+    output = runner.run(image)
     resnet34.postprocess(output, contexts=contexts)
diff --git a/docs/examples/ssd_resnet34_native.py b/docs/examples/ssd_resnet34_native.py
@@ -1,9 +1,9 @@
 from furiosa.models.vision import SSDResNet34
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
-resnet34 = SSDResNet34.load(use_native=True)
+resnet34 = SSDResNet34(postprocessor_type="Rust")
 
-with session.create(resnet34.enf) as sess:
+with create_runner(resnet34.model_source()) as runner:
     image, contexts = resnet34.preprocess(["tests/assets/cat.jpg"])
-    output = sess.run(image).numpy()
+    output = runner.run(image)
     resnet34.postprocessor(output, contexts=contexts[0])
diff --git a/docs/examples/yolov5l.py b/docs/examples/yolov5l.py
@@ -2,12 +2,12 @@
 import numpy as np
 
 from furiosa.models.vision import YOLOv5l
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
-yolov5l = YOLOv5l.load()
+yolov5l = YOLOv5l()
 
-with session.create(yolov5l.enf) as sess:
+with create_runner(yolov5l.model_source()) as runner:
     image = cv2.imread("tests/assets/yolov5-test.jpg")
     inputs, contexts = yolov5l.preprocess([image])
-    output = sess.run(np.expand_dims(inputs[0], axis=0)).numpy()
+    output = runner.run(np.expand_dims(inputs[0], axis=0))
     yolov5l.postprocess(output, contexts=contexts)
diff --git a/docs/examples/yolov5m.py b/docs/examples/yolov5m.py
@@ -2,12 +2,12 @@
 import numpy as np
 
 from furiosa.models.vision import YOLOv5m
-from furiosa.runtime import session
+from furiosa.runtime.sync import create_runner
 
-yolov5m = YOLOv5m.load()
+yolov5m = YOLOv5m()
 
-with session.create(yolov5m.enf) as sess:
+with create_runner(yolov5m.model_source()) as runner:
     image = cv2.imread("tests/assets/yolov5-test.jpg")
     inputs, contexts = yolov5m.preprocess([image])
-    output = sess.run(np.expand_dims(inputs[0], axis=0)).numpy()
+    output = runner.run(np.expand_dims(inputs[0], axis=0))
     yolov5m.postprocess(output, contexts=contexts)
diff --git a/furiosa/models/__init__.py b/furiosa/models/__init__.py
@@ -1,5 +1,5 @@
 """Furiosa Models"""
 from . import errors, vision
 
-__version__ = "0.9.0.dev0"
+__version__ = "0.10.0.dev0"
 __all__ = ["errors", "vision"]
diff --git a/furiosa/models/_utils.py b/furiosa/models/_utils.py
@@ -0,0 +1,161 @@
+import logging
+import os
+from pathlib import Path
+from typing import TYPE_CHECKING, Collection, Optional, Tuple, Union
+
+if TYPE_CHECKING:
+    from .types import Platform
+
+import requests
+import yaml
+
+from . import errors
+
+EXT_CALIB_YAML = "calib_range.yaml"
+EXT_ONNX = "onnx"
+DATA_DIRECTORY_BASE = Path(__file__).parent / "data"
+CACHE_DIRECTORY_BASE = Path(
+    os.getenv(
+        "FURIOSA_MODELS_CACHE_HOME",
+        os.path.join(os.getenv("XDG_CACHE_HOME", Path.home() / ".cache"), "furiosa/models"),
+    )
+)
+DVC_PUBLIC_HTTP_ENDPOINT = (
+    "https://furiosa-public-artifacts.s3-accelerate.amazonaws.com/furiosa-artifacts/"
+)
+
+module_logger = logging.getLogger(__name__)
+
+
+def get_version_info() -> Optional[str]:
+    from furiosa.tools.compiler.api import VersionInfo
+
+    version_info = VersionInfo()
+    return f"{version_info.version}_{version_info.git_hash}"
+
+
+def find_dvc_cache_directory(path: Path) -> Optional[Path]:
+    if path is None or path == path.parent:
+        return None
+    if (path / ".dvc").is_dir():
+        return path / ".dvc" / "cache"
+    return find_dvc_cache_directory(path.parent)
+
+
+def parse_dvc_file(file_path: Path) -> Tuple[str, str, int]:
+    info_dict = yaml.safe_load(open(f"{file_path}.dvc").read())["outs"][0]
+    md5sum = info_dict["md5"]
+    return md5sum[:2], md5sum[2:], info_dict["size"]
+
+
+def get_from_url(path: str, uri: Path, is_legacy_path: bool = False) -> bytes:
+    url = f"{DVC_PUBLIC_HTTP_ENDPOINT}{path}"
+    module_logger.debug(f"Fetching from remote: {url}")
+    with requests.get(url) as resp:
+        if resp.status_code != 200:
+            if not is_legacy_path:
+                # New dvc now stores data into /files/md5
+                return get_from_url(f"files/md5/{path}", uri, True)
+            raise errors.NotFoundInDVCRemote(uri, path)
+        data = resp.content
+        caching_path = CACHE_DIRECTORY_BASE / get_version_info() / (uri.name)
+        module_logger.debug(f"caching to {caching_path}")
+        caching_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(caching_path, mode="wb") as f:
+            f.write(data)
+        return data
+
+
+class ArtifactResolver:
+    def __init__(self, uri: Union[str, Path]):
+        self.uri = Path(uri)
+        # Note: DVC_REPO is to locate local DVC directory not remote git repository
+        self.dvc_cache_path = os.environ.get("DVC_REPO", find_dvc_cache_directory(Path.cwd()))
+        if self.dvc_cache_path is not None:
+            self.dvc_cache_path = Path(self.dvc_cache_path)
+            if self.dvc_cache_path.is_symlink():
+                self.dvc_cache_path = self.dvc_cache_path.readlink()
+            module_logger.debug(f"Found DVC cache directory: {self.dvc_cache_path}")
+
+    def _read(self, directory: str, filename: str) -> bytes:
+        # Try to find local cached file
+        local_cache_path = CACHE_DIRECTORY_BASE / get_version_info() / (self.uri.name)
+        if local_cache_path.exists():
+            module_logger.debug(f"Local cache exists: {local_cache_path}")
+            with open(local_cache_path, mode="rb") as f:
+                return f.read()
+
+        # Try to find real file along with DVC file (no DVC)
+        if Path(self.uri).exists():
+            module_logger.debug(f"Local file exists: {self.uri}")
+            with open(self.uri, mode="rb") as f:
+                return f.read()
+
+        module_logger.debug(f"{self.uri} not exists, resolving DVC")
+        if self.dvc_cache_path is not None:
+            cached: Path = self.dvc_cache_path / directory / filename
+            if cached.exists():
+                module_logger.debug(f"DVC cache hit: {cached}")
+                with open(cached, mode="rb") as f:
+                    return f.read()
+            else:
+                module_logger.debug(f"DVC cache directory exists, but not having {self.uri}")
+
+        # Fetching from remote
+        return get_from_url(f"{directory}/{filename}", self.uri)
+
+    def read(self) -> bytes:
+        directory, filename, size = parse_dvc_file(self.uri)
+        data = self._read(directory, filename)
+        assert len(data) == size
+        return data
+
+
+def resolve_artifact(src_name: str, full_path: Path) -> bytes:
+    try:
+        return ArtifactResolver(full_path).read()
+    except Exception as e:
+        raise errors.ArtifactNotFound(f"{src_name}:{full_path}") from e
+
+
+def resolve_source(src_name: str, extension: str) -> bytes:
+    full_path = next((DATA_DIRECTORY_BASE / src_name).glob(f'*.{extension}.dvc'))
+    # Remove `.dvc` suffix
+    full_path = full_path.with_suffix('')
+    return resolve_artifact(src_name, full_path)
+
+
+def resolve_model_source(src_name: str, num_pe: int = 2) -> bytes:
+    version_info = get_version_info()
+    if version_info is None:
+        raise errors.VersionInfoNotFound()
+    generated_path_base = DATA_DIRECTORY_BASE / f"generated/{version_info}"
+    if not generated_path_base.exists():
+        module_logger.warning("ENF does not exist. Trying to generate from source..")
+
+        try:
+            import onnx
+            import yaml
+
+            from furiosa.quantizer import ModelEditor, TensorType, get_pure_input_names, quantize
+        except ImportError:
+            raise errors.ExtraPackageRequired()
+        module_logger.warning(f"Returning quantized ONNX for {src_name}")
+        onnx_model = onnx.load_from_string(resolve_source(src_name, EXT_ONNX))
+        calib_range = yaml.full_load(resolve_source(src_name, EXT_CALIB_YAML))
+        editor = ModelEditor(onnx_model)
+        for input_name in get_pure_input_names(onnx_model):
+            editor.convert_input_type(input_name, TensorType.UINT8)
+        return quantize(onnx_model, calib_range)
+    file_name = f'{src_name}_warboy_{num_pe}pe.enf'
+    return resolve_artifact(src_name, generated_path_base / file_name)
+
+
+def validate_postprocessor_type(
+    postprocessor_type: Union[str, "Platform"], postprocessor_map: Collection["Platform"]
+):
+    if postprocessor_type not in postprocessor_map:
+        raise ValueError(
+            f"Not supported postprocessor type: {postprocessor_type}, "
+            f"Available choices: {', '.join(postprocessor_map)}"
+        )