diff --git a/README.md b/README.md index 795fa86..8a00151 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ ## 简介 -**PyAXEngine** 基于 cffi 模块实现了 Axera NPU Runtime 的 Python API,并同时支持开发板和M.2算力卡形态,方便开源社区开发者使用 Python 脚本快速构建 NPU 推理脚本 +**PyAXEngine** 基于 cffi 模块实现了 Axera NPU Runtime 的 Python API,其 Python API 与 ONNXRuntime 高度兼(相)容(似),并同时支持开发板和M.2算力卡形态,方便开源社区开发者使用 +Python 脚本快速构建 NPU 推理脚本 支持芯片 @@ -16,6 +17,7 @@ - python >= 3.8 - cffi >= 1.0.0 - ml-dtypes >= 0.1.0 +- numpy >= 1.22.0 ## 快速上手 @@ -23,54 +25,86 @@ ### 获取 wheel 包并安装 -- [下载链接](https://github.com/AXERA-TECH/pyaxengine/releases/download/0.0.1rc3/axengine-0.0.1-py3-none-any.whl) +- [下载链接](https://github.com/AXERA-TECH/pyaxengine/releases/latest) - 将 `axengine-x.x.x-py3-none-any.whl` 拷贝到开发板上,执行 `pip install axengine-x.x.x-py3-none-any.whl` 安装 ### 简单示例 将 [classification.py](https://github.com/AXERA-TECH/pyaxengine/blob/main/examples/classification.py) 拷贝到开发板上并执行。 -``` -root@ax650:~/samples# python3 classification.py -[INFO] Chip type: ChipType.AX650 -[INFO] Engine version: 2.7.2a +```bash +root@ax650:~/samples# python3 classification.py -m /opt/data/npu/models/mobilenetv2.axmodel -i /opt/data/npu/images/cat.jpg +[INFO] Available providers: ['AXCLRTExecutionProvider', 'AxEngineExecutionProvider'] +[INFO] Using provider: AxEngineExecutionProvider +[INFO] Chip type: ChipType.MC50 [INFO] VNPU type: VNPUType.DISABLED +[INFO] Engine version: 2.10.1s [INFO] Model type: 0 (single core) [INFO] Compiler version: 1.2-patch2 7e6b2b5f -Top 5 Predictions: -Class Index: 282, Score: 9.77352523803711 -Class Index: 278, Score: 8.981077194213867 -Class Index: 277, Score: 8.452778816223145 -Class Index: 281, Score: 8.320704460144043 -Class Index: 287, Score: 7.924479961395264 - -# 默认将自动检测计算设备,但也可以强制要求跑在AX650 M.2算力卡上,假设设备号是1,(设备号必须大于等于0,具体查看axcl-smi) -root@ax650:~/samples# python3 classification.py -b axcl -d 1 + ------------------------------------------------------ + Top 5 Predictions: + Class Index: 282, Score: 9.774 + Class Index: 278, Score: 8.981 + Class Index: 277, Score: 8.453 + Class Index: 281, Score: 8.321 + Class Index: 287, Score: 7.924 + ------------------------------------------------------ + min = 1.004 ms max = 22.512 ms avg = 1.252 ms + ------------------------------------------------------ +``` + +示例也演示了如何选择计算设备:这意味着既可以在 **AX650/AX630C** 等开发板上运行,也可以在 AX650 M.2 算力卡上运行。 + +切换计算设备的方式是通过 `-p` 参数指定,如 `-p AxEngineExecutionProvider` 表示使用开发板上的 NPU 进行推理,而 `-p AXCLRTExecutionProvider` 表示使用 M.2 算力卡进行推理。 +注意:在使用 M.2 算力卡进行推理时,需要将算力卡插入宿主机上,并且已经安装驱动,详见: [axcl](https://axcl-docs.readthedocs.io/zh-cn/latest/)。 + +```bash +root@ax650:~/samples# python3 classification.py -m /opt/data/npu/models/mobilenetv2.axmodel -i /opt/data/npu/images/cat.jpg -p AXCLRTExecutionProvider +[INFO] Available providers: ['AXCLRTExecutionProvider', 'AxEngineExecutionProvider'] +[INFO] Using provider: AXCLRTExecutionProvider [INFO] SOC Name: AX650N -[INFO] Runtime version: 1.0.0 [INFO] VNPU type: VNPUType.DISABLED [INFO] Compiler version: 1.2-patch2 7e6b2b5f -grp_id: 0 -input size: 1 - name: input - shape: 1 x 224 x 224 x 3 -output size: 1 - name: output - shape: 1 x 1000 -[INFO] cost time in host to device: 0.617ms, inference: 1.087ms, device to host: 0.266ms -Top 5 Predictions: -Class Index: 282, Score: 9.77352523803711 -Class Index: 278, Score: 8.981077194213867 -Class Index: 277, Score: 8.452778816223145 -Class Index: 281, Score: 8.320704460144043 -Class Index: 287, Score: 7.924479961395264 + ------------------------------------------------------ + Top 5 Predictions: + Class Index: 282, Score: 9.774 + Class Index: 278, Score: 8.981 + Class Index: 277, Score: 8.453 + Class Index: 281, Score: 8.321 + Class Index: 287, Score: 7.924 + ------------------------------------------------------ + min = 1.673 ms max = 12.400 ms avg = 1.805 ms + ------------------------------------------------------ +root@ax650:~/samples# python3 classification.py -m /opt/data/npu/models/mobilenetv2.axmodel -i /opt/data/npu/images/cat.jpg -p AxEngineExecutionProvider +[INFO] Available providers: ['AXCLRTExecutionProvider', 'AxEngineExecutionProvider'] +[INFO] Using provider: AxEngineExecutionProvider +[INFO] Chip type: ChipType.MC50 +[INFO] VNPU type: VNPUType.DISABLED +[INFO] Engine version: 2.10.1s +[INFO] Model type: 0 (single core) +[INFO] Compiler version: 1.2-patch2 7e6b2b5f + ------------------------------------------------------ + Top 5 Predictions: + Class Index: 282, Score: 9.774 + Class Index: 278, Score: 8.981 + Class Index: 277, Score: 8.453 + Class Index: 281, Score: 8.321 + Class Index: 287, Score: 7.924 + ------------------------------------------------------ + min = 1.004 ms max = 22.512 ms avg = 1.252 ms + ------------------------------------------------------ ``` +## 社区贡献者 + +[zylo117](https://github.com/zylo117): 提供了基于 cffi 的 AXCL Runtime Python API 实现 + ## 关联项目 - [ax-samples](https://github.com/AXERA-TECH/ax-samples) - [ax-llm](https://github.com/AXERA-TECH/ax-llm) - [pulsar2](https://pulsar2-docs.readthedocs.io/zh-cn/latest/) +- [axcl](https://axcl-docs.readthedocs.io/zh-cn/latest/) ## 技术讨论 diff --git a/axengine/__init__.py b/axengine/__init__.py index ceb84f8..bacaa74 100644 --- a/axengine/__init__.py +++ b/axengine/__init__.py @@ -5,6 +5,18 @@ # written consent of Axera Semiconductor Co., Ltd. # -from .session import InferenceSession -from .ax_session import InferenceSession as AXInferenceSession -from .axcl_session import InferenceSession as AXCLInferenceSession +# thanks to community contributors list below: +# zylo117: https://github.com/zylo117, first implementation of the axclrt backend + +from ._providers import axengine_provider_name, axclrt_provider_name +from ._providers import get_all_providers, get_available_providers + +# check if axclrt is installed, or is a supported chip(e.g. AX650, AX620E etc.) +_available_providers = get_available_providers() +if not _available_providers: + raise ImportError( + f"No providers found. Please make sure you have installed one of the following: {get_all_providers()}") +print("[INFO] Available providers: ", _available_providers) + +from ._node import NodeArg +from ._session import SessionOptions, InferenceSession diff --git a/axengine/_ax_chip.py b/axengine/_ax_chip.py deleted file mode 100644 index 9ad93ca..0000000 --- a/axengine/_ax_chip.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. -# -# This source file is the property of Axera Semiconductor Co., Ltd. and -# may not be copied or distributed in any isomorphic form without the prior -# written consent of Axera Semiconductor Co., Ltd. -# - -from . import _types -from ._ax_capi import E as _lib - -__all__: ["T"] - - -def function_exists(lib, func_name): - try: - getattr(lib, func_name) - return True - except AttributeError: - return False - - -def check_chip_type(clib): - if not function_exists(clib, "AX_ENGINE_SetAffinity"): - return _types.ChipType.M57H - elif not function_exists(clib, "AX_ENGINE_GetTotalOps"): - return _types.ChipType.MC50 - else: - return _types.ChipType.MC20E - - -T = check_chip_type(_lib) diff --git a/axengine/_axclrt.py b/axengine/_axclrt.py new file mode 100644 index 0000000..f0f558e --- /dev/null +++ b/axengine/_axclrt.py @@ -0,0 +1,371 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# +# first implementation of AXCLRTSession contributed by zylo117 + +import atexit +import os +import time +from typing import Any, Sequence + +import ml_dtypes as mldt +import numpy as np + +from ._axclrt_capi import axclrt_cffi, axclrt_lib +from ._axclrt_types import VNPUType, ModelType +from ._base_session import Session, SessionOptions +from ._node import NodeArg + +__all__: ["AXCLRTSession"] + +_is_axclrt_initialized = False +_is_axclrt_engine_initialized = False + + +def _transform_dtype(dtype): + if dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_UINT8): + return np.dtype(np.uint8) + elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_INT8): + return np.dtype(np.int8) + elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_UINT16): + return np.dtype(np.uint16) + elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_INT16): + return np.dtype(np.int16) + elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_UINT32): + return np.dtype(np.uint32) + elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_INT32): + return np.dtype(np.int32) + elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_FP32): + return np.dtype(np.float32) + elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_BF16): + return np.dtype(mldt.bfloat16) + else: + raise ValueError(f"Unsupported data type '{dtype}'.") + +def _initialize_axclrt(): + global _is_axclrt_initialized + ret = axclrt_lib.axclInit([]) + if ret != 0: + raise RuntimeError(f"Failed to initialize axcl runtime. {ret}.") + _is_axclrt_initialized = True + + +def _finalize_axclrt(): + global _is_axclrt_initialized, _is_axclrt_engine_initialized + if _is_axclrt_engine_initialized: + axclrt_lib.axclrtEngineFinalize() + _is_axclrt_engine_initialized = False + if _is_axclrt_initialized: + axclrt_lib.axclFinalize() + _is_axclrt_initialized = False + + +_initialize_axclrt() +atexit.register(_finalize_axclrt) + + +def _get_vnpu_type() -> VNPUType: + vnpu_type = axclrt_cffi.new("axclrtEngineVNpuKind *") + ret = axclrt_lib.axclrtEngineGetVNpuKind(vnpu_type) + if ret != 0: + raise RuntimeError("Failed to get VNPU attribute.") + return VNPUType(vnpu_type[0]) + + +def _get_version(): + major, minor, patch = axclrt_cffi.new('int32_t *'), axclrt_cffi.new('int32_t *'), axclrt_cffi.new( + 'int32_t *') + axclrt_lib.axclrtGetVersion(major, minor, patch) + return f'{major[0]}.{minor[0]}.{patch[0]}' + + +class AXCLRTSession(Session): + def __init__( + self, + path_or_bytes: str | bytes | os.PathLike, + sess_options: SessionOptions | None = None, + provider_options: dict[Any, Any] | None = None, + **kwargs, + ) -> None: + super().__init__() + + self._device_index = 0 + + if provider_options is not None and "device_id" in provider_options[0]: + self._device_index = provider_options[0].get("device_id", 0) + + lst = axclrt_cffi.new("axclrtDeviceList *") + ret = axclrt_lib.axclrtGetDeviceList(lst) + if ret != 0 or lst.num == 0: + raise RuntimeError(f"Get AXCL device failed 0x{ret:08x}, find total {lst.num} device.") + + if self._device_index >= lst.num: + raise RuntimeError(f"Device index {self._device_index} is out of range, total {lst.num} device.") + + self._device_id = lst.devices[self._device_index] + ret = axclrt_lib.axclrtSetDevice(self._device_id) + if ret != 0 or lst.num == 0: + raise RuntimeError(f"Set AXCL device failed 0x{ret:08x}.") + + global _is_axclrt_engine_initialized + vnpu_type = axclrt_cffi.cast( + "axclrtEngineVNpuKind", VNPUType.DISABLED.value + ) + # try to initialize NPU as disabled + ret = axclrt_lib.axclrtEngineInit(vnpu_type) + # if failed, try to get vnpu type + if 0 != ret: + vnpu = axclrt_cffi.new("axclrtEngineVNpuKind *") + ret = axclrt_lib.axclrtEngineGetVNpuKind(vnpu) + # if failed, that means the NPU is not available + if ret != 0: + raise RuntimeError(f"axclrtEngineInit as {vnpu.value} failed 0x{ret:08x}.") + # if success, that means the NPU is already initialized as vnpu.value + # so the initialization is failed. + # this means the other users maybe uninitialized the NPU suddenly + # and the app would be terminated unexpectedly at that moment. + # but we can't do anything to fix this issue, just print a warning message. + # it because the api looks like onnxruntime, so there no window avoid this. + # such as the life. + else: + print(f"[WARNING] Failed to initialize NPU as {vnpu_type}, NPU is already initialized as {vnpu.value}.") + # initialize NPU successfully, mark the flag to ensure the engine will be finalized + else: + _is_axclrt_engine_initialized = True + + self.soc_name = axclrt_cffi.string(axclrt_lib.axclrtGetSocName()).decode() + print(f"[INFO] SOC Name: {self.soc_name}") + + # model handle, context, info, io + self._model_id = axclrt_cffi.new("uint64_t *") + self._context_id = axclrt_cffi.new("uint64_t *") + + # get vnpu type + self._vnpu_type = _get_vnpu_type() + print(f"[INFO] VNPU type: {self._vnpu_type}") + + # load model + ret = self._load(path_or_bytes) + if 0 != ret: + raise RuntimeError("Failed to load model.") + print(f"[INFO] Compiler version: {self._get_model_tool_version()}") + + # get model info + self._info = self._get_info() + self._shape_count = self._get_shape_count() + self._inputs = self._get_inputs() + self._outputs = self._get_outputs() + + # prepare io + self._io = self._prepare_io() + + def __del__(self): + self._unload() + + def _load(self, path_or_bytes): + # model buffer, almost copied from onnx runtime + if isinstance(path_or_bytes, (str, os.PathLike)): + _model_path = axclrt_cffi.new("char[]", path_or_bytes.encode('utf-8')) + ret = axclrt_lib.axclrtEngineLoadFromFile(_model_path, self._model_id) + if ret != 0: + raise RuntimeError("axclrtEngineLoadFromFile failed.") + elif isinstance(path_or_bytes, bytes): + _model_buffer = axclrt_cffi.new("char[]", path_or_bytes) + _model_buffer_size = len(path_or_bytes) + + dev_mem_ptr = axclrt_cffi.new('void **', axclrt_cffi.NULL) + ret = axclrt_lib.axclrtMalloc(dev_mem_ptr, _model_buffer_size, axclrt_lib.AXCL_MEM_MALLOC_NORMAL_ONLY) + if ret != 0: + raise RuntimeError("axclrtMalloc failed.") + + ret = axclrt_lib.axclrtMemcpy(dev_mem_ptr[0], _model_buffer, _model_buffer_size, axclrt_lib.AXCL_MEMCPY_HOST_TO_DEVICE) + if ret != 0: + axclrt_lib.axclrtFree(dev_mem_ptr[0]) + raise RuntimeError("axclrtMemcpy failed.") + + ret = axclrt_lib.axclrtEngineLoadFromMem(dev_mem_ptr[0], _model_buffer_size, self._model_id) + axclrt_lib.axclrtFree(dev_mem_ptr[0]) + if ret != 0: + raise RuntimeError("axclrtEngineLoadFromMem failed.") + else: + raise TypeError(f"Unable to load model from type '{type(path_or_bytes)}'") + + ret = axclrt_lib.axclrtEngineCreateContext(self._model_id[0], self._context_id) + if ret != 0: + raise RuntimeError("axclrtEngineCreateContext failed") + return ret + + def _unload(self): + if self._io is not None: + dev_size = axclrt_cffi.new("uint64_t *") + dev_prt = axclrt_cffi.new("void **") + for i in range(axclrt_lib.axclrtEngineGetNumInputs(self._info[0])): + axclrt_lib.axclrtEngineGetInputBufferByIndex(self._io, i, dev_prt, dev_size) + axclrt_lib.axclrtFree(dev_prt[0]) + for i in range(axclrt_lib.axclrtEngineGetNumOutputs(self._info[0])): + axclrt_lib.axclrtEngineGetOutputBufferByIndex(self._io, i, dev_prt, dev_size) + axclrt_lib.axclrtFree(dev_prt[0]) + axclrt_lib.axclrtEngineDestroyIO(self._io) + self._io = None + if self._model_id[0] is not None: + axclrt_lib.axclrtEngineUnload(self._model_id[0]) + self._model_id[0] = 0 + + def _get_model_tool_version(self): + model_tool_version = axclrt_lib.axclrtEngineGetModelCompilerVersion(self._model_id[0]) + return axclrt_cffi.string(model_tool_version).decode() + + def _get_info(self): + io_info = axclrt_cffi.new("axclrtEngineIOInfo *") + ret = axclrt_lib.axclrtEngineGetIOInfo(self._model_id[0], io_info) + if ret != 0: + raise RuntimeError("axclrtEngineGetIOInfo failed.") + return io_info + + def _get_shape_count(self): + count = axclrt_cffi.new("int32_t *") + ret = axclrt_lib.axclrtEngineGetShapeGroupsCount(self._info[0], count) + if ret != 0: + axclrt_lib.axclrtEngineUnload(self._model_id[0]) + raise RuntimeError("axclrtEngineGetShapeGroupsCount failed.") + return count[0] + + def _get_inputs(self): + inputs = [] + for group in range(self._shape_count): + one_group_io = [] + for index in range(axclrt_lib.axclrtEngineGetNumInputs(self._info[0])): + name = axclrt_lib.axclrtEngineGetInputNameByIndex(self._info[0], index) + + cffi_dtype = axclrt_cffi.new("axclrtEngineDataType *") + ret = axclrt_lib.axclrtEngineGetInputDataType(self._info[0], index, cffi_dtype) + if ret != 0: + raise RuntimeError("axclrtEngineGetInputDataType failed.") + dtype = _transform_dtype(cffi_dtype[0]) + + cffi_dims = axclrt_cffi.new("axclrtEngineIODims *") + ret = axclrt_lib.axclrtEngineGetInputDims(self._info[0], group, index, cffi_dims) + if ret != 0: + raise RuntimeError("axclrtEngineGetInputDims failed.") + shape = [cffi_dims.dims[i] for i in range(cffi_dims.dimCount)] + + meta = NodeArg(name, dtype, shape) + one_group_io.append(meta) + inputs.append(one_group_io) + return inputs + + def _get_outputs(self): + outputs = [] + for group in range(self._shape_count): + one_group_io = [] + for index in range(axclrt_lib.axclrtEngineGetNumOutputs(self._info[0])): + name = axclrt_lib.axclrtEngineGetOutputNameByIndex(self._info[0], index) + + cffi_dtype = axclrt_cffi.new("axclrtEngineDataType *") + ret = axclrt_lib.axclrtEngineGetOutputDataType(self._info[0], index, cffi_dtype) + if ret != 0: + raise RuntimeError("axclrtEngineGetOutputDataType failed.") + dtype = _transform_dtype(cffi_dtype[0]) + + cffi_dims = axclrt_cffi.new("axclrtEngineIODims *") + ret = axclrt_lib.axclrtEngineGetOutputDims(self._info[0], group, index, cffi_dims) + if ret != 0: + raise RuntimeError("axclrtEngineGetOutputDims failed.") + shape = [cffi_dims.dims[i] for i in range(cffi_dims.dimCount)] + + meta = NodeArg(name, dtype, shape) + one_group_io.append(meta) + outputs.append(one_group_io) + return outputs + + def _prepare_io(self): + _io = axclrt_cffi.new("axclrtEngineIO *") + ret = axclrt_lib.axclrtEngineCreateIO(self._info[0], _io) + if ret != 0: + raise RuntimeError(f"axclrtEngineCreateIO failed 0x{ret:08x}.") + for i in range(axclrt_lib.axclrtEngineGetNumInputs(self._info[0])): + max_size = 0 + for group in range(self._shape_count): + size = axclrt_lib.axclrtEngineGetInputSizeByIndex(self._info[0], group, i) + max_size = max(max_size, size) + dev_ptr = axclrt_cffi.new("void **") + ret = axclrt_lib.axclrtMalloc(dev_ptr, max_size, axclrt_lib.AXCL_MEM_MALLOC_NORMAL_ONLY) + if 0 != ret or dev_ptr[0] == axclrt_cffi.NULL: + raise RuntimeError(f"axclrtMalloc failed 0x{ret:08x} for input {i}.") + ret = axclrt_lib.axclrtEngineSetInputBufferByIndex(_io[0], i, dev_ptr[0], max_size) + if 0 != ret: + raise RuntimeError(f"axclrtEngineSetInputBufferByIndex failed 0x{ret:08x} for input {i}.") + for i in range(axclrt_lib.axclrtEngineGetNumOutputs(self._info[0])): + max_size = 0 + for group in range(self._shape_count): + size = axclrt_lib.axclrtEngineGetOutputSizeByIndex(self._info[0], group, i) + max_size = max(max_size, size) + dev_ptr = axclrt_cffi.new("void **") + ret = axclrt_lib.axclrtMalloc(dev_ptr, max_size, axclrt_lib.AXCL_MEM_MALLOC_NORMAL_ONLY) + if 0 != ret or dev_ptr[0] == axclrt_cffi.NULL: + raise RuntimeError(f"axclrtMalloc failed 0x{ret:08x} for output {i}.") + ret = axclrt_lib.axclrtEngineSetOutputBufferByIndex(_io[0], i, dev_ptr[0], max_size) + if 0 != ret: + raise RuntimeError(f"axclrtEngineSetOutputBufferByIndex failed 0x{ret:08x} for output {i}.") + return _io[0] + + def run( + self, + output_names: list[str], + input_feed: dict[str, np.ndarray], + run_options=None + ): + self._validate_input(input_feed) + self._validate_output(output_names) + + if None is output_names: + output_names = [o.name for o in self.get_outputs()] + + # fill model io + dev_prt = axclrt_cffi.new("void **") + dev_size = axclrt_cffi.new("uint64_t *") + for key, npy in input_feed.items(): + for i, one in enumerate(self.get_inputs()): + if one.name == key: + assert ( + list(one.shape) == list(npy.shape) and one.dtype == npy.dtype + ), f"model inputs({key}) expect shape {one.shape} and dtype {one.dtype}, howerver gets input with shape {npy.shape} and dtype {npy.dtype}" + + if not ( + not npy.flags.c_contiguous + and npy.flags.f_contiguous + and npy.flags.contiguous + ): + npy = np.ascontiguousarray(npy) + npy_ptr = axclrt_cffi.cast("void *", npy.ctypes.data) + ret = axclrt_lib.axclrtEngineGetInputBufferByIndex(self._io, i, dev_prt, dev_size) + if 0 != ret: + raise RuntimeError(f"axclrtEngineGetInputBufferByIndex failed for input {i}.") + ret = axclrt_lib.axclrtMemcpy(dev_prt[0], npy_ptr, npy.nbytes, axclrt_lib.AXCL_MEMCPY_HOST_TO_DEVICE) + if 0 != ret: + raise RuntimeError(f"axclrtMemcpy failed for input {i}.") + + # execute model + ret = axclrt_lib.axclrtEngineExecute(self._model_id[0], self._context_id[0], 0, self._io) + + # get output + outputs = [] + if 0 == ret: + for i in range(len(self.get_outputs())): + ret = axclrt_lib.axclrtEngineGetOutputBufferByIndex(self._io, i, dev_prt, dev_size) + if 0 != ret: + raise RuntimeError(f"axclrtEngineGetOutputBufferByIndex failed for output {i}.") + npy = np.zeros(self.get_outputs()[i].shape, dtype=self.get_outputs()[i].dtype) + npy_ptr = axclrt_cffi.cast("void *", npy.ctypes.data) + ret = axclrt_lib.axclrtMemcpy(npy_ptr, dev_prt[0], npy.nbytes, axclrt_lib.AXCL_MEMCPY_DEVICE_TO_HOST) + if 0 != ret: + raise RuntimeError(f"axclrtMemcpy failed for output {i}.") + name = self.get_outputs()[i].name + if name in output_names: + outputs.append(npy) + return outputs + else: + raise RuntimeError(f"axclrtEngineExecute failed 0x{ret:08x}") diff --git a/axengine/_axcl_capi.py b/axengine/_axclrt_capi.py similarity index 71% rename from axengine/_axcl_capi.py rename to axengine/_axclrt_capi.py index f8bac80..1719a94 100644 --- a/axengine/_axcl_capi.py +++ b/axengine/_axclrt_capi.py @@ -4,19 +4,17 @@ # may not be copied or distributed in any isomorphic form without the prior # written consent of Axera Semiconductor Co., Ltd. # -# modified by zylo117 import ctypes.util -import platform from cffi import FFI -__all__: ["R", "O"] +__all__: ["axclrt_cffi", "axclrt_lib"] -O = FFI() +axclrt_cffi = FFI() # axcl_base.h -O.cdef( +axclrt_cffi.cdef( """ #define AXCL_MAX_DEVICE_COUNT 256 typedef int32_t axclError; @@ -25,7 +23,7 @@ ) # axcl_rt_type.h -O.cdef( +axclrt_cffi.cdef( """ typedef struct axclrtDeviceList { uint32_t num; @@ -50,7 +48,7 @@ ) # axcl_rt_engine_type.h -O.cdef( +axclrt_cffi.cdef( """ #define AXCLRT_ENGINE_MAX_DIM_CNT 32 typedef void* axclrtEngineIOInfo; @@ -63,6 +61,32 @@ AXCL_VNPU_LITTLE_BIG = 3, } axclrtEngineVNpuKind; + typedef enum axclrtEngineDataType { + AXCL_DATA_TYPE_NONE = 0, + AXCL_DATA_TYPE_INT4 = 1, + AXCL_DATA_TYPE_UINT4 = 2, + AXCL_DATA_TYPE_INT8 = 3, + AXCL_DATA_TYPE_UINT8 = 4, + AXCL_DATA_TYPE_INT16 = 5, + AXCL_DATA_TYPE_UINT16 = 6, + AXCL_DATA_TYPE_INT32 = 7, + AXCL_DATA_TYPE_UINT32 = 8, + AXCL_DATA_TYPE_INT64 = 9, + AXCL_DATA_TYPE_UINT64 = 10, + AXCL_DATA_TYPE_FP4 = 11, + AXCL_DATA_TYPE_FP8 = 12, + AXCL_DATA_TYPE_FP16 = 13, + AXCL_DATA_TYPE_BF16 = 14, + AXCL_DATA_TYPE_FP32 = 15, + AXCL_DATA_TYPE_FP64 = 16, + } axclrtEngineDataType; + + typedef enum axclrtEngineDataLayout { + AXCL_DATA_LAYOUT_NONE = 0, + AXCL_DATA_LAYOUT_NHWC = 0, + AXCL_DATA_LAYOUT_NCHW = 1, + } axclrtEngineDataLayout; + typedef struct axclrtEngineIODims { int32_t dimCount; int32_t dims[AXCLRT_ENGINE_MAX_DIM_CNT]; @@ -70,67 +94,8 @@ """ ) -# ax_model_runner_axcl.cpp -O.cdef( - """ - typedef enum - { - AX_ENGINE_ABST_DEFAULT = 0, - AX_ENGINE_ABST_CACHED = 1, - } AX_ENGINE_ALLOC_BUFFER_STRATEGY_T; - - typedef struct - { - int nIndex; - int nSize; - void *pBuf; - void *pVirAddr; - - const char *Name; - - axclrtEngineIODims dims; - } AXCL_IO_BUF_T; - - typedef struct - { - uint32_t nInputSize; - uint32_t nOutputSize; - AXCL_IO_BUF_T *pInputs; - AXCL_IO_BUF_T *pOutputs; - } AXCL_IO_DATA_T; -""" -) - -# ax_model_runner.hpp -O.cdef( - """ - typedef struct - { - const char * sName; - unsigned int nIdx; - unsigned int vShape[AXCLRT_ENGINE_MAX_DIM_CNT]; - unsigned int vShapeSize; - int nSize; - unsigned long long phyAddr; - void *pVirAddr; - } ax_runner_tensor_t; -""" -) - -# stdlib.h/string.h -O.cdef( - """ - void free (void *__ptr); - void *malloc(size_t size); - void *memset (void *__s, int __c, size_t __n); - void *memcpy (void * __dest, const void * __src, size_t __n); -""" -) - - - # axcl.h -O.cdef( +axclrt_cffi.cdef( """ axclError axclInit(const char *config); axclError axclFinalize(); @@ -138,7 +103,7 @@ ) # axcl_rt.h -O.cdef( +axclrt_cffi.cdef( """ axclError axclrtGetVersion(int32_t *major, int32_t *minor, int32_t *patch); const char *axclrtGetSocName(); @@ -146,7 +111,7 @@ ) # axcl_rt_device.h -O.cdef( +axclrt_cffi.cdef( """ axclError axclrtGetDeviceList(axclrtDeviceList *deviceList); axclError axclrtSetDevice(int32_t deviceId); @@ -155,7 +120,7 @@ ) # axcl_rt_context.h -O.cdef( +axclrt_cffi.cdef( """ axclError axclrtCreateContext(axclrtContext *context, int32_t deviceId); axclError axclrtDestroyContext(axclrtContext context); @@ -166,34 +131,54 @@ ) # axcl_rt_engine.h -O.cdef( +axclrt_cffi.cdef( """ axclError axclrtEngineInit(axclrtEngineVNpuKind npuKind); - axclError axclrtEngineLoadFromMem(const void *model, uint64_t modelSize, uint64_t *modelId); - axclError axclrtEngineCreateContext(uint64_t modelId, uint64_t *contextId); axclError axclrtEngineGetVNpuKind(axclrtEngineVNpuKind *npuKind); + axclError axclrtEngineFinalize(); + + axclError axclrtEngineLoadFromFile(const char *modelPath, uint64_t *modelId); + axclError axclrtEngineLoadFromMem(const void *model, uint64_t modelSize, uint64_t *modelId); const char* axclrtEngineGetModelCompilerVersion(uint64_t modelId); + axclError axclrtEngineUnload(uint64_t modelId); + axclError axclrtEngineGetIOInfo(uint64_t modelId, axclrtEngineIOInfo *ioInfo); axclError axclrtEngineGetShapeGroupsCount(axclrtEngineIOInfo ioInfo, int32_t *count); - axclError axclrtEngineCreateIO(axclrtEngineIOInfo ioInfo, axclrtEngineIO *io); + uint32_t axclrtEngineGetNumInputs(axclrtEngineIOInfo ioInfo); uint32_t axclrtEngineGetNumOutputs(axclrtEngineIOInfo ioInfo); + uint64_t axclrtEngineGetInputSizeByIndex(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index); - axclError axclrtEngineGetInputDims(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index, axclrtEngineIODims *dims); - const char *axclrtEngineGetInputNameByIndex(axclrtEngineIOInfo ioInfo, uint32_t index); - axclError axclrtEngineSetInputBufferByIndex(axclrtEngineIO io, uint32_t index, const void *dataBuffer, uint64_t size); uint64_t axclrtEngineGetOutputSizeByIndex(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index); + + axclError axclrtEngineGetInputDims(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index, axclrtEngineIODims *dims); axclError axclrtEngineGetOutputDims(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index, axclrtEngineIODims *dims); + + const char *axclrtEngineGetInputNameByIndex(axclrtEngineIOInfo ioInfo, uint32_t index); const char *axclrtEngineGetOutputNameByIndex(axclrtEngineIOInfo ioInfo, uint32_t index); + + int32_t axclrtEngineGetInputDataType(axclrtEngineIOInfo ioInfo, uint32_t index, axclrtEngineDataType *type); + int32_t axclrtEngineGetOutputDataType(axclrtEngineIOInfo ioInfo, uint32_t index, axclrtEngineDataType *type); + + int32_t axclrtEngineGetInputDataLayout(axclrtEngineIOInfo ioInfo, uint32_t index, axclrtEngineDataLayout *layout); + int32_t axclrtEngineGetOutputDataLayout(axclrtEngineIOInfo ioInfo, uint32_t index, axclrtEngineDataLayout *layout); + + axclError axclrtEngineCreateIO(axclrtEngineIOInfo ioInfo, axclrtEngineIO *io); + axclError axclrtEngineDestroyIO(axclrtEngineIO io); + + axclError axclrtEngineSetInputBufferByIndex(axclrtEngineIO io, uint32_t index, const void *dataBuffer, uint64_t size); axclError axclrtEngineSetOutputBufferByIndex(axclrtEngineIO io, uint32_t index, const void *dataBuffer, uint64_t size); + axclError axclrtEngineGetInputBufferByIndex(axclrtEngineIO io, uint32_t index, void **dataBuffer, uint64_t *size); + axclError axclrtEngineGetOutputBufferByIndex(axclrtEngineIO io, uint32_t index, void **dataBuffer, uint64_t *size); + + axclError axclrtEngineCreateContext(uint64_t modelId, uint64_t *contextId); + axclError axclrtEngineExecute(uint64_t modelId, uint64_t contextId, uint32_t group, axclrtEngineIO io); - axclError axclrtEngineDestroyIO(axclrtEngineIO io); - axclError axclrtEngineUnload(uint64_t modelId); """ ) # axcl_rt_memory.h -O.cdef( +axclrt_cffi.cdef( """ axclError axclrtMalloc(void **devPtr, size_t size, axclrtMemMallocPolicy policy); axclError axclrtMallocCached(void **devPtr, size_t size, axclrtMemMallocPolicy policy); @@ -209,5 +194,5 @@ rt_path is not None ), f"Failed to find library {rt_name}. Please ensure it is installed and in the library path." -R = O.dlopen(rt_path) -assert R is not None, f"Failed to load library {rt_path}. Please ensure it is installed and in the library path." +axclrt_lib = axclrt_cffi.dlopen(rt_path) +assert axclrt_lib is not None, f"Failed to load library {rt_path}. Please ensure it is installed and in the library path." diff --git a/axengine/_axclrt_types.py b/axengine/_axclrt_types.py new file mode 100644 index 0000000..7240312 --- /dev/null +++ b/axengine/_axclrt_types.py @@ -0,0 +1,21 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + +from enum import Enum + + +class VNPUType(Enum): + DISABLED = 0 + ENABLED = 1 + BIG_LITTLE = 2 + LITTLE_BIG = 3 + + +class ModelType(Enum): + SINGLE = 0 + DUAL = 1 + TRIPLE = 2 diff --git a/axengine/ax_session.py b/axengine/_axe.py similarity index 54% rename from axengine/ax_session.py rename to axengine/_axe.py index 12e65d6..62ec764 100644 --- a/axengine/ax_session.py +++ b/axengine/_axe.py @@ -4,61 +4,142 @@ # may not be copied or distributed in any isomorphic form without the prior # written consent of Axera Semiconductor Co., Ltd. # -from ._session import BaseInferenceSession -from ._types import VNPUType, ModelType, ChipType -from ._types import _transform_dtype -from ._node import NodeArg +import atexit import os +from typing import Any, Sequence + +import ml_dtypes as mldt import numpy as np -__all__: ["InferenceSession"] +from ._axe_capi import sys_lib, engine_cffi, engine_lib +from ._axe_types import VNPUType, ModelType, ChipType +from ._base_session import Session, SessionOptions +from ._node import NodeArg +__all__: ["AXEngineSession"] + +_is_sys_initialized = False +_is_engine_initialized = False + + +def _transform_dtype(dtype): + if dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_UINT8): + return np.dtype(np.uint8) + elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_SINT8): + return np.dtype(np.int8) + elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_UINT16): + return np.dtype(np.uint16) + elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_SINT16): + return np.dtype(np.int16) + elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_UINT32): + return np.dtype(np.uint32) + elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_SINT32): + return np.dtype(np.int32) + elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_FLOAT32): + return np.dtype(np.float32) + elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_BFLOAT16): + return np.dtype(mldt.bfloat16) + else: + raise ValueError(f"Unsupported data type '{dtype}'.") + + +def _check_cffi_func_exists(lib, func_name): + try: + getattr(lib, func_name) + return True + except AttributeError: + return False + + +def _get_chip_type(): + if not _check_cffi_func_exists(engine_lib, "AX_ENGINE_SetAffinity"): + return ChipType.M57H + elif not _check_cffi_func_exists(engine_lib, "AX_ENGINE_GetTotalOps"): + return ChipType.MC50 + else: + return ChipType.MC20E + + +def _get_version(): + engine_version = engine_lib.AX_ENGINE_GetVersion() + return engine_cffi.string(engine_version).decode("utf-8") + + +def _get_vnpu_type() -> VNPUType: + vnpu_type = engine_cffi.new("AX_ENGINE_NPU_ATTR_T *") + ret = engine_lib.AX_ENGINE_GetVNPUAttr(vnpu_type) + if 0 != ret: + raise RuntimeError("Failed to get VNPU attribute.") + return VNPUType(vnpu_type.eHardMode) + + +def _initialize_engine(): + global _is_sys_initialized, _is_engine_initialized + + ret = sys_lib.AX_SYS_Init() + if ret != 0: + raise RuntimeError("Failed to initialize ax sys.") + _is_sys_initialized = True + + # disabled mode by default + vnpu_type = engine_cffi.new("AX_ENGINE_NPU_ATTR_T *") + ret = engine_lib.AX_ENGINE_GetVNPUAttr(vnpu_type) + if 0 != ret: + # this means the NPU was not initialized + vnpu_type.eHardMode = engine_cffi.cast( + "AX_ENGINE_NPU_MODE_T", VNPUType.DISABLED.value + ) + ret = engine_lib.AX_ENGINE_Init(vnpu_type) + if ret != 0: + raise RuntimeError("Failed to initialize ax sys engine.") + _is_engine_initialized = True -class InferenceSession(BaseInferenceSession): - def __init__( - self, - path_or_bytes: str | bytes | os.PathLike, - ) -> None: - from . import _ax_capi as _capi - from . import _ax_chip as _chip + print(f"[INFO] Chip type: {_get_chip_type()}") + print(f"[INFO] VNPU type: {_get_vnpu_type()}") + print(f"[INFO] Engine version: {_get_version()}") - super(BaseInferenceSession).__init__() - # load shared library - self._sys_lib = _capi.S - self._sys_ffi = _capi.M - self._engine_lib = _capi.E - self._engine_ffi = _capi.N +def _finalize_engine(): + global _is_sys_initialized, _is_engine_initialized - # chip type - self._chip_type = _chip.T - print(f"[INFO] Chip type: {self._chip_type}") + if _is_engine_initialized: + engine_lib.AX_ENGINE_Deinit() + if _is_sys_initialized: + sys_lib.AX_SYS_Deinit() - # handle, context, info, io - self._handle = self._engine_ffi.new("uint64_t **") - self._context = self._engine_ffi.new("uint64_t **") - self._io = self._engine_ffi.new("AX_ENGINE_IO_T *") - # init ax sys & engine - ret = self._init() - if 0 != ret: - raise RuntimeError("Failed to initialize engine.") - print(f"[INFO] Engine version: {self._get_version()}") +_initialize_engine() +atexit.register(_finalize_engine) + - # get vnpu type - self._vnpu_type = self._get_vnpu_type() - print(f"[INFO] VNPU type: {self._vnpu_type}") +class AXEngineSession(Session): + def __init__( + self, + path_or_bytes: str | bytes | os.PathLike, + sess_options: SessionOptions | None = None, + provider_options: dict[Any, Any] | None = None, + **kwargs, + ) -> None: + super().__init__() + + self._chip_type = _get_chip_type() + self._vnpu_type = _get_vnpu_type() + + # handle, context, info, io + self._handle = engine_cffi.new("uint64_t **") + self._context = engine_cffi.new("uint64_t **") + self._io = engine_cffi.new("AX_ENGINE_IO_T *") # model buffer, almost copied from onnx runtime if isinstance(path_or_bytes, (str, os.PathLike)): self._model_name = os.path.splitext(os.path.basename(path_or_bytes))[0] with open(path_or_bytes, "rb") as f: data = f.read() - self._model_buffer = self._engine_ffi.new("char[]", data) + self._model_buffer = engine_cffi.new("char[]", data) self._model_buffer_size = len(data) elif isinstance(path_or_bytes, bytes): - self._model_buffer = self._engine_ffi.new("char[]", path_or_bytes) + self._model_buffer = engine_cffi.new("char[]", path_or_bytes) self._model_buffer_size = len(path_or_bytes) else: raise TypeError(f"Unable to load model from type '{type(path_or_bytes)}'") @@ -91,8 +172,8 @@ def __init__( f"Model type '{self._model_type}' is not allowed when vnpu is inited as {self._vnpu_type}." ) if ( - self._vnpu_type is VNPUType.BIG_LITTLE - or self._vnpu_type is VNPUType.LITTLE_BIG + self._vnpu_type is VNPUType.BIG_LITTLE + or self._vnpu_type is VNPUType.LITTLE_BIG ): if self._model_type is ModelType.TRIPLE: raise ValueError( @@ -129,13 +210,13 @@ def __init__( # fill model io self._align = 128 - self._cmm_token = self._engine_ffi.new("AX_S8[]", b"PyEngine") + self._cmm_token = engine_cffi.new("AX_S8[]", b"PyEngine") self._io[0].nInputSize = len(self.get_inputs()) self._io[0].nOutputSize = len(self.get_outputs()) - self._io[0].pInputs = self._engine_ffi.new( + self._io[0].pInputs = engine_cffi.new( "AX_ENGINE_IO_BUFFER_T[{}]".format(self._io[0].nInputSize) ) - self._io[0].pOutputs = self._engine_ffi.new( + self._io[0].pOutputs = engine_cffi.new( "AX_ENGINE_IO_BUFFER_T[{}]".format(self._io[0].nOutputSize) ) for i in range(len(self.get_inputs())): @@ -143,9 +224,9 @@ def __init__( for j in range(self._shape_count): max_buf = max(max_buf, self._info[j][0].pInputs[i].nSize) self._io[0].pInputs[i].nSize = max_buf - phy = self._engine_ffi.new("AX_U64*") - vir = self._engine_ffi.new("AX_VOID**") - ret = self._sys_lib.AX_SYS_MemAllocCached( + phy = engine_cffi.new("AX_U64*") + vir = engine_cffi.new("AX_VOID**") + ret = sys_lib.AX_SYS_MemAllocCached( phy, vir, self._io[0].pInputs[i].nSize, self._align, self._cmm_token ) if 0 != ret: @@ -157,9 +238,9 @@ def __init__( for j in range(self._shape_count): max_buf = max(max_buf, self._info[j][0].pOutputs[i].nSize) self._io[0].pOutputs[i].nSize = max_buf - phy = self._engine_ffi.new("AX_U64*") - vir = self._engine_ffi.new("AX_VOID**") - ret = self._sys_lib.AX_SYS_MemAllocCached( + phy = engine_cffi.new("AX_U64*") + vir = engine_cffi.new("AX_VOID**") + ret = sys_lib.AX_SYS_MemAllocCached( phy, vir, self._io[0].pOutputs[i].nSize, self._align, self._cmm_token ) if 0 != ret: @@ -167,42 +248,12 @@ def __init__( self._io[0].pOutputs[i].phyAddr = phy[0] self._io[0].pOutputs[i].pVirAddr = vir[0] - def _init(self, vnpu=VNPUType.DISABLED): # vnpu type, the default is disabled - ret = self._sys_lib.AX_SYS_Init() - if 0 != ret: - raise RuntimeError("Failed to initialize system.") - - # get vnpu type first, check if npu was initialized - vnpu_type = self._engine_ffi.new("AX_ENGINE_NPU_ATTR_T *") - ret = self._engine_lib.AX_ENGINE_GetVNPUAttr(vnpu_type) - if 0 != ret: - # this means the NPU was not initialized - vnpu_type.eHardMode = self._engine_ffi.cast( - "AX_ENGINE_NPU_MODE_T", vnpu.value - ) - - return self._engine_lib.AX_ENGINE_Init(vnpu_type) - - def _final(self): - if self._handle[0] is not None: - self._unload() - self._engine_lib.AX_ENGINE_Deinit() - return self._sys_lib.AX_SYS_Deinit() - - def _get_version(self): - engine_version = self._engine_lib.AX_ENGINE_GetVersion() - return self._engine_ffi.string(engine_version).decode("utf-8") - - def _get_vnpu_type(self) -> VNPUType: - vnpu_type = self._engine_ffi.new("AX_ENGINE_NPU_ATTR_T *") - ret = self._engine_lib.AX_ENGINE_GetVNPUAttr(vnpu_type) - if 0 != ret: - raise RuntimeError("Failed to get VNPU attribute.") - return VNPUType(vnpu_type.eHardMode) + def __del__(self): + self._unload() def _get_model_type(self) -> ModelType: - model_type = self._engine_ffi.new("AX_ENGINE_MODEL_TYPE_T *") - ret = self._engine_lib.AX_ENGINE_GetModelType( + model_type = engine_cffi.new("AX_ENGINE_MODEL_TYPE_T *") + ret = engine_lib.AX_ENGINE_GetModelType( self._model_buffer, self._model_buffer_size, model_type ) if 0 != ret: @@ -210,23 +261,23 @@ def _get_model_type(self) -> ModelType: return ModelType(model_type[0]) def _get_model_tool_version(self): - model_tool_version = self._engine_lib.AX_ENGINE_GetModelToolsVersion( + model_tool_version = engine_lib.AX_ENGINE_GetModelToolsVersion( self._handle[0] ) - return self._engine_ffi.string(model_tool_version).decode("utf-8") + return engine_cffi.string(model_tool_version).decode("utf-8") def _load(self): - extra = self._engine_ffi.new("AX_ENGINE_HANDLE_EXTRA_T *") - extra_name = self._engine_ffi.new("char[]", self._model_name.encode("utf-8")) + extra = engine_cffi.new("AX_ENGINE_HANDLE_EXTRA_T *") + extra_name = engine_cffi.new("char[]", self._model_name.encode("utf-8")) extra.pName = extra_name # for onnx runtime do not support one model multiple context running in multi-thread as far as I know, so # the engine handle and context will create only once - ret = self._engine_lib.AX_ENGINE_CreateHandleV2( + ret = engine_lib.AX_ENGINE_CreateHandleV2( self._handle, self._model_buffer, self._model_buffer_size, extra ) if 0 == ret: - ret = self._engine_lib.AX_ENGINE_CreateContextV2( + ret = engine_lib.AX_ENGINE_CreateContextV2( self._handle[0], self._context ) return ret @@ -234,15 +285,15 @@ def _load(self): def _get_info(self): total_info = [] if 1 == self._shape_count: - info = self._engine_ffi.new("AX_ENGINE_IO_INFO_T **") - ret = self._engine_lib.AX_ENGINE_GetIOInfo(self._handle[0], info) + info = engine_cffi.new("AX_ENGINE_IO_INFO_T **") + ret = engine_lib.AX_ENGINE_GetIOInfo(self._handle[0], info) if 0 != ret: raise RuntimeError("Failed to get model shape.") total_info.append(info) else: for i in range(self._shape_count): - info = self._engine_ffi.new("AX_ENGINE_IO_INFO_T **") - ret = self._engine_lib.AX_ENGINE_GetGroupIOInfo( + info = engine_cffi.new("AX_ENGINE_IO_INFO_T **") + ret = engine_lib.AX_ENGINE_GetGroupIOInfo( self._handle[0], i, info ) if 0 != ret: @@ -251,53 +302,44 @@ def _get_info(self): return total_info def _get_shape_count(self): - count = self._engine_ffi.new("AX_U32 *") - ret = self._engine_lib.AX_ENGINE_GetGroupIOInfoCount(self._handle[0], count) + count = engine_cffi.new("AX_U32 *") + ret = engine_lib.AX_ENGINE_GetGroupIOInfoCount(self._handle[0], count) if 0 != ret: raise RuntimeError("Failed to get model shape group.") return count[0] def _unload(self): - return self._engine_lib.AX_ENGINE_DestroyHandle(self._handle[0]) + if self._handle[0] is not None: + engine_lib.AX_ENGINE_DestroyHandle(self._handle[0]) + self._handle[0] = engine_cffi.NULL - def _get_inputs(self): - inputs = [] + def _get_io(self, io_type: str): + io_info = [] for group in range(self._shape_count): - one_group_input = [] - for index in range(self._info[group][0].nInputSize): - current_input = self._info[group][0].pInputs[index] - name = self._engine_ffi.string(current_input.pName).decode("utf-8") - shape = [] - for i in range(current_input.nShapeSize): - shape.append(current_input.pShape[i]) - dtype = _transform_dtype( - self._engine_ffi, self._engine_lib, current_input.eDataType - ) + one_group_io = [] + for index in range(getattr(self._info[group][0], f'n{io_type}Size')): + current_io = getattr(self._info[group][0], f'p{io_type}s')[index] + name = engine_cffi.string(current_io.pName).decode("utf-8") + shape = [current_io.pShape[i] for i in range(current_io.nShapeSize)] + dtype = _transform_dtype(current_io.eDataType) meta = NodeArg(name, dtype, shape) - one_group_input.append(meta) - inputs.append(one_group_input) - return inputs + one_group_io.append(meta) + io_info.append(one_group_io) + return io_info - def _get_outputs(self): - outputs = [] - for group in range(self._shape_count): - one_group_output = [] - for index in range(self._info[group][0].nOutputSize): - current_output = self._info[group][0].pOutputs[index] - name = self._engine_ffi.string(current_output.pName).decode("utf-8") - shape = [] - for i in range(current_output.nShapeSize): - shape.append(current_output.pShape[i]) - dtype = _transform_dtype( - self._engine_ffi, self._engine_lib, current_output.eDataType - ) - meta = NodeArg(name, dtype, shape) - one_group_output.append(meta) - outputs.append(one_group_output) - return outputs + def _get_inputs(self): + return self._get_io('Input') - def run(self, output_names, input_feed, run_options=None): - self._validate_input(list(input_feed.keys())) + def _get_outputs(self): + return self._get_io('Output') + + def run( + self, + output_names: list[str], + input_feed: dict[str, np.ndarray], + run_options=None + ): + self._validate_input(input_feed) self._validate_output(output_names) if None is output_names: @@ -308,20 +350,21 @@ def run(self, output_names, input_feed, run_options=None): for i, one in enumerate(self.get_inputs()): if one.name == key: assert ( - list(one.shape) == list(npy.shape) and one.dtype == npy.dtype - ), f"model inputs({key}) expect shape {one.shape} and dtype {one.dtype}, howerver gets input with shape {npy.shape} and dtype {npy.dtype}" + list(one.shape) == list(npy.shape) and one.dtype == npy.dtype + ), f"model inputs({key}) expect shape {one.shape} and dtype {one.dtype}, however gets input with shape {npy.shape} and dtype {npy.dtype}" if not ( - not npy.flags.c_contiguous - and npy.flags.f_contiguous - and npy.flags.contiguous + not npy.flags.c_contiguous + and npy.flags.f_contiguous + and npy.flags.contiguous ): npy = np.ascontiguousarray(npy) - npy_ptr = self._engine_ffi.cast("void *", npy.ctypes.data) - self._engine_ffi.memmove( + npy_ptr = engine_cffi.cast("void *", npy.ctypes.data) + + engine_cffi.memmove( self._io[0].pInputs[i].pVirAddr, npy_ptr, npy.nbytes ) - self._sys_lib.AX_SYS_MflushCache( + sys_lib.AX_SYS_MflushCache( self._io[0].pInputs[i].phyAddr, self._io[0].pInputs[i].pVirAddr, self._io[0].pInputs[i].nSize, @@ -329,7 +372,7 @@ def run(self, output_names, input_feed, run_options=None): break # execute model - ret = self._engine_lib.AX_ENGINE_RunSyncV2( + ret = engine_lib.AX_ENGINE_RunSyncV2( self._handle[0], self._context[0], self._io ) @@ -337,13 +380,13 @@ def run(self, output_names, input_feed, run_options=None): outputs = [] if 0 == ret: for i in range(len(self.get_outputs())): - self._sys_lib.AX_SYS_MinvalidateCache( + sys_lib.AX_SYS_MinvalidateCache( self._io[0].pOutputs[i].phyAddr, self._io[0].pOutputs[i].pVirAddr, self._io[0].pOutputs[i].nSize, ) npy = np.frombuffer( - self._engine_ffi.buffer( + engine_cffi.buffer( self._io[0].pOutputs[i].pVirAddr, self._io[0].pOutputs[i].nSize ), dtype=self.get_outputs()[i].dtype, diff --git a/axengine/_ax_capi.py b/axengine/_axe_capi.py similarity index 94% rename from axengine/_ax_capi.py rename to axengine/_axe_capi.py index dd40bc1..2d9ecec 100644 --- a/axengine/_ax_capi.py +++ b/axengine/_axe_capi.py @@ -10,12 +10,12 @@ from cffi import FFI -__all__: ["S", "M", "E", "N"] +__all__: ["sys_lib", "sys_cffi", "engine_lib", "engine_cffi"] -M = FFI() +sys_cffi = FFI() # ax_base_type.h -M.cdef( +sys_cffi.cdef( """ typedef int AX_S32; typedef unsigned int AX_U32; @@ -26,7 +26,7 @@ ) # ax_sys_api.h -M.cdef( +sys_cffi.cdef( """ AX_S32 AX_SYS_Init(AX_VOID); AX_S32 AX_SYS_Deinit(AX_VOID); @@ -43,13 +43,13 @@ sys_path is not None ), f"Failed to find library {sys_name}. Please ensure it is installed and in the library path." -S = M.dlopen(sys_path) -assert S is not None, f"Failed to load library {sys_path}. Please ensure it is installed and in the library path." +sys_lib = sys_cffi.dlopen(sys_path) +assert sys_lib is not None, f"Failed to load library {sys_path}. Please ensure it is installed and in the library path." -N = FFI() +engine_cffi = FFI() # ax_base_type.h -N.cdef( +engine_cffi.cdef( """ typedef unsigned long long int AX_U64; typedef unsigned int AX_U32; @@ -67,14 +67,14 @@ ) # ax_engine_type.h, base type -N.cdef( +engine_cffi.cdef( """ typedef AX_U32 AX_ENGINE_NPU_SET_T; """ ) # ax_engine_type.h, enum -N.cdef( +engine_cffi.cdef( """ typedef enum _AX_ENGINE_TENSOR_LAYOUT_E { @@ -128,7 +128,7 @@ ) # ax_engine_type.h, architecturally agnostic struct -N.cdef( +engine_cffi.cdef( """ typedef enum { AX_ENGINE_VIRTUAL_NPU_DISABLE = 0, @@ -173,7 +173,7 @@ # ax_engine_type.h, struct if arch == "64bit": - N.cdef( + engine_cffi.cdef( """ typedef struct _AX_ENGINE_IO_META_T { @@ -224,7 +224,7 @@ """ ) else: - N.cdef( + engine_cffi.cdef( """ typedef struct _AX_ENGINE_IO_META_T { @@ -276,7 +276,7 @@ ) # ax_engine_api.h -N.cdef( +engine_cffi.cdef( """ const AX_CHAR* AX_ENGINE_GetVersion(AX_VOID); @@ -319,5 +319,5 @@ engine_path is not None ), f"Failed to find library {engine_name}. Please ensure it is installed and in the library path." -E = N.dlopen(engine_path) -assert E is not None, f"Failed to load library {engine_path}. Please ensure it is installed and in the library path." +engine_lib = engine_cffi.dlopen(engine_path) +assert engine_lib is not None, f"Failed to load library {engine_path}. Please ensure it is installed and in the library path." diff --git a/axengine/_axe_types.py b/axengine/_axe_types.py new file mode 100644 index 0000000..86fb476 --- /dev/null +++ b/axengine/_axe_types.py @@ -0,0 +1,29 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + +from enum import Enum + + +class VNPUType(Enum): + DISABLED = 0 + ENABLED = 1 + BIG_LITTLE = 2 + LITTLE_BIG = 3 + + +class ModelType(Enum): + HALF = 0 # for MC20E, which means chip is AX630C(x), or AX620Q(x) + FULL = 1 # for MC20E + SINGLE = 0 # for MC50, which means chip is AX650A or AX650N, and M57H + DUAL = 1 # for MC50 + TRIPLE = 2 # for MC50 + + +class ChipType(Enum): + MC20E = 0 + MC50 = 1 + M57H = 2 diff --git a/axengine/_base_session.py b/axengine/_base_session.py new file mode 100644 index 0000000..86d25c0 --- /dev/null +++ b/axengine/_base_session.py @@ -0,0 +1,59 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + +from abc import ABC, abstractmethod + +import numpy as np + +from ._node import NodeArg + + +class SessionOptions: + pass + + +class Session(ABC): + def __init__(self) -> None: + self._shape_count = 0 + self._inputs = [] + self._outputs = [] + + def _validate_input(self, feed_input_names: dict[str, np.ndarray]): + missing_input_names = [] + for i in self.get_inputs(): + if i.name not in feed_input_names: + missing_input_names.append(i.name) + if missing_input_names: + raise ValueError( + f"Required inputs ({missing_input_names}) are missing from input feed ({feed_input_names}).") + + def _validate_output(self, output_names: list[str]): + if output_names is not None: + for name in output_names: + if name not in [o.name for o in self.get_outputs()]: + raise ValueError(f"Output name '{name}' is not in model outputs name list.") + + def get_inputs(self, shape_group: int = 0) -> list[NodeArg]: + if shape_group > self._shape_count: + raise ValueError(f"Shape group '{shape_group}' is out of range, total {self._shape_count}.") + selected_info = self._inputs[shape_group] + return selected_info + + def get_outputs(self, shape_group: int = 0) -> list[NodeArg]: + if shape_group > self._shape_count: + raise ValueError(f"Shape group '{shape_group}' is out of range, total {self._shape_count}.") + selected_info = self._outputs[shape_group] + return selected_info + + @abstractmethod + def run( + self, + output_names: list[str] | None, + input_feed: dict[str, np.ndarray], + run_options=None + ) -> list[np.ndarray]: + pass diff --git a/axengine/_providers.py b/axengine/_providers.py new file mode 100644 index 0000000..dfab02e --- /dev/null +++ b/axengine/_providers.py @@ -0,0 +1,31 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + +import ctypes.util as cutil + +providers = [] +axengine_provider_name = 'AxEngineExecutionProvider' +axclrt_provider_name = 'AXCLRTExecutionProvider' + +_axengine_lib_name = 'ax_engine' +_axclrt_lib_name = 'axcl_rt' + +# check if axcl_rt is installed, so if available, it's the default provider +if cutil.find_library(_axclrt_lib_name) is not None: + providers.append(axclrt_provider_name) + +# check if ax_engine is installed +if cutil.find_library(_axengine_lib_name) is not None: + providers.append(axengine_provider_name) + + +def get_all_providers(): + return [axengine_provider_name, axclrt_provider_name] + + +def get_available_providers(): + return providers diff --git a/axengine/_session.py b/axengine/_session.py index 7edcca3..1f321b4 100644 --- a/axengine/_session.py +++ b/axengine/_session.py @@ -1,75 +1,117 @@ -from ._node import NodeArg -from ._types import VNPUType - -import numpy as np +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# -class BaseInferenceSession: - def __init__(self, *args, **kwargs) -> None: - self._shape_count = 0 - self._inputs = [] - self._outputs = [] +import os +from typing import Any, Sequence - def __del__(self): - self._final() +import numpy as np +from ._base_session import SessionOptions +from ._node import NodeArg +from ._providers import axclrt_provider_name, axengine_provider_name +from ._providers import get_available_providers + + +class InferenceSession: + def __init__( + self, + path_or_bytes: str | bytes | os.PathLike, + sess_options: SessionOptions | None = None, + providers: Sequence[str | tuple[str, dict[Any, Any]]] | None = None, + provider_options: Sequence[dict[Any, Any]] | None = None, **kwargs, + ) -> None: + self._sess = None + self._sess_options = sess_options + self._provider = None + self._provider_options = None + self._available_providers = get_available_providers() + + # the providers should be available at least one, checked in __init__.py + if providers is None: + # using first available provider as default + _provider_name = self._available_providers[0] + self._provider = _provider_name + else: + # if only one provider is specified + if isinstance(providers, str): + if providers not in self._available_providers: + raise ValueError(f"Selected provider: '{providers}' is not available.") + self._provider = providers + # if multiple providers are specified, using the first one as default + elif isinstance(providers, list): + _unavailable_provider = [] + for p in providers: + assert isinstance(p, str) or isinstance(p, tuple), \ + f"Invalid provider type: {type(p)}. Must be str or tuple." + if isinstance(p, str): + if p not in self._available_providers: + _unavailable_provider.append(p) + elif self._provider is None: + self._provider = p + if isinstance(p, tuple): + assert len(p) == 2, f"Invalid provider type: {p}. Must be tuple with 2 elements." + assert isinstance(p[0], str), f"Invalid provider type: {type(p[0])}. Must be str." + assert isinstance(p[1], dict), f"Invalid provider type: {type(p[1])}. Must be dict." + if p[0] not in self._available_providers: + _unavailable_provider.append(p[0]) + elif self._provider is None: + self._provider = p[0] + # FIXME: check provider options + self._provider_options = p[1] + if _unavailable_provider: + if self._provider is None: + raise ValueError(f"Selected provider(s): {_unavailable_provider} is(are) not available.") + else: + print(f"[WARNING] Selected provider(s): {_unavailable_provider} is(are) not available.") + + # FIXME: can we remove this check? + if self._provider is None: + raise ValueError(f"No available provider found in {providers}.") + print(f"[INFO] Using provider: {self._provider}") + + if self._provider == axclrt_provider_name: + from ._axclrt import AXCLRTSession + self._sess = AXCLRTSession(path_or_bytes, sess_options, provider_options, **kwargs) + if self._provider == axengine_provider_name: + from ._axe import AXEngineSession + self._sess = AXEngineSession(path_or_bytes, sess_options, provider_options, **kwargs) + if self._sess is None: + raise RuntimeError(f"Create session failed with provider: {self._provider}") + + # add to support 'with' statement def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): - self._final() - - def _init(self, *args, **kwargs): - return - - def _final(self): - return - - def _get_version(self) -> str: - return '' - - def _get_vnpu_type(self) -> VNPUType: - return VNPUType(0) - - def _get_model_tool_version(self) -> str: - return '' - - def _load(self) -> 0: - return 0 - - def _get_shape_count(self) -> int: - return 0 - - def _unload(self): - return - - def get_inputs(self, shape_group=0) -> list[NodeArg]: - if shape_group > self._shape_count: - raise ValueError(f"Shape group '{shape_group}' is out of range, total {self._shape_count}.") - selected_info = self._inputs[shape_group] - return selected_info - - def get_outputs(self, shape_group=0) -> list[NodeArg]: - if shape_group > self._shape_count: - raise ValueError(f"Shape group '{shape_group}' is out of range, total {self._shape_count}.") - selected_info = self._outputs[shape_group] - return selected_info - - # copy from onnxruntime - def _validate_input(self, feed_input_names): - missing_input_names = [] - for i in self.get_inputs(): - if i.name not in feed_input_names: - missing_input_names.append(i.name) - if missing_input_names: - raise ValueError( - f"Required inputs ({missing_input_names}) are missing from input feed ({feed_input_names})." - ) - - def _validate_output(self, output_names): - if output_names is not None: - for name in output_names: - if name not in [o.name for o in self.get_outputs()]: - raise ValueError(f"Output name '{name}' is not registered.") - - def run(self, output_names, input_feed, run_options=None) -> list[np.ndarray]: - return [] + # not suppress exceptions + return False + + def get_session_options(self): + """ + Return the session options. See :class:`axengine.SessionOptions`. + """ + return self._sess_options + + def get_providers(self): + """ + Return list of registered execution providers. + """ + return self._provider + + def get_inputs(self, shape_group: int = 0) -> list[NodeArg]: + return self._sess.get_inputs(shape_group) + + def get_outputs(self, shape_group: int = 0) -> list[NodeArg]: + return self._sess.get_outputs(shape_group) + + def run( + self, + output_names: list[str] | None, + input_feed: dict[str, np.ndarray], + run_options=None + ) -> list[np.ndarray]: + return self._sess.run(output_names, input_feed, run_options) diff --git a/axengine/_types.py b/axengine/_types.py deleted file mode 100644 index d054a92..0000000 --- a/axengine/_types.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. -# -# This source file is the property of Axera Semiconductor Co., Ltd. and -# may not be copied or distributed in any isomorphic form without the prior -# written consent of Axera Semiconductor Co., Ltd. -# - -from enum import Enum -import ml_dtypes as mldt -import numpy as np - - -class VNPUType(Enum): - DISABLED = 0 - ENABLED = 1 - BIG_LITTLE = 2 - LITTLE_BIG = 3 - - -class ModelType(Enum): - HALF = 0 # for MC20E, which means chip is AX630C(x), or AX620Q(x) - FULL = 1 # for MC20E - SINGLE = 0 # for MC50, which means chip is AX650A or AX650N, and M57H - DUAL = 1 # for MC50 - TRIPLE = 2 # for MC50 - - -class ChipType(Enum): - MC20E = 0 - MC50 = 1 - M57H = 2 - - -def get_data_type(engine_type): - if engine_type == ChipType.MC20E: - return ModelType.HALF - elif engine_type == ChipType.MC50: - return ModelType.SINGLE - elif engine_type == ChipType.M57H: - return ModelType.SINGLE - else: - raise ValueError("Invalid engine type: %s" % engine_type) - - -def _transform_dtype(ffi, lib, dtype): - if dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_UINT8): - return np.dtype(np.uint8) - elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_SINT8): - return np.dtype(np.int8) - elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_UINT16): - return np.dtype(np.uint16) - elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_SINT16): - return np.dtype(np.int16) - elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_UINT32): - return np.dtype(np.uint32) - elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_SINT32): - return np.dtype(np.int32) - elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_FLOAT32): - return np.dtype(np.float32) - elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_BFLOAT16): - return np.dtype(mldt.bfloat16) - else: - raise ValueError(f"Unsupported data type '{dtype}'.") diff --git a/axengine/axcl_session.py b/axengine/axcl_session.py deleted file mode 100644 index ade4d96..0000000 --- a/axengine/axcl_session.py +++ /dev/null @@ -1,405 +0,0 @@ -# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. -# -# This source file is the property of Axera Semiconductor Co., Ltd. and -# may not be copied or distributed in any isomorphic form without the prior -# written consent of Axera Semiconductor Co., Ltd. -# -# created by zylo117 -from ._session import BaseInferenceSession -from ._types import VNPUType -from ._node import NodeArg - -import os -import numpy as np -import time - -__all__: ["InferenceSession"] - - -class InferenceSession(BaseInferenceSession): - def __init__( - self, - path_or_bytes: str | bytes | os.PathLike, - device_id: int = 0 - ) -> None: - from . import _axcl_capi as _capi - - super(BaseInferenceSession).__init__() - - self.device_id = device_id - - # load shared library - self._rt_lib = _capi.R - self._rt_ffi = _capi.O - - self.soc_name = self._rt_ffi.string(self._rt_lib.axclrtGetSocName()).decode() - print(f"[INFO] SOC Name: {self.soc_name}") - - # init axcl - self.axcl_device_id = -1 # axcl_device_id != device_id, device_id is just the index of the list of axcl_device_ids - ret = self._init(device_id) - if 0 != ret: - raise RuntimeError("Failed to initialize axclrt.") - print(f"[INFO] Runtime version: {self._get_version()}") - - self._thread_context = self._rt_ffi.new("axclrtContext *") - ret = self._rt_lib.axclrtGetCurrentContext(self._thread_context) - if ret != 0: - raise RuntimeError("axclrtGetCurrentContext failed") - - # model handle, context, info, io - self._handle = self._rt_ffi.new("uint64_t *") - self._context = self._rt_ffi.new("uint64_t *") - self.io_info = self._rt_ffi.new("axclrtEngineIOInfo *") - self.group_count = self._rt_ffi.new("int32_t *") - - # get vnpu type - self._vnpu_type = self._get_vnpu_type() - print(f"[INFO] VNPU type: {self._vnpu_type}") - - # model buffer, almost copied from onnx runtime - if isinstance(path_or_bytes, (str, os.PathLike)): - self._model_name = os.path.splitext(os.path.basename(path_or_bytes))[0] - with open(path_or_bytes, "rb") as f: - data = f.read() - self._model_buffer = self._rt_ffi.new("char[]", data) - self._model_buffer_size = len(data) - elif isinstance(path_or_bytes, bytes): - self._model_buffer = self._rt_ffi.new("char[]", path_or_bytes) - self._model_buffer_size = len(path_or_bytes) - else: - raise TypeError(f"Unable to load model from type '{type(path_or_bytes)}'") - - # load model - ret = self._load() - if 0 != ret: - raise RuntimeError("Failed to load model.") - print(f"[INFO] Compiler version: {self._get_model_tool_version()}") - - # get shape group count - self._shape_count = self._get_shape_count() - self.ios = [self._rt_ffi.new("axclrtEngineIO *") for _ in range(self._shape_count)] - self.io_datas = [self._rt_ffi.new("AXCL_IO_DATA_T *") for _ in range(self._shape_count)] - self.io_buf_in = None - self.io_buf_out = None - - self.mgroup_input_tensors = [[] for _ in range(self._shape_count)] - self.mgroup_output_tensors = [[] for _ in range(self._shape_count)] - self._inputs = [] - self._outputs = [] - - self._sub_init() - - self._auto_sync_before_inference = True - self._auto_sync_after_inference = True - - def _sub_init(self): - for grp_id in range(self._shape_count): - input_node_args = [] - output_node_args = [] - - print(f'grp_id: {grp_id}') - - io = self.ios[grp_id] - ret = self._rt_lib.axclrtEngineCreateIO(self.io_info[0], io) - if 0 != ret: - self._rt_lib.axclrtEngineUnload(self._handle[0]) - raise RuntimeError(f"Create io failed 0x{ret:08x}") - - io_data = self.io_datas[grp_id] - ret = self._prepare_io(grp_id, self.io_info[0], io[0], io_data, - (self._rt_lib.AX_ENGINE_ABST_DEFAULT, - self._rt_lib.AX_ENGINE_ABST_DEFAULT)) - if ret != 0: - self._free_io(io_data) - self._rt_lib.axclrtEngineDestroyIO(io[0]) - self._rt_lib.axclrtEngineUnload(self._handle[0]) - raise RuntimeError("prepare_io failed.") - - print(f'input size: {io_data.nInputSize}') - for i in range(io_data.nInputSize): - tensor = self._rt_ffi.new("ax_runner_tensor_t *") - tensor.nIdx = i - tensor.sName = io_data.pInputs[i].Name - tensor.nSize = io_data.pInputs[i].nSize - for j in range(io_data.pInputs[i].dims.dimCount): - tensor.vShape[j] = io_data.pInputs[i].dims.dims[j] - tensor.vShapeSize = io_data.pInputs[i].dims.dimCount - tensor.phyAddr = self._rt_ffi.cast('unsigned long long', io_data.pInputs[i].pBuf) - tensor.pVirAddr = io_data.pInputs[i].pVirAddr - self.mgroup_input_tensors[grp_id].append(tensor) - print(f'\tname: {self._rt_ffi.string(io_data.pInputs[i].Name).decode()}') - print(f'\t\tshape: {" x ".join([str(io_data.pInputs[i].dims.dims[j]) for j in range(io_data.pInputs[i].dims.dimCount)])}') - input_node_args.append( - NodeArg(self._rt_ffi.string(io_data.pInputs[i].Name).decode(), 'uint8', - [io_data.pInputs[i].dims.dims[j] for j in range(io_data.pInputs[i].dims.dimCount)])) - - print(f'output size: {io_data.nOutputSize}') - for i in range(io_data.nOutputSize): - tensor = self._rt_ffi.new("ax_runner_tensor_t *") - tensor.nIdx = i - tensor.sName = io_data.pOutputs[i].Name - tensor.nSize = io_data.pOutputs[i].nSize - for j in range(io_data.pOutputs[i].dims.dimCount): - tensor.vShape[j] = io_data.pOutputs[i].dims.dims[j] - tensor.vShapeSize = io_data.pOutputs[i].dims.dimCount - tensor.phyAddr = self._rt_ffi.cast('unsigned long long', io_data.pOutputs[i].pBuf) - tensor.pVirAddr = io_data.pOutputs[i].pVirAddr - self.mgroup_output_tensors[grp_id].append(tensor) - print(f'\tname: {self._rt_ffi.string(io_data.pOutputs[i].Name).decode()}') - print(f'\t\tshape: {" x ".join([str(io_data.pOutputs[i].dims.dims[j]) for j in range(io_data.pOutputs[i].dims.dimCount)])}') - output_node_args.append( - NodeArg(self._rt_ffi.string(io_data.pOutputs[i].Name).decode(), 'float32', - [io_data.pOutputs[i].dims.dims[j] for j in range(io_data.pOutputs[i].dims.dimCount)])) - - self._inputs.append(input_node_args) - self._outputs.append(output_node_args) - - def _prepare_io(self, grp_id, io_info, io, io_data, strategy): - self._rt_lib.memset(io_data, 0, self._rt_ffi.sizeof('AXCL_IO_DATA_T')) - - inputNum = self._rt_lib.axclrtEngineGetNumInputs(io_info) - outputNum = self._rt_lib.axclrtEngineGetNumOutputs(io_info) - io_data.nInputSize = inputNum - io_data.nOutputSize = outputNum - self.io_buf_in = self._rt_ffi.new('AXCL_IO_BUF_T[]', inputNum) - self.io_buf_out = self._rt_ffi.new('AXCL_IO_BUF_T[]', outputNum) - io_data.pInputs = self.io_buf_in - io_data.pOutputs = self.io_buf_out - - # alloc inputs - for i in range(inputNum): - bufSize = self._rt_lib.axclrtEngineGetInputSizeByIndex(io_info, grp_id, i) - devPtr = self._rt_ffi.new('void **', self._rt_ffi.NULL) - ret = 0 - if strategy[0] == self._rt_lib.AX_ENGINE_ABST_DEFAULT: - ret = self._rt_lib.axclrtMalloc(devPtr, bufSize, self._rt_lib.AXCL_MEM_MALLOC_HUGE_FIRST) - else: - ret = self._rt_lib.axclrtMallocCached(devPtr, bufSize, self._rt_lib.AXCL_MEM_MALLOC_HUGE_FIRST) - - if ret != 0: - self._free_io_index(io_data.pInputs, i) - raise RuntimeError(f"Malloc input(index: {i}, size: {bufSize}) failed! 0x{ret:08x}") - - tmp = self._rt_ffi.new('char[]', bufSize) - self._rt_lib.axclrtMemcpy(devPtr[0], tmp, bufSize, self._rt_lib.AXCL_MEMCPY_HOST_TO_DEVICE) - - dims = self._rt_ffi.new('axclrtEngineIODims *') - ret = self._rt_lib.axclrtEngineGetInputDims(io_info, grp_id, i, dims) - if ret != 0: - self._free_io_index(io_data.pInputs, i) - raise RuntimeError(f"Get input dims(index: {i}) failed! 0x{ret:08x}") - - io_data.pInputs[i].nIndex = i - io_data.pInputs[i].nSize = bufSize - io_data.pInputs[i].pBuf = devPtr[0] - io_data.pInputs[i].dims = dims[0] - io_data.pInputs[i].Name = self._rt_lib.axclrtEngineGetInputNameByIndex(io_info, i) - io_data.pInputs[i].pVirAddr = self._rt_lib.malloc(bufSize) - self._rt_lib.memset(io_data.pInputs[i].pVirAddr, 0, bufSize) - ret = self._rt_lib.axclrtEngineSetInputBufferByIndex(io, i, devPtr[0], bufSize) - if ret != 0: - self._free_io_index(io_data.pInputs, i) - raise RuntimeError(f"Set input buffer(index: {i}, size: {bufSize}) failed! 0x{ret:08x}") - - # alloc outputs - for i in range(outputNum): - bufSize = self._rt_lib.axclrtEngineGetOutputSizeByIndex(io_info, grp_id, i) - devPtr = self._rt_ffi.new('void **', self._rt_ffi.NULL) - ret = 0 - if strategy[0] == self._rt_lib.AX_ENGINE_ABST_DEFAULT: - ret = self._rt_lib.axclrtMalloc(devPtr, bufSize, self._rt_lib.AXCL_MEM_MALLOC_HUGE_FIRST) - else: - ret = self._rt_lib.axclrtMallocCached(devPtr, bufSize, self._rt_lib.AXCL_MEM_MALLOC_HUGE_FIRST) - - if ret != 0: - self._free_io_index(io_data.pOutputs, i) - raise RuntimeError(f"Malloc output(index: {i}, size: {bufSize}) failed! 0x{ret:08x}") - - tmp = self._rt_ffi.new('char[]', bufSize) - self._rt_lib.axclrtMemcpy(devPtr[0], tmp, bufSize, self._rt_lib.AXCL_MEMCPY_HOST_TO_DEVICE) - - dims = self._rt_ffi.new('axclrtEngineIODims *') - ret = self._rt_lib.axclrtEngineGetOutputDims(io_info, grp_id, i, dims) - if ret != 0: - self._free_io_index(io_data.pOutputs, i) - raise RuntimeError(f"Get output dims(index: {i}) failed! 0x{ret:08x}") - - io_data.pOutputs[i].nIndex = i - io_data.pOutputs[i].nSize = bufSize - io_data.pOutputs[i].pBuf = devPtr[0] - io_data.pOutputs[i].dims = dims[0] - io_data.pOutputs[i].Name = self._rt_lib.axclrtEngineGetOutputNameByIndex(io_info, i) - io_data.pOutputs[i].pVirAddr = self._rt_lib.malloc(bufSize) - self._rt_lib.memset(io_data.pOutputs[i].pVirAddr, 0, bufSize) - ret = self._rt_lib.axclrtEngineSetOutputBufferByIndex(io, i, devPtr[0], bufSize) - if ret != 0: - self._free_io_index(io_data.pOutputs, i) - raise RuntimeError(f"Set output buffer(index: {i}, size: {bufSize}) failed! 0x{ret:08x}") - return 0 - - def _free_io_index(self, pBuf, index): - for i in range(index): - self._rt_lib.axclrtFree(pBuf[i].pBuf) - - def _free_io(self, io_data): - for j in range(io_data.nInputSize): - self._rt_lib.axclrtFree(io_data.pInputs[j].pBuf) - self._rt_lib.free(io_data.pInputs[j].pVirAddr) - for j in range(io_data.nOutputSize): - self._rt_lib.axclrtFree(io_data.pOutputs[j].pBuf) - self._rt_lib.free(io_data.pOutputs[j].pVirAddr) - - # 不知道如何在ffi中直接调用 - # delete[] io_data->pInputs; - # delete[] io_data->pOutputs; - - def _init(self, device_id=0, vnpu=VNPUType.DISABLED): # vnpu type, the default is disabled - ret = self._rt_lib.axclInit([]) - if ret != 0: - raise RuntimeError(f"Failed to initialize runtime. {ret}.") - - lst = self._rt_ffi.new("axclrtDeviceList *") - ret = self._rt_lib.axclrtGetDeviceList(lst) - if ret != 0 or lst.num == 0: - raise RuntimeError(f"Get AXCL device failed 0x{ret:08x}, find total {lst.num} device.") - - self.axcl_device_id = lst.devices[device_id] - ret = self._rt_lib.axclrtSetDevice(self.axcl_device_id) - if ret != 0 or lst.num == 0: - raise RuntimeError(f"Set AXCL device failed 0x{ret:08x}.") - - ret = self._rt_lib.axclrtEngineInit(vnpu.value) - if ret != 0 or lst.num == 0: - raise RuntimeError(f"axclrtEngineInit failed 0x{ret:08x}.") - - return 0 - - def _final(self): - if self._handle[0] is not None: - self._unload() - self._rt_lib.axclrtResetDevice(self.axcl_device_id) - self._rt_lib.axclFinalize() - return - - def _get_version(self): - major, minor, patch = self._rt_ffi.new('int32_t *'), self._rt_ffi.new('int32_t *'), self._rt_ffi.new('int32_t *') - self._rt_lib.axclrtGetVersion(major, minor, patch) - return f'{major[0]}.{minor[0]}.{patch[0]}' - - def _get_vnpu_type(self) -> VNPUType: - vnpu_type = self._rt_ffi.new("axclrtEngineVNpuKind *") - ret = self._rt_lib.axclrtEngineGetVNpuKind(vnpu_type) - if ret != 0: - raise RuntimeError("Failed to get VNPU attribute.") - return VNPUType(vnpu_type[0]) - - def _get_model_tool_version(self): - model_tool_version = self._rt_lib.axclrtEngineGetModelCompilerVersion(self._handle[0]) - return self._rt_ffi.string(model_tool_version).decode() - - def _load(self): - devMem = self._rt_ffi.new('void **', self._rt_ffi.NULL) - self._rt_lib.axclrtMalloc(devMem, self._model_buffer_size, self._rt_lib.AXCL_MEM_MALLOC_NORMAL_ONLY) - self._rt_lib.axclrtMemcpy(devMem[0], self._model_buffer, self._model_buffer_size, self._rt_lib.AXCL_MEMCPY_HOST_TO_DEVICE) - - ret = self._rt_lib.axclrtEngineLoadFromMem(devMem[0], self._model_buffer_size, self._handle) - if ret != 0: - raise RuntimeError("axclrtEngineLoadFromMem failed") - - self._rt_lib.axclrtFree(devMem[0]) - - ret = self._rt_lib.axclrtEngineCreateContext(self._handle[0], self._context) - if ret != 0: - raise RuntimeError("axclrtEngineCreateContext failed") - - ret = self._rt_lib.axclrtEngineGetIOInfo(self._handle[0], self.io_info) - if ret != 0: - raise RuntimeError("axclrtEngineGetIOInfo failed") - - return self.group_count[0] - - def _get_shape_count(self): - ret = self._rt_lib.axclrtEngineGetShapeGroupsCount(self.io_info[0], self.group_count) - if ret != 0: - self._rt_lib.axclrtEngineUnload(self._handle[0]) - raise RuntimeError("axclrtEngineGetShapeGroupsCount failed") - - return self.group_count[0] - - def _unload(self): - for grp_id in range(len(self.mgroup_input_tensors)): - self._free_io(self.io_datas[grp_id]) - self._rt_lib.axclrtEngineDestroyIO(self.ios[grp_id][0]) - - self._rt_lib.axclrtEngineUnload(self._handle[0]) - self._handle[0] = 0 - - return - - def run(self, output_names, input_feed, run_options=None): - self._validate_input(list(input_feed.keys())) - self._validate_output(output_names) - - ret = self._rt_lib.axclrtSetCurrentContext(self._thread_context[0]) - if ret != 0: - raise RuntimeError("axclrtSetCurrentContext failed") - - if None is output_names: - output_names = [o.name for o in self.get_outputs()] - - grp_id = 0 - - # fill model io - for key, npy in input_feed.items(): - for i, one in enumerate(self.get_inputs()): - if one.name == key: - assert ( - list(one.shape) == list(npy.shape) and one.dtype == npy.dtype - ), f"model inputs({key}) expect shape {one.shape} and dtype {one.dtype}, howerver gets input with shape {npy.shape} and dtype {npy.dtype}" - - if not ( - not npy.flags.c_contiguous - and npy.flags.f_contiguous - and npy.flags.contiguous - ): - npy = np.ascontiguousarray(npy) - npy_ptr = self._rt_ffi.cast("void *", npy.ctypes.data) - self._rt_lib.memcpy(self.mgroup_input_tensors[grp_id][i].pVirAddr, npy_ptr, npy.nbytes) - break - - # execute model - t1 = time.time() - if self._auto_sync_before_inference: - for input_tensor in self.mgroup_input_tensors[grp_id]: - self._rt_lib.axclrtMemcpy(self._rt_ffi.cast('void *', input_tensor.phyAddr), input_tensor.pVirAddr, - input_tensor.nSize, self._rt_lib.AXCL_MEMCPY_HOST_TO_DEVICE) - t2 = time.time() - cost_host_to_device = t2 - t1 - - t1 = time.time() - ret = self._rt_lib.axclrtEngineExecute(self._handle[0], self._context[0], grp_id, self.ios[grp_id][0]) - if ret != 0: - raise RuntimeError(f"axclrtEngineExecute failed 0x{ret:08x}") - t2 = time.time() - cost_inference = t2 - t1 - - t1 = time.time() - if self._auto_sync_after_inference: - for output_tensor in self.mgroup_output_tensors[grp_id]: - self._rt_lib.axclrtMemcpy(output_tensor.pVirAddr, self._rt_ffi.cast('void *', output_tensor.phyAddr), - output_tensor.nSize, self._rt_lib.AXCL_MEMCPY_DEVICE_TO_HOST) - t2 = time.time() - cost_device_to_host = t2 - t1 - - # flush output - outputs = [np.frombuffer(self._rt_ffi.buffer(output_tensor.pVirAddr, output_tensor.nSize), - dtype=self.get_outputs()[0].dtype).reshape(self.get_outputs()[i].shape) - for i, output_tensor in enumerate(self.mgroup_output_tensors[grp_id]) - if self.get_outputs()[i].name in output_names] - - print(f'[INFO] cost time in host to device: {cost_host_to_device * 1000:.3f}ms, ' - f'inference: {cost_inference * 1000:.3f}ms, ' - f'device to host: {cost_device_to_host * 1000:.3f}ms') - - return outputs diff --git a/axengine/session.py b/axengine/session.py deleted file mode 100644 index 9cc4040..0000000 --- a/axengine/session.py +++ /dev/null @@ -1,23 +0,0 @@ -import ctypes.util -import os - -from .ax_session import InferenceSession as AXInferenceSession -from .axcl_session import InferenceSession as AXCLInferenceSession - - -def InferenceSession(path_or_bytes: str | bytes | os.PathLike, device_id: int = -1, *args, **kwargs): - is_axcl = False - if device_id >= 0: - if ctypes.util.find_library('axcl_rt') is not None: - is_axcl = True - elif ctypes.util.find_library('ax_engine') is not None: - is_axcl = False - else: - print("axcl_rt not found, please install axcl_host driver") - - if is_axcl: - print(f"Using axclrt backend, device_id: {device_id}") - return AXCLInferenceSession(path_or_bytes, device_id) - else: - print("Using ax backend with onboard npu") - return AXInferenceSession(path_or_bytes) diff --git a/examples/classification.py b/examples/classification.py index dbffe3f..889e84b 100644 --- a/examples/classification.py +++ b/examples/classification.py @@ -5,22 +5,39 @@ # written consent of Axera Semiconductor Co., Ltd. # -import axengine as axe +import argparse +import os +import re +import sys +import time + import numpy as np from PIL import Image +import axengine as axe +from axengine import axclrt_provider_name, axengine_provider_name + + +def load_model(model_path: str | os.PathLike, selected_provider: str, selected_device_id: int = 0): + if selected_provider == 'AUTO': + # Use AUTO to let the pyengine choose the first available provider + return axe.InferenceSession(model_path) -def load_model(model_path, backend='auto', device_id=-1): - if backend == 'auto': - session = axe.InferenceSession(model_path, device_id) - elif backend == 'ax': - session = axe.AXInferenceSession(model_path) - elif backend == 'axcl': - session = axe.AXCLInferenceSession(model_path, device_id) - return session + providers = [] + if selected_provider == axclrt_provider_name: + provider_options = {"device_id": selected_device_id} + providers.append((axclrt_provider_name, provider_options)) + if selected_provider == axengine_provider_name: + providers.append(axengine_provider_name) + return axe.InferenceSession(model_path, providers=providers) -def preprocess_image(image_path, target_size=(256, 256), crop_size=(224, 224)): + +def preprocess_image( + image_path: str | os.PathLike, + middle_step_size: (int, int) = (256, 256), + final_step_size: (int, int) = (224, 224) +): # Load the image img = Image.open(image_path).convert("RGB") @@ -40,12 +57,12 @@ def preprocess_image(image_path, target_size=(256, 256), crop_size=(224, 224)): img = img.crop((crop_x, crop_y, crop_x + crop_area, crop_y + crop_area)) # Resize the image to 256x256 - img = img.resize(target_size) + img = img.resize(middle_step_size) # Crop the center 224x224 - crop_x = (target_size[0] - crop_size[0]) // 2 - crop_y = (target_size[1] - crop_size[1]) // 2 - img = img.crop((crop_x, crop_y, crop_x + crop_size[0], crop_y + crop_size[1])) + crop_x = (middle_step_size[0] - final_step_size[0]) // 2 + crop_y = (middle_step_size[1] - final_step_size[1]) // 2 + img = img.crop((crop_x, crop_y, crop_x + final_step_size[0], crop_y + final_step_size[1])) # Convert to numpy array and change dtype to int img_array = np.array(img).astype("uint8") @@ -55,50 +72,128 @@ def preprocess_image(image_path, target_size=(256, 256), crop_size=(224, 224)): return img_array -def get_top_k_predictions(output, k=5): +def get_top_k_predictions(output: list[np.ndarray], k: int = 5): # Get top k predictions top_k_indices = np.argsort(output[0].flatten())[-k:][::-1] top_k_scores = output[0].flatten()[top_k_indices] return top_k_indices, top_k_scores -def main(model_path, image_path, target_size, crop_size, k, backend='auto', device_id=-1): +def main(model_path, image_path, middle_step_size, final_step_size, k, repeat_times, selected_provider, + selected_device_id): # Load the model - session = load_model(model_path, backend, device_id) + session = load_model(model_path, selected_provider, selected_device_id) # Preprocess the image - input_tensor = preprocess_image(image_path, target_size, crop_size) + input_tensor = preprocess_image(image_path, middle_step_size, final_step_size) # Get input name and run inference input_name = session.get_inputs()[0].name - import time - for i in range(10): + time_costs = [] + output = None + for i in range(repeat_times): t1 = time.time() + input_tensor = input_tensor.copy() output = session.run(None, {input_name: input_tensor}) t2 = time.time() - print(t2 - t1) + time_costs.append((t2 - t1) * 1000) # Get top k predictions top_k_indices, top_k_scores = get_top_k_predictions(output, k) # Print the results - print(f"Top {k} Predictions:") + print(" ------------------------------------------------------") + print(f" Top {k} Predictions:") for i in range(k): - print(f"Class Index: {top_k_indices[i]}, Score: {top_k_scores[i]}") + print(f" Class Index: {top_k_indices[i]:>3}, Score: {top_k_scores[i]:.3f}") + + print(" ------------------------------------------------------") + print( + f" min = {min(time_costs):.3f} ms max = {max(time_costs):.3f} ms avg = {sum(time_costs) / len(time_costs):.3f} ms" + ) + print(" ------------------------------------------------------") + + +def parse_size(size_str): + pattern = r'^\s*\d+\s*,\s*\d+\s*$' + if not re.match(pattern, size_str): + raise argparse.ArgumentTypeError(R'params should looks like: "height,width", such as: "256,256"') + + height, width = map(int, size_str.split(',')) + return height, width + + +class ExampleParser(argparse.ArgumentParser): + def error(self, message): + self.print_usage(sys.stderr) + print(f"\nError: {message}") + print("\nExample usage:") + print(" python3 classification.py -m -i ") + print(" python3 classification.py -m /opt/data/npu/models/mobilenetv2.axmodel -i /opt/data/npu/images/cat.jpg") + print( + f" python3 classification.py -m /opt/data/npu/models/mobilenetv2.axmodel -i /opt/data/npu/images/cat.jpg -p {axengine_provider_name}") + print( + f" python3 classification.py -m /opt/data/npu/models/mobilenetv2.axmodel -i /opt/data/npu/images/cat.jpg -p {axclrt_provider_name}") + sys.exit(1) if __name__ == "__main__": - import argparse - ap = argparse.ArgumentParser() - ap.add_argument('-b', '--backend', type=str, help='auto/ax/axcl', default='auto') - ap.add_argument('-d', '--device_id', type=int, help='axcl device no, -1: onboard npu, >0: axcl devices', default=0) + ap = ExampleParser() + ap.add_argument('-m', '--model-path', type=str, help='model path', required=True) + ap.add_argument('-i', '--image-path', type=str, help='image path', required=True) + ap.add_argument( + '-s', + '--resize-size', + type=parse_size, + help=R'imagenet resize size: "height,width", such as: "256,256"', + default='256,256', + ) + ap.add_argument( + '-c', + '--crop-size', + type=parse_size, + help=R'imagenet crop size: "height,width", such as: "224,224"', + default='224,224', + ) + ap.add_argument( + '-k', + '--top-k', + type=int, + help='top k predictions', + default=5 + ) + ap.add_argument('-r', '--repeat', type=int, help='repeat times', default=100) + ap.add_argument( + '-p', + '--provider', + type=str, + choices=["AUTO", f"{axclrt_provider_name}", f"{axengine_provider_name}"], + help=f'"AUTO", "{axclrt_provider_name}", "{axengine_provider_name}"', + default='AUTO' + ) + ap.add_argument( + '-d', + '--device-id', + type=int, + help=R'axclrt device index, depends on how many cards inserted', + default=0 + ) args = ap.parse_args() - assert args.backend in ['auto', 'ax', 'axcl'], "backend must be auto/ax/axcl" - assert args.device_id >= -1, "device_id must be greater than -1" - - MODEL_PATH = "../mobilenetv2.axmodel" - IMAGE_PATH = "../cat.jpg" - TARGET_SIZE = (256, 256) # Resize to 256x256 - CROP_SIZE = (224, 224) # Crop to 224x224 - K = 5 # Top K predictions - main(MODEL_PATH, IMAGE_PATH, TARGET_SIZE, CROP_SIZE, K, args.backend, args.device_id) + + model_file = args.model_path + image_file = args.image_path + + # check if the model and image exist + assert os.path.exists(model_file), f"model file path {model_file} does not exist" + assert os.path.exists(image_file), f"image file path {image_file} does not exist" + + resize_size = args.resize_size + crop_size = args.crop_size + top_k = args.top_k + + repeat = args.repeat + + provider = args.provider + device_id = args.device_id + + main(model_file, image_file, resize_size, crop_size, top_k, repeat, provider, device_id) diff --git a/setup.py b/setup.py index 4892b02..80c4bb3 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,10 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + from setuptools import setup setup( @@ -17,6 +24,6 @@ ], packages=["axengine"], ext_modules=[], - install_requires=["cffi>=1.0.0", "ml-dtypes>=0.1.0"], - setup_requires=["cffi>=1.0.0", "ml-dtypes>=0.1.0"], + install_requires=["cffi>=1.0.0", "ml-dtypes>=0.1.0", "numpy>=1.22"], + setup_requires=["cffi>=1.0.0", "ml-dtypes>=0.1.0", "numpy>=1.22"], )