Skip to content

Commit

Permalink
在axengine基础上增加axcl的支持 (AXERA-TECH#13)
Browse files Browse the repository at this point in the history
* rebase 57a6028

* example适配双backend

* 适配旧版python

* assert backend in ['ax', 'axcl']

* update readme

* 自动判断是否有axclrt lib以及device no(默认0)是否大于等于0,如果是则用axcl,否则ax

* device_id>=0但是没有axcl设备时用ax

* 自动判断平台

* 调整import

* 调整import

* update readme

* add check for inputs on axcl (AXERA-TECH#14)
  • Loading branch information
zylo117 authored Dec 10, 2024
1 parent 36aefe3 commit 9371cff
Show file tree
Hide file tree
Showing 11 changed files with 1,096 additions and 400 deletions.
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

## 简介

**PyAXEngine** 基于 cffi 模块实现了 Axera NPU Runtime 的 Python API,方便开源社区开发者使用 Python 脚本快速构建 NPU 推理脚本
**PyAXEngine** 基于 cffi 模块实现了 Axera NPU Runtime 的 Python API,并同时支持开发板和M.2算力卡形态,方便开源社区开发者使用 Python 脚本快速构建 NPU 推理脚本

支持芯片

Expand Down Expand Up @@ -43,6 +43,27 @@ Class Index: 278, Score: 8.981077194213867
Class Index: 277, Score: 8.452778816223145
Class Index: 281, Score: 8.320704460144043
Class Index: 287, Score: 7.924479961395264
# 默认将自动检测计算设备,但也可以强制要求跑在AX650 M.2算力卡上,设备号是1
root@ax650:~/samples# python3 classification.py -b axcl -d 1
[INFO] SOC Name: AX650N
[INFO] Runtime version: 1.0.0
[INFO] VNPU type: VNPUType.DISABLED
[INFO] Compiler version: 1.2-patch2 7e6b2b5f
grp_id: 0
input size: 1
name: input
shape: 1 x 224 x 224 x 3
output size: 1
name: output
shape: 1 x 1000
[INFO] cost time in host to device: 0.617ms, inference: 1.087ms, device to host: 0.266ms
Top 5 Predictions:
Class Index: 282, Score: 9.77352523803711
Class Index: 278, Score: 8.981077194213867
Class Index: 277, Score: 8.452778816223145
Class Index: 281, Score: 8.320704460144043
Class Index: 287, Score: 7.924479961395264
```

## 关联项目
Expand Down
2 changes: 2 additions & 0 deletions axengine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@
#

from .session import InferenceSession
from .ax_session import InferenceSession as AXInferenceSession
from .axcl_session import InferenceSession as AXCLInferenceSession
File renamed without changes.
2 changes: 1 addition & 1 deletion axengine/_chip.py → axengine/_ax_chip.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#

from . import _types
from ._capi import E as _lib
from ._ax_capi import E as _lib

__all__: ["T"]

Expand Down
200 changes: 200 additions & 0 deletions axengine/_axcl_capi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
#
# This source file is the property of Axera Semiconductor Co., Ltd. and
# may not be copied or distributed in any isomorphic form without the prior
# written consent of Axera Semiconductor Co., Ltd.
#
# modified by zylo117

import ctypes.util
import platform

from cffi import FFI

__all__: ["R", "O"]

O = FFI()

# axcl_base.h
O.cdef(
"""
#define AXCL_MAX_DEVICE_COUNT 256
typedef int32_t axclError;
"""
)

# axcl_rt_type.h
O.cdef(
"""
typedef struct axclrtDeviceList {
uint32_t num;
int32_t devices[AXCL_MAX_DEVICE_COUNT];
} axclrtDeviceList;
typedef enum axclrtMemMallocPolicy {
AXCL_MEM_MALLOC_HUGE_FIRST,
AXCL_MEM_MALLOC_HUGE_ONLY,
AXCL_MEM_MALLOC_NORMAL_ONLY
} axclrtMemMallocPolicy;
typedef enum axclrtMemcpyKind {
AXCL_MEMCPY_HOST_TO_HOST,
AXCL_MEMCPY_HOST_TO_DEVICE, //!< host vir -> device phy
AXCL_MEMCPY_DEVICE_TO_HOST, //!< host vir <- device phy
AXCL_MEMCPY_DEVICE_TO_DEVICE,
AXCL_MEMCPY_HOST_PHY_TO_DEVICE, //!< host phy -> device phy
AXCL_MEMCPY_DEVICE_TO_HOST_PHY, //!< host phy <- device phy
} axclrtMemcpyKind;
"""
)

# axcl_rt_engine_type.h
O.cdef(
"""
#define AXCLRT_ENGINE_MAX_DIM_CNT 32
typedef void* axclrtEngineIOInfo;
typedef void* axclrtEngineIO;
typedef enum axclrtEngineVNpuKind {
AXCL_VNPU_DISABLE = 0,
AXCL_VNPU_ENABLE = 1,
AXCL_VNPU_BIG_LITTLE = 2,
AXCL_VNPU_LITTLE_BIG = 3,
} axclrtEngineVNpuKind;
typedef struct axclrtEngineIODims {
int32_t dimCount;
int32_t dims[AXCLRT_ENGINE_MAX_DIM_CNT];
} axclrtEngineIODims;
"""
)

# ax_model_runner_axcl.cpp
O.cdef(
"""
typedef enum
{
AX_ENGINE_ABST_DEFAULT = 0,
AX_ENGINE_ABST_CACHED = 1,
} AX_ENGINE_ALLOC_BUFFER_STRATEGY_T;
typedef struct
{
int nIndex;
int nSize;
void *pBuf;
void *pVirAddr;
const char *Name;
axclrtEngineIODims dims;
} AXCL_IO_BUF_T;
typedef struct
{
uint32_t nInputSize;
uint32_t nOutputSize;
AXCL_IO_BUF_T *pInputs;
AXCL_IO_BUF_T *pOutputs;
} AXCL_IO_DATA_T;
"""
)

# ax_model_runner.hpp
O.cdef(
"""
typedef struct
{
const char * sName;
unsigned int nIdx;
unsigned int vShape[AXCLRT_ENGINE_MAX_DIM_CNT];
unsigned int vShapeSize;
int nSize;
unsigned long long phyAddr;
void *pVirAddr;
} ax_runner_tensor_t;
"""
)

# stdlib.h/string.h
O.cdef(
"""
void free (void *__ptr);
void *malloc(size_t size);
void *memset (void *__s, int __c, size_t __n);
void *memcpy (void * __dest, const void * __src, size_t __n);
"""
)



# axcl.h
O.cdef(
"""
axclError axclInit(const char *config);
axclError axclFinalize();
"""
)

# axcl_rt.h
O.cdef(
"""
axclError axclrtGetVersion(int32_t *major, int32_t *minor, int32_t *patch);
const char *axclrtGetSocName();
"""
)

# axcl_rt_device.h
O.cdef(
"""
axclError axclrtGetDeviceList(axclrtDeviceList *deviceList);
axclError axclrtSetDevice(int32_t deviceId);
"""
)

# axcl_rt_engine.h
O.cdef(
"""
axclError axclrtEngineInit(axclrtEngineVNpuKind npuKind);
axclError axclrtEngineLoadFromMem(const void *model, uint64_t modelSize, uint64_t *modelId);
axclError axclrtEngineCreateContext(uint64_t modelId, uint64_t *contextId);
axclError axclrtEngineGetVNpuKind(axclrtEngineVNpuKind *npuKind);
const char* axclrtEngineGetModelCompilerVersion(uint64_t modelId);
axclError axclrtEngineGetIOInfo(uint64_t modelId, axclrtEngineIOInfo *ioInfo);
axclError axclrtEngineGetShapeGroupsCount(axclrtEngineIOInfo ioInfo, int32_t *count);
axclError axclrtEngineCreateIO(axclrtEngineIOInfo ioInfo, axclrtEngineIO *io);
uint32_t axclrtEngineGetNumInputs(axclrtEngineIOInfo ioInfo);
uint32_t axclrtEngineGetNumOutputs(axclrtEngineIOInfo ioInfo);
uint64_t axclrtEngineGetInputSizeByIndex(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index);
axclError axclrtEngineGetInputDims(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index, axclrtEngineIODims *dims);
const char *axclrtEngineGetInputNameByIndex(axclrtEngineIOInfo ioInfo, uint32_t index);
axclError axclrtEngineSetInputBufferByIndex(axclrtEngineIO io, uint32_t index, const void *dataBuffer, uint64_t size);
uint64_t axclrtEngineGetOutputSizeByIndex(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index);
axclError axclrtEngineGetOutputDims(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index, axclrtEngineIODims *dims);
const char *axclrtEngineGetOutputNameByIndex(axclrtEngineIOInfo ioInfo, uint32_t index);
axclError axclrtEngineSetOutputBufferByIndex(axclrtEngineIO io, uint32_t index, const void *dataBuffer, uint64_t size);
axclError axclrtEngineExecute(uint64_t modelId, uint64_t contextId, uint32_t group, axclrtEngineIO io);
axclError axclrtEngineDestroyIO(axclrtEngineIO io);
axclError axclrtEngineUnload(uint64_t modelId);
"""
)

# axcl_rt_memory.h
O.cdef(
"""
axclError axclrtMalloc(void **devPtr, size_t size, axclrtMemMallocPolicy policy);
axclError axclrtMallocCached(void **devPtr, size_t size, axclrtMemMallocPolicy policy);
axclError axclrtMemcpy(void *dstPtr, const void *srcPtr, size_t count, axclrtMemcpyKind kind);
axclError axclrtFree(void *devPtr);
axclError axclrtMemFlush(void *devPtr, size_t size);
"""
)

rt_name = "axcl_rt"
rt_path = ctypes.util.find_library(rt_name)
assert (
rt_path is not None
), f"Failed to find library {rt_name}. Please ensure it is installed and in the library path."

R = O.dlopen(rt_path)
assert R is not None, f"Failed to load library {rt_path}. Please ensure it is installed and in the library path."
75 changes: 75 additions & 0 deletions axengine/_session.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from ._node import NodeArg
from ._types import VNPUType

import numpy as np

class BaseInferenceSession:
def __init__(self, *args, **kwargs) -> None:
self._shape_count = 0
self._inputs = []
self._outputs = []

def __del__(self):
self._final()

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, traceback):
self._final()

def _init(self, *args, **kwargs):
return

def _final(self):
return

def _get_version(self) -> str:
return ''

def _get_vnpu_type(self) -> VNPUType:
return VNPUType(0)

def _get_model_tool_version(self) -> str:
return ''

def _load(self) -> 0:
return 0

def _get_shape_count(self) -> int:
return 0

def _unload(self):
return

def get_inputs(self, shape_group=0) -> list[NodeArg]:
if shape_group > self._shape_count:
raise ValueError(f"Shape group '{shape_group}' is out of range, total {self._shape_count}.")
selected_info = self._inputs[shape_group]
return selected_info

def get_outputs(self, shape_group=0) -> list[NodeArg]:
if shape_group > self._shape_count:
raise ValueError(f"Shape group '{shape_group}' is out of range, total {self._shape_count}.")
selected_info = self._outputs[shape_group]
return selected_info

# copy from onnxruntime
def _validate_input(self, feed_input_names):
missing_input_names = []
for i in self.get_inputs():
if i.name not in feed_input_names:
missing_input_names.append(i.name)
if missing_input_names:
raise ValueError(
f"Required inputs ({missing_input_names}) are missing from input feed ({feed_input_names})."
)

def _validate_output(self, output_names):
if output_names is not None:
for name in output_names:
if name not in [o.name for o in self.get_outputs()]:
raise ValueError(f"Output name '{name}' is not registered.")

def run(self, output_names, input_feed, run_options=None) -> list[np.ndarray]:
return []
Loading

0 comments on commit 9371cff

Please sign in to comment.