Skip to content
This repository has been archived by the owner on Nov 5, 2024. It is now read-only.

Commit

Permalink
[wip] replace pyxrt which does unexplainable things...
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental committed Jun 11, 2024
1 parent 75c2b2f commit 401459c
Show file tree
Hide file tree
Showing 4 changed files with 243 additions and 3 deletions.
31 changes: 28 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,40 @@ set_target_properties(

add_dependencies(xaie pyxrt)

# ##############################################################################
# own xrt bindings
# ##############################################################################

pybind11_add_module(_xrt xaiepy/xrt.cpp)
target_include_directories(_xrt PRIVATE ${XRT_INCLUDE_DIR}
${XRT_BINARY_DIR}/gen)
target_link_directories(_xrt PRIVATE ${XRT_LIB_DIR})
target_link_libraries(_xrt PRIVATE xrt_coreutil uuid)
set_target_properties(
_xrt
PROPERTIES
# pyxrt and xrt in general do a ridiculous dance with drivers
# https://github.com/Xilinx/XRT/blob/edcae12640ce96ec597c4c0cc1b2a850cfcc5c8b/src/runtime_src/core/common/module_loader.cpp#L201-L205
SKIP_BUILD_RPATH ON
BUILD_WITH_INSTALL_RPATH ON)
add_dependencies(xaie _xrt)

# ##############################################################################
# finish
# ##############################################################################

set_target_properties(_bootgen _xclbinutil bootgen-lib cdo_driver xaie
xclbinutil-lib PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(
_bootgen
_xclbinutil
_xrt
bootgen-lib
cdo_driver
xaie
xclbinutil-lib
PROPERTIES POSITION_INDEPENDENT_CODE ON)

set_target_properties(
_bootgen _xclbinutil pyxrt xaie xclbinutil-lib
_bootgen _xclbinutil pyxrt xaie xclbinutil-lib _xrt
PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_DIR}
ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_DIR}
RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_DIR})
51 changes: 51 additions & 0 deletions examples/harness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from pathlib import Path

import numpy as np
from filelock import FileLock
from xaiepy.xrt import XCLBin, list_kernels

# don't forget LD_LIBRARY_PATH=/opt/xilinx/xrt/lib:/usr/lib/x86_64-linux-gnu


M = K = N = 64

TEST = "basic_matrix_multiplication_matrix_vector"
WORKDIR = Path(__file__).parent.absolute() / TEST / "module_dummy1_amdaie_xclbin_fb"
NPU_INSTS_FP = f"{WORKDIR}/module_dummy1_amdaie_xclbin_fb.npu.txt"
XCLBIN_PATH = f"{WORKDIR}/module_dummy1_amdaie_xclbin_fb.xclbin"
KERNEL_NAME = "dummy2"
NUM_ARGS = 3

with open(NPU_INSTS_FP, "r") as f:
npu_insts = list(map(lambda n: int(n, 16), f.readlines()))

instr_v = np.array(npu_insts, dtype=np.uint32)

with open(NPU_INSTS_FP, "r") as f:
npu_insts = list(map(lambda n: int(n, 16), f.readlines()))

list_kernels(XCLBIN_PATH)

with FileLock("/tmp/npu.lock"):
xclbin = XCLBin(XCLBIN_PATH, KERNEL_NAME)
views = xclbin.mmap_buffers([(M, K), (K,), (M,)], np.float32)

xclbin.load_npu_instructions(npu_insts)

A = np.ones((M, K), dtype=np.float32)
B = 2 * np.ones((K,), dtype=np.float32)
C = np.zeros((M,), dtype=np.float32)

wraps = list(map(np.asarray, views))
np.copyto(wraps[0], A, casting="no")
np.copyto(wraps[1], B, casting="no")
np.copyto(wraps[2], C, casting="no")

xclbin.sync_buffers_to_device()
xclbin.run()
print("Running kernel")
xclbin.wait(30)
xclbin.sync_buffers_from_device()

print(wraps)
assert np.allclose(A @ B, wraps[2])
163 changes: 163 additions & 0 deletions xaiepy/xrt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
//===- XRTModule.cpp --------------------------------------------*- C++ -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Copyright (C) 2023, Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//

#include "xrt/xrt_bo.h"
#include "xrt/xrt_device.h"
#include "xrt/xrt_kernel.h"

#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/pytypes.h>
#include <pybind11/stl.h>

#include <algorithm>
#include <string>
#include <vector>

namespace py = pybind11;
using namespace py::literals;

// group_id 0 is for npu instructions
// group_id 1 is for number of npu instructions
// host side buffers/args follow starting from position 2
// see aiecc.main.emit_design_kernel_json
constexpr size_t HOST_BUFFERS_START_IDX = 2;

class PyXCLBin {
public:
PyXCLBin(const std::string &xclBinPath, const std::string &kernelName,
int deviceIndex)
: xclBin(std::make_unique<xrt::xclbin>(xclBinPath)),
device(std::make_unique<xrt::device>(deviceIndex)) {
device->register_xclbin(*xclBin);
context = std::make_unique<xrt::hw_context>(*device, xclBin->get_uuid());
kernel = std::make_unique<xrt::kernel>(*context, kernelName);
}

void loadNPUInstructions(const std::vector<uint32_t> &insts) {
npuInstructions =
std::make_unique<xrt::bo>(*device, insts.size() * sizeof(uint32_t),
XCL_BO_FLAGS_CACHEABLE, kernel->group_id(0));
npuInstructions->write(insts.data());
npuInstructions->sync(XCL_BO_SYNC_BO_TO_DEVICE);
}

template <typename ElementT>
std::vector<py::memoryview>
mmapBuffers(std::vector<std::vector<int>> shapes) {
this->buffers.reserve(shapes.size());
std::vector<py::memoryview> views;
views.reserve(shapes.size());

auto initAndViewBuffer = [this](
std::vector<int> shape, int groupId,
std::vector<std::unique_ptr<xrt::bo>> &buffers,
std::vector<py::memoryview> &views) {
int nElements =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
int nBytes = nElements * sizeof(ElementT);
xrt::bo xrtBuf(*device, nBytes, XRT_BO_FLAGS_HOST_ONLY,
kernel->group_id(groupId));
buffers.push_back(std::make_unique<xrt::bo>(xrtBuf));

ElementT *buf = xrtBuf.map<ElementT *>();
for (int i = 0; i < nElements; ++i)
buf[i] = static_cast<ElementT>(0);

std::vector strides_{1};
for (int i = shape.size() - 1; i > 0; i--)
strides_.push_back(strides_.back() * shape[i]);
std::vector<int> strides;
// stride in bytes
std::transform(strides_.rbegin(), strides_.rend(),
std::back_inserter(strides),
[](int s) { return s * sizeof(ElementT); });
views.push_back(py::memoryview::from_buffer(buf, shape, strides));
};

for (size_t i = 0; i < shapes.size(); ++i)
initAndViewBuffer(shapes[i], HOST_BUFFERS_START_IDX + i, this->buffers,
views);
return views;
}

uint64_t getBufferHostAddress(size_t idx) { return buffers[idx]->address(); }

void syncBuffersToDevice() {
for (auto &buf : this->buffers)
buf->sync(XCL_BO_SYNC_BO_TO_DEVICE);
}

void syncBuffersFromDevice() {
for (auto &buf : this->buffers)
buf->sync(XCL_BO_SYNC_BO_FROM_DEVICE);
}

void run() {
run_ = std::make_unique<xrt::run>(*kernel);
run_->set_arg(0, *npuInstructions);
run_->set_arg(1, npuInstructions->size());
for (size_t i = 0; i < buffers.size(); ++i)
run_->set_arg(HOST_BUFFERS_START_IDX + i, *buffers[i]);
run_->start();
}

void wait(const std::optional<int> timeout) { run_->wait2(); }

std::unique_ptr<xrt::xclbin> xclBin;
std::unique_ptr<xrt::device> device;
std::unique_ptr<xrt::hw_context> context;
std::unique_ptr<xrt::kernel> kernel;
std::unique_ptr<xrt::bo> npuInstructions;

std::vector<std::unique_ptr<xrt::bo>> buffers;

std::unique_ptr<xrt::run> run_;
};

PYBIND11_MODULE(_xrt, m) {

py::class_<PyXCLBin>(m, "XCLBin", py::module_local())
.def(py::init<const std::string &, const std::string &, int>(),
"xclbin_path"_a, "kernel_name"_a, "device_index"_a = 0)
.def("load_npu_instructions", &PyXCLBin::loadNPUInstructions, "insts"_a)
.def("sync_buffers_to_device", &PyXCLBin::syncBuffersToDevice)
.def("sync_buffers_from_device", &PyXCLBin::syncBuffersFromDevice)
.def("run", &PyXCLBin::run)
.def("wait", &PyXCLBin::wait, "timeout"_a = py::none())
.def(
"mmap_buffers",
[](PyXCLBin &self, const std::vector<std::vector<int>> &shapes,
const py::object &npFormat) {
auto npy = py::module_::import("numpy");
if (npFormat.is(npy.attr("int16")))
return self.mmapBuffers<int16_t>(shapes);
if (npFormat.is(npy.attr("int32")))
return self.mmapBuffers<int32_t>(shapes);
if (npFormat.is(npy.attr("float32")))
return self.mmapBuffers<float>(shapes);
if (npFormat.is(npy.attr("int64")))
return self.mmapBuffers<int64_t>(shapes);
if (npFormat.is(npy.attr("float64")))
return self.mmapBuffers<double>(shapes);
throw std::runtime_error("unsupported np format: " +
py::repr(npFormat).cast<std::string>());
},
"shapes"_a, "np_format"_a)
.def("_get_buffer_host_address", [](PyXCLBin &self, size_t idx) {
return self.getBufferHostAddress(idx);
});
m.def("list_kernels", [](std::string fp) {
auto xclbin = xrt::xclbin(fp);
auto xkernels = xclbin.get_kernels();
for (const auto &item : xkernels)
py::print(item.get_name());
});
}
1 change: 1 addition & 0 deletions xaiepy/xrt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._xrt import *

0 comments on commit 401459c

Please sign in to comment.