|
| 1 | +// SPDX-FileCopyrightText: Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +#include "cxxopts.hpp" |
| 5 | +#include "golden_reference_reader.h" |
| 6 | +#include "test_utils.h" |
| 7 | +#include "xrt/xrt_bo.h" |
| 8 | +#include "xrt/xrt_device.h" |
| 9 | +#include "xrt/xrt_kernel.h" |
| 10 | + |
| 11 | +#include <cstdint> |
| 12 | +#include <cstdlib> |
| 13 | +#include <fstream> |
| 14 | +#include <iostream> |
| 15 | +#include <sstream> |
| 16 | +#include <string> |
| 17 | +#include <vector> |
| 18 | + |
| 19 | +int main(int argc, const char *argv[]) |
| 20 | +{ |
| 21 | + // Program arguments parsing |
| 22 | + cxxopts::Options options("Leaky ReLU Test"); |
| 23 | + cxxopts::ParseResult vm; |
| 24 | + |
| 25 | + options.add_options()("help,h", |
| 26 | + "produce help message")("xclbin,x", "the input xclbin path", cxxopts::value<std::string>())( |
| 27 | + "kernel,k", "the kernel name in the XCLBIN (for instance PP_PRE_FD)", cxxopts::value<std::string>())( |
| 28 | + "verbosity,v", "the verbosity of the output", cxxopts::value<int>()->default_value("0"))( |
| 29 | + "instr,i", |
| 30 | + "path of file containing userspace instructions to be sent to the LX6", |
| 31 | + cxxopts::value<std::string>())( |
| 32 | + "length,l", "the length of the transfer in std::bfloat16_t", cxxopts::value<int>()->default_value("4096"))( |
| 33 | + "ref", |
| 34 | + "path to golden reference file", |
| 35 | + cxxopts::value<std::string>()->default_value("golden_leaky_relu/golden_reference.bin"))( |
| 36 | + "alpha,a", "alpha parameter for Leaky ReLU", cxxopts::value<float>()->default_value("0.01")); |
| 37 | + |
| 38 | + try { |
| 39 | + vm = options.parse(argc, argv); |
| 40 | + |
| 41 | + if (vm.count("help")) { |
| 42 | + std::cout << options.help() << std::endl; |
| 43 | + return 1; |
| 44 | + } |
| 45 | + |
| 46 | + // Check required options |
| 47 | + if (!vm.count("xclbin") || !vm.count("kernel") || !vm.count("instr") || !vm.count("ref")) { |
| 48 | + std::cerr << "Error: Required options missing\n\n"; |
| 49 | + std::cerr << "Usage:\n" << options.help() << std::endl; |
| 50 | + return 1; |
| 51 | + } |
| 52 | + } catch (const cxxopts::exceptions::parsing &e) { |
| 53 | + std::cerr << e.what() << "\n\n"; |
| 54 | + std::cerr << "Usage:\n" << options.help() << std::endl; |
| 55 | + return 1; |
| 56 | + } |
| 57 | + |
| 58 | + std::vector<uint32_t> instr_v = test_utils::load_instr_binary(vm["instr"].as<std::string>()); |
| 59 | + |
| 60 | + std::string ref_path = vm["ref"].as<std::string>(); |
| 61 | + GoldenReference ref = GoldenReference::fromFile(ref_path); |
| 62 | + |
| 63 | + int verbosity = vm["verbosity"].as<int>(); |
| 64 | + if (verbosity >= 1) |
| 65 | + std::cout << "Sequence instr count: " << instr_v.size() << std::endl; |
| 66 | + |
| 67 | + int N = vm["length"].as<int>(); |
| 68 | + if ((N % 1024)) { |
| 69 | + std::cerr << "Length must be a multiple of 1024." << std::endl; |
| 70 | + return 1; |
| 71 | + } |
| 72 | + |
| 73 | + float alpha = vm["alpha"].as<float>(); |
| 74 | + std::bfloat16_t alpha_bf16 = static_cast<std::bfloat16_t>(alpha); |
| 75 | + |
| 76 | + // Start the XRT test code |
| 77 | + // Get a device handle |
| 78 | + unsigned int device_index = 0; |
| 79 | + auto device = xrt::device(device_index); |
| 80 | + |
| 81 | + // Load the xclbin |
| 82 | + if (verbosity >= 1) |
| 83 | + std::cout << "Loading xclbin: " << vm["xclbin"].as<std::string>() << std::endl; |
| 84 | + auto xclbin = xrt::xclbin(vm["xclbin"].as<std::string>()); |
| 85 | + |
| 86 | + if (verbosity >= 1) |
| 87 | + std::cout << "Kernel opcode: " << vm["kernel"].as<std::string>() << std::endl; |
| 88 | + std::string Node = vm["kernel"].as<std::string>(); |
| 89 | + |
| 90 | + // Get the kernel from the xclbin |
| 91 | + auto xkernels = xclbin.get_kernels(); |
| 92 | + auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(), [Node](xrt::xclbin::kernel &k) { |
| 93 | + auto name = k.get_name(); |
| 94 | + std::cout << "Name: " << name << std::endl; |
| 95 | + return name.rfind(Node, 0) == 0; |
| 96 | + }); |
| 97 | + auto kernelName = xkernel.get_name(); |
| 98 | + |
| 99 | + if (verbosity >= 1) |
| 100 | + std::cout << "Registering xclbin: " << vm["xclbin"].as<std::string>() << "\n"; |
| 101 | + |
| 102 | + device.register_xclbin(xclbin); |
| 103 | + |
| 104 | + // get a hardware context |
| 105 | + if (verbosity >= 1) |
| 106 | + std::cout << "Getting hardware context." << std::endl; |
| 107 | + xrt::hw_context context(device, xclbin.get_uuid()); |
| 108 | + |
| 109 | + // get a kernel handle |
| 110 | + if (verbosity >= 1) |
| 111 | + std::cout << "Getting handle to kernel:" << kernelName << std::endl; |
| 112 | + auto kernel = xrt::kernel(context, kernelName); |
| 113 | + |
| 114 | + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); |
| 115 | + auto bo_inA = xrt::bo(device, N * sizeof(std::bfloat16_t), XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); |
| 116 | + auto bo_out = xrt::bo(device, N * sizeof(std::bfloat16_t), XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); |
| 117 | + auto bo_alpha = xrt::bo(device, sizeof(std::bfloat16_t), XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5)); |
| 118 | + |
| 119 | + if (verbosity >= 1) |
| 120 | + std::cout << "Writing data into buffer objects." << std::endl; |
| 121 | + |
| 122 | + std::bfloat16_t *bufInA = bo_inA.map<std::bfloat16_t *>(); |
| 123 | + memcpy(bufInA, ref.get<std::bfloat16_t>("A")->data(), N * sizeof(std::bfloat16_t)); |
| 124 | + |
| 125 | + std::bfloat16_t *bufAlpha = bo_alpha.map<std::bfloat16_t *>(); |
| 126 | + *bufAlpha = alpha_bf16; |
| 127 | + |
| 128 | + void *bufInstr = bo_instr.map<void *>(); |
| 129 | + memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); |
| 130 | + |
| 131 | + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); |
| 132 | + bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE); |
| 133 | + bo_alpha.sync(XCL_BO_SYNC_BO_TO_DEVICE); |
| 134 | + |
| 135 | + if (verbosity >= 1) |
| 136 | + std::cout << "Running Kernel." << std::endl; |
| 137 | + unsigned int opcode = 3; |
| 138 | + // Setup run to configure |
| 139 | + auto cfg_run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_out, bo_alpha); |
| 140 | + cfg_run.wait(); |
| 141 | + auto start = std::chrono::high_resolution_clock::now(); |
| 142 | + // Test run |
| 143 | + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_out, bo_alpha); |
| 144 | + ert_cmd_state r = run.wait(); |
| 145 | + auto stop = std::chrono::high_resolution_clock::now(); |
| 146 | + if (r != ERT_CMD_STATE_COMPLETED) { |
| 147 | + std::cout << "Kernel did not complete. Returned status: " << r << std::endl; |
| 148 | + return 1; |
| 149 | + } |
| 150 | + const float npu_time = std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count(); |
| 151 | + |
| 152 | + bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE); |
| 153 | + std::cout << std::endl; |
| 154 | + std::cout << "Latency (us): " << npu_time << std::endl; |
| 155 | + std::cout << std::endl; |
| 156 | + |
| 157 | + double total_bytes = 2.0 * N * sizeof(std::bfloat16_t); // input and output |
| 158 | + double bandwidth_GBps = (total_bytes / (1024 * 1024 * 1024)) / (npu_time * 1e-6); |
| 159 | + std::cout << "Effective Bandwidth: " << bandwidth_GBps << " GB/s" << std::endl; |
| 160 | + |
| 161 | + std::bfloat16_t *bufOut = bo_out.map<std::bfloat16_t *>(); |
| 162 | + |
| 163 | + int errors = 0; |
| 164 | + auto ref_B = ref.get<std::bfloat16_t>("B"); |
| 165 | + |
| 166 | + for (int i = 0; i < N; i++) { |
| 167 | + std::bfloat16_t ref_val = (*ref_B)[i]; |
| 168 | + // if (i < 10){ |
| 169 | + // std::cout << "Index " << i << ": Computed=" << *(bufOut + i) << ", Reference=" << ref_val << std::endl; |
| 170 | + // } |
| 171 | + if (!test_utils::nearly_equal(*(bufOut + i), ref_val, 0.01, 1e-6)) { |
| 172 | + errors++; |
| 173 | + // Print the first 100 mismatches |
| 174 | + if (errors <= 100) { |
| 175 | + std::cout << "Mismatch at index " << i << ": " << "Expected: " << ref_val << ", " |
| 176 | + << "Got: " << *(bufOut + i) << std::endl; |
| 177 | + } |
| 178 | + } |
| 179 | + } |
| 180 | + |
| 181 | + if (!errors) { |
| 182 | + std::cout << std::endl << "PASS!" << std::endl << std::endl; |
| 183 | + return 0; |
| 184 | + } else { |
| 185 | + std::cout << std::endl << errors << " mismatches." << std::endl << std::endl; |
| 186 | + std::cout << std::endl << "fail." << std::endl << std::endl; |
| 187 | + return 1; |
| 188 | + } |
| 189 | +} |
0 commit comments