From 64003d5f8a0ab09e48e56b3e7e246fb3875ade20 Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Tue, 27 Aug 2024 18:50:52 +0800 Subject: [PATCH 01/17] fpga: Add pcie XDMA framework Co-Author: xushuoxiang --- Makefile | 6 +++ src/test/csrc/fpga/xdma.cpp | 63 +++++++++++++++++++++++++++ src/test/csrc/fpga/xdma.h | 71 +++++++++++++++++++++++++++++++ src/test/csrc/fpga/xdma_mpool.cpp | 0 4 files changed, 140 insertions(+) create mode 100644 src/test/csrc/fpga/xdma.cpp create mode 100644 src/test/csrc/fpga/xdma.h create mode 100644 src/test/csrc/fpga/xdma_mpool.cpp diff --git a/Makefile b/Makefile index fe1a0edf6..04bd1054d 100644 --- a/Makefile +++ b/Makefile @@ -78,6 +78,12 @@ SIM_VSRC = $(shell find $(VSRC_DIR) -name "*.v" -or -name "*.sv") # DiffTest support DIFFTEST_CSRC_DIR = $(abspath ./src/test/csrc/difftest) +# FPGA-Difftest support +FPGA ?= 0 +ifeq ($(FPGA),1) +DIFFTEST_CSRC_DIR += $(abspath ./src/test/csrc/fpga) +endif + DIFFTEST_CXXFILES = $(shell find $(DIFFTEST_CSRC_DIR) -name "*.cpp") ifeq ($(NO_DIFF), 1) SIM_CXXFLAGS += -DCONFIG_NO_DIFFTEST diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp new file mode 100644 index 000000000..f55f58ab5 --- /dev/null +++ b/src/test/csrc/fpga/xdma.cpp @@ -0,0 +1,63 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ +#include "xdma.h" + +FpgaXdma::FpgaXdma() { + signal(SIGINT, handle_sigint); + fd_c2h = open("/dev/xdma0_c2h_0", O_RDWR); + set_dma_fd_block(); +} + +void FpgaXdma::handle_sigint(int sig) { + printf("Unlink sem success, exit success!\n"); + exit(1); +} + +void FpgaXdma::set_dma_fd_block() { + int flags = fcntl(fd, F_GETFL, 0); + if (flags == -1) { + perror("fcntl get error"); + return; + } + // Clear the O NONBLOCK flag and set it to blocking mode + flags &= ~O_NONBLOCK; + if (fcntl(fd, F_SETFL, flags) == -1) { + perror("fcntl set error"); + return; + } +} + +void FpgaXdma::thread_read_xdma() { + while (running) { + char *memory = memory_pool.get_free_chunk(); + read(fd_c2h, memory, recv_size); + memory_pool.set_busy_chunk(); + } +} + +void FpgaXdma::write_difftest_thread() { + while (running) { + const char *memory = memory_pool.get_busy_chunk(); + memcpy(&diffteststate, memory, sizeof(diffteststate)); + + stream_receiver_cout ++; + memory_pool.set_free_chunk(); + +// Notify difftest to run the next beat + + + } +} diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h new file mode 100644 index 000000000..223dda98d --- /dev/null +++ b/src/test/csrc/fpga/xdma.h @@ -0,0 +1,71 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ +#ifndef __XDMA_H__ +#define __XDMA_H__ + +#include "common.h" +#include +#include +#include +#include +#include +#include + +#include "diffstate.h" + +#define MAX_DATA_LEN 1024 * 8 - 1 +#define HEAD_DATA_LEN 7 +#define BUFSIZE 1024 * 8 * 8 +#define SHMSZ 27 +#define WAIT_RECV_SLEEP 5 + +typedef struct FpgaPackgeHead { + struct DiffTestState difftestinfo; + unsigned int sequence : 16; + unsigned int message_size : 16; + unsigned long data[HEAD_DATA_LEN]; +} FpgaPackgeHead; + +class FpgaXdma { +public: + struct FpgaPackgeHead *shmadd_recv; + + int shmid_recv; + int ret_recv; + key_t key_recv; + + int fd_c2h; + int fd_interrupt; + + struct FpgaPackgeHead recv_buffer; + unsigned long buffer[8]; + unsigned int recv_size = sizeof(FpgaPackgeHead); + unsigned long old_exec_instr = 0; + + FpgaXdma(); + ~FpgaXdma() {}; + + void set_dma_fd_block(); + void handle_sigint(int sig); + void read_xdma_thread(); + void write_difftest_thread(); + +protected: + std::mutex test_mtx; + std::condition_variable test_cv; +}; + +#endif diff --git a/src/test/csrc/fpga/xdma_mpool.cpp b/src/test/csrc/fpga/xdma_mpool.cpp new file mode 100644 index 000000000..e69de29bb From 5a9d0270d197ad9e41185363935ee28fac6d7b72 Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Wed, 28 Aug 2024 10:54:16 +0800 Subject: [PATCH 02/17] fpga: add mpool --- src/test/csrc/fpga/mpool.cpp | 72 +++++++++++++++++++++++++++ src/test/csrc/fpga/mpool.h | 83 +++++++++++++++++++++++++++++++ src/test/csrc/fpga/xdma_mpool.cpp | 0 3 files changed, 155 insertions(+) create mode 100644 src/test/csrc/fpga/mpool.cpp create mode 100644 src/test/csrc/fpga/mpool.h delete mode 100644 src/test/csrc/fpga/xdma_mpool.cpp diff --git a/src/test/csrc/fpga/mpool.cpp b/src/test/csrc/fpga/mpool.cpp new file mode 100644 index 000000000..0e6d2122c --- /dev/null +++ b/src/test/csrc/fpga/mpool.cpp @@ -0,0 +1,72 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ +#include "mpool.h" + +void MemoryPool::init_memory_pool() { + memory_pool.reserve(NUM_BLOCKS); + for (size_t i = 0; i < NUM_BLOCKS; ++i) { + memory_pool.emplace_back(); + block_mutexes[i].unlock(); + } +} + +void MemoryPool::cleanup_memory_pool() { + cv_empty.notify_all(); + cv_filled.notify_all(); + memory_pool.clear(); +} + +void MemoryPool::unlock_thread() { + cv_empty.notify_all(); + cv_filled.notify_all(); +} + +char *MemoryPool::get_free_chunk() { + page_head = (write_index++) & REM_NUM_BLOCKS; + { + std::unique_lock lock(block_mutexes[page_head]); + cv_empty.wait(lock, [this] { return empty_blocks > 0; }); + } + + --empty_blocks; + block_mutexes[page_head].lock(); + return memory_pool[page_head].data.get(); +} + +void MemoryPool::set_busy_chunk() { + memory_pool[page_head].is_free = false; + block_mutexes[page_head].unlock(); + cv_filled.notify_one(); + ++filled_blocks; +} + +const char *MemoryPool::get_busy_chunk() { + page_end = (read_index++) & REM_NUM_BLOCKS; + { + std::unique_lock lock(block_mutexes[page_end]); + cv_filled.wait(lock, [this] { return filled_blocks > 0; }); + } + --filled_blocks; + block_mutexes[page_end].lock(); + return memory_pool[page_end].data.get(); +} + +void MemoryPool::set_free_chunk() { + memory_pool[page_end].is_free = true; + block_mutexes[page_end].unlock(); + cv_empty.notify_one(); + ++empty_blocks; +} diff --git a/src/test/csrc/fpga/mpool.h b/src/test/csrc/fpga/mpool.h new file mode 100644 index 000000000..7c3242588 --- /dev/null +++ b/src/test/csrc/fpga/mpool.h @@ -0,0 +1,83 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ +#include +#include +#include +#include +#include +#include + +#define MEMPOOL_SIZE 4096 * 1024 // 4M page +#define MEMBLOCK_SIZE 4096 // 4K packge +#define NUM_BLOCKS (MEMPOOL_SIZE / MEMBLOCK_SIZE) +#define REM_NUM_BLOCKS (NUM_BLOCKS - 1) + +extern bool running; +class MemoryPool { +public: + // Constructor to allocate aligned memory blocks + MemoryPool() { + init_memory_pool(); + } + + ~MemoryPool() { + cleanup_memory_pool(); + } + // Disable copy constructors and copy assignment operators + MemoryPool(const MemoryPool &) = delete; + MemoryPool &operator=(const MemoryPool &) = delete; + + void init_memory_pool(); + + // Cleaning up memory pools + void cleanup_memory_pool(); + // Releasing locks manually + void unlock_thread(); + + // Detect a free block and lock the memory that returns the free block + char *get_free_chunk(); + // Set block data valid and locked + void set_busy_chunk(); + + // Gets the latest block of memory + const char *get_busy_chunk(); + // Invalidate and lock the block + void set_free_chunk(); + +private: + struct MemoryBlock { + std::unique_ptr> data; + bool is_free; + + MemoryBlock() : is_free(true) { + void *ptr = nullptr; + if (posix_memalign(&ptr, MEMBLOCK_SIZE, MEMBLOCK_SIZE * 2) != 0) { + throw std::runtime_error("Failed to allocate aligned memory"); + } + data = std::unique_ptr>(static_cast(ptr), [](char *p) { free(p); }); + } + }; + std::vector memory_pool; // Mempool + std::vector block_mutexes{NUM_BLOCKS}; // Partition lock array + std::atomic empty_blocks = NUM_BLOCKS; // Free block count + std::atomic filled_blocks; // Filled blocks count + std::atomic write_index; + std::atomic read_index; + std::condition_variable cv_empty; // Free block condition variable + std::condition_variable cv_filled; // Filled block condition variable + size_t page_head = 0; + size_t page_end = 0; +}; diff --git a/src/test/csrc/fpga/xdma_mpool.cpp b/src/test/csrc/fpga/xdma_mpool.cpp deleted file mode 100644 index e69de29bb..000000000 From 4b118ef50be7f61e4184de6c22b5eeb761bff974 Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Wed, 28 Aug 2024 15:53:21 +0800 Subject: [PATCH 03/17] difftest: Move mempool to common code --- src/test/csrc/{fpga => common}/mpool.cpp | 0 src/test/csrc/{fpga => common}/mpool.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/test/csrc/{fpga => common}/mpool.cpp (100%) rename src/test/csrc/{fpga => common}/mpool.h (100%) diff --git a/src/test/csrc/fpga/mpool.cpp b/src/test/csrc/common/mpool.cpp similarity index 100% rename from src/test/csrc/fpga/mpool.cpp rename to src/test/csrc/common/mpool.cpp diff --git a/src/test/csrc/fpga/mpool.h b/src/test/csrc/common/mpool.h similarity index 100% rename from src/test/csrc/fpga/mpool.h rename to src/test/csrc/common/mpool.h From 740232ddec41436623240f70b577c8e79a0f5dad Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Thu, 29 Aug 2024 11:18:48 +0800 Subject: [PATCH 04/17] fpga: Add function of difftest through dma interface --- src/test/csrc/common/mpool.h | 5 ++ src/test/csrc/difftest/difftest.cpp | 2 + src/test/csrc/fpga/fpga_main.cpp | 103 ++++++++++++++++++++++++++++ src/test/csrc/fpga/xdma.cpp | 42 ++++++++---- src/test/csrc/fpga/xdma.h | 31 +++++---- 5 files changed, 154 insertions(+), 29 deletions(-) create mode 100644 src/test/csrc/fpga/fpga_main.cpp diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 7c3242588..2aafdea48 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -13,6 +13,9 @@ * * See the Mulan PSL v2 for more details. ***************************************************************************************/ +#ifndef __MPOOL_H__ +#define __MPOOL_H__ + #include #include #include @@ -81,3 +84,5 @@ class MemoryPool { size_t page_head = 0; size_t page_end = 0; }; + +#endif diff --git a/src/test/csrc/difftest/difftest.cpp b/src/test/csrc/difftest/difftest.cpp index 095c1fdb5..936c8e991 100644 --- a/src/test/csrc/difftest/difftest.cpp +++ b/src/test/csrc/difftest/difftest.cpp @@ -92,7 +92,9 @@ void difftest_set_dut() { } } int difftest_step() { +#ifndef WITH_FPGA difftest_set_dut(); +#endif for (int i = 0; i < NUM_CORES; i++) { int ret = difftest[i]->step(); if (ret) { diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp new file mode 100644 index 000000000..864590915 --- /dev/null +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -0,0 +1,103 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +#include "difftest.h" +#include "diffstate.h" +#include "mpool.h" +#include "xdma.h" + +#define XDMA_C2H_DEVICE "/dev/xdma0_c2h_0" + +enum { + SIMV_RUN, + SIMV_DONE, + SIMV_FAIL, +} simv_state; + +static uint8_t simv_result = SIMV_RUN; +static uint64_t max_instrs = 0; + +struct core_end_info_t { + bool core_trap[NUM_CORES]; + double core_cpi[NUM_CORES]; + uint8_t core_trap_num; +}; +static core_end_info_t core_end_info; + +void simv_init(); +void simv_step(); +void cpu_endtime_check(); +void set_dut_from_xdma(); + +FpgaXdma *xdma_device = NULL; + +int main(int argc, char *argv[]) { + + simv_init(); + + while (simv_result == SIMV_RUN) { + // get xdma data + set_dut_from_xdma(); + + // run difftest + simv_step(); + cpu_endtime_check(); + } +} + +void set_dut_from_xdma() { + { + std::unique_lock lock(xdma_device->diff_mtx); + xdma_device->diff_filled_cv.wait(lock, [] { return xdma_device->diff_packge_filled; }); + for (int i = 0; i < NUM_CORES; i++) { + + difftest[i]->dut = &xdma_device->difftest_pack[i]; + } + xdma_device->diff_packge_filled = false; + xdma_device->diff_empile_cv.notify_one(); + } +} + +void simv_init() { + xdma_device = new FpgaXdma(XDMA_C2H_DEVICE); + difftest_init(); + max_instrs = 40000000; +} + +void simv_step() { + if (difftest_step()) + simv_result = SIMV_FAIL; +} + +void cpu_endtime_check() { + if (max_instrs != 0) { // 0 for no limit + for (int i = 0; i < NUM_CORES; i++) { + if (core_end_info.core_trap[i]) + continue; + auto trap = difftest[i]->get_trap_event(); + if (max_instrs < trap->instrCnt) { + core_end_info.core_trap[i] = true; + core_end_info.core_trap_num++; + eprintf(ANSI_COLOR_GREEN "EXCEEDED CORE-%d MAX INSTR: %ld\n" ANSI_COLOR_RESET, i, max_instrs); + difftest[i]->display_stats(); + core_end_info.core_cpi[i] = (double)trap->cycleCnt / (double)trap->instrCnt; + if (core_end_info.core_trap_num == NUM_CORES) { + simv_result = SIMV_DONE; + } + } + } + } +} diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index f55f58ab5..589bb32c7 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -13,11 +13,15 @@ * * See the Mulan PSL v2 for more details. ***************************************************************************************/ +#include +#include + #include "xdma.h" +#include "mpool.h" -FpgaXdma::FpgaXdma() { +FpgaXdma::FpgaXdma(const char *device_name) { signal(SIGINT, handle_sigint); - fd_c2h = open("/dev/xdma0_c2h_0", O_RDWR); + fd_c2h = open(device_name, O_RDWR); set_dma_fd_block(); } @@ -27,37 +31,47 @@ void FpgaXdma::handle_sigint(int sig) { } void FpgaXdma::set_dma_fd_block() { - int flags = fcntl(fd, F_GETFL, 0); + int flags = fcntl(fd_c2h, F_GETFL, 0); if (flags == -1) { perror("fcntl get error"); return; } // Clear the O NONBLOCK flag and set it to blocking mode flags &= ~O_NONBLOCK; - if (fcntl(fd, F_SETFL, flags) == -1) { + if (fcntl(fd_c2h, F_SETFL, flags) == -1) { perror("fcntl set error"); return; } } -void FpgaXdma::thread_read_xdma() { +void FpgaXdma::read_xdma_thread() { while (running) { - char *memory = memory_pool.get_free_chunk(); + char *memory = xdma_mempool.get_free_chunk(); read(fd_c2h, memory, recv_size); - memory_pool.set_busy_chunk(); + xdma_mempool.set_busy_chunk(); } } void FpgaXdma::write_difftest_thread() { while (running) { - const char *memory = memory_pool.get_busy_chunk(); - memcpy(&diffteststate, memory, sizeof(diffteststate)); - - stream_receiver_cout ++; - memory_pool.set_free_chunk(); + const char *memory = xdma_mempool.get_busy_chunk(); + static uint8_t valid_core = 0; + uint8_t core_id = 0; -// Notify difftest to run the next beat - + memcpy(&core_id, memory + sizeof(DiffTestState), sizeof(uint8_t)); + assert(core_id > NUM_CORES); + { + std::unique_lock lock(diff_mtx); + diff_empile_cv.wait(lock, [this] { return !diff_packge_filled; }); + memcpy(&difftest_pack[core_id], memory, sizeof(DiffTestState)); + } + valid_core ++; + xdma_mempool.set_free_chunk(); + if (core_id == NUM_CORES) { + diff_packge_filled = true; + // Notify difftest to run the next check + diff_filled_cv.notify_one(); + } } } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index 223dda98d..41112ad6f 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -16,7 +16,6 @@ #ifndef __XDMA_H__ #define __XDMA_H__ -#include "common.h" #include #include #include @@ -24,25 +23,25 @@ #include #include +#include "common.h" #include "diffstate.h" +#include "mpool.h" -#define MAX_DATA_LEN 1024 * 8 - 1 +#define WITH_FPGA #define HEAD_DATA_LEN 7 #define BUFSIZE 1024 * 8 * 8 -#define SHMSZ 27 #define WAIT_RECV_SLEEP 5 typedef struct FpgaPackgeHead { - struct DiffTestState difftestinfo; - unsigned int sequence : 16; - unsigned int message_size : 16; - unsigned long data[HEAD_DATA_LEN]; + DiffTestState difftestinfo; + uint8_t corid; } FpgaPackgeHead; class FpgaXdma { public: struct FpgaPackgeHead *shmadd_recv; - + MemoryPool xdma_mempool; + DiffTestState difftest_pack[NUM_CORES] = {}; int shmid_recv; int ret_recv; key_t key_recv; @@ -50,22 +49,24 @@ class FpgaXdma { int fd_c2h; int fd_interrupt; - struct FpgaPackgeHead recv_buffer; - unsigned long buffer[8]; unsigned int recv_size = sizeof(FpgaPackgeHead); unsigned long old_exec_instr = 0; - FpgaXdma(); + std::condition_variable diff_filled_cv; + std::condition_variable diff_empile_cv; + std::mutex diff_mtx; + bool diff_packge_filled = false; + FpgaXdma(const char *device_name); ~FpgaXdma() {}; void set_dma_fd_block(); - void handle_sigint(int sig); + + // thread api void read_xdma_thread(); void write_difftest_thread(); -protected: - std::mutex test_mtx; - std::condition_variable test_cv; +private: + static void handle_sigint(int sig); }; #endif From 120d708726888bcb2d5bb35f1674573e0eb8c02b Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Thu, 29 Aug 2024 15:24:00 +0800 Subject: [PATCH 05/17] fpga: add independent compilation and usage support under fpga --- Makefile | 1 + fpga.mk | 19 +++++++++++++++++ src/test/csrc/common/mpool.h | 1 - src/test/csrc/fpga/fpga_main.cpp | 26 +++++++++++++++++++++--- src/test/csrc/fpga/xdma.cpp | 35 +++++++++++++++++++++++++++----- src/test/csrc/fpga/xdma.h | 22 ++++++++++++-------- 6 files changed, 86 insertions(+), 18 deletions(-) create mode 100644 fpga.mk diff --git a/Makefile b/Makefile index 04bd1054d..f4905d173 100644 --- a/Makefile +++ b/Makefile @@ -231,6 +231,7 @@ include verilator.mk include vcs.mk include palladium.mk include libso.mk +include fpga.mk clean: vcs-clean pldm-clean rm -rf $(BUILD_DIR) diff --git a/fpga.mk b/fpga.mk new file mode 100644 index 000000000..e28792301 --- /dev/null +++ b/fpga.mk @@ -0,0 +1,19 @@ + +FPGA = FPGA_HOST +FPGA_TARGET = $(abspath $(BUILD_DIR)/simv) +FPGA_BUILD_DIR = $(abspath $(BUILD_DIR)/simv-compile) +FPGA_RUN_DIR = $(abspath $(BUILD_DIR)/$(notdir $(RUN_BIN))) + +FPGA_CSRC_DIR = $(abspath ./src/test/csrc/fpga) +FPGA_CONFIG_DIR = $(abspath ./config) + +FPGA_CXXFILES = $(SIM_CXXFILES) $(shell find $(FPGA_CSRC_DIR) -name "*.cpp") +FPGA_CXXFLAGS = $(subst \\\",\", $(SIM_CXXFLAGS)) -I$(FPGA_CSRC_DIR) -DNUM_CORES=$(NUM_CORES) +FPGA_LDFLAGS = $(SIM_LDFLAGS) -lpthread -ldl + +fpga-build: fpga-clean fpga-host + +fpga-host: + $(CXX) $(FPGA_CXXFLAGS) $(FPGA_CXXFILES) $^ -o $@ $(FPGA_LDFLAGS) +fpga-clean: + rm -f fpga-host diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 2aafdea48..0925e3043 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -28,7 +28,6 @@ #define NUM_BLOCKS (MEMPOOL_SIZE / MEMBLOCK_SIZE) #define REM_NUM_BLOCKS (NUM_BLOCKS - 1) -extern bool running; class MemoryPool { public: // Constructor to allocate aligned memory blocks diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 864590915..3f51d21a5 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -14,9 +14,10 @@ * See the Mulan PSL v2 for more details. ***************************************************************************************/ -#include "difftest.h" #include "diffstate.h" +#include "difftest.h" #include "mpool.h" +#include "refproxy.h" #include "xdma.h" #define XDMA_C2H_DEVICE "/dev/xdma0_c2h_0" @@ -41,11 +42,13 @@ void simv_init(); void simv_step(); void cpu_endtime_check(); void set_dut_from_xdma(); +void set_diff_ref_so(char *s); +void args_parsingniton(int argc, char *argv[]); -FpgaXdma *xdma_device = NULL; +FpgaXdma *xdma_device = NULL; int main(int argc, char *argv[]) { - + args_parsingniton(argc, argv); simv_init(); while (simv_result == SIMV_RUN) { @@ -56,6 +59,15 @@ int main(int argc, char *argv[]) { simv_step(); cpu_endtime_check(); } + free(xdma_device); +} + +void set_diff_ref_so(char *s) { + extern const char *difftest_ref_so; + printf("diff-test ref so:%s\n", s); + char *buf = (char *)malloc(256); + strcpy(buf, s); + difftest_ref_so = buf; } void set_dut_from_xdma() { @@ -101,3 +113,11 @@ void cpu_endtime_check() { } } } + +void args_parsingniton(int argc, char *argv[]) { + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "--diff") == 0) { + set_diff_ref_so(argv[++i]); + } + } +} diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 589bb32c7..f586f834a 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -13,15 +13,19 @@ * * See the Mulan PSL v2 for more details. ***************************************************************************************/ -#include -#include - #include "xdma.h" #include "mpool.h" +#include +#include FpgaXdma::FpgaXdma(const char *device_name) { signal(SIGINT, handle_sigint); fd_c2h = open(device_name, O_RDWR); + if (fd_c2h == -1) { + printf("xdma device not find %s\n", device_name); + exit(1); + } + printf("xdma device %s\n", device_name); set_dma_fd_block(); } @@ -34,6 +38,7 @@ void FpgaXdma::set_dma_fd_block() { int flags = fcntl(fd_c2h, F_GETFL, 0); if (flags == -1) { perror("fcntl get error"); + exit(1); return; } // Clear the O NONBLOCK flag and set it to blocking mode @@ -44,6 +49,25 @@ void FpgaXdma::set_dma_fd_block() { } } +void FpgaXdma::start_transmit_thread() { + if (running == true) + return; + receive_thread = std::thread(&FpgaXdma::read_xdma_thread, this); + process_thread = std::thread(&FpgaXdma::write_difftest_thread, this); + running = true; +} + +void FpgaXdma::stop_thansmit_thread() { + if (running == false) + return; + xdma_mempool.unlock_thread(); + if (receive_thread.joinable()) + receive_thread.join(); + if (process_thread.joinable()) + process_thread.join(); + running = false; +} + void FpgaXdma::read_xdma_thread() { while (running) { char *memory = xdma_mempool.get_free_chunk(); @@ -65,11 +89,12 @@ void FpgaXdma::write_difftest_thread() { diff_empile_cv.wait(lock, [this] { return !diff_packge_filled; }); memcpy(&difftest_pack[core_id], memory, sizeof(DiffTestState)); } - valid_core ++; + valid_core++; xdma_mempool.set_free_chunk(); - if (core_id == NUM_CORES) { + if (valid_core == NUM_CORES) { diff_packge_filled = true; + valid_core = 0; // Notify difftest to run the next check diff_filled_cv.notify_one(); } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index 41112ad6f..c5d5d2c15 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -16,22 +16,18 @@ #ifndef __XDMA_H__ #define __XDMA_H__ +#include "common.h" +#include "diffstate.h" +#include "mpool.h" #include #include #include #include #include +#include #include -#include "common.h" -#include "diffstate.h" -#include "mpool.h" - #define WITH_FPGA -#define HEAD_DATA_LEN 7 -#define BUFSIZE 1024 * 8 * 8 -#define WAIT_RECV_SLEEP 5 - typedef struct FpgaPackgeHead { DiffTestState difftestinfo; uint8_t corid; @@ -48,6 +44,7 @@ class FpgaXdma { int fd_c2h; int fd_interrupt; + bool running = false; unsigned int recv_size = sizeof(FpgaPackgeHead); unsigned long old_exec_instr = 0; @@ -57,15 +54,22 @@ class FpgaXdma { std::mutex diff_mtx; bool diff_packge_filled = false; FpgaXdma(const char *device_name); - ~FpgaXdma() {}; + ~FpgaXdma() { + stop_thansmit_thread(); + }; void set_dma_fd_block(); // thread api + void start_transmit_thread(); + void stop_thansmit_thread(); void read_xdma_thread(); void write_difftest_thread(); private: + std::thread receive_thread; + std::thread process_thread; + static void handle_sigint(int sig); }; From 10f3427ef4ba08e5622c1122bf84401c5dcad611 Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Wed, 18 Sep 2024 18:13:54 +0800 Subject: [PATCH 06/17] fpga: modify the xdma initi process --- fpga.mk | 3 ++ src/test/csrc/fpga/fpga_main.cpp | 4 +-- src/test/csrc/fpga/xdma.cpp | 55 +++++++++++++++++--------------- src/test/csrc/fpga/xdma.h | 13 +++++--- 4 files changed, 42 insertions(+), 33 deletions(-) diff --git a/fpga.mk b/fpga.mk index e28792301..11e08ceb9 100644 --- a/fpga.mk +++ b/fpga.mk @@ -11,6 +11,9 @@ FPGA_CXXFILES = $(SIM_CXXFILES) $(shell find $(FPGA_CSRC_DIR) -name "*.cpp") FPGA_CXXFLAGS = $(subst \\\",\", $(SIM_CXXFLAGS)) -I$(FPGA_CSRC_DIR) -DNUM_CORES=$(NUM_CORES) FPGA_LDFLAGS = $(SIM_LDFLAGS) -lpthread -ldl +DMA_CHANNELS?=1 +FPGA_LDFLAGS += -DCONFIG_DMA_CHANNELS=$(DMA_CHANNELS) + fpga-build: fpga-clean fpga-host fpga-host: diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 3f51d21a5..274325c15 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -20,8 +20,6 @@ #include "refproxy.h" #include "xdma.h" -#define XDMA_C2H_DEVICE "/dev/xdma0_c2h_0" - enum { SIMV_RUN, SIMV_DONE, @@ -84,7 +82,7 @@ void set_dut_from_xdma() { } void simv_init() { - xdma_device = new FpgaXdma(XDMA_C2H_DEVICE); + xdma_device = new FpgaXdma; difftest_init(); max_instrs = 40000000; } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index f586f834a..985934f58 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -18,15 +18,31 @@ #include #include -FpgaXdma::FpgaXdma(const char *device_name) { +#define XDMA_C2H_DEVICE "/dev/xdma0_c2h_" +#define XDMA_H2C_DEVICE "/dev/xdma0_h2c_0" +static const int dma_channel = CONFIG_DMA_CHANNELS; + +FpgaXdma::FpgaXdma() { signal(SIGINT, handle_sigint); - fd_c2h = open(device_name, O_RDWR); - if (fd_c2h == -1) { - printf("xdma device not find %s\n", device_name); - exit(1); + for (int channel = 0; i < dma_channel; channel ++) { + char c2h_device[64]; + sprintf(c2h_device,"%s%d",DEVICE_C2H_NAME,i); + xdma_c2h_fd[i] = open(c2h_device, O_RDONLY ); + if (xdma_c2h_fd[i] == -1) { + std::cout << c2h_device << std::endl; + perror("Failed to open XDMA device"); + exit(-1); + } + std::cout << "XDMA link " << c2h_device << std::endl; + } + + xdma_h2c_fd[i] = open(h2c_device, O_WRONLY); + if (xdma_h2c_fd[i] == -1) { + std::cout << h2c_device << std::endl; + perror("Failed to open XDMA device"); + exit(-1); } - printf("xdma device %s\n", device_name); - set_dma_fd_block(); + std::cout << "XDMA link " << h2c_device << std::endl; } void FpgaXdma::handle_sigint(int sig) { @@ -34,26 +50,15 @@ void FpgaXdma::handle_sigint(int sig) { exit(1); } -void FpgaXdma::set_dma_fd_block() { - int flags = fcntl(fd_c2h, F_GETFL, 0); - if (flags == -1) { - perror("fcntl get error"); - exit(1); - return; - } - // Clear the O NONBLOCK flag and set it to blocking mode - flags &= ~O_NONBLOCK; - if (fcntl(fd_c2h, F_SETFL, flags) == -1) { - perror("fcntl set error"); - return; - } -} - void FpgaXdma::start_transmit_thread() { if (running == true) return; - receive_thread = std::thread(&FpgaXdma::read_xdma_thread, this); - process_thread = std::thread(&FpgaXdma::write_difftest_thread, this); + + for(int i = 0; i < dma_channel;i ++) { + printf("start channel %d \n", i); + receive_thread[i] = std::thread(&FpgaXdma::read_xdma_thread, this, i); + } + process_thread[i] = std::thread(&FpgaXdma::write_difftest_thread, this, i); running = true; } @@ -68,7 +73,7 @@ void FpgaXdma::stop_thansmit_thread() { running = false; } -void FpgaXdma::read_xdma_thread() { +void FpgaXdma::read_xdma_thread(int channel) { while (running) { char *memory = xdma_mempool.get_free_chunk(); read(fd_c2h, memory, recv_size); diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index c5d5d2c15..d5572bea1 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -36,13 +36,16 @@ typedef struct FpgaPackgeHead { class FpgaXdma { public: struct FpgaPackgeHead *shmadd_recv; - MemoryPool xdma_mempool; + + MemoryPool xdma_mempool[DMA_CHANNS]; DiffTestState difftest_pack[NUM_CORES] = {}; int shmid_recv; int ret_recv; key_t key_recv; - int fd_c2h; + int xdma_c2h_fd[DMA_CHANNS]; + int xdma_h2c_fd; + int fd_interrupt; bool running = false; @@ -53,7 +56,7 @@ class FpgaXdma { std::condition_variable diff_empile_cv; std::mutex diff_mtx; bool diff_packge_filled = false; - FpgaXdma(const char *device_name); + FpgaXdma(); ~FpgaXdma() { stop_thansmit_thread(); }; @@ -67,8 +70,8 @@ class FpgaXdma { void write_difftest_thread(); private: - std::thread receive_thread; - std::thread process_thread; + std::thread receive_thread[DMA_CHANNS]; + std::thread process_thread[DMA_CHANNS]; static void handle_sigint(int sig); }; From 308b05648b1a4dc595d765cf7b823e8592fb5530 Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Fri, 20 Sep 2024 15:34:51 +0800 Subject: [PATCH 07/17] difftest: Fixed an issue where the block structure was not memory safe --- src/test/csrc/common/mpool.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 0925e3043..0484d6348 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -22,12 +22,37 @@ #include #include #include +#include #define MEMPOOL_SIZE 4096 * 1024 // 4M page #define MEMBLOCK_SIZE 4096 // 4K packge #define NUM_BLOCKS (MEMPOOL_SIZE / MEMBLOCK_SIZE) #define REM_NUM_BLOCKS (NUM_BLOCKS - 1) +struct MemoryBlock { + std::unique_ptr> data; + std::atomic is_free; + + MemoryBlock() : is_free(true) { + void* ptr = nullptr; + if (posix_memalign(&ptr, 4096, 4096) != 0) { + throw std::runtime_error("Failed to allocate aligned memory"); + } + memset(ptr, 0, 4096); + data = std::unique_ptr>( + static_cast(ptr), + [](char* p) { free(p); } + ); + } + // Disable copy operations + MemoryBlock(const MemoryBlock&) = delete; + MemoryBlock& operator=(const MemoryBlock&) = delete; + + // Enable move operations + MemoryBlock(MemoryBlock&&) = default; + MemoryBlock& operator=(MemoryBlock&&) = default; +}; + class MemoryPool { public: // Constructor to allocate aligned memory blocks @@ -74,7 +99,7 @@ class MemoryPool { }; std::vector memory_pool; // Mempool std::vector block_mutexes{NUM_BLOCKS}; // Partition lock array - std::atomic empty_blocks = NUM_BLOCKS; // Free block count + std::atomic empty_blocks {NUM_BLOCKS}; // Free block count std::atomic filled_blocks; // Filled blocks count std::atomic write_index; std::atomic read_index; @@ -84,4 +109,5 @@ class MemoryPool { size_t page_end = 0; }; + #endif From 818fa62ad4f605ebedd5cfd91e70408b28f77ad3 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 23 Sep 2024 16:49:03 +0800 Subject: [PATCH 08/17] fpga: The memory pool with sliding window was added to realize multi-channel out-of-order reception of data packets --- src/test/csrc/common/mpool.cpp | 94 ++++++++++++++++++++++++++++++++++ src/test/csrc/common/mpool.h | 59 +++++++++++++++++++++ 2 files changed, 153 insertions(+) diff --git a/src/test/csrc/common/mpool.cpp b/src/test/csrc/common/mpool.cpp index 0e6d2122c..05e5caa30 100644 --- a/src/test/csrc/common/mpool.cpp +++ b/src/test/csrc/common/mpool.cpp @@ -70,3 +70,97 @@ void MemoryPool::set_free_chunk() { cv_empty.notify_one(); ++empty_blocks; } + +// Cleaning up memory pools +void MemoryIdxPool::cleanupMemoryPool() { + cv_empty.notify_all(); + cv_filled.notify_all(); +} + +// Write a specified free block of a free window +bool MemoryIdxPool::write_free_chunk(uint8_t idx, const char *data) { + size_t page_w_idx; + { + std::lock_guard lock(offset_mutexes); + + page_w_idx = idx + group_w_offset; + // Processing of winding data at the boundary + if (memory_pool[page_w_idx].is_free.load() == false) { + size_t this_group = group_w_idx.load(); + size_t offset = ((this_group & REM_MAX_GROUPING_IDX) * MAX_IDX); + page_w_idx = idx + offset; + write_next_count ++; + // Lookup failed + if (memory_pool[page_w_idx].is_free.load() == false) { + printf("This block has been written, and there is a duplicate packge idx %d\n",idx); + return false; + } + } else { + write_count ++; + // Proceed to the next group + if (write_count == MAX_IDX) { + memory_pool[page_w_idx].is_free.store(false); + memcpy(memory_pool[page_w_idx].data.get(), data, 4096); + + size_t next_w_idx = wait_next_free_group(); + group_w_offset = (next_w_idx & REM_MAX_GROUPING_IDX) * MAX_IDX; + write_count = write_next_count; + write_next_count = 0; + return true; + } + } + memory_pool[page_w_idx].is_free.store(false); + } + memcpy(memory_pool[page_w_idx].data.get(), data, 4096); + + return true; +} + +bool MemoryIdxPool::read_busy_chunk(char *data) { + size_t page_r_idx = read_count + group_r_offset; + size_t this_r_idx = ++read_count; + + if (this_r_idx == MAX_IDX) { + read_count = 0; + size_t next_r_idx = wait_next_full_group(); + group_r_offset = ((next_r_idx & REM_MAX_GROUPING_IDX) * MAX_IDX); + } + if (memory_pool[page_r_idx].is_free.load() == true) { + printf("An attempt was made to read the block of free %d\n", page_r_idx); + return false; + } + + memcpy(data, memory_pool[page_r_idx].data.get(), 4096); + memory_pool[page_r_idx].is_free.store(true); + + return true; +} + +size_t MemoryIdxPool::wait_next_free_group() { + empty_blocks.fetch_sub(1); + size_t free_num = empty_blocks.load(); + cv_filled.notify_all(); + //Reserve at least two free blocks + if (free_num <= 2) { + std::unique_lock lock(window_mutexes); + cv_empty.wait(lock, [this] { return empty_blocks.load() > 1;}); + } + return group_w_idx.fetch_add(1); +} + +size_t MemoryIdxPool::wait_next_full_group() { + empty_blocks.fetch_add(1); + size_t free_num = empty_blocks.load(); + cv_empty.notify_all(); + + if (free_num >= MAX_GROUP_READ) { + std::unique_lock lock(window_mutexes); + cv_filled.wait(lock, [this] { return empty_blocks.load() < MAX_GROUP_READ;}); + } + return group_r_idx.fetch_add(1); +} + +bool MemoryIdxPool::check_group() { + bool result = (group_w_idx.load() > group_r_idx.load()) ? true : false; + return result; +} \ No newline at end of file diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 0484d6348..ed6e10f0c 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -110,4 +110,63 @@ class MemoryPool { }; +static const size_t MAX_IDX = 256; +static const size_t MAX_GROUPING_IDX = NUM_BLOCKS / MAX_IDX; +static const size_t MAX_GROUP_READ = MAX_GROUPING_IDX - 2; //窗口需要预留两个空闲空间 +static const size_t REM_MAX_IDX = (MAX_IDX - 1); +static const size_t REM_MAX_GROUPING_IDX = (MAX_GROUPING_IDX - 1); + +// Split the memory pool into sliding Windows based on the index width +// Support multi-thread out-of-order write sequential read +class MemoryIdxPool { +public: + MemoryIdxPool() { + initMemoryPool(); + } + + ~MemoryIdxPool() { + cleanupMemoryPool(); + } + // Disable copy constructors and copy assignment operators + MemoryIdxPool(const MemoryIdxPool&) = delete; + MemoryIdxPool& operator=(const MemoryIdxPool&) = delete; + + void initMemoryPool() {} + + // Cleaning up memory pools + void cleanupMemoryPool(); + + // Write a specified free block of a free window + bool write_free_chunk(uint8_t idx, const char *data); + + // Get the head memory + bool read_busy_chunk(char *data); + + // Wait for the data to be free + size_t wait_next_free_group(); + + // Wait for the data to be readable + size_t wait_next_full_group(); + + // Check if there is a window to read + bool check_group(); + +private: + MemoryBlock memory_pool[NUM_BLOCKS]; // Mempool + std::mutex window_mutexes; // window sliding protection + std::mutex offset_mutexes; // w/r offset protection + std::condition_variable cv_empty; // Free block condition variable + std::condition_variable cv_filled; // Filled block condition variable + + size_t group_r_offset = 0; // The offset used by the current consumer + size_t group_w_offset = 0; // The offset used by the current producer + size_t read_count = 0; + size_t write_count = 0; + size_t write_next_count = 0; + + std::atomic empty_blocks{MAX_GROUP_READ}; + std::atomic group_w_idx{1}; + std::atomic group_r_idx{1}; +}; + #endif From e07e5e2f2c7bf18450298bb6868157d493fb9396 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 23 Sep 2024 16:49:16 +0800 Subject: [PATCH 09/17] fpga: fix mpool format --- src/test/csrc/common/mpool.cpp | 18 ++++++++-------- src/test/csrc/common/mpool.h | 38 +++++++++++++++------------------- 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/src/test/csrc/common/mpool.cpp b/src/test/csrc/common/mpool.cpp index 05e5caa30..fcd08a5be 100644 --- a/src/test/csrc/common/mpool.cpp +++ b/src/test/csrc/common/mpool.cpp @@ -81,7 +81,7 @@ void MemoryIdxPool::cleanupMemoryPool() { bool MemoryIdxPool::write_free_chunk(uint8_t idx, const char *data) { size_t page_w_idx; { - std::lock_guard lock(offset_mutexes); + std::lock_guard lock(offset_mutexes); page_w_idx = idx + group_w_offset; // Processing of winding data at the boundary @@ -89,14 +89,14 @@ bool MemoryIdxPool::write_free_chunk(uint8_t idx, const char *data) { size_t this_group = group_w_idx.load(); size_t offset = ((this_group & REM_MAX_GROUPING_IDX) * MAX_IDX); page_w_idx = idx + offset; - write_next_count ++; + write_next_count++; // Lookup failed if (memory_pool[page_w_idx].is_free.load() == false) { - printf("This block has been written, and there is a duplicate packge idx %d\n",idx); + printf("This block has been written, and there is a duplicate packge idx %d\n", idx); return false; } } else { - write_count ++; + write_count++; // Proceed to the next group if (write_count == MAX_IDX) { memory_pool[page_w_idx].is_free.store(false); @@ -106,10 +106,10 @@ bool MemoryIdxPool::write_free_chunk(uint8_t idx, const char *data) { group_w_offset = (next_w_idx & REM_MAX_GROUPING_IDX) * MAX_IDX; write_count = write_next_count; write_next_count = 0; - return true; + return true; } } - memory_pool[page_w_idx].is_free.store(false); + memory_pool[page_w_idx].is_free.store(false); } memcpy(memory_pool[page_w_idx].data.get(), data, 4096); @@ -143,7 +143,7 @@ size_t MemoryIdxPool::wait_next_free_group() { //Reserve at least two free blocks if (free_num <= 2) { std::unique_lock lock(window_mutexes); - cv_empty.wait(lock, [this] { return empty_blocks.load() > 1;}); + cv_empty.wait(lock, [this] { return empty_blocks.load() > 1; }); } return group_w_idx.fetch_add(1); } @@ -155,7 +155,7 @@ size_t MemoryIdxPool::wait_next_full_group() { if (free_num >= MAX_GROUP_READ) { std::unique_lock lock(window_mutexes); - cv_filled.wait(lock, [this] { return empty_blocks.load() < MAX_GROUP_READ;}); + cv_filled.wait(lock, [this] { return empty_blocks.load() < MAX_GROUP_READ; }); } return group_r_idx.fetch_add(1); } @@ -163,4 +163,4 @@ size_t MemoryIdxPool::wait_next_full_group() { bool MemoryIdxPool::check_group() { bool result = (group_w_idx.load() > group_r_idx.load()) ? true : false; return result; -} \ No newline at end of file +} diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index ed6e10f0c..2b26bb241 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -18,11 +18,11 @@ #include #include +#include #include #include #include #include -#include #define MEMPOOL_SIZE 4096 * 1024 // 4M page #define MEMBLOCK_SIZE 4096 // 4K packge @@ -30,27 +30,24 @@ #define REM_NUM_BLOCKS (NUM_BLOCKS - 1) struct MemoryBlock { - std::unique_ptr> data; + std::unique_ptr> data; std::atomic is_free; MemoryBlock() : is_free(true) { - void* ptr = nullptr; + void *ptr = nullptr; if (posix_memalign(&ptr, 4096, 4096) != 0) { throw std::runtime_error("Failed to allocate aligned memory"); } memset(ptr, 0, 4096); - data = std::unique_ptr>( - static_cast(ptr), - [](char* p) { free(p); } - ); + data = std::unique_ptr>(static_cast(ptr), [](char *p) { free(p); }); } // Disable copy operations - MemoryBlock(const MemoryBlock&) = delete; - MemoryBlock& operator=(const MemoryBlock&) = delete; + MemoryBlock(const MemoryBlock &) = delete; + MemoryBlock &operator=(const MemoryBlock &) = delete; // Enable move operations - MemoryBlock(MemoryBlock&&) = default; - MemoryBlock& operator=(MemoryBlock&&) = default; + MemoryBlock(MemoryBlock &&) = default; + MemoryBlock &operator=(MemoryBlock &&) = default; }; class MemoryPool { @@ -99,7 +96,7 @@ class MemoryPool { }; std::vector memory_pool; // Mempool std::vector block_mutexes{NUM_BLOCKS}; // Partition lock array - std::atomic empty_blocks {NUM_BLOCKS}; // Free block count + std::atomic empty_blocks{NUM_BLOCKS}; // Free block count std::atomic filled_blocks; // Filled blocks count std::atomic write_index; std::atomic read_index; @@ -109,7 +106,6 @@ class MemoryPool { size_t page_end = 0; }; - static const size_t MAX_IDX = 256; static const size_t MAX_GROUPING_IDX = NUM_BLOCKS / MAX_IDX; static const size_t MAX_GROUP_READ = MAX_GROUPING_IDX - 2; //窗口需要预留两个空闲空间 @@ -128,8 +124,8 @@ class MemoryIdxPool { cleanupMemoryPool(); } // Disable copy constructors and copy assignment operators - MemoryIdxPool(const MemoryIdxPool&) = delete; - MemoryIdxPool& operator=(const MemoryIdxPool&) = delete; + MemoryIdxPool(const MemoryIdxPool &) = delete; + MemoryIdxPool &operator=(const MemoryIdxPool &) = delete; void initMemoryPool() {} @@ -152,16 +148,16 @@ class MemoryIdxPool { bool check_group(); private: - MemoryBlock memory_pool[NUM_BLOCKS]; // Mempool - std::mutex window_mutexes; // window sliding protection - std::mutex offset_mutexes; // w/r offset protection - std::condition_variable cv_empty; // Free block condition variable - std::condition_variable cv_filled; // Filled block condition variable + MemoryBlock memory_pool[NUM_BLOCKS]; // Mempool + std::mutex window_mutexes; // window sliding protection + std::mutex offset_mutexes; // w/r offset protection + std::condition_variable cv_empty; // Free block condition variable + std::condition_variable cv_filled; // Filled block condition variable size_t group_r_offset = 0; // The offset used by the current consumer size_t group_w_offset = 0; // The offset used by the current producer size_t read_count = 0; - size_t write_count = 0; + size_t write_count = 0; size_t write_next_count = 0; std::atomic empty_blocks{MAX_GROUP_READ}; From b3a828c42dcc81d79cc73de332d484dc698e6bbc Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 23 Sep 2024 17:56:53 +0800 Subject: [PATCH 10/17] fpga: modify xdma to be multi-channel configurable and use a sliding window --- src/test/csrc/common/mpool.cpp | 2 +- src/test/csrc/common/mpool.h | 7 ---- src/test/csrc/fpga/xdma.cpp | 70 +++++++++++++++++----------------- src/test/csrc/fpga/xdma.h | 12 +++--- 4 files changed, 43 insertions(+), 48 deletions(-) diff --git a/src/test/csrc/common/mpool.cpp b/src/test/csrc/common/mpool.cpp index fcd08a5be..4e83e63ae 100644 --- a/src/test/csrc/common/mpool.cpp +++ b/src/test/csrc/common/mpool.cpp @@ -126,7 +126,7 @@ bool MemoryIdxPool::read_busy_chunk(char *data) { group_r_offset = ((next_r_idx & REM_MAX_GROUPING_IDX) * MAX_IDX); } if (memory_pool[page_r_idx].is_free.load() == true) { - printf("An attempt was made to read the block of free %d\n", page_r_idx); + printf("An attempt was made to read the block of free %zu\n", page_r_idx); return false; } diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 2b26bb241..b78bf4ad1 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -41,13 +41,6 @@ struct MemoryBlock { memset(ptr, 0, 4096); data = std::unique_ptr>(static_cast(ptr), [](char *p) { free(p); }); } - // Disable copy operations - MemoryBlock(const MemoryBlock &) = delete; - MemoryBlock &operator=(const MemoryBlock &) = delete; - - // Enable move operations - MemoryBlock(MemoryBlock &&) = default; - MemoryBlock &operator=(MemoryBlock &&) = default; }; class MemoryPool { diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 985934f58..6a90f16e6 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -16,6 +16,7 @@ #include "xdma.h" #include "mpool.h" #include +#include #include #define XDMA_C2H_DEVICE "/dev/xdma0_c2h_" @@ -24,10 +25,10 @@ static const int dma_channel = CONFIG_DMA_CHANNELS; FpgaXdma::FpgaXdma() { signal(SIGINT, handle_sigint); - for (int channel = 0; i < dma_channel; channel ++) { + for (int i = 0; i < dma_channel; i++) { char c2h_device[64]; - sprintf(c2h_device,"%s%d",DEVICE_C2H_NAME,i); - xdma_c2h_fd[i] = open(c2h_device, O_RDONLY ); + sprintf(c2h_device, "%s%d", XDMA_C2H_DEVICE, i); + xdma_c2h_fd[i] = open(c2h_device, O_RDONLY); if (xdma_c2h_fd[i] == -1) { std::cout << c2h_device << std::endl; perror("Failed to open XDMA device"); @@ -36,13 +37,13 @@ FpgaXdma::FpgaXdma() { std::cout << "XDMA link " << c2h_device << std::endl; } - xdma_h2c_fd[i] = open(h2c_device, O_WRONLY); - if (xdma_h2c_fd[i] == -1) { - std::cout << h2c_device << std::endl; + xdma_h2c_fd = open(XDMA_H2C_DEVICE, O_WRONLY); + if (xdma_h2c_fd == -1) { + std::cout << XDMA_H2C_DEVICE << std::endl; perror("Failed to open XDMA device"); exit(-1); } - std::cout << "XDMA link " << h2c_device << std::endl; + std::cout << "XDMA link " << XDMA_H2C_DEVICE << std::endl; } void FpgaXdma::handle_sigint(int sig) { @@ -54,54 +55,55 @@ void FpgaXdma::start_transmit_thread() { if (running == true) return; - for(int i = 0; i < dma_channel;i ++) { + for (int i = 0; i < dma_channel; i++) { printf("start channel %d \n", i); receive_thread[i] = std::thread(&FpgaXdma::read_xdma_thread, this, i); } - process_thread[i] = std::thread(&FpgaXdma::write_difftest_thread, this, i); + process_thread = std::thread(&FpgaXdma::write_difftest_thread, this); running = true; } void FpgaXdma::stop_thansmit_thread() { if (running == false) return; - xdma_mempool.unlock_thread(); - if (receive_thread.joinable()) - receive_thread.join(); + running = false; + + for (int i = 0; i < CONFIG_DMA_CHANNELS; i++) { + if (receive_thread[i].joinable()) + receive_thread[i].join(); + close(xdma_c2h_fd[i]); + } + if (process_thread.joinable()) process_thread.join(); - running = false; + + close(xdma_h2c_fd); + xdma_mempool.cleanupMemoryPool(); } void FpgaXdma::read_xdma_thread(int channel) { + FpgaPackgeHead packge; + bool result = true; while (running) { - char *memory = xdma_mempool.get_free_chunk(); - read(fd_c2h, memory, recv_size); - xdma_mempool.set_busy_chunk(); + size_t size = read(xdma_c2h_fd[channel], &packge, sizeof(FpgaPackgeHead)); + uint8_t idx = packge.packge_idx; + if (xdma_mempool.write_free_chunk(idx, (char *)&packge) == false) { + printf("It should not be the case that no available block can be found\n"); + assert(0); + } } } void FpgaXdma::write_difftest_thread() { + FpgaPackgeHead packge; + bool result = true; while (running) { - const char *memory = xdma_mempool.get_busy_chunk(); - static uint8_t valid_core = 0; - uint8_t core_id = 0; - - memcpy(&core_id, memory + sizeof(DiffTestState), sizeof(uint8_t)); - assert(core_id > NUM_CORES); - { - std::unique_lock lock(diff_mtx); - diff_empile_cv.wait(lock, [this] { return !diff_packge_filled; }); - memcpy(&difftest_pack[core_id], memory, sizeof(DiffTestState)); + if (xdma_mempool.read_busy_chunk((char *)&packge) == false) { + printf("Failed to read data from the XDMA memory pool\n"); + assert(0); } - valid_core++; - xdma_mempool.set_free_chunk(); + // packge unpack - if (valid_core == NUM_CORES) { - diff_packge_filled = true; - valid_core = 0; - // Notify difftest to run the next check - diff_filled_cv.notify_one(); - } + // difftest run } } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index d5572bea1..cb4307c60 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -30,20 +30,20 @@ #define WITH_FPGA typedef struct FpgaPackgeHead { DiffTestState difftestinfo; - uint8_t corid; + uint8_t packge_idx; } FpgaPackgeHead; class FpgaXdma { public: struct FpgaPackgeHead *shmadd_recv; - MemoryPool xdma_mempool[DMA_CHANNS]; + MemoryIdxPool xdma_mempool; DiffTestState difftest_pack[NUM_CORES] = {}; int shmid_recv; int ret_recv; key_t key_recv; - int xdma_c2h_fd[DMA_CHANNS]; + int xdma_c2h_fd[CONFIG_DMA_CHANNELS]; int xdma_h2c_fd; int fd_interrupt; @@ -66,12 +66,12 @@ class FpgaXdma { // thread api void start_transmit_thread(); void stop_thansmit_thread(); - void read_xdma_thread(); + void read_xdma_thread(int channel); void write_difftest_thread(); private: - std::thread receive_thread[DMA_CHANNS]; - std::thread process_thread[DMA_CHANNS]; + std::thread receive_thread[CONFIG_DMA_CHANNELS]; + std::thread process_thread; static void handle_sigint(int sig); }; From 2f9ee343a634340718d25354acedcb3c36d52b39 Mon Sep 17 00:00:00 2001 From: Kami Date: Tue, 24 Sep 2024 17:26:40 +0800 Subject: [PATCH 11/17] fpga: Improve the operation logic of fpga diff --- src/test/csrc/fpga/fpga_main.cpp | 50 ++++++++++++++++++-------------- src/test/csrc/fpga/xdma.cpp | 3 +- src/test/csrc/fpga/xdma.h | 20 +++++-------- 3 files changed, 38 insertions(+), 35 deletions(-) diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 274325c15..2f4ed5a11 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -15,6 +15,7 @@ ***************************************************************************************/ #include "diffstate.h" +#include "difftest-dpic.h" #include "difftest.h" #include "mpool.h" #include "refproxy.h" @@ -26,6 +27,7 @@ enum { SIMV_FAIL, } simv_state; +static char work_load[256] = "/dev/zero"; static uint8_t simv_result = SIMV_RUN; static uint64_t max_instrs = 0; @@ -39,7 +41,6 @@ static core_end_info_t core_end_info; void simv_init(); void simv_step(); void cpu_endtime_check(); -void set_dut_from_xdma(); void set_diff_ref_so(char *s); void args_parsingniton(int argc, char *argv[]); @@ -50,12 +51,13 @@ int main(int argc, char *argv[]) { simv_init(); while (simv_result == SIMV_RUN) { - // get xdma data - set_dut_from_xdma(); - - // run difftest - simv_step(); - cpu_endtime_check(); + // wait get xdma data + if (xdma_device->diff_packge_count.load(std::memory_order_seq_cst) > 0) { + // run difftest + simv_step(); + cpu_endtime_check(); + xdma_device->diff_packge_count.fetch_sub(1, std::memory_order_relaxed); + } } free(xdma_device); } @@ -68,28 +70,30 @@ void set_diff_ref_so(char *s) { difftest_ref_so = buf; } -void set_dut_from_xdma() { - { - std::unique_lock lock(xdma_device->diff_mtx); - xdma_device->diff_filled_cv.wait(lock, [] { return xdma_device->diff_packge_filled; }); - for (int i = 0; i < NUM_CORES; i++) { - - difftest[i]->dut = &xdma_device->difftest_pack[i]; - } - xdma_device->diff_packge_filled = false; - xdma_device->diff_empile_cv.notify_one(); - } -} - void simv_init() { xdma_device = new FpgaXdma; difftest_init(); - max_instrs = 40000000; } void simv_step() { if (difftest_step()) simv_result = SIMV_FAIL; + if (difftest_state() != -1) { + int trapCode = difftest_state(); + for (int i = 0; i < NUM_CORES; i++) { + printf("Core %d: ", i); + uint64_t pc = difftest[i]->get_trap_event()->pc; + switch (trapCode) { + case 0: eprintf(ANSI_COLOR_GREEN "HIT GOOD TRAP at pc = 0x%" PRIx64 "\n" ANSI_COLOR_RESET, pc); break; + default: eprintf(ANSI_COLOR_RED "Unknown trap code: %d\n" ANSI_COLOR_RESET, trapCode); + } + difftest[i]->display_stats(); + } + if (trapCode == 0) + simv_result = SIMV_DONE; + else + simv_result = SIMV_FAIL; + } } void cpu_endtime_check() { @@ -116,6 +120,10 @@ void args_parsingniton(int argc, char *argv[]) { for (int i = 1; i < argc; ++i) { if (strcmp(argv[i], "--diff") == 0) { set_diff_ref_so(argv[++i]); + } else if (strcmp(argv[i], "-i") == 0) { + memcpy(work_load, argv[++i], sizeof(argv[++i])); + } else if (strcmp(argv[i], "--max-instrs") == 0) { + max_instrs = std::stoul(argv[++i], nullptr, 16); } } } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 6a90f16e6..6854bcfc4 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -103,7 +103,8 @@ void FpgaXdma::write_difftest_thread() { assert(0); } // packge unpack - + v_difftest_Batch(packge.difftest_batch_info.io_data, packge.difftest_batch_info.io_info); // difftest run + diff_packge_count.fetch_add(1, std::memory_order_relaxed); } } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index cb4307c60..ae9eeeeda 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -18,7 +18,9 @@ #include "common.h" #include "diffstate.h" +#include "difftest-dpic.h" #include "mpool.h" +#include #include #include #include @@ -28,8 +30,9 @@ #include #define WITH_FPGA + typedef struct FpgaPackgeHead { - DiffTestState difftestinfo; + BatchInfo difftest_batch_info; uint8_t packge_idx; } FpgaPackgeHead; @@ -38,31 +41,22 @@ class FpgaXdma { struct FpgaPackgeHead *shmadd_recv; MemoryIdxPool xdma_mempool; - DiffTestState difftest_pack[NUM_CORES] = {}; - int shmid_recv; - int ret_recv; - key_t key_recv; int xdma_c2h_fd[CONFIG_DMA_CHANNELS]; int xdma_h2c_fd; - int fd_interrupt; bool running = false; - unsigned int recv_size = sizeof(FpgaPackgeHead); - unsigned long old_exec_instr = 0; - std::condition_variable diff_filled_cv; std::condition_variable diff_empile_cv; - std::mutex diff_mtx; - bool diff_packge_filled = false; + + std::atomic diff_packge_count{0}; + FpgaXdma(); ~FpgaXdma() { stop_thansmit_thread(); }; - void set_dma_fd_block(); - // thread api void start_transmit_thread(); void stop_thansmit_thread(); From 5988694501a0a3a4b31f1be406e9983c1b59906f Mon Sep 17 00:00:00 2001 From: Kami Date: Wed, 25 Sep 2024 11:35:26 +0800 Subject: [PATCH 12/17] fpga: Remove redundant mempool-MemoryBlock definitions --- src/test/csrc/common/mpool.h | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index b78bf4ad1..69d031e70 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -41,6 +41,21 @@ struct MemoryBlock { memset(ptr, 0, 4096); data = std::unique_ptr>(static_cast(ptr), [](char *p) { free(p); }); } + // Move constructors + MemoryBlock(MemoryBlock &&other) noexcept : data(std::move(other.data)), is_free(other.is_free.load()) {} + + // Move assignment operator + MemoryBlock &operator=(MemoryBlock &&other) noexcept { + if (this != &other) { + data = std::move(other.data); + is_free.store(other.is_free.load()); + } + return *this; + } + + // Disable the copy constructor and copy assignment operator + MemoryBlock(const MemoryBlock &) = delete; + MemoryBlock &operator=(const MemoryBlock &) = delete; }; class MemoryPool { @@ -75,18 +90,6 @@ class MemoryPool { void set_free_chunk(); private: - struct MemoryBlock { - std::unique_ptr> data; - bool is_free; - - MemoryBlock() : is_free(true) { - void *ptr = nullptr; - if (posix_memalign(&ptr, MEMBLOCK_SIZE, MEMBLOCK_SIZE * 2) != 0) { - throw std::runtime_error("Failed to allocate aligned memory"); - } - data = std::unique_ptr>(static_cast(ptr), [](char *p) { free(p); }); - } - }; std::vector memory_pool; // Mempool std::vector block_mutexes{NUM_BLOCKS}; // Partition lock array std::atomic empty_blocks{NUM_BLOCKS}; // Free block count @@ -101,7 +104,7 @@ class MemoryPool { static const size_t MAX_IDX = 256; static const size_t MAX_GROUPING_IDX = NUM_BLOCKS / MAX_IDX; -static const size_t MAX_GROUP_READ = MAX_GROUPING_IDX - 2; //窗口需要预留两个空闲空间 +static const size_t MAX_GROUP_READ = MAX_GROUPING_IDX - 2; //The window needs to reserve two free Spaces static const size_t REM_MAX_IDX = (MAX_IDX - 1); static const size_t REM_MAX_GROUPING_IDX = (MAX_GROUPING_IDX - 1); From 36f2638887805cc22dadb1b636be4b4cc62597cc Mon Sep 17 00:00:00 2001 From: Kami Date: Wed, 25 Sep 2024 15:12:10 +0800 Subject: [PATCH 13/17] fpga: The adaptation pack processes the new batch --- src/test/csrc/fpga/xdma.cpp | 7 +++---- src/test/csrc/fpga/xdma.h | 13 ++++--------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 6854bcfc4..eb8bb4581 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -21,11 +21,10 @@ #define XDMA_C2H_DEVICE "/dev/xdma0_c2h_" #define XDMA_H2C_DEVICE "/dev/xdma0_h2c_0" -static const int dma_channel = CONFIG_DMA_CHANNELS; FpgaXdma::FpgaXdma() { signal(SIGINT, handle_sigint); - for (int i = 0; i < dma_channel; i++) { + for (int i = 0; i < CONFIG_DMA_CHANNELS; i++) { char c2h_device[64]; sprintf(c2h_device, "%s%d", XDMA_C2H_DEVICE, i); xdma_c2h_fd[i] = open(c2h_device, O_RDONLY); @@ -55,7 +54,7 @@ void FpgaXdma::start_transmit_thread() { if (running == true) return; - for (int i = 0; i < dma_channel; i++) { + for (int i = 0; i < CONFIG_DMA_CHANNELS; i++) { printf("start channel %d \n", i); receive_thread[i] = std::thread(&FpgaXdma::read_xdma_thread, this, i); } @@ -103,7 +102,7 @@ void FpgaXdma::write_difftest_thread() { assert(0); } // packge unpack - v_difftest_Batch(packge.difftest_batch_info.io_data, packge.difftest_batch_info.io_info); + v_difftest_Batch((uint8_t *)packge.diff_batch_pack); // difftest run diff_packge_count.fetch_add(1, std::memory_order_relaxed); } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index ae9eeeeda..9319f823a 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -32,24 +32,16 @@ #define WITH_FPGA typedef struct FpgaPackgeHead { - BatchInfo difftest_batch_info; uint8_t packge_idx; + char diff_batch_pack[CONFIG_DIFFTEST_BATCH_BYTELEN]; } FpgaPackgeHead; class FpgaXdma { public: - struct FpgaPackgeHead *shmadd_recv; - MemoryIdxPool xdma_mempool; - int xdma_c2h_fd[CONFIG_DMA_CHANNELS]; - int xdma_h2c_fd; - bool running = false; - std::condition_variable diff_filled_cv; - std::condition_variable diff_empile_cv; - std::atomic diff_packge_count{0}; FpgaXdma(); @@ -67,6 +59,9 @@ class FpgaXdma { std::thread receive_thread[CONFIG_DMA_CHANNELS]; std::thread process_thread; + int xdma_c2h_fd[CONFIG_DMA_CHANNELS]; + int xdma_h2c_fd; + static void handle_sigint(int sig); }; From bf239e8d3080180d62ac918e376d18c5087f69b7 Mon Sep 17 00:00:00 2001 From: Kami Date: Wed, 25 Sep 2024 18:42:25 +0800 Subject: [PATCH 14/17] CI: add fpga-diff compile ci --- .github/workflows/main.yml | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ad92367af..bd81da333 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -450,3 +450,44 @@ jobs: make difftest_verilog PROFILE=../build/generated-src/difftest_profile.json NUMCORES=1 CONFIG=ZEL MFC=1 make simv VCS=verilator WITH_CHISELDB=0 WITH_CONSTANTIN=0 IOTRACE_ZSTD=1 ./build/simv +workload=../ready-to-run/microbench.bin +e=0 +diff=../ready-to-run/riscv64-nemu-interpreter-so +iotrace-name=../iotrace + + # test-difftest-fpga: + # runs-on: ubuntu-22.04 + + # needs: test-difftest-main + + # steps: + # - uses: actions/checkout@v4 + + # - name: Prepare environment + # run: | + # cd $GITHUB_WORKSPACE/.. + # git config --global url."https://github.com/".insteadOf git@github.com: + # git config --global url."https://".insteadOf git:// + # git clone https://github.com/OpenXiangShan/xs-env + # cd xs-env + # sudo -s ./setup-tools.sh + # source ./setup.sh + + # - name: Prepare NutShell + # run: | + # cd $GITHUB_WORKSPACE/../xs-env + # rm -r NutShell + # git clone -b dev-difftest --single-branch https://github.com/OSCPU/NutShell.git + # cd NutShell && git submodule update --init + # rm -r difftest + # cp -r $GITHUB_WORKSPACE . + + # - name: Enable -Werror for EMU Build + # run: | + # echo "CXX_NO_WARNING=1" >> $GITHUB_ENV + + # - name: FPGA-difftest Build + # run: | + # cd $GITHUB_WORKSPACE/../xs-env + # source ./env.sh + # cd $GITHUB_WORKSPACE/../xs-env/NutShell + # source ./env.sh + # make clean + # make sim-verilog MILL_ARGS="--difftest-config ENBF" -j2 + # make fpga-build DMA_CHANNELS=2 WITH_CHISELDB=0 WITH_CONSTANTIN=0 From 5a211182bc674ce4d4333e23d6af85f3bbfecbc7 Mon Sep 17 00:00:00 2001 From: Kami Date: Thu, 26 Sep 2024 11:22:58 +0800 Subject: [PATCH 15/17] fpga: svdpi.h is not referenced when fpga is used --- src/main/scala/DPIC.scala | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/scala/DPIC.scala b/src/main/scala/DPIC.scala index 1ae6a1dd7..db4848fa1 100644 --- a/src/main/scala/DPIC.scala +++ b/src/main/scala/DPIC.scala @@ -297,6 +297,7 @@ private class DummyDPICBatchWrapper( object DPIC { val interfaces = ListBuffer.empty[(String, String, String)] + var defMacros = new StringBuilder() def apply(control: GatewaySinkControl, io: Valid[DifftestBundle], config: GatewayConfig): Unit = { val module = Module(new DummyDPICWrapper(chiselTypeOf(io), config)) @@ -314,6 +315,12 @@ object DPIC { module.control := control module.io := io val dpic = module.dpic + if (!config.isFPGA) + defMacros ++= + s""" + |#ifdef CONFIG_DIFFTEST_BATCH + |#include "svdpi.h" + |#endif // CONFIG_DIFFTEST_BATCH""".stripMargin interfaces += ((dpic.dpicFuncName, dpic.dpicFuncProto, dpic.dpicFunc)) } @@ -328,12 +335,10 @@ object DPIC { interfaceCpp += "" interfaceCpp += "#include " interfaceCpp += "#include \"diffstate.h\"" - interfaceCpp += "#ifdef CONFIG_DIFFTEST_BATCH" - interfaceCpp += "#include \"svdpi.h\"" - interfaceCpp += "#endif // CONFIG_DIFFTEST_BATCH" interfaceCpp += "#ifdef CONFIG_DIFFTEST_PERFCNT" interfaceCpp += "#include \"perf.h\"" interfaceCpp += "#endif // CONFIG_DIFFTEST_PERFCNT" + interfaceCpp += defMacros.toString() interfaceCpp += "" interfaceCpp += """ From b4b1fc0cca6a8fe7992e4c6752a7e6f42d42c692 Mon Sep 17 00:00:00 2001 From: Kami Date: Thu, 26 Sep 2024 17:02:42 +0800 Subject: [PATCH 16/17] fpga: Burn workload to fpga ddr at boot time --- src/test/csrc/fpga/fpga_main.cpp | 2 +- src/test/csrc/fpga/xdma.cpp | 49 +++++++++++++++++++++++++++++++- src/test/csrc/fpga/xdma.h | 18 +++++++++++- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 2f4ed5a11..689dfbf4b 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -71,7 +71,7 @@ void set_diff_ref_so(char *s) { } void simv_init() { - xdma_device = new FpgaXdma; + xdma_device = new FpgaXdma(work_load); difftest_init(); } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index eb8bb4581..c7e578f7d 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -16,14 +16,20 @@ #include "xdma.h" #include "mpool.h" #include +#include #include #include +#include +#define XDMA_USER "/dev/xdma0_user" +#define XDMA_BYPASS "/dev/xdma0_bypass" #define XDMA_C2H_DEVICE "/dev/xdma0_c2h_" #define XDMA_H2C_DEVICE "/dev/xdma0_h2c_0" -FpgaXdma::FpgaXdma() { +FpgaXdma::FpgaXdma(const char *workload) { signal(SIGINT, handle_sigint); + ddr_load_workload(workload); + for (int i = 0; i < CONFIG_DMA_CHANNELS; i++) { char c2h_device[64]; sprintf(c2h_device, "%s%d", XDMA_C2H_DEVICE, i); @@ -50,6 +56,47 @@ void FpgaXdma::handle_sigint(int sig) { exit(1); } +// write xdma_bypass memory or xdma_user +int FpgaXdma::device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value) { + uint64_t pg_size = sysconf(_SC_PAGE_SIZE); + uint64_t size = !is_bypass ? 0x1000 : 0x10000; + uint64_t aligned_size = (size + 0xffful) & ~0xffful; + uint64_t base = addr & ~0xffful; + uint32_t offset = addr & 0xfffu; + std::ifstream workload_fd; + int fd = -1; + + if (base % pg_size != 0) { + printf("base must be a multiple of system page size\n"); + return -1; + } + + if (is_bypass) + fd = open(XDMA_BYPASS, O_RDWR | O_SYNC); + else + fd = open(XDMA_USER, O_RDWR | O_SYNC); + if (fd < 0) { + printf("failed to open %s\n", is_bypass ? XDMA_BYPASS : XDMA_USER); + return -1; + } + + void *m_ptr = mmap(nullptr, aligned_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, base); + if (m_ptr == MAP_FAILED) { + close(fd); + printf("failed to mmap\n"); + return -1; + } + + if (is_bypass) { + workload_fd.read(((char *)m_ptr) + offset, size); + } else { + ((volatile uint32_t *)m_ptr)[offset >> 2] = value; + } + + munmap(m_ptr, aligned_size); + close(fd); +} + void FpgaXdma::start_transmit_thread() { if (running == true) return; diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index 9319f823a..86fef90e4 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -44,11 +44,27 @@ class FpgaXdma { std::atomic diff_packge_count{0}; - FpgaXdma(); + FpgaXdma(const char *workload); ~FpgaXdma() { stop_thansmit_thread(); }; + int core_reset() { + device_write(false, nullptr, 0x100000, 0x1); + device_write(false, nullptr, 0x10000, 0x8); + } + + int core_restart() { + device_write(false, nullptr, 0x100000, 0); + } + + int ddr_load_workload(const char *workload) { + core_reset(); + device_write(true, workload, 0, 0); + core_restart(); + } + + int device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value); // thread api void start_transmit_thread(); void stop_thansmit_thread(); From 9b1bf1fb20fc116027724ed20580cf6cba2851a9 Mon Sep 17 00:00:00 2001 From: Kami Date: Fri, 27 Sep 2024 15:34:30 +0800 Subject: [PATCH 17/17] fpga: Load memory for the ref module --- src/test/csrc/common/ram.h | 3 +++ src/test/csrc/fpga/fpga_main.cpp | 4 ++++ src/test/csrc/fpga/xdma.cpp | 17 +++++++++++------ src/test/csrc/fpga/xdma.h | 9 +++++---- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/test/csrc/common/ram.h b/src/test/csrc/common/ram.h index 78d242e85..2a791a98b 100644 --- a/src/test/csrc/common/ram.h +++ b/src/test/csrc/common/ram.h @@ -110,6 +110,9 @@ class SimMemory { uint64_t get_size() { return memory_size; } + uint64_t get_load_img_size() { + return get_img_size(); + } bool in_range_u8(uint64_t address) { return address < memory_size; } diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 689dfbf4b..f35589d3b 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -18,6 +18,7 @@ #include "difftest-dpic.h" #include "difftest.h" #include "mpool.h" +#include "ram.h" #include "refproxy.h" #include "xdma.h" @@ -60,6 +61,8 @@ int main(int argc, char *argv[]) { } } free(xdma_device); + printf("difftest releases the fpga device and exits\n"); + exit(0); } void set_diff_ref_so(char *s) { @@ -72,6 +75,7 @@ void set_diff_ref_so(char *s) { void simv_init() { xdma_device = new FpgaXdma(work_load); + init_ram(work_load, DEFAULT_EMU_RAM_SIZE); difftest_init(); } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index c7e578f7d..2ad8840c0 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -15,6 +15,7 @@ ***************************************************************************************/ #include "xdma.h" #include "mpool.h" +#include "ram.h" #include #include #include @@ -57,18 +58,17 @@ void FpgaXdma::handle_sigint(int sig) { } // write xdma_bypass memory or xdma_user -int FpgaXdma::device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value) { +void FpgaXdma::device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value) { uint64_t pg_size = sysconf(_SC_PAGE_SIZE); uint64_t size = !is_bypass ? 0x1000 : 0x10000; uint64_t aligned_size = (size + 0xffful) & ~0xffful; uint64_t base = addr & ~0xffful; uint32_t offset = addr & 0xfffu; - std::ifstream workload_fd; int fd = -1; if (base % pg_size != 0) { printf("base must be a multiple of system page size\n"); - return -1; + exit(-1); } if (is_bypass) @@ -77,18 +77,23 @@ int FpgaXdma::device_write(bool is_bypass, const char *workload, uint64_t addr, fd = open(XDMA_USER, O_RDWR | O_SYNC); if (fd < 0) { printf("failed to open %s\n", is_bypass ? XDMA_BYPASS : XDMA_USER); - return -1; + exit(-1); } void *m_ptr = mmap(nullptr, aligned_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, base); if (m_ptr == MAP_FAILED) { close(fd); printf("failed to mmap\n"); - return -1; + exit(-1); } if (is_bypass) { - workload_fd.read(((char *)m_ptr) + offset, size); + if (simMemory->get_load_img_size() > aligned_size) { + printf("The loaded workload size exceeds the xdma bypass size"); + exit(-1); + } + memcpy(static_cast(m_ptr) + offset, static_cast(simMemory->as_ptr()), + simMemory->get_load_img_size()); } else { ((volatile uint32_t *)m_ptr)[offset >> 2] = value; } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index 86fef90e4..ceca1e8e2 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -49,22 +49,23 @@ class FpgaXdma { stop_thansmit_thread(); }; - int core_reset() { + void core_reset() { device_write(false, nullptr, 0x100000, 0x1); device_write(false, nullptr, 0x10000, 0x8); } - int core_restart() { + void core_restart() { device_write(false, nullptr, 0x100000, 0); } - int ddr_load_workload(const char *workload) { + void ddr_load_workload(const char *workload) { core_reset(); device_write(true, workload, 0, 0); core_restart(); } - int device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value); + void device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value); + // thread api void start_transmit_thread(); void stop_thansmit_thread();