From 325e6475d458c1f8c96e028ac31375c4d89d5307 Mon Sep 17 00:00:00 2001 From: Jaco Hofmann Date: Wed, 7 Aug 2019 11:26:03 +0200 Subject: [PATCH 01/10] Adds CPP version of memcheck --- runtime/examples/memcheck/CMakeLists.txt | 2 +- runtime/examples/memcheck/memcheck.c | 126 ----------------------- runtime/examples/memcheck/memcheck.cpp | 94 +++++++++++++++++ 3 files changed, 95 insertions(+), 127 deletions(-) delete mode 100644 runtime/examples/memcheck/memcheck.c create mode 100644 runtime/examples/memcheck/memcheck.cpp diff --git a/runtime/examples/memcheck/CMakeLists.txt b/runtime/examples/memcheck/CMakeLists.txt index d055a2cf..ea278bd1 100644 --- a/runtime/examples/memcheck/CMakeLists.txt +++ b/runtime/examples/memcheck/CMakeLists.txt @@ -9,7 +9,7 @@ find_package(TapascoPlatform REQUIRED) find_package(Tapasco REQUIRED) endif(NOT TARGET tapasco) -add_executable(memcheck memcheck.c) +add_executable(memcheck memcheck.cpp) set_tapasco_defaults(memcheck) target_link_libraries(memcheck PRIVATE tapasco tlkm platform tapasco-common) diff --git a/runtime/examples/memcheck/memcheck.c b/runtime/examples/memcheck/memcheck.c deleted file mode 100644 index 9db4f0a7..00000000 --- a/runtime/examples/memcheck/memcheck.c +++ /dev/null @@ -1,126 +0,0 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -//! @file memcheck-mt-ff.cc -//! @brief Initializes the first TPC device and iterates over a number -//! of integer arrays of increasing size, allocating each array -//! on the device, copying to and from and then checking the -//! results. Basic regression test for platform implementations. -//! Single-threaded variant. -//! @author J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de) -//! -#include -#include -#include -#include -#include - -static tapasco_ctx_t *ctx; -static tapasco_devctx_t *dev; - -static void check(int const result) { - if (!result) { - fprintf(stderr, "fatal error: %s\n", strerror(errno)); - tapasco_destroy_device(ctx, dev); - tapasco_deinit(ctx); - exit(errno); - } -} - -static void check_fpga(tapasco_res_t const result) { - if (result != TAPASCO_SUCCESS) { - fprintf(stderr, "fpga fatal error: %s\n", tapasco_strerror(result)); - tapasco_destroy_device(ctx, dev); - tapasco_deinit(ctx); - exit(result); - } -} - -void init_array(int *arr, size_t sz) { - for (size_t i = 0; i < sz; ++i) - arr[i] = i; -} - -int compare_arrays(int const *arr, int const *rarr, size_t const sz) { - int errs = 0; - for (size_t i = 0; i < sz; ++i) { - if (rarr[i] != arr[i]) { - fprintf(stderr, "wrong data: arr[%zd] = %d != %d = rarr[%zd]\n", i, - arr[i], rarr[i], i); - ++errs; - } - } - return errs; -} - -int main(int argc, char **argv) { - int errs = 0; - size_t arr_szs[] = {1, 2, 8, 10, 16, 1024, 2048, 4096, 8192, 16384}; - - // initialize threadpool - check_fpga(tapasco_init(&ctx)); - check_fpga(tapasco_create_device(ctx, 0, &dev, 0)); - - for (int s = 0; s < sizeof(arr_szs) / sizeof(*arr_szs) && errs == 0; ++s) { - printf("Checking array size %zd (%zd byte) ...\n", arr_szs[s], - arr_szs[s] * sizeof(int)); - // allocate and fill array - int *arr = (int *)malloc(arr_szs[s] * sizeof(int)); - check(arr != NULL); - init_array(arr, arr_szs[s]); - // allocate array for read data - int *rarr = (int *)malloc(arr_szs[s] * sizeof(int)); - - // get fpga handle - tapasco_handle_t h; - tapasco_device_alloc(dev, &h, arr_szs[s] * sizeof(int), 0); - printf("handle = 0x%08lx\n", (unsigned long)h); - - // copy data to and back - printf("sizeof(arr) %zd, sizeof(rarr) %zd\n", sizeof(arr), sizeof(rarr)); - check_fpga( - tapasco_device_copy_to(dev, arr, h, arr_szs[s] * sizeof(int), 0)); - check_fpga( - tapasco_device_copy_from(dev, h, rarr, arr_szs[s] * sizeof(int), 0)); - - tapasco_device_free(dev, h, arr_szs[s] * sizeof(int), 0); - - int merr = compare_arrays(arr, rarr, arr_szs[s]); - errs = +merr; - - if (!merr) - printf("Array size %zd (%zd byte) ok!\n", arr_szs[s], - arr_szs[s] * sizeof(int)); - else - fprintf(stderr, "FAILURE: array size %zd (%zd byte) not ok.\n", - arr_szs[s], arr_szs[s] * sizeof(int)); - - free(arr); - free(rarr); - } - - if (!errs) - printf("\nSUCCESS\n"); - else - fprintf(stderr, "\nFAILURE\n"); - - // release device - tapasco_destroy_device(ctx, dev); - tapasco_deinit(ctx); - return errs; -} diff --git a/runtime/examples/memcheck/memcheck.cpp b/runtime/examples/memcheck/memcheck.cpp new file mode 100644 index 00000000..664ab8d3 --- /dev/null +++ b/runtime/examples/memcheck/memcheck.cpp @@ -0,0 +1,94 @@ +// +// Copyright (C) 2014 Jens Korinth, TU Darmstadt +// +// This file is part of Tapasco (TPC). +// +// Tapasco is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Tapasco is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with Tapasco. If not, see . +// +//! @file memcheck-mt-ff.cc +//! @brief Initializes the first TPC device and iterates over a number +//! of integer arrays of increasing size, allocating each array +//! on the device, copying to and from and then checking the +//! results. Basic regression test for platform implementations. +//! Single-threaded variant. +//! @author J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de) +//! +#include +#include +#include + +using namespace tapasco; + +std::vector init_array(size_t sz) { + std::vector vec; + for (size_t i = 0; i < sz; ++i) { + vec.push_back(i); + } + return vec; +} + +int compare_arrays(const std::vector &arr, const std::vector &rarr, + size_t const sz) { + int errs = 0; + for (size_t i = 0; i < sz; ++i) { + if (rarr[i] != arr[i]) { + std::cout << "wrong data: arr[" << i << "] = " << arr[i] + << " != " << rarr[i] << " = rarr[" << i << "]" << std::endl; + ++errs; + } + } + return errs; +} + +int main(int argc, char **argv) { + int errs = 0; + int max_pow = 20; + + Tapasco tapasco; + + for (int s = 0; s < max_pow && errs == 0; ++s) { + size_t len = 1 << s; + std::cout << "Checking array size " << len << "B" << std::endl; + auto arr = init_array(len / sizeof(int)); + + std::vector rarr(len / 4, 42); + + // get fpga handle + tapasco_handle_t h; + tapasco.alloc(h, len, (tapasco_device_alloc_flag_t)0); + std::cout << "handle = 0x" << std::hex << (unsigned long)h << std::dec + << std::endl; + + // copy data to and back + tapasco.copy_to(arr.data(), h, len, (tapasco_device_copy_flag_t)0); + tapasco.copy_from(h, rarr.data(), len, (tapasco_device_copy_flag_t)0); + + tapasco.free(h, len, (tapasco_device_alloc_flag_t)0); + + int merr = compare_arrays(arr, rarr, len); + errs = +merr; + + if (!merr) + std::cout << "Array size " << len << "B ok!" << std::endl; + else + std::cout << "FAILURE: array size " << len << "B not ok." << std::endl; + } + + if (!errs) + std::cout << "SUCCESS" << std::endl; + else + std::cout << "FAILURE" << std::endl; + + return errs; +} From 153cbea797266fd50a6b0e4e2acbd2ce4c93ad88 Mon Sep 17 00:00:00 2001 From: Jaco Hofmann Date: Wed, 7 Aug 2019 12:44:50 +0200 Subject: [PATCH 02/10] Fix memcheck and increase test range --- runtime/examples/memcheck/memcheck.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/runtime/examples/memcheck/memcheck.cpp b/runtime/examples/memcheck/memcheck.cpp index 664ab8d3..467d56d7 100644 --- a/runtime/examples/memcheck/memcheck.cpp +++ b/runtime/examples/memcheck/memcheck.cpp @@ -25,8 +25,10 @@ //! @author J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de) //! #include -#include #include +#include + +#include using namespace tapasco; @@ -53,22 +55,21 @@ int compare_arrays(const std::vector &arr, const std::vector &rarr, int main(int argc, char **argv) { int errs = 0; - int max_pow = 20; + int max_pow = 28; Tapasco tapasco; for (int s = 0; s < max_pow && errs == 0; ++s) { size_t len = 1 << s; std::cout << "Checking array size " << len << "B" << std::endl; - auto arr = init_array(len / sizeof(int)); + size_t elements = std::max((size_t)1, len / sizeof(int)); + auto arr = init_array(elements); - std::vector rarr(len / 4, 42); + std::vector rarr(elements, 42); // get fpga handle tapasco_handle_t h; tapasco.alloc(h, len, (tapasco_device_alloc_flag_t)0); - std::cout << "handle = 0x" << std::hex << (unsigned long)h << std::dec - << std::endl; // copy data to and back tapasco.copy_to(arr.data(), h, len, (tapasco_device_copy_flag_t)0); @@ -76,7 +77,7 @@ int main(int argc, char **argv) { tapasco.free(h, len, (tapasco_device_alloc_flag_t)0); - int merr = compare_arrays(arr, rarr, len); + int merr = compare_arrays(arr, rarr, elements); errs = +merr; if (!merr) From 96ef914505bd1679f562bfb410771d7a54598b45 Mon Sep 17 00:00:00 2001 From: Jaco Hofmann Date: Wed, 7 Aug 2019 13:20:07 +0200 Subject: [PATCH 03/10] Remove deprecated copyright header --- runtime/examples/memcheck/memcheck.cpp | 28 +------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/runtime/examples/memcheck/memcheck.cpp b/runtime/examples/memcheck/memcheck.cpp index 467d56d7..9c852ba1 100644 --- a/runtime/examples/memcheck/memcheck.cpp +++ b/runtime/examples/memcheck/memcheck.cpp @@ -1,32 +1,6 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -//! @file memcheck-mt-ff.cc -//! @brief Initializes the first TPC device and iterates over a number -//! of integer arrays of increasing size, allocating each array -//! on the device, copying to and from and then checking the -//! results. Basic regression test for platform implementations. -//! Single-threaded variant. -//! @author J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de) -//! +#include #include #include -#include #include From 9d6144dfb6de7fa490fe6ef026cd142832bf810f Mon Sep 17 00:00:00 2001 From: Jaco Hofmann Date: Wed, 7 Aug 2019 13:20:48 +0200 Subject: [PATCH 04/10] Adds simple bandwidth checking tool --- runtime/examples/CMakeLists.txt | 6 +- runtime/examples/bandwidth/CMakeLists.txt | 20 ++++++ runtime/examples/bandwidth/bandwidth.cpp | 76 +++++++++++++++++++++++ 3 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 runtime/examples/bandwidth/CMakeLists.txt create mode 100644 runtime/examples/bandwidth/bandwidth.cpp diff --git a/runtime/examples/CMakeLists.txt b/runtime/examples/CMakeLists.txt index 50e9a3b3..50ff7c81 100644 --- a/runtime/examples/CMakeLists.txt +++ b/runtime/examples/CMakeLists.txt @@ -1,8 +1,3 @@ -# -# Copyright (C) 2014 Jens Korinth, TU Darmstadt -# -# This file is part of Tapasco (TPC). -# # Tapasco is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or @@ -26,3 +21,4 @@ add_subdirectory(arrayupdate) add_subdirectory(memcheck) add_subdirectory(tapasco-benchmark) add_subdirectory(tapasco-debug) +add_subdirectory(bandwidth) \ No newline at end of file diff --git a/runtime/examples/bandwidth/CMakeLists.txt b/runtime/examples/bandwidth/CMakeLists.txt new file mode 100644 index 00000000..e8c0c4c4 --- /dev/null +++ b/runtime/examples/bandwidth/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.5.1 FATAL_ERROR) +include($ENV{TAPASCO_HOME_RUNTIME}/cmake/Tapasco.cmake NO_POLICY_SCOPE) +project (bandwidth) + +if(NOT TARGET tapasco) +find_package(TapascoTLKM REQUIRED) +find_package(TapascoCommon REQUIRED) +find_package(TapascoPlatform REQUIRED) +find_package(Tapasco REQUIRED) +endif(NOT TARGET tapasco) + +add_executable(bandwidth bandwidth.cpp) +set_tapasco_defaults(bandwidth) +target_link_libraries(bandwidth PRIVATE tapasco tlkm platform tapasco-common) + +install(TARGETS bandwidth + ARCHIVE DESTINATION share/Tapasco/bin/ + LIBRARY DESTINATION share/Tapasco/bin/ + RUNTIME DESTINATION share/Tapasco/bin/) + diff --git a/runtime/examples/bandwidth/bandwidth.cpp b/runtime/examples/bandwidth/bandwidth.cpp new file mode 100644 index 00000000..8eefd490 --- /dev/null +++ b/runtime/examples/bandwidth/bandwidth.cpp @@ -0,0 +1,76 @@ +#include +#include +#include +#include + +#include + +using namespace tapasco; + +int main(int argc, char **argv) { + size_t max_pow = 30; + size_t data_to_transfer = 256*1024*1024L; + + Tapasco tapasco; + + for (size_t s = 12; s < max_pow; ++s) { + size_t len = 1 << s; + size_t elements = std::max((size_t)1, len / sizeof(int)); + + std::vector arr_to(elements, 42); + std::vector arr_from(elements, 42); + + // get fpga handle + tapasco_handle_t handle_to; + tapasco.alloc(handle_to, len, (tapasco_device_alloc_flag_t)0); + + tapasco_handle_t handle_from; + tapasco.alloc(handle_from, len, (tapasco_device_alloc_flag_t)0); + + size_t copied = 0; + + std::cout << "Write C " << len << "B @ "; + auto start = std::chrono::system_clock::now(); + while(copied < data_to_transfer) { + tapasco.copy_to(arr_to.data(), handle_to, len, (tapasco_device_copy_flag_t)0); + copied += len; + } + auto end = std::chrono::system_clock::now(); + + std::chrono::duration elapsed_seconds = end-start; + + std::cout << (data_to_transfer / elapsed_seconds.count()) / (1024.0 * 1024.0) << "MBps" << std::endl; + + copied = 0; + std::cout << "Read C " << len<< "B @ "; + start = std::chrono::system_clock::now(); + while(copied < data_to_transfer) { + tapasco.copy_from(handle_from, arr_from.data(), len, (tapasco_device_copy_flag_t)0); + copied += len; + } + end = std::chrono::system_clock::now(); + + elapsed_seconds = end-start; + + std::cout << (data_to_transfer / elapsed_seconds.count()) / (1024.0 * 1024.0) << "MBps" << std::endl; + + copied = 0; + std::cout << "ReadWrite C " << len << "B @ "; + while(copied < data_to_transfer) { + tapasco.copy_to(arr_to.data(), handle_to, len, (tapasco_device_copy_flag_t)0); + tapasco.copy_from(handle_from, arr_from.data(), len, (tapasco_device_copy_flag_t)0); + copied += len*2; + } + end = std::chrono::system_clock::now(); + + elapsed_seconds = end-start; + + std::cout << ((data_to_transfer*2) / elapsed_seconds.count()) / (1024.0 * 1024.0) << "MBps" << std::endl; + + + tapasco.free(handle_to, len, (tapasco_device_alloc_flag_t)0); + tapasco.free(handle_from, len, (tapasco_device_alloc_flag_t)0); + } + + return 0; +} From b0a1c5aa68b7b0b0548b61e7e7697003b646e5f8 Mon Sep 17 00:00:00 2001 From: Jaco Hofmann Date: Mon, 4 Nov 2019 11:01:25 +0100 Subject: [PATCH 05/10] Adds C++ version of arrayinit --- runtime/examples/arrayinit/CMakeLists.txt | 6 +- .../examples/arrayinit/arrayinit-example.cpp | 75 +++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 runtime/examples/arrayinit/arrayinit-example.cpp diff --git a/runtime/examples/arrayinit/CMakeLists.txt b/runtime/examples/arrayinit/CMakeLists.txt index 08d8a4ec..5d0dfab9 100644 --- a/runtime/examples/arrayinit/CMakeLists.txt +++ b/runtime/examples/arrayinit/CMakeLists.txt @@ -13,7 +13,11 @@ add_executable(arrayinit arrayinit-example.c) set_tapasco_defaults(arrayinit) target_link_libraries(arrayinit tapasco pthread platform tlkm) -install(TARGETS arrayinit +add_executable(arrayinit-cpp arrayinit-example.cpp) +set_tapasco_defaults(arrayinit-cpp) +target_link_libraries(arrayinit-cpp tapasco pthread platform tlkm) + +install(TARGETS arrayinit-cpp ARCHIVE DESTINATION share/Tapasco/bin/ LIBRARY DESTINATION share/Tapasco/bin/ RUNTIME DESTINATION share/Tapasco/bin/) diff --git a/runtime/examples/arrayinit/arrayinit-example.cpp b/runtime/examples/arrayinit/arrayinit-example.cpp new file mode 100644 index 00000000..dadb6882 --- /dev/null +++ b/runtime/examples/arrayinit/arrayinit-example.cpp @@ -0,0 +1,75 @@ +// Tapasco is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Tapasco is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with Tapasco. If not, see . +// +#include +#include +#include + +#define SZ 256 +#define RUNS 25 + +typedef int32_t element_type; +constexpr int ARRAYINIT_ID = 11; + +static uint64_t check_array(std::array &arr) { + unsigned int errs = 0; + for (int i = 0; i < (int)arr.size(); ++i) { + if (arr[i] != i) { + std::cerr << "ERROR: Value at " << i << " is " << arr[i] << std::endl; + ++errs; + } + } + return errs; +} + +int main(int argc, char **argv) { + // initialize TaPaSCo + tapasco::Tapasco tapasco; + + uint64_t errs = 0; + + // check arrayinit instance count + uint64_t instances = tapasco_device_kernel_pe_count(tapasco.device(), ARRAYINIT_ID); + std::cout << "Got " << instances << " arrayinit instances."; + if(!instances) { + std::cout << "Need at least one arrayinit instance to run."; + exit(1); + } + + for (int run = 0; run < RUNS; ++run) { + // Generate array for arrayinit output + std::array result; + result.fill(-1); + // Wrap the array to be TaPaSCo compatible + auto result_buffer_pointer = tapasco::makeWrappedPointer(result.data(), result.size() * sizeof(element_type)); + // Data will be copied back from the device only, no data will be moved to the device + auto result_buffer_out = tapasco::makeOutOnly(result_buffer_pointer); + + // Launch the job + // Arrayinit takes only one parameter: The location of the array. It will always initialize 256 Int`s. + auto job = tapasco.launch(ARRAYINIT_ID, result_buffer_out); + + // Wait for job completion. Will block execution until the job is done. + job(); + + errs += check_array(result); + std::cout << "RUN " << run << " " << (errs == 0 ? "OK" : "NOT OK"); + } + + if (!errs) + std::cout << "Arrayinit finished without errors." << std::endl; + else + std::cerr << "Arrayinit finished wit errors." << std::endl; + + return errs; +} From 8107a2d7a57fbb3d8bb2844374c7eb3510ea18a2 Mon Sep 17 00:00:00 2001 From: Jaco Hofmann Date: Mon, 4 Nov 2019 11:32:19 +0100 Subject: [PATCH 06/10] Adds arraysum and arrayupdate --- runtime/examples/arrayinit/CMakeLists.txt | 2 +- .../examples/arrayinit/arrayinit-example.cpp | 86 ++++++++++-------- runtime/examples/arraysum/CMakeLists.txt | 6 +- .../examples/arraysum/arraysum-example.cpp | 91 +++++++++++++++++++ runtime/examples/arrayupdate/CMakeLists.txt | 6 +- .../arrayupdate/arrayupdate-example.cpp | 86 ++++++++++++++++++ 6 files changed, 234 insertions(+), 43 deletions(-) create mode 100644 runtime/examples/arraysum/arraysum-example.cpp create mode 100644 runtime/examples/arrayupdate/arrayupdate-example.cpp diff --git a/runtime/examples/arrayinit/CMakeLists.txt b/runtime/examples/arrayinit/CMakeLists.txt index 5d0dfab9..bf0e685a 100644 --- a/runtime/examples/arrayinit/CMakeLists.txt +++ b/runtime/examples/arrayinit/CMakeLists.txt @@ -17,7 +17,7 @@ add_executable(arrayinit-cpp arrayinit-example.cpp) set_tapasco_defaults(arrayinit-cpp) target_link_libraries(arrayinit-cpp tapasco pthread platform tlkm) -install(TARGETS arrayinit-cpp +install(TARGETS arrayinit arrayinit-cpp ARCHIVE DESTINATION share/Tapasco/bin/ LIBRARY DESTINATION share/Tapasco/bin/ RUNTIME DESTINATION share/Tapasco/bin/) diff --git a/runtime/examples/arrayinit/arrayinit-example.cpp b/runtime/examples/arrayinit/arrayinit-example.cpp index dadb6882..7ea1a3bf 100644 --- a/runtime/examples/arrayinit/arrayinit-example.cpp +++ b/runtime/examples/arrayinit/arrayinit-example.cpp @@ -11,9 +11,9 @@ // You should have received a copy of the GNU Lesser General Public License // along with Tapasco. If not, see . // -#include -#include #include +#include +#include #define SZ 256 #define RUNS 25 @@ -22,54 +22,60 @@ typedef int32_t element_type; constexpr int ARRAYINIT_ID = 11; static uint64_t check_array(std::array &arr) { - unsigned int errs = 0; - for (int i = 0; i < (int)arr.size(); ++i) { - if (arr[i] != i) { - std::cerr << "ERROR: Value at " << i << " is " << arr[i] << std::endl; - ++errs; - } + unsigned int errs = 0; + for (size_t i = 0; i < arr.size(); ++i) { + if (arr[i] != (element_type)i) { + std::cerr << "ERROR: Value at " << i << " is " << arr[i] << std::endl; + ++errs; } - return errs; + } + return errs; } int main(int argc, char **argv) { - // initialize TaPaSCo - tapasco::Tapasco tapasco; + // initialize TaPaSCo + tapasco::Tapasco tapasco; - uint64_t errs = 0; + uint64_t errs = 0; - // check arrayinit instance count - uint64_t instances = tapasco_device_kernel_pe_count(tapasco.device(), ARRAYINIT_ID); - std::cout << "Got " << instances << " arrayinit instances."; - if(!instances) { - std::cout << "Need at least one arrayinit instance to run."; - exit(1); - } + // check arrayinit instance count + uint64_t instances = + tapasco_device_kernel_pe_count(tapasco.device(), ARRAYINIT_ID); + std::cout << "Got " << instances << " arrayinit instances."; + if (!instances) { + std::cout << "Need at least one arrayinit instance to run."; + exit(1); + } - for (int run = 0; run < RUNS; ++run) { - // Generate array for arrayinit output - std::array result; - result.fill(-1); - // Wrap the array to be TaPaSCo compatible - auto result_buffer_pointer = tapasco::makeWrappedPointer(result.data(), result.size() * sizeof(element_type)); - // Data will be copied back from the device only, no data will be moved to the device - auto result_buffer_out = tapasco::makeOutOnly(result_buffer_pointer); + for (int run = 0; run < RUNS; ++run) { + // Generate array for arrayinit output + std::array result; + result.fill(-1); + // Wrap the array to be TaPaSCo compatible + auto result_buffer_pointer = tapasco::makeWrappedPointer( + result.data(), result.size() * sizeof(element_type)); + // Data will be copied back from the device only, no data will be moved to + // the device + auto result_buffer_out = tapasco::makeOutOnly(result_buffer_pointer); - // Launch the job - // Arrayinit takes only one parameter: The location of the array. It will always initialize 256 Int`s. - auto job = tapasco.launch(ARRAYINIT_ID, result_buffer_out); + // Launch the job + // Arrayinit takes only one parameter: The location of the array. It will + // always initialize 256 Int`s. + auto job = tapasco.launch(ARRAYINIT_ID, result_buffer_out); - // Wait for job completion. Will block execution until the job is done. - job(); + // Wait for job completion. Will block execution until the job is done. + job(); - errs += check_array(result); - std::cout << "RUN " << run << " " << (errs == 0 ? "OK" : "NOT OK"); - } + int iter_errs = check_array(result); + errs += iter_errs; + std::cout << "RUN " << run << " " << (iter_errs == 0 ? "OK" : "NOT OK") + << std::endl; + } - if (!errs) - std::cout << "Arrayinit finished without errors." << std::endl; - else - std::cerr << "Arrayinit finished wit errors." << std::endl; + if (!errs) + std::cout << "Arrayinit finished without errors." << std::endl; + else + std::cerr << "Arrayinit finished wit errors." << std::endl; - return errs; + return errs; } diff --git a/runtime/examples/arraysum/CMakeLists.txt b/runtime/examples/arraysum/CMakeLists.txt index 26bea6d8..a3e253b7 100644 --- a/runtime/examples/arraysum/CMakeLists.txt +++ b/runtime/examples/arraysum/CMakeLists.txt @@ -13,7 +13,11 @@ add_executable(arraysum arraysum-example.c) set_tapasco_defaults(arraysum) target_link_libraries(arraysum tapasco pthread platform tlkm) -install(TARGETS arraysum +add_executable(arraysum-cpp arraysum-example.cpp) +set_tapasco_defaults(arraysum-cpp) +target_link_libraries(arraysum-cpp tapasco pthread platform tlkm) + +install(TARGETS arraysum arraysum-cpp ARCHIVE DESTINATION share/Tapasco/bin/ LIBRARY DESTINATION share/Tapasco/bin/ RUNTIME DESTINATION share/Tapasco/bin/) diff --git a/runtime/examples/arraysum/arraysum-example.cpp b/runtime/examples/arraysum/arraysum-example.cpp new file mode 100644 index 00000000..eddd39a7 --- /dev/null +++ b/runtime/examples/arraysum/arraysum-example.cpp @@ -0,0 +1,91 @@ +// Tapasco is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Tapasco is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with Tapasco. If not, see . +// +#include +#include +#include + +#define SZ 256 +#define RUNS 25 + +typedef int32_t element_type; +constexpr int PE_ID = 10; + +static void init_array(std::array &arr) { + for (size_t i = 0; i < arr.size(); ++i) + arr[i] = (element_type)i; +} + +static int arraysum(std::array &arr) { + int sum = 0; + for (size_t i = 0; i < arr.size(); i++) { + sum += arr[i]; + } + return sum; +} + +int main(int argc, char **argv) { + // initialize TaPaSCo + tapasco::Tapasco tapasco; + + uint64_t errs = 0; + + // check arraysum instance count + uint64_t instances = tapasco_device_kernel_pe_count(tapasco.device(), PE_ID); + std::cout << "Got " << instances << " arraysum instances."; + if (!instances) { + std::cout << "Need at least one arraysum instance to run."; + exit(1); + } + + for (int run = 0; run < RUNS; ++run) { + // Generate array for arraysum output + std::array input; + init_array(input); + + int cpu_sum = arraysum(input); + + // Wrap the array to be TaPaSCo compatible + auto input_buffer_pointer = tapasco::makeWrappedPointer( + input.data(), input.size() * sizeof(element_type)); + // Data will be copied back from the device only, no data will be moved to + // the device + auto input_buffer_in = tapasco::makeInOnly(input_buffer_pointer); + + int fpga_sum = -1; + tapasco::RetVal ret_val(fpga_sum); + + // Launch the job + // Arraysum takes only one parameter: The location of the array. It will + // always summarize 256 Int`s. + auto job = tapasco.launch(PE_ID, ret_val, input_buffer_in); + + // Wait for job completion. Will block execution until the job is done. + job(); + + if (cpu_sum == fpga_sum) { + std::cout << "RUN " << run << "OK" << std::endl; + } else { + std::cerr << "RUN" << run << " FAILED FPGA: " << fpga_sum + << " CPU: " << cpu_sum << std::endl; + ++errs; + } + } + + if (!errs) + std::cout << "Arraysum finished without errors." << std::endl; + else + std::cerr << "Arraysum finished wit errors." << std::endl; + + return errs; +} diff --git a/runtime/examples/arrayupdate/CMakeLists.txt b/runtime/examples/arrayupdate/CMakeLists.txt index 67de79d2..5e5b3fb2 100644 --- a/runtime/examples/arrayupdate/CMakeLists.txt +++ b/runtime/examples/arrayupdate/CMakeLists.txt @@ -13,7 +13,11 @@ add_executable(arrayupdate arrayupdate-example.c) set_tapasco_defaults(arrayupdate) target_link_libraries(arrayupdate tapasco pthread platform tlkm) -install(TARGETS arrayupdate +add_executable(arrayupdate-cpp arrayupdate-example.cpp) +set_tapasco_defaults(arrayupdate-cpp) +target_link_libraries(arrayupdate-cpp tapasco pthread platform tlkm) + +install(TARGETS arrayupdate arrayupdate-cpp ARCHIVE DESTINATION share/Tapasco/bin/ LIBRARY DESTINATION share/Tapasco/bin/ RUNTIME DESTINATION share/Tapasco/bin/) diff --git a/runtime/examples/arrayupdate/arrayupdate-example.cpp b/runtime/examples/arrayupdate/arrayupdate-example.cpp new file mode 100644 index 00000000..14da8955 --- /dev/null +++ b/runtime/examples/arrayupdate/arrayupdate-example.cpp @@ -0,0 +1,86 @@ +// Tapasco is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Tapasco is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with Tapasco. If not, see . +// +#include +#include +#include + +#define SZ 256 +#define RUNS 25 + +typedef int32_t element_type; +constexpr int PE_ID = 9; + +static void init_array(std::array &arr) { + for (size_t i = 0; i < arr.size(); ++i) + arr[i] = (element_type)i; +} + +static int arraycheck(std::array &arr) { + int errs = 0; + for (size_t i = 0; i < arr.size(); i++) { + if (arr[i] != ((element_type)i) + 42) { + std::cerr << "ERROR: Value at " << i << " is " << arr[i] << std::endl; + ++errs; + } + } + return errs; +} + +int main(int argc, char **argv) { + // initialize TaPaSCo + tapasco::Tapasco tapasco; + + uint64_t errs = 0; + + // check arrayupdate instance count + uint64_t instances = tapasco_device_kernel_pe_count(tapasco.device(), PE_ID); + std::cout << "Got " << instances << " arrayupdate instances."; + if (!instances) { + std::cout << "Need at least one arrayupdate instance to run."; + exit(1); + } + + for (int run = 0; run < RUNS; ++run) { + // Generate array for arrayupdate output + std::array input; + init_array(input); + + // Wrap the array to be TaPaSCo compatible + auto input_buffer_pointer = tapasco::makeWrappedPointer( + input.data(), input.size() * sizeof(element_type)); + + // Launch the job + // Arrayupdate takes only one parameter: The location of the array. It will + // always update 256 Int`s. + auto job = tapasco.launch(PE_ID, input_buffer_pointer); + + // Wait for job completion. Will block execution until the job is done. + job(); + + int iter_errs = arraycheck(input); + errs += iter_errs; + if (!iter_errs) { + std::cout << "RUN " << run << "OK" << std::endl; + } else { + std::cerr << "RUN" << run << " FAILED" << std::endl; + } + } + + if (!errs) + std::cout << "Arrayupdate finished without errors." << std::endl; + else + std::cerr << "Arrayupdate finished wit errors." << std::endl; + + return errs; +} From d715c49cf13e0a354876db450018f6fcd45744ea Mon Sep 17 00:00:00 2001 From: Jaco Hofmann Date: Mon, 4 Nov 2019 16:05:17 +0100 Subject: [PATCH 07/10] Removes deprecated benchmarks --- .../benchmark-alloc-free/CMakeLists.txt | 16 -- .../benchmark-alloc-free/alloc-speed.gnuplot | 23 -- .../benchmark-alloc-dealloc.c | 133 ---------- .../examples/benchmark-alloc-free/makeplot.sh | 24 -- runtime/examples/benchmark-alloc-free/timer.h | 73 ------ runtime/examples/benchmark-cd/CMakeLists.txt | 15 -- runtime/examples/benchmark-cd/benchmark-cd.c | 170 ------------- .../examples/benchmark-cd/job-speed.gnuplot | 29 --- runtime/examples/benchmark-cd/makeplot.sh | 24 -- .../examples/benchmark-latency/CMakeLists.txt | 21 -- .../benchmark-latency/benchmark-latency.c | 220 ---------------- .../benchmark-latency/benchmark-latency.cpp | 236 ------------------ runtime/examples/benchmark-mem/CMakeLists.txt | 22 -- .../examples/benchmark-mem/benchmark-mem.c | 198 --------------- .../examples/benchmark-mem/benchmark-mem.cpp | 209 ---------------- runtime/examples/benchmark-mem/makeplot.sh | 24 -- runtime/examples/benchmark-mem/timer.h | 73 ------ .../benchmark-mem/transfer-speed.gnuplot | 23 -- 18 files changed, 1533 deletions(-) delete mode 100644 runtime/examples/benchmark-alloc-free/CMakeLists.txt delete mode 100755 runtime/examples/benchmark-alloc-free/alloc-speed.gnuplot delete mode 100644 runtime/examples/benchmark-alloc-free/benchmark-alloc-dealloc.c delete mode 100755 runtime/examples/benchmark-alloc-free/makeplot.sh delete mode 100644 runtime/examples/benchmark-alloc-free/timer.h delete mode 100644 runtime/examples/benchmark-cd/CMakeLists.txt delete mode 100644 runtime/examples/benchmark-cd/benchmark-cd.c delete mode 100755 runtime/examples/benchmark-cd/job-speed.gnuplot delete mode 100755 runtime/examples/benchmark-cd/makeplot.sh delete mode 100644 runtime/examples/benchmark-latency/CMakeLists.txt delete mode 100644 runtime/examples/benchmark-latency/benchmark-latency.c delete mode 100644 runtime/examples/benchmark-latency/benchmark-latency.cpp delete mode 100644 runtime/examples/benchmark-mem/CMakeLists.txt delete mode 100644 runtime/examples/benchmark-mem/benchmark-mem.c delete mode 100644 runtime/examples/benchmark-mem/benchmark-mem.cpp delete mode 100755 runtime/examples/benchmark-mem/makeplot.sh delete mode 100644 runtime/examples/benchmark-mem/timer.h delete mode 100755 runtime/examples/benchmark-mem/transfer-speed.gnuplot diff --git a/runtime/examples/benchmark-alloc-free/CMakeLists.txt b/runtime/examples/benchmark-alloc-free/CMakeLists.txt deleted file mode 100644 index ce8211d7..00000000 --- a/runtime/examples/benchmark-alloc-free/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -cmake_minimum_required(VERSION 3.5.1 FATAL_ERROR) -include($ENV{TAPASCO_HOME_RUNTIME}/cmake/Tapasco.cmake NO_POLICY_SCOPE) -project (benchmark-alloc-dealloc) - -find_package(TapascoTLKM REQUIRED) -find_package(TapascoCommon REQUIRED) -find_package(TapascoPlatform REQUIRED) -find_package(Tapasco REQUIRED) - -add_executable (benchmark-alloc-dealloc benchmark-alloc-dealloc.c) -set_tapasco_defaults(benchmark-alloc-dealloc) -target_link_libraries (benchmark-alloc-dealloc m rt pthread tapasco platform) - -install (TARGETS benchmark-alloc-dealloc - RUNTIME DESTINATION share/Tapasco/bin/) - diff --git a/runtime/examples/benchmark-alloc-free/alloc-speed.gnuplot b/runtime/examples/benchmark-alloc-free/alloc-speed.gnuplot deleted file mode 100755 index bb3e38f0..00000000 --- a/runtime/examples/benchmark-alloc-free/alloc-speed.gnuplot +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/gnuplot -set terminal pdf enhanced -set output '' - -set style data histogram -set style histogram cluster gap 1 - -set style fill solid border rgb "black" -set auto x -set xtics rotate by 90 right -set xrange [0:*] reverse -set yrange [1:*] -set xlabel 'Allocation Size (KiB)' -set ylabel 'Allocation Speed (alloc+dealloc/s)' -set grid noxtics ytics -show grid - -set datafile separator "," - -set key right top invert -set logscale y - -plot for [i=3:2:-1] "" using i:xtic(1) title col diff --git a/runtime/examples/benchmark-alloc-free/benchmark-alloc-dealloc.c b/runtime/examples/benchmark-alloc-free/benchmark-alloc-dealloc.c deleted file mode 100644 index acc3ea9f..00000000 --- a/runtime/examples/benchmark-alloc-free/benchmark-alloc-dealloc.c +++ /dev/null @@ -1,133 +0,0 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -//! @file benchmark-mem.c -//! @brief TPC API application that performs a simplistic benchmark on the -//! implementation: It allocates memory as fast as possible in chunk -//! sizes ranging from 2^12 (== 4KiB) to 2^26 (== 64MiB) with one -//! thread per core. -//! The program output can be used for the gnuplot script in this -//! directory to generate a bar plot. -//! @authors J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de) -//! -#include "timer.h" -#include -#include -#include -#include -#include -#include -#include -#include - -#define ALLOCATION_COUNT (1000) -#define UPPER_BND (26) -#define LOWER_BND (12) - -typedef unsigned long int ul; -typedef long int l; - -static ul chunk_sz; -static l allocations; -static ul errors; -static l mode; - -static tapasco_ctx_t *ctx; -static tapasco_dev_ctx_t *dev; - -static inline void alloc_dealloc(size_t const sz) { - void *ptr = malloc(sz); - if (!ptr) - __sync_fetch_and_add(&errors, 1); - free(ptr); -} - -static inline void tapasco_alloc_dealloc(size_t const sz) { - tapasco_handle_t h; - tapasco_device_alloc(dev, &h, sz, 0); - if (h <= 0) - __sync_fetch_and_add(&errors, 1); - else - tapasco_device_free(dev, h, 0); -} - -static void *run(void *p) { - size_t const sz = (size_t)p; - while (!errors && __sync_sub_and_fetch(&allocations, 1) > 0) { - if (mode) - tapasco_alloc_dealloc(sz); - else - alloc_dealloc(sz); - } - return NULL; -} - -static void print_header(void) { - printf("Allocation Size (KiB),virt. mem (alloc+dealloc/s),DMA mem " - "(alloc+dealloc/s)\n"); -} - -static void print_line(ul const *times) { - printf("%lu,%3.2f,%3.2f\n", chunk_sz / 1024, - ALLOCATION_COUNT / (times[0] / 1000000.0), - ALLOCATION_COUNT / (times[1] / 1000000.0)); -} - -static void check_tapasco(tapasco_res_t const result) { - if (result != TAPASCO_SUCCESS) { - fprintf(stderr, "tapasco fatal error: %s\n", tapasco_strerror(result)); - exit(result); - } -} - -int main(int argc, char **argv) { - int pw, i; - pthread_t threads[sysconf(_SC_NPROCESSORS_CONF)]; - ul times[2] = {0}; - - // init timer and data - TIMER_INIT(); - - // initialize threadpool - check_tapasco(tapasco_init(&ctx)); - check_tapasco(tapasco_create_device(ctx, 0, &dev, 0)); - - print_header(); - TIMER_START(total) - for (pw = UPPER_BND; pw >= LOWER_BND; --pw) { - chunk_sz = (size_t)(pow(2, pw)); - for (mode = 0; mode < 2; ++mode) { - allocations = ALLOCATION_COUNT; - errors = 0; - TIMER_START(run) - for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); ++i) - pthread_create(&threads[i], NULL, run, (void *)chunk_sz); - for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); ++i) - pthread_join(threads[i], NULL); - TIMER_STOP(run) - // fprintf(stderr, "\nerrors = %lu\n", errors); - times[mode] = errors ? 0 : TIMER_USECS(run); - } - print_line(times); - } - TIMER_STOP(total) - fprintf(stderr, "Total duration: %llu us.\n", TIMER_USECS(total)); - // de-initialize threadpool - tapasco_destroy_device(ctx, dev); - tapasco_deinit(ctx); -} diff --git a/runtime/examples/benchmark-alloc-free/makeplot.sh b/runtime/examples/benchmark-alloc-free/makeplot.sh deleted file mode 100755 index 4ad9a524..00000000 --- a/runtime/examples/benchmark-alloc-free/makeplot.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -# -# Copyright (C) 2014 Jens Korinth, TU Darmstadt -# -# This file is part of Tapasco (TPC). -# -# Tapasco is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Tapasco is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with Tapasco. If not, see . -# -FILENAME=$1 -CSV=$FILENAME.csv -PDF=$FILENAME.pdf - -cat alloc-speed.gnuplot | sed "s//$CSV/g" | sed "s//$PDF/g" | gnuplot diff --git a/runtime/examples/benchmark-alloc-free/timer.h b/runtime/examples/benchmark-alloc-free/timer.h deleted file mode 100644 index cfea2daa..00000000 --- a/runtime/examples/benchmark-alloc-free/timer.h +++ /dev/null @@ -1,73 +0,0 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -//! @file timer.h -//! @brief C macros for high-precision timing (Linux, Mac OS X). -//! @authors J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de) -//! -#ifndef __TIMER_H__ -#define __TIMER_H__ - -#ifdef __APPLE__ -#include -#include - -static mach_timebase_info_data_t _tb; - -#define TIMER_INIT() mach_timebase_info(&_tb); - -#define TIMER_START(name) uint64_t ts_start_##name = mach_absolute_time(); - -#define TIMER_STOP(name) uint64_t ts_stop_##name = mach_absolute_time(); - -#define TIMER_USECS(name) \ - (uint64_t)((double)(ts_stop_##name - ts_start_##name) * (double)_tb.numer / \ - (double)_tb.denom / (double)1e3) - -#else -#include -#include - -static struct timespec _tb; - -#define TIMER_INIT() clock_getres(CLOCK_MONOTONIC, &_tb) - -#define TIMER_START(name) \ - struct timespec tp_start_##name; \ - clock_gettime(CLOCK_MONOTONIC, &tp_start_##name); - -#define TIMER_STOP(name) \ - struct timespec tp_stop_##name; \ - clock_gettime(CLOCK_MONOTONIC, &tp_stop_##name); - -#define TIMER_USECS(name) tp_diff_usecs(&tp_stop_##name, &tp_start_##name) - -static inline unsigned long long tp_diff_usecs(struct timespec *stop, - struct timespec *start) { - if (stop->tv_nsec < start->tv_nsec) { - return (stop->tv_sec - 1 - start->tv_sec) * 1000000ULL + - (1000000000ULL + stop->tv_nsec - start->tv_nsec) / 1000ULL; - } else { - return (stop->tv_sec - start->tv_sec) * 1000000ULL + - (stop->tv_nsec - start->tv_nsec) / 1000ULL; - } -} - -#endif - -#endif /* __TIMER_H__ */ diff --git a/runtime/examples/benchmark-cd/CMakeLists.txt b/runtime/examples/benchmark-cd/CMakeLists.txt deleted file mode 100644 index 2d55361b..00000000 --- a/runtime/examples/benchmark-cd/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -cmake_minimum_required(VERSION 3.5.1 FATAL_ERROR) -include($ENV{TAPASCO_HOME_RUNTIME}/cmake/Tapasco.cmake NO_POLICY_SCOPE) -project (benchmark-cd) - -find_package(TapascoTLKM REQUIRED) -find_package(TapascoCommon REQUIRED) -find_package(TapascoPlatform REQUIRED) -find_package(Tapasco REQUIRED) - -add_executable(benchmark-cd benchmark-cd.c) -set_tapasco_defaults(benchmark-cd) -target_link_libraries(benchmark-cd rt pthread tapasco platform) - -install(TARGETS benchmark-cd - RUNTIME DESTINATION share/Tapasco/bin/) diff --git a/runtime/examples/benchmark-cd/benchmark-cd.c b/runtime/examples/benchmark-cd/benchmark-cd.c deleted file mode 100644 index dfa64dbe..00000000 --- a/runtime/examples/benchmark-cd/benchmark-cd.c +++ /dev/null @@ -1,170 +0,0 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -#include "../benchmark-mem/timer.h" -#include -#include -#include -#include -#include -#include -#include - -#define MIN_NSECS (10000) -#define MAX_NSECS (1000000) -#define NSTEPS (15) -#define JOBS (1000) - -static long jobs; -static long errors; -static long mode; - -static tapasco_ctx_t *ctx; -static tapasco_dev_ctx_t *dev; - -static inline void check_tapasco(tapasco_res_t const result) { - if (result != TAPASCO_SUCCESS) { - fprintf(stderr, "tapasco fatal error: %s\n", tapasco_strerror(result)); - exit(result); - } -} - -static inline double clock_period(void) { - static double period = 0.0; - if (period == 0.0) { - unsigned long hz; - char buf[1024] = ""; - ssize_t rc; - int fd = open("/sys/class/fclk/fclk0/set_rate", O_RDONLY); - if (fd == -1) { - fprintf(stderr, "WARNING: could not open /sys/class/fclk/fclk0/set_rate, " - "using TAPASCO_FREQ\n"); - assert(getenv("TAPASCO_FREQ") && "must set TAPASCO_FREQ env var!"); - hz = strtoul(getenv("TAPASCO_FREQ"), NULL, 0) * 1000000; - } else { - rc = read(fd, buf, 1023); - assert(rc); - (void)rc; - fprintf(stderr, "fclk/set_rate = %s", buf); - close(fd); - hz = strtoul(buf, NULL, 0); - } - period = 1.0 / (hz / 1000000000.0); - fprintf(stderr, "period = %3.2f ns\n", period); - } - return period; -} - -static inline unsigned long ns_to_cd(unsigned long ns) { - // convert to countdown value: - // t = 2 * period + n * 2 * period - // i.e., 2 cycles init + 2 cycles per loop iteration - return ns / (2 * clock_period()) - 1; -} - -static inline void tapasco_run(long cc) { - tapasco_job_id_t j_id = tapasco_device_acquire_job_id(dev, 14, 0); - tapasco_device_job_set_arg(dev, j_id, 0, sizeof(cc), &cc); - if (tapasco_device_job_launch( - dev, j_id, TAPASCO_DEVICE_JOB_LAUNCH_BLOCKING) != TAPASCO_SUCCESS) - __atomic_fetch_add(&errors, 1, __ATOMIC_SEQ_CST); - tapasco_device_release_job_id(dev, j_id); -} - -static inline void cpu_run(long us) { usleep(us); } - -static inline void *run(void *p) { - long job; - long clk = (long)p; - long cc = ns_to_cd(clk); - long us = clk / 1000; - while ((job = __atomic_fetch_sub(&jobs, 1, __ATOMIC_SEQ_CST)) > 0) { - if (mode == 0) - tapasco_run(cc); - else - cpu_run(us); - } - return NULL; -} - -static inline void print_header(void) { - long const pc = sysconf(_SC_NPROCESSORS_CONF); - printf("Kernel Runtime (us)"); - for (int no_p = 1; no_p <= pc; ++no_p) - printf(",Ideal (%d cores), CPU (%d core), FPGA (%d core)", no_p, no_p, - no_p); - printf("\n\n"); -} - -static inline void print_line(double clk, double *t) { - long const pc = sysconf(_SC_NPROCESSORS_CONF); - printf("%3.2f", clk); - for (int no_p = 1; no_p <= pc; ++no_p) { - double cpu_t = 1.0 / (t[no_p - 1] / clk / JOBS); - double fpga_t = 1.0 / (t[no_p + pc - 1] / clk / JOBS); - printf(", %3.4f, %3.4f, %3.4f", (float)no_p, cpu_t, fpga_t); - } - /*printf("%3.2f, %3.2f, %3.2f", clk, ideal_1, ideal_n); - for (int i = 0; i < sysconf(_SC_NPROCESSORS_CONF) * 2; ++i) { - double const actual = 1.0 / (t[i] / clk / JOBS); - printf(", %3.8f", actual); - }*/ - printf("\n"); -} - -int main(int argc, char **argv) { - long clk_step, clk; - pthread_t threads[sysconf(_SC_NPROCESSORS_CONF)]; - double times[sysconf(_SC_NPROCESSORS_CONF) * 2]; - - // init timer and data - TIMER_INIT(); - - // initialize threadpool - check_tapasco(tapasco_init(&ctx)); - check_tapasco(tapasco_create_device(ctx, 0, &dev, 0)); - assert(tapasco_device_func_instance_count(dev, 14) > 0); - - clk_step = (MAX_NSECS - MIN_NSECS) / NSTEPS; - clk = MIN_NSECS; - print_header(); - TIMER_START(total) - for (int i = 0; i <= NSTEPS; ++i, clk += clk_step) { - for (mode = 0; mode < sysconf(_SC_NPROCESSORS_CONF) * 2; ++mode) { - // for (int nt = 1; nt <= 1/*sysconf(_SC_NPROCESSORS_CONF)*/; ++nt) { - // - jobs = JOBS; - int const nt = mode % sysconf(_SC_NPROCESSORS_CONF) + 1; - errors = 0; - TIMER_START(run) - for (int i = 0; i < nt; ++i) - pthread_create(&threads[i], NULL, run, (void *)clk); - for (int i = 0; i < nt; ++i) - pthread_join(threads[i], NULL); - TIMER_STOP(run) - times[mode] = errors ? 0.0 : TIMER_USECS(run); - //} - } - print_line(clk / 1000.0, times); - } - TIMER_STOP(total) - fprintf(stderr, "Total duration: %llu us.\n", TIMER_USECS(total)); - // de-initialize threadpool - tapasco_destroy_device(ctx, dev); - tapasco_deinit(ctx); -} diff --git a/runtime/examples/benchmark-cd/job-speed.gnuplot b/runtime/examples/benchmark-cd/job-speed.gnuplot deleted file mode 100755 index e34ab24d..00000000 --- a/runtime/examples/benchmark-cd/job-speed.gnuplot +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/gnuplot -set terminal pdf enhanced size 29.7cm,21cm -set output '' - -set style data histogram -set style histogram cluster gap 2 - -set style fill solid border rgb "black" -set auto x -set xtics rotate by 90 right -set xrange [0:*] -set yrange [*:*] -set xlabel 'Approx. Kernel Runtime (us)' -set ylabel 'Speedup (compared to ideal 1 core)' -set grid noxtics ytics -show grid - -set datafile separator "," -set key below - -set style line 2 lc rgb '#e31a1c' -set style line 3 lc rgb '#1f78b4' -set style line 4 lc rgb '#33a02c' -set style line 5 lc rgb '#fb9a99' -set style line 6 lc rgb '#a6cee3' -set style line 7 lc rgb '#b2df8a' - -plot for [i=2:7:1] "" using i:xtic(1) title col ls i - diff --git a/runtime/examples/benchmark-cd/makeplot.sh b/runtime/examples/benchmark-cd/makeplot.sh deleted file mode 100755 index 390756fa..00000000 --- a/runtime/examples/benchmark-cd/makeplot.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -# -# Copyright (C) 2014 Jens Korinth, TU Darmstadt -# -# This file is part of Tapasco (TPC). -# -# Tapasco is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Tapasco is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with Tapasco. If not, see . -# -FILENAME=$1 -CSV=$FILENAME.csv -PDF=$FILENAME.pdf - -cat job-speed.gnuplot | sed "s//$CSV/g" | sed "s//$PDF/g" | gnuplot diff --git a/runtime/examples/benchmark-latency/CMakeLists.txt b/runtime/examples/benchmark-latency/CMakeLists.txt deleted file mode 100644 index 5ddbd39a..00000000 --- a/runtime/examples/benchmark-latency/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -cmake_minimum_required(VERSION 3.5.1 FATAL_ERROR) -include($ENV{TAPASCO_HOME_RUNTIME}/cmake/Tapasco.cmake NO_POLICY_SCOPE) -project (benchmark-latency) - -find_package(TapascoTLKM REQUIRED) -find_package(TapascoCommon REQUIRED) -find_package(TapascoPlatform REQUIRED) -find_package(Tapasco REQUIRED) - -add_executable(benchmark-latency benchmark-latency.c) -set_tapasco_defaults(benchmark-latency) -target_link_libraries(benchmark-latency rt pthread tapasco platform atomic) - -add_executable(benchmark-latency++ benchmark-latency.cpp) -set_tapasco_defaults(benchmark-latency++) -target_link_libraries(benchmark-latency++ m pthread atomic tapasco platform atomic) - -install(TARGETS benchmark-latency - ARCHIVE DESTINATION share/Tapasco/bin/ - LIBRARY DESTINATION share/Tapasco/bin/ - RUNTIME DESTINATION share/Tapasco/bin/) diff --git a/runtime/examples/benchmark-latency/benchmark-latency.c b/runtime/examples/benchmark-latency/benchmark-latency.c deleted file mode 100644 index 0bd73fb1..00000000 --- a/runtime/examples/benchmark-latency/benchmark-latency.c +++ /dev/null @@ -1,220 +0,0 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -#include "../benchmark-mem/timer.h" -#include -#include -#include -#include -#include -#include -#include -#include - -#define MIN_NSECS (10000) -#define MAX_NSECS (1000000) -#define NSTEPS (15) -#define JOBS (10) - -struct config_t { - unsigned long int min; - unsigned long int max; - unsigned long int time_steps; - unsigned long int iterations; -}; - -static long errors; - -static tapasco_ctx_t *ctx; -static tapasco_dev_ctx_t *dev; - -static inline void check_tapasco(tapasco_res_t const result) { - if (result != TAPASCO_SUCCESS) { - fprintf(stderr, "tapasco fatal error: %s\n", tapasco_strerror(result)); - exit(result); - } -} - -static inline double clock_period(void) { - static double period = 0.0; - if (period == 0.0) { - unsigned long hz; - char buf[1024] = ""; - ssize_t rc; - (void)rc; - int fd = open("/sys/class/fclk/fclk0/set_rate", O_RDONLY); - if (fd == -1) { - fprintf(stderr, "WARNING: could not open /sys/class/fclk/fclk0/set_rate, " - "using TAPASCO_FREQ\n"); - assert(getenv("TAPASCO_FREQ") && "must set TAPASCO_FREQ env var!"); - hz = strtoul(getenv("TAPASCO_FREQ"), NULL, 0) * 1000000; - } else { - rc = read(fd, buf, 1023); - assert(rc); - fprintf(stderr, "fclk/set_rate = %s", buf); - close(fd); - hz = strtoul(buf, NULL, 0); - } - period = 1.0 / (hz / 1000000000.0); - fprintf(stderr, "period = %3.2f ns\n", period); - } - return period; -} - -static inline unsigned long ns_to_cd(unsigned long ns) { - // convert to countdown value: - // t = 2 * period + n * 2 * period - // i.e., 2 cycles init + 2 cycles per loop iteration - return ns / (2 * clock_period()) - 1; -} - -static inline void tapasco_run(uint32_t cc) { - tapasco_job_id_t j_id = tapasco_device_acquire_job_id(dev, 14, 0); - tapasco_device_job_set_arg(dev, j_id, 0, sizeof(cc), &cc); - if (tapasco_device_job_launch( - dev, j_id, TAPASCO_DEVICE_JOB_LAUNCH_BLOCKING) != TAPASCO_SUCCESS) - __atomic_fetch_add(&errors, 1, __ATOMIC_SEQ_CST); - tapasco_device_release_job_id(dev, j_id); -} - -static inline void platform_run(uint32_t cc) { - uint32_t const start = 1; - platform_ctl_addr_t sb = platform_address_get_slot_base(0, 0); - if (platform_write_ctl(sb + 0x20, 4, &cc, PLATFORM_CTL_FLAGS_NONE) != - PLATFORM_SUCCESS) - __atomic_fetch_add(&errors, 1, __ATOMIC_SEQ_CST); - if (platform_write_ctl_and_wait(sb, 4, &start, 0, PLATFORM_CTL_FLAGS_NONE) != - PLATFORM_SUCCESS) - __atomic_fetch_add(&errors, 1, __ATOMIC_SEQ_CST); - // ack interrupt - if (platform_write_ctl(sb + 0xc, 4, &start, PLATFORM_CTL_FLAGS_NONE) != - PLATFORM_SUCCESS) - __atomic_fetch_add(&errors, 1, __ATOMIC_SEQ_CST); -} - -static inline void print_header(void) { - printf("Kernel time (ns), Kernel time (cycles), Average Latency TPC (us), " - "Average Latency Platform (us)\n"); -} - -static inline void print_line(double clk, unsigned long long t1, - unsigned long long t2) { - printf("%3.2f, %lu, %llu, %llu\n", clk, ns_to_cd(clk), t1, t2); -} - -static inline void print_usage(void) { - fprintf(stderr, "Usage: benchmark-latency [ [ " - "[ []]]] with\n" - "\t = minimum kernel runtime in ns " - " (default: 10ns)\n" - "\t = maximum kernel runtime in ns " - " (default: 10000ns)\n" - "\t = number of equidistant sampling points " - " (default:10)\n" - "\t = number of iterations at each sampling " - "point (default:1000)\n\n"); -} - -static inline void check_parse(unsigned long v) { - if (v == 0) { - fprintf(stderr, "ERROR: invalid option string!\n"); - print_usage(); - exit(EXIT_FAILURE); - } -} - -static inline void print_args(struct config_t const *cfg) { - fprintf(stderr, - "Configuration:\n" - "\tminimum kernel time = %lu\n" - "\tmaximum kernel time = %lu\n" - "\tkernel time steps = %lu\n" - "\titerations = %lu\n\n", - cfg->min, cfg->max, cfg->time_steps, cfg->iterations); -} - -static inline void parse_args(int argc, char **argv, struct config_t *cfg) { - // set defaults - cfg->min = 10; - cfg->max = 10000; - cfg->time_steps = 10; - cfg->iterations = 10; - - // try to parse arguments (if some where given) - if (argc > 1) { - cfg->min = strtoul(argv[1], NULL, 0); - check_parse(cfg->min); - } - if (argc > 2) { - cfg->max = strtoul(argv[2], NULL, 0); - check_parse(cfg->max); - } - if (argc > 3) { - cfg->time_steps = strtoul(argv[3], NULL, 0); - check_parse(cfg->time_steps); - } - if (argc > 4) { - cfg->iterations = strtoul(argv[4], NULL, 0); - check_parse(cfg->iterations); - } - print_args(cfg); -} - -int main(int argc, char **argv) { - struct config_t cfg; - parse_args(argc, argv, &cfg); - - unsigned long long int times[cfg.time_steps]; - - // init timer and data - TIMER_INIT(); - - // initialize threadpool - check_tapasco(tapasco_init(&ctx)); - check_tapasco(tapasco_create_device(ctx, 0, &dev, 0)); - assert(tapasco_device_func_instance_count(dev, 14) > 0); - - unsigned long int clk_step = (cfg.max - cfg.min) / cfg.time_steps; - unsigned long int clk = cfg.min; - print_header(); - TIMER_START(total) - for (int i = 0; i < cfg.time_steps; ++i, clk += clk_step) { - TIMER_START(run) - for (int j = 0; j < cfg.iterations; ++j) - tapasco_run(ns_to_cd(clk)); - TIMER_STOP(run) - times[i] = - (TIMER_USECS(run) - (clk * cfg.iterations / 1000)) / cfg.iterations; - - TIMER_START(papi_run) - for (int j = 0; j < cfg.iterations; ++j) - platform_run(ns_to_cd(clk)); - TIMER_STOP(papi_run) - unsigned long long int papi_time = - (TIMER_USECS(papi_run) - (clk * cfg.iterations / 1000)) / - cfg.iterations; - - print_line(clk, times[i], papi_time); - } - TIMER_STOP(total) - fprintf(stderr, "Total duration: %llu us, errors: %ld.\n", TIMER_USECS(total), - errors); - // de-initialize threadpool - tapasco_destroy_device(ctx, dev); - tapasco_deinit(ctx); -} diff --git a/runtime/examples/benchmark-latency/benchmark-latency.cpp b/runtime/examples/benchmark-latency/benchmark-latency.cpp deleted file mode 100644 index a42ddc51..00000000 --- a/runtime/examples/benchmark-latency/benchmark-latency.cpp +++ /dev/null @@ -1,236 +0,0 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MIN_NSECS (10000) -#define MAX_NSECS (1000000) -#define NSTEPS (15) -#define JOBS (10) - -using namespace std; -using namespace tapasco::platform; - -struct config_t { - unsigned long int min; - unsigned long int max; - unsigned long int time_steps; - unsigned long int iterations; -}; - -static long errors; - -static tapasco::Tapasco Tapasco; - -static inline void check_tapasco(tapasco::tapasco_res_t const result) { - if (result != tapasco::TAPASCO_SUCCESS) { - cerr << "Tapasco fatal error: " << tapasco_strerror(result) << endl; - exit(result); - } -} - -static inline uint32_t clock_period(void) { - static double period = 0.0; - if (period == 0.0) { - unsigned long hz; - char buf[1024] = ""; - ifstream ifs("/sys/class/fclk/fclk0/set_rate", ifstream::in); - if (!ifs.good()) { - cerr << "WARNING: could not open /sys/class/fclk/fclk0/set_rate, using " - "TAPASCO_FREQ" - << endl; - assert(getenv("TAPASCO_FREQ") && "must set TAPASCO_FREQ env var!"); - hz = stoi(string(getenv("TAPASCO_FREQ"))) * 1000000; - } else { - ifs.read(buf, sizeof(buf) - 1); - cerr << "fclk/set_rate = " << buf << endl; - hz = stoi(string(buf)); - } - period = 1000000000.0 / hz; - cerr << "period = " << period << " ns" << endl; - } - return nearbyint(period); -} - -static inline unsigned long ns_to_cd(unsigned long ns) { - return ns / clock_period(); -} - -static inline unsigned long cd_to_ns(unsigned long cd) { - return cd * clock_period(); -} - -static inline uint32_t tapasco_run(uint32_t cc) { - uint32_t ret = 0; - if (Tapasco.launch(14, ret, cc) != tapasco::TAPASCO_SUCCESS) - __atomic_fetch_add(&errors, 1, __ATOMIC_SEQ_CST); - return ret; -} - -static inline uint32_t platform_run(uint32_t cc) { - uint32_t start = 1; - platform_ctl_addr_t sb = platform_address_get_slot_base(0, 0); - if (platform_write_ctl(sb + 0x20, 4, &cc, PLATFORM_CTL_FLAGS_NONE) != - PLATFORM_SUCCESS) - __atomic_fetch_add(&errors, 1, __ATOMIC_SEQ_CST); - if (platform_write_ctl_and_wait(sb, 4, &start, 0, PLATFORM_CTL_FLAGS_NONE) != - PLATFORM_SUCCESS) - __atomic_fetch_add(&errors, 1, __ATOMIC_SEQ_CST); - // ack interrupt - if (platform_write_ctl(sb + 0xc, 4, &start, PLATFORM_CTL_FLAGS_NONE) != - PLATFORM_SUCCESS) - __atomic_fetch_add(&errors, 1, __ATOMIC_SEQ_CST); - if (platform_read_ctl(sb + 0x10, 4, &start, PLATFORM_CTL_FLAGS_NONE) != - PLATFORM_SUCCESS) - __atomic_fetch_add(&errors, 1, __ATOMIC_SEQ_CST); - if (start != cc) - cerr << "WARNING: found return value of " << start - << " instead of expected " << cc << endl; - return start; -} - -static inline void print_header(void) { - cout << "Wait time (ns), Stopwatch Latency TPC (ns), Stopwatch Latency " - "Platform (ns), " - << "IRQ Ack. Latency TPC (ns), IRQ Ack. Latency Platform (ns)" << endl; -} - -// static inline void print_line(double clk, unsigned long long t1, unsigned -// long long t2) -template -static inline void print_line(const uint32_t clk, const T1 &t1, const T1 &t2, - const T2 &t3, const T2 &t4) { - cout << fixed << setprecision(1) << clk << ", " << t1 << ", " << t2 << ", " - << t3 << ", " << t4 << endl; -} - -static inline void print_usage(void) { - cerr << "Usage: benchmark-latency [ [ [ " - "[]]]] with" - << endl - << "\t = minimum kernel runtime in ns " - "(default: 10ns)" - << endl - << "\t = maximum kernel runtime in ns " - "(default: 10000ns)" - << endl - << "\t = number of equidistant sampling points " - "(default:10)" - << endl - << "\t = number of iterations at each sampling point " - "(default:1000)" - << endl - << endl; -} - -static inline void print_args(struct config_t const *cfg) { - cerr << "Configuration:" << endl - << "\tminimum kernel time = " << cfg->min << endl - << "\tmaximum kernel time = " << cfg->max << endl - << "\tkernel time steps = " << cfg->time_steps << endl - << "\titerations = " << cfg->iterations << endl - << endl; -} - -static inline void parse_args(int argc, char **argv, struct config_t *cfg) { - // set defaults - cfg->min = 10000; - cfg->max = 1000000; - cfg->time_steps = 100; - cfg->iterations = 1000; - - // try to parse arguments (if some where given) - if (argc > 1) - cfg->min = stoi(string(argv[1])); - if (argc > 2) - cfg->max = stoi(string(argv[2])); - if (argc > 3) - cfg->time_steps = stoi(string(argv[3])); - if (argc > 4) - cfg->iterations = stoi(string(argv[4])); - print_args(cfg); -} - -int main(int argc, char **argv) { - struct config_t cfg; - parse_args(argc, argv, &cfg); - - // initialize threadpool - // check_Tapasco(Tapasco.init()); - uint32_t const n_inst = Tapasco.func_instance_count(14); - cerr << "Found " << n_inst << " of timer kernel." << endl; - if (!n_inst) { - cerr << "ERROR: did not find any timer kernels." << endl; - exit(EXIT_FAILURE); - } - - unsigned long int clk_step = (cfg.max - cfg.min) / cfg.time_steps; - unsigned long int clk = cfg.min; - print_header(); - auto start = chrono::high_resolution_clock::now(); - for (unsigned int i = 0; i < cfg.time_steps; ++i, clk += clk_step) { - const auto rounded_cd = ns_to_cd(clk); - const auto rounded_clk = cd_to_ns(rounded_cd); - uint64_t run_latencies = 0; - uint64_t platform_latencies = 0; - - auto run_start = chrono::high_resolution_clock::now(); - for (unsigned int j = 0; j < cfg.iterations; ++j) - run_latencies += tapasco_run(rounded_cd); - auto run_d = chrono::duration_cast( - chrono::high_resolution_clock::now() - run_start); - auto run_time = - run_d.count() / (double)cfg.iterations - (double)rounded_clk; - - // cerr << "run_latencies = " << run_latencies << endl; - - auto papi_start = chrono::high_resolution_clock::now(); - for (unsigned int j = 0; j < cfg.iterations; ++j) - platform_latencies += platform_run(rounded_cd); - auto papi_d = chrono::duration_cast( - chrono::high_resolution_clock::now() - papi_start); - auto papi_time = - papi_d.count() / (double)cfg.iterations - (double)rounded_clk; - - // cerr << "platform_latencies = " << platform_latencies << endl; - - if (ns_to_cd(rounded_clk) != rounded_cd) - cerr << " FUUUUUUUUUUUUUUUUUUUUUUUUUUU" << endl; - run_latencies *= clock_period(); - platform_latencies *= clock_period(); - run_latencies /= cfg.iterations; - platform_latencies /= cfg.iterations; - print_line(rounded_clk, run_time, papi_time, run_latencies, - platform_latencies); - } - auto total_d = chrono::duration_cast( - chrono::high_resolution_clock::now() - start); - cerr << "Total duration: " << total_d.count() << " us, errors: " << errors - << endl; -} diff --git a/runtime/examples/benchmark-mem/CMakeLists.txt b/runtime/examples/benchmark-mem/CMakeLists.txt deleted file mode 100644 index d853a271..00000000 --- a/runtime/examples/benchmark-mem/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -cmake_minimum_required(VERSION 3.5.1 FATAL_ERROR) -include($ENV{TAPASCO_HOME_RUNTIME}/cmake/Tapasco.cmake NO_POLICY_SCOPE) -project (benchmark-mem) - -find_package(TapascoTLKM REQUIRED) -find_package(TapascoCommon REQUIRED) -find_package(TapascoPlatform REQUIRED) -find_package(Tapasco REQUIRED) - -add_executable (benchmark-mem benchmark-mem.c) -set_tapasco_defaults(benchmark-mem) -target_link_libraries (benchmark-mem m rt pthread tapasco platform) - -add_executable (benchmark-mem++ benchmark-mem.cpp) -set_tapasco_defaults(benchmark-mem++) -target_link_libraries (benchmark-mem++ m rt pthread tapasco platform) - -install (TARGETS benchmark-mem benchmark-mem++ - ARCHIVE DESTINATION share/Tapasco/bin/ - LIBRARY DESTINATION share/Tapasco/bin/ - RUNTIME DESTINATION share/Tapasco/bin/) - diff --git a/runtime/examples/benchmark-mem/benchmark-mem.c b/runtime/examples/benchmark-mem/benchmark-mem.c deleted file mode 100644 index 1923a780..00000000 --- a/runtime/examples/benchmark-mem/benchmark-mem.c +++ /dev/null @@ -1,198 +0,0 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -//! @file benchmark-mem.c -//! @brief TPC API application that performs a simplistic benchmark on the -//! memory system: 1GiB of data is transferred in chunks of sizes -//! ranging from 2^12 (== 4KiB) to 2^26 (== 64MiB) with one thread -//! per processor. Each thread performs alloc-copy-dealloc until all -//! transfers are finished; this is done in three modes read, write -//! and read+write (data is either only copied from, copied to or -//! copied in both directions). -//! The program output can be used for the gnuplot script in this -//! directory to generate a bar plot. -//! @authors J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de) -//! -#include "timer.h" -#include -#include -#include -#include -#include -#include -#include -#include - -#define TRANSFER_SZ ((size_t)(1024 * 1024 * 1024)) -#define UPPER_BND (26) -#define LOWER_BND (12) - -typedef unsigned long int ul; -typedef long int l; - -static void *rnddata; -static ul chunk_sz; -static l transfers; -static ul errors; -static l mode; - -static tapasco_ctx_t *ctx; -static tapasco_dev_ctx_t *dev; - -static void fill_with_random(void *d, size_t const sz) { - FILE *f; - // size_t c; - TIMER_START(fill_with_random); - f = fopen("/dev/urandom", "r"); - assert(f); - // c = fread(d, sizeof(char), sz, f); - // assert(c == sz); - fclose(f); - TIMER_STOP(fill_with_random); - fprintf(stderr, "fill_with_random took %llu us.\n", - TIMER_USECS(fill_with_random)); -} - -static inline void baseline_transfer(void *d) { - void *h; - if (!d) { - __sync_fetch_and_add(&errors, 1); - return; - } - h = malloc(chunk_sz); - if (!h) { - __sync_fetch_and_add(&errors, 1); - return; - } - - switch (mode) { - case 0: /* read-only */ - memcpy(h, d, chunk_sz); - break; - case 1: /* write-only */ - memcpy(d, h, chunk_sz); - break; - case 2: /* read-write */ - memcpy(d, h, chunk_sz); - memcpy(h, d, chunk_sz); - break; - } - free(h); -} - -static inline void tapasco_transfer(void *d) { - tapasco_handle_t h; - if (!d) { - __sync_fetch_and_add(&errors, 1); - return; - } - if (tapasco_device_alloc(dev, &h, chunk_sz, 0) != TAPASCO_SUCCESS) { - __sync_fetch_and_add(&errors, 1); - return; - } - - switch (mode - 3) { - case 0: /* read-only */ - tapasco_device_copy_from(dev, h, d, chunk_sz, TAPASCO_DEVICE_COPY_BLOCKING); - break; - case 1: /* write-only */ - tapasco_device_copy_to(dev, d, h, chunk_sz, TAPASCO_DEVICE_COPY_BLOCKING); - break; - case 2: /* read-write */ - tapasco_device_copy_to(dev, d, h, chunk_sz, TAPASCO_DEVICE_COPY_BLOCKING); - tapasco_device_copy_from(dev, h, d, chunk_sz, TAPASCO_DEVICE_COPY_BLOCKING); - break; - } - tapasco_device_free(dev, h, 0); -} - -static void *transfer(void *p) { - void *d = malloc(chunk_sz); - while (__sync_fetch_and_sub(&transfers, 1) > 0) { - if (mode < 3) - baseline_transfer(d); - else - tapasco_transfer(d); - } - free(d); - return NULL; -} - -static void print_header(void) { - printf("Allocation Size (KiB),virt. R (MiB/s),virt. W (MiB/s),virt. R+W " - "(MiB/s),DMA R (MiB/s),DMA W (MiB/s),DMA R+W (MiB/s)\n"); -} - -static void print_line(ul const *times) { - printf("%lu,%3.2f,%3.2f,%3.2f,%3.2f,%3.2f,%3.2f\n", chunk_sz / 1024, - (TRANSFER_SZ / (1024 * 1024)) / (times[0] / 1000000.0), - (TRANSFER_SZ / (1024 * 1024)) / (times[1] / 1000000.0), - (TRANSFER_SZ / (1024 * 1024)) / (times[2] / 1000000.0), - (TRANSFER_SZ / (1024 * 1024)) / (times[3] / 1000000.0), - (TRANSFER_SZ / (1024 * 1024)) / (times[4] / 1000000.0), - (TRANSFER_SZ / (1024 * 1024)) / (times[5] / 1000000.0)); -} - -static void check_tapasco(tapasco_res_t const result) { - if (result != TAPASCO_SUCCESS) { - fprintf(stderr, "tapasco fatal error: %s\n", tapasco_strerror(result)); - exit(result); - } -} - -int main(int argc, char **argv) { - int pw, i; - pthread_t threads[sysconf(_SC_NPROCESSORS_CONF)]; - ul times[6] = {0}; - - // init timer and data - TIMER_INIT(); - rnddata = malloc(pow(2, UPPER_BND)); - fill_with_random(rnddata, pow(2, UPPER_BND)); - - // initialize threadpool - check_tapasco(tapasco_init(&ctx)); - check_tapasco(tapasco_create_device(ctx, 0, &dev, 0)); - - print_header(); - TIMER_START(total) - for (pw = UPPER_BND; pw >= LOWER_BND; --pw) { - chunk_sz = (size_t)(pow(2, pw)); - for (mode = 0; mode <= 5; ++mode) { - transfers = TRANSFER_SZ / chunk_sz; - errors = 0; - TIMER_START(run) - for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); ++i) - pthread_create(&threads[i], NULL, transfer, NULL); - for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); ++i) - pthread_join(threads[i], NULL); - TIMER_STOP(run) - // fprintf(stderr, "\nerrors = %lu\n", errors); - times[mode] = errors ? 0 : TIMER_USECS(run); - if (mode % 3 == 2) - times[mode] /= 2; - } - print_line(times); - } - TIMER_STOP(total) - fprintf(stderr, "Total duration: %llu us.\n", TIMER_USECS(total)); - // de-initialize threadpool - tapasco_destroy_device(ctx, dev); - tapasco_deinit(ctx); - free(rnddata); -} diff --git a/runtime/examples/benchmark-mem/benchmark-mem.cpp b/runtime/examples/benchmark-mem/benchmark-mem.cpp deleted file mode 100644 index 3f2aa86b..00000000 --- a/runtime/examples/benchmark-mem/benchmark-mem.cpp +++ /dev/null @@ -1,209 +0,0 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -//! @file benchmark-mem.c -//! @brief TPC API application that performs a simplistic benchmark on the -//! memory system: 1GiB of data is transferred in chunks of sizes -//! ranging from 2^12 (== 4KiB) to 2^26 (== 64MiB) with one thread -//! per processor. Each thread performs alloc-copy-dealloc until all -//! transfers are finished; this is done in three modes read, write -//! and read+write (data is either only copied from, copied to or -//! copied in both directions). -//! The program output can be used for the gnuplot script in this -//! directory to generate a bar plot. -//! @authors J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de) -//! -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -constexpr size_t TRANSFER_SZ{1024 * 1024 * 1024}; -constexpr size_t RNDDATA_SZ{TRANSFER_SZ / 32}; -constexpr unsigned long UPPER_BND{25}; -constexpr unsigned long LOWER_BND{12}; - -using namespace std; - -typedef unsigned long int ul; -typedef long int l; - -static uint8_t *rnddata; -static ul chunk_sz; -static atomic transfers; -static atomic
    errors; -static l mode; - -static tapasco::Tapasco Tapasco{false}; - -inline tapasco::tapasco_device_copy_flag_t -operator|(tapasco::tapasco_device_copy_flag_t a, - tapasco::tapasco_device_copy_flag_t b) { - return static_cast(static_cast(a) | - static_cast(b)); -} - -static void fill_with_random(char *d, size_t const sz) { - auto start = chrono::steady_clock::now(); - ifstream ifs("/dev/urandom", ifstream::in); - ifs.read(d, sz); - auto dur = chrono::duration_cast( - chrono::steady_clock::now() - start); - cerr << "fill_with_random took " << dur.count() << " us." << endl; -} - -static inline void baseline_transfer(void *d) { - if (!d) { - errors++; - return; - } - auto h = new (nothrow) uint8_t[chunk_sz]; - if (!h) { - errors++; - return; - } - - switch (mode) { - case 0: /* read-only */ - memcpy(h, d, chunk_sz); - break; - case 1: /* write-only */ - memcpy(d, h, chunk_sz); - break; - case 2: /* read-write */ - memcpy(d, h, chunk_sz); - memcpy(h, d, chunk_sz); - break; - } - delete[] h; -} - -static inline void tapasco_transfer(void *d) { - tapasco::tapasco_handle_t h = 0; - if (!d) { - errors++; - return; - } - if (Tapasco.alloc(h, chunk_sz, tapasco::TAPASCO_DEVICE_ALLOC_FLAGS_NONE) != - tapasco::TAPASCO_SUCCESS) { - errors++; - return; - } - - switch (mode - 3) { - case 0: /* read-only */ - if (Tapasco.copy_from(h, d, chunk_sz, - tapasco::TAPASCO_DEVICE_COPY_BLOCKING) != - tapasco::TAPASCO_SUCCESS) - errors++; - break; - case 1: /* write-only */ - if (Tapasco.copy_to(d, h, chunk_sz, - tapasco::TAPASCO_DEVICE_COPY_BLOCKING) != - tapasco::TAPASCO_SUCCESS) - errors++; - break; - case 2: /* read-write */ - if (Tapasco.copy_to(d, h, chunk_sz, - tapasco::TAPASCO_DEVICE_COPY_BLOCKING) == - tapasco::TAPASCO_SUCCESS) { - if (Tapasco.copy_from(h, d, chunk_sz, - tapasco::TAPASCO_DEVICE_COPY_BLOCKING) != - tapasco::TAPASCO_SUCCESS) - errors++; - } else - errors++; - break; - } - if (h) - Tapasco.free(h, tapasco::TAPASCO_DEVICE_ALLOC_FLAGS_NONE); -} - -static void transfer() { - l i{0}; - while ((i = --transfers) > 0) { - const ul off = (i % (RNDDATA_SZ / chunk_sz)) * chunk_sz; - assert(off + chunk_sz <= RNDDATA_SZ); - if (mode < 3) - baseline_transfer(&rnddata[off]); - else - tapasco_transfer(&rnddata[off]); - } -} - -static void print_header(void) { - cout << "Allocation Size (KiB),virt. R (MiB/s),virt. W (MiB/s),virt. R+W " - "(MiB/s),DMA R (MiB/s),DMA W (MiB/s),DMA R+W (MiB/s)" - << endl; -} - -static void print_line(ul const *times) { - cout << chunk_sz / 1024 << ", " - << (TRANSFER_SZ / (1024 * 1024)) / (times[0] / 1000000.0) << ", " - << (TRANSFER_SZ / (1024 * 1024)) / (times[1] / 1000000.0) << ", " - << (TRANSFER_SZ / (1024 * 1024)) / (times[2] / 1000000.0) << ", " - << (TRANSFER_SZ / (1024 * 1024)) / (times[3] / 1000000.0) << ", " - << (TRANSFER_SZ / (1024 * 1024)) / (times[4] / 1000000.0) << ", " - << (TRANSFER_SZ / (1024 * 1024)) / (times[5] / 1000000.0) << endl; -} - -int main(int argc, char **argv) { - int i; - ul times[6] = {0}; - - // init timer and data - rnddata = new uint8_t[RNDDATA_SZ]; - fill_with_random((char *)rnddata, RNDDATA_SZ); - - // initialize threadpool - Tapasco.init(0); - - print_header(); - auto total_start = chrono::steady_clock::now(); - for (auto pw = UPPER_BND; pw >= LOWER_BND; --pw) { - chunk_sz = static_cast(1 << pw); - for (mode = 0; mode <= 5; ++mode) { - vector> fs; - transfers = TRANSFER_SZ / chunk_sz; - errors = 0; - auto run_start = chrono::steady_clock::now(); - for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); ++i) - fs.push_back(async(launch::async, transfer)); - for (auto &f : fs) - f.get(); - auto run_d = chrono::duration_cast( - chrono::steady_clock::now() - run_start); - times[mode] = errors ? 0 : run_d.count(); - if (mode % 3 == 2) - times[mode] /= 2; - } - print_line(times); - } - auto total_d = chrono::duration_cast( - chrono::steady_clock::now() - total_start); - cerr << "Total duration: " << total_d.count() << " us." << endl; - delete[] rnddata; -} diff --git a/runtime/examples/benchmark-mem/makeplot.sh b/runtime/examples/benchmark-mem/makeplot.sh deleted file mode 100755 index 40afdc96..00000000 --- a/runtime/examples/benchmark-mem/makeplot.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -# -# Copyright (C) 2014 Jens Korinth, TU Darmstadt -# -# This file is part of Tapasco (TPC). -# -# Tapasco is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Tapasco is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with Tapasco. If not, see . -# -FILENAME=$1 -CSV=$FILENAME.csv -PDF=$FILENAME.pdf - -cat transfer-speed.gnuplot | sed "s//$CSV/g" | sed "s//$PDF/g" | gnuplot diff --git a/runtime/examples/benchmark-mem/timer.h b/runtime/examples/benchmark-mem/timer.h deleted file mode 100644 index cfea2daa..00000000 --- a/runtime/examples/benchmark-mem/timer.h +++ /dev/null @@ -1,73 +0,0 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -//! @file timer.h -//! @brief C macros for high-precision timing (Linux, Mac OS X). -//! @authors J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de) -//! -#ifndef __TIMER_H__ -#define __TIMER_H__ - -#ifdef __APPLE__ -#include -#include - -static mach_timebase_info_data_t _tb; - -#define TIMER_INIT() mach_timebase_info(&_tb); - -#define TIMER_START(name) uint64_t ts_start_##name = mach_absolute_time(); - -#define TIMER_STOP(name) uint64_t ts_stop_##name = mach_absolute_time(); - -#define TIMER_USECS(name) \ - (uint64_t)((double)(ts_stop_##name - ts_start_##name) * (double)_tb.numer / \ - (double)_tb.denom / (double)1e3) - -#else -#include -#include - -static struct timespec _tb; - -#define TIMER_INIT() clock_getres(CLOCK_MONOTONIC, &_tb) - -#define TIMER_START(name) \ - struct timespec tp_start_##name; \ - clock_gettime(CLOCK_MONOTONIC, &tp_start_##name); - -#define TIMER_STOP(name) \ - struct timespec tp_stop_##name; \ - clock_gettime(CLOCK_MONOTONIC, &tp_stop_##name); - -#define TIMER_USECS(name) tp_diff_usecs(&tp_stop_##name, &tp_start_##name) - -static inline unsigned long long tp_diff_usecs(struct timespec *stop, - struct timespec *start) { - if (stop->tv_nsec < start->tv_nsec) { - return (stop->tv_sec - 1 - start->tv_sec) * 1000000ULL + - (1000000000ULL + stop->tv_nsec - start->tv_nsec) / 1000ULL; - } else { - return (stop->tv_sec - start->tv_sec) * 1000000ULL + - (stop->tv_nsec - start->tv_nsec) / 1000ULL; - } -} - -#endif - -#endif /* __TIMER_H__ */ diff --git a/runtime/examples/benchmark-mem/transfer-speed.gnuplot b/runtime/examples/benchmark-mem/transfer-speed.gnuplot deleted file mode 100755 index e8e79b57..00000000 --- a/runtime/examples/benchmark-mem/transfer-speed.gnuplot +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/gnuplot -set terminal pdf enhanced -set output '' - -set style data histogram -set style histogram cluster gap 1 - -set style fill solid border rgb "black" -set auto x -set xtics rotate by 90 right -set xrange [0:*] reverse -set yrange [1:100000] -set xlabel 'Allocation Size (KiB)' -set ylabel 'Transfer Speed (MiB/s)' -set grid noxtics ytics -show grid -set logscale y - -set datafile separator "," - -set key right top invert - -plot for [i=7:2:-1] "" using i:xtic(1) title col From 05be0bc598babd020dc5d358daf2cf9acfe7a080 Mon Sep 17 00:00:00 2001 From: Jaco Hofmann Date: Mon, 4 Nov 2019 16:05:32 +0100 Subject: [PATCH 08/10] Adds OpenMP version of memcheck --- runtime/examples/memcheck/CMakeLists.txt | 9 +- runtime/examples/memcheck/memcheck-mt.c | 172 ----------------------- runtime/examples/memcheck/memcheck.cpp | 20 ++- 3 files changed, 24 insertions(+), 177 deletions(-) delete mode 100644 runtime/examples/memcheck/memcheck-mt.c diff --git a/runtime/examples/memcheck/CMakeLists.txt b/runtime/examples/memcheck/CMakeLists.txt index ea278bd1..d7bf050d 100644 --- a/runtime/examples/memcheck/CMakeLists.txt +++ b/runtime/examples/memcheck/CMakeLists.txt @@ -13,11 +13,12 @@ add_executable(memcheck memcheck.cpp) set_tapasco_defaults(memcheck) target_link_libraries(memcheck PRIVATE tapasco tlkm platform tapasco-common) -add_executable(memcheck-mt memcheck-mt.c) -set_tapasco_defaults(memcheck-mt) -target_link_libraries(memcheck-mt PRIVATE tapasco tlkm platform tapasco-common) +find_package(OpenMP) +if(OpenMP_CXX_FOUND) + target_link_libraries(memcheck PUBLIC OpenMP::OpenMP_CXX) +endif() -install(TARGETS memcheck memcheck-mt +install(TARGETS memcheck ARCHIVE DESTINATION share/Tapasco/bin/ LIBRARY DESTINATION share/Tapasco/bin/ RUNTIME DESTINATION share/Tapasco/bin/) diff --git a/runtime/examples/memcheck/memcheck-mt.c b/runtime/examples/memcheck/memcheck-mt.c deleted file mode 100644 index cf7326e2..00000000 --- a/runtime/examples/memcheck/memcheck-mt.c +++ /dev/null @@ -1,172 +0,0 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -//! @file memcheck-mt.cc -//! @brief Initializes the first TPC device and iterates over a number -//! of integer arrays of increasing size, allocating each array -//! on the device, copying to and from and then checking the -//! results. Basic regression test for platform implementations. -//! Multi-threaded Pthreads variant. -//! @author J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de) -//! -#include -#include -#include -#include -#include -#include -#include - -#define DEFAULT_RUNS (1000) - -static tapasco_ctx_t *ctx; -static tapasco_devctx_t *dev; -static long int errs; -static long int runs; -static size_t const arr_szs[] = {1, 2, 8, 10, 16, - 1024, 2048, 4096, 8192, 16384}; - -static void check_fpga(tapasco_res_t const result) { - if (result != TAPASCO_SUCCESS) { - fprintf(stderr, "fpga fatal error: %s\n", tapasco_strerror(result)); - exit(result); - } -} - -static void init_array(int *arr, size_t sz) { - for (size_t i = 0; i < sz; ++i) - arr[i] = i; -} - -static int compare_arrays(long int const s, int const *arr, int const *rarr, - size_t const sz, unsigned int const base) { - int errs = 0; - for (size_t i = 0; i < sz; ++i) { - if (rarr[i] != arr[i]) { - unsigned int const addr = base + i * sizeof(int); - fprintf(stderr, - "%ld: wrong data: arr[%zd] = %d != %d " - "= rarr[%zd]\terror at 0x%08x\n", - s, i, arr[i], rarr[i], i, addr); - ++errs; - } - } - return errs; -} - -static void *test_thread(void *p) { - long const sc = sizeof(arr_szs) / sizeof(*arr_szs); - long int s; - while ((s = __atomic_sub_fetch(&runs, 1, __ATOMIC_SEQ_CST)) > 0) { - s = s % sc; - // printf("%ld: Checking array size %zd (%zd byte) ...\n", - // s, arr_szs[s], arr_szs[s] * sizeof(int)); - // allocate and fill array - int *arr = (int *)malloc(arr_szs[s] * sizeof(int)); - assert(arr != NULL); - init_array(arr, arr_szs[s]); - // allocate array for read data - int *rarr = (int *)malloc(arr_szs[s] * sizeof(int)); - assert(rarr != NULL); - - // get tapasco handle - tapasco_handle_t h; - tapasco_device_alloc(dev, &h, arr_szs[s] * sizeof(int), 0); - // printf("%ld: handle = 0x%08lx, size = %zd bytes\n", s, - // (unsigned long)h, arr_szs[s] * sizeof(int)); - assert((unsigned long)h > 0); - - // copy data to and back - int merr = 0; - // printf("%ld: sizeof(arr) %zd, sizeof(rarr) %zd\n", s, sizeof(arr), - // sizeof(rarr)); - tapasco_res_t res = tapasco_device_copy_to( - dev, arr, h, arr_szs[s] * sizeof(int), TAPASCO_DEVICE_COPY_BLOCKING); - if (res == TAPASCO_SUCCESS) { - // printf("%ld: copy to successful, copying from ...\n", s); - res = tapasco_device_copy_from(dev, h, rarr, arr_szs[s] * sizeof(int), - TAPASCO_DEVICE_COPY_BLOCKING); - // printf("%ld: copy from finished\n", s); - if (res == TAPASCO_SUCCESS) { - merr += compare_arrays(s, arr, rarr, arr_szs[s], (unsigned int)h); - } else { - printf("%ld: Copy from device failed.\n", s); - merr += 1; - } - } else { - printf("%ld: Copy to device failed.\n", s); - merr += 1; - } - __atomic_add_fetch(&errs, merr, __ATOMIC_SEQ_CST); - tapasco_device_free(dev, h, arr_szs[s] * sizeof(int), 0); - - if (!merr) - /*printf("%ld: Array size %zd (%zd byte) ok!\n", - s, arr_szs[s], arr_szs[s] * sizeof(int));*/ - (void)0; - else - printf(/*stderr,*/ - "%ld: FAILURE: array size %zd (%zd byte) not ok.\n", s, arr_szs[s], - arr_szs[s] * sizeof(int)); - - free(arr); - free(rarr); - } - return NULL; -} - -int main(int argc, char **argv) { - if (argc < 2) { - fprintf(stderr, "Usage: memcheck-mt []\n"); - exit(EXIT_FAILURE); - } - long unsigned tc = strtoul(argv[1], NULL, 0); - runs = DEFAULT_RUNS; - if (argc > 2) - runs = strtol(argv[2], NULL, 0); - printf("Executing %ld transfers with %ld threads ...\n", runs, tc); - setbuf(stdout, NULL); - - // initialize FPGA - check_fpga(tapasco_init(&ctx)); - check_fpga(tapasco_create_device(ctx, 0, &dev, 0)); - - printf("Starting %lu threads ...\n", tc); - pthread_t *thrds = (pthread_t *)malloc(tc * sizeof(pthread_t)); - assert(thrds); - errs = 0; - - for (long int s = 0; s < tc; ++s) { - pthread_create(&thrds[s], NULL, test_thread, (void *)s); - } - for (long int s = 0; s < tc; ++s) { - pthread_join(thrds[s], NULL); - } - - if (!errs) - printf("\nSUCCESS\n"); - else - fprintf(stderr, "\nFAILURE\n"); - - free(thrds); - // release device - tapasco_destroy_device(ctx, dev); - tapasco_deinit(ctx); - return errs; -} diff --git a/runtime/examples/memcheck/memcheck.cpp b/runtime/examples/memcheck/memcheck.cpp index 9c852ba1..02ffc811 100644 --- a/runtime/examples/memcheck/memcheck.cpp +++ b/runtime/examples/memcheck/memcheck.cpp @@ -2,6 +2,11 @@ #include #include +#ifdef _OPENMP +#include +#include +#endif + #include using namespace tapasco; @@ -33,7 +38,20 @@ int main(int argc, char **argv) { Tapasco tapasco; - for (int s = 0; s < max_pow && errs == 0; ++s) { + int threads = 1; + +#ifdef _OPENMP + if (argc > 1) { + std::stringstream s(argv[1]); + s >> threads; + } + omp_set_num_threads(threads); +#endif + + std::cout << "Using " << threads << " threads." << std::endl; + +#pragma omp parallel for reduction(+ : errs) + for (int s = 0; s < max_pow; ++s) { size_t len = 1 << s; std::cout << "Checking array size " << len << "B" << std::endl; size_t elements = std::max((size_t)1, len / sizeof(int)); From 69af6ba5885faa3f04a9128e01aa5196c4218635 Mon Sep 17 00:00:00 2001 From: Jaco Hofmann Date: Mon, 4 Nov 2019 16:05:43 +0100 Subject: [PATCH 09/10] Removes deprecated test --- runtime/examples/basic_test/CMakeLists.txt | 17 -- runtime/examples/basic_test/basic_test.cpp | 243 --------------------- 2 files changed, 260 deletions(-) delete mode 100644 runtime/examples/basic_test/CMakeLists.txt delete mode 100644 runtime/examples/basic_test/basic_test.cpp diff --git a/runtime/examples/basic_test/CMakeLists.txt b/runtime/examples/basic_test/CMakeLists.txt deleted file mode 100644 index f41fe37b..00000000 --- a/runtime/examples/basic_test/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -cmake_minimum_required(VERSION 3.5.1 FATAL_ERROR) -include($ENV{TAPASCO_HOME_RUNTIME}/cmake/Tapasco.cmake NO_POLICY_SCOPE) -project (basic_test) - -find_package(TapascoTLKM REQUIRED) -find_package(TapascoCommon REQUIRED) -find_package(TapascoPlatform REQUIRED) -find_package(Tapasco REQUIRED) - -add_executable(basic_test basic_test.cpp) -set_tapasco_defaults(basic_test) -target_link_libraries(basic_test rt pthread tapasco platform atomic) - -install(TARGETS basic_test - ARCHIVE DESTINATION share/Tapasco/bin/ - LIBRARY DESTINATION share/Tapasco/bin/ - RUNTIME DESTINATION share/Tapasco/bin/) diff --git a/runtime/examples/basic_test/basic_test.cpp b/runtime/examples/basic_test/basic_test.cpp deleted file mode 100644 index ad9a5395..00000000 --- a/runtime/examples/basic_test/basic_test.cpp +++ /dev/null @@ -1,243 +0,0 @@ -// -// Copyright (C) 2014 Jens Korinth, TU Darmstadt -// -// This file is part of Tapasco (TPC). -// -// Tapasco is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// Tapasco is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with Tapasco. If not, see . -// -/** - * @file basic_test.cpp - * @brief Uses the precompiled basic_test bitstreams to run the absolute - * minimum of functionality tests: Test 1 uses 'arrayinit' to - * ascertain that masters on the device can write to memory; Test 2 - * uses 'arraysum' to ascertain read capability in the same way and - * Test 3 executes 'warraw' which requires both (in-loop deps). - * Overall idea is to provide increase confidence in the basic - * functionality of the installation when debugging. - * - * WORD OF CAUTION: THIS IS A HARDHAT AREA, HACKED IN <20 MIN! - * BEWARE OF HORRIBLE CODE AHEAD... - * @author J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de) - **/ -#include -#include -#include -#include -#include -#include -#include -#include - -#define TEST_1 -#define TEST_2 -#define TEST_3 - -using namespace std; - -#define SZ 256 -#define RUNS 100000 -#define T1_KID 11 -#define T2_KID 10 -#define T3_KID 9 - -static tapasco::Tapasco Tapasco; -static atomic runs; -typedef int run_block[SZ]; - -// #define CPU_EXECUTION 1 -/******************************************************************************/ -bool test1_execute(int *arr) { - int run; - while ((run = runs--) >= 0) { - run_block *z = reinterpret_cast(&arr[run * SZ]); -#ifdef CPU_EXECUTION - for (int i = 0; i < SZ; ++i) - (*z)[i] = i; -#else - if (Tapasco.launch_no_return(T1_KID, tapasco::OutOnly{z}) != - tapasco::TAPASCO_SUCCESS) - return false; -#endif - } - return true; -} - -static void test1_prepare(int *arr) { - for (size_t j = 0; j < RUNS; ++j) - for (size_t i = 0; i < SZ; ++i) - arr[j * SZ + i] = -1; -} - -static int test1_check(int *arr) { - int errs = 0; - for (int i = 0; i < SZ; ++i) { - if (arr[i] != i) { - cerr << "wrong data at " << i << ": " << arr[i] << endl; - ++errs; - } - } - return errs; -} - -/******************************************************************************/ -static void test2_prepare(int *arr, size_t sz) { - for (size_t i = 0; i < sz; ++i) - arr[i] = i; -} - -int test2_execute(int *arr) { - int run; - int result = 0; - while ((run = runs--) >= 0) { - const run_block *z = reinterpret_cast(&arr[run * SZ]); -#ifdef CPU_EXECUTION - for (int i = 0; i < SZ; ++i) - result += (*z)[i]; -#else - int tr = 0; - if (Tapasco.launch(T2_KID, tr, z) != tapasco::TAPASCO_SUCCESS) - return -1; - else - result += tr; -#endif - } - return result; -} - -static bool test2_check(int *arr, size_t sz, int res) { - int golden = 0; - for (size_t i = 0; i < sz; ++i) - golden += arr[i]; - return golden == res; -} - -/******************************************************************************/ -static void test3_prepare(int *arr, size_t sz, size_t bsz) { - for (size_t i = 0; i < sz; ++i) - arr[i] = i % bsz; -} - -void test3_execute(int *arr) { - int run; - while ((run = runs--) >= 0) { - run_block *z = reinterpret_cast(&arr[run * SZ]); -#ifdef CPU_EXECUTION - for (int i = 0; i < SZ; ++i) - (*z)[i] += 42; -#else - tapasco::tapasco_res_t res; - if ((res = Tapasco.launch_no_return(T3_KID, z)) != tapasco::TAPASCO_SUCCESS) - throw tapasco::Tapasco::tapasco_error(res); -#endif - } -} - -static unsigned int test3_check(int *arr, size_t sz) { - unsigned int errs = 0; - for (int i = 0; i < (int)sz; ++i) { - if (arr[i] != i + 42) { - fprintf(stderr, "wrong data at %d: %d, should be %d\n", i, arr[i], - i + 42); - ++errs; - } - } - return errs; -} -/******************************************************************************/ -int main(int argc, char **argv) { - int retval = 0; - unsigned int tc = 1; // sysconf(_SC_NPROCESSORS_CONF); -#ifndef CPU_EXECUTION - if (!Tapasco.is_ready()) { - cerr << "TPC init failed." << endl; - sleep(10); - return 1; - } - const uint32_t cnt[] = { - Tapasco.func_instance_count(T1_KID), - Tapasco.func_instance_count(T2_KID), - Tapasco.func_instance_count(T3_KID), - }; - cout << "Instance counts" << endl - << " arrayinit : " << cnt[0] << endl - << " arraysum : " << cnt[1] << endl - << " arrayupdate : " << cnt[2] << endl; - - if (cnt[0] == 0 || cnt[1] == 0 || cnt[2] == 0) { - cerr << "ERROR: missing at least one of the required kernels!" << endl; - return EXIT_FAILURE; - } -#endif - - int *arr{new int[SZ * RUNS]}; - test1_prepare(arr); - - if (argc >= 2) - tc = stoul(argv[1]); - - cout << "Using threadpool with " << tc << " threads." << endl; - runs = RUNS - 1; - - int result{0}; - /****************************************************************************/ -#ifdef TEST_1 - vector> tp; - for (unsigned int i = 0; i < tc; ++i) - tp.push_back(async(launch::async, test1_execute, arr)); - for (auto &f : tp) - retval += f.get() ? 0 : 1; - for (unsigned int i = 0; i < RUNS; ++i) { - int err = test1_check(&arr[i * SZ]); - cout << "Run #" << i << (err ? " NOT OK!" : " ok.") << endl; - retval += err; - } -#endif - - /****************************************************************************/ -#ifdef TEST_2 - vector> tp2; - runs = RUNS - 1; - test2_prepare(arr, SZ * RUNS); - for (unsigned int i = 0; i < tc; ++i) - tp2.push_back(async(launch::async, test2_execute, arr)); - for (auto &f : tp2) - result += f.get(); - cout << "Test 2 " - << (test2_check(arr, SZ * RUNS, result) ? " ok." : " NOT OK!") << endl; - retval += test2_check(arr, SZ * RUNS, result) ? 0 : 1; -#endif - - /****************************************************************************/ -#ifdef TEST_3 - vector> tp3; - runs = RUNS - 1; - result = 0; - test3_prepare(arr, SZ * RUNS, SZ); - for (unsigned int i = 0; i < tc; ++i) - tp3.push_back(async(launch::async, test3_execute, arr)); - for (auto &f : tp3) - f.get(); - for (unsigned int i = 0; i < RUNS; ++i) { - int errs = test3_check(&arr[i * SZ], SZ); - cout << "Run #" << i << (errs ? " NOT OK!" : " ok.") << endl; - retval += errs; - } -#endif - - /****************************************************************************/ - cout << "Finished, errors: " << retval << endl; - delete[] arr; - return retval; -} -/* vim: set foldmarker=@{,@} foldlevel=0 foldmethod=marker : */ From 2ba1f550bea6bceeeec9c3dccd1af651de7982a4 Mon Sep 17 00:00:00 2001 From: Jaco Hofmann Date: Mon, 4 Nov 2019 16:05:59 +0100 Subject: [PATCH 10/10] Fixes tool formatting --- runtime/examples/bandwidth/bandwidth.cpp | 47 ++++++++++++++---------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/runtime/examples/bandwidth/bandwidth.cpp b/runtime/examples/bandwidth/bandwidth.cpp index 8eefd490..2adfcf93 100644 --- a/runtime/examples/bandwidth/bandwidth.cpp +++ b/runtime/examples/bandwidth/bandwidth.cpp @@ -1,7 +1,7 @@ -#include -#include #include #include +#include +#include #include @@ -9,7 +9,7 @@ using namespace tapasco; int main(int argc, char **argv) { size_t max_pow = 30; - size_t data_to_transfer = 256*1024*1024L; + size_t data_to_transfer = 256 * 1024 * 1024L; Tapasco tapasco; @@ -31,42 +31,51 @@ int main(int argc, char **argv) { std::cout << "Write C " << len << "B @ "; auto start = std::chrono::system_clock::now(); - while(copied < data_to_transfer) { - tapasco.copy_to(arr_to.data(), handle_to, len, (tapasco_device_copy_flag_t)0); + while (copied < data_to_transfer) { + tapasco.copy_to(arr_to.data(), handle_to, len, + (tapasco_device_copy_flag_t)0); copied += len; } auto end = std::chrono::system_clock::now(); - std::chrono::duration elapsed_seconds = end-start; + std::chrono::duration elapsed_seconds = end - start; - std::cout << (data_to_transfer / elapsed_seconds.count()) / (1024.0 * 1024.0) << "MBps" << std::endl; + std::cout << (data_to_transfer / elapsed_seconds.count()) / + (1024.0 * 1024.0) + << "MBps" << std::endl; copied = 0; - std::cout << "Read C " << len<< "B @ "; + std::cout << "Read C " << len << "B @ "; start = std::chrono::system_clock::now(); - while(copied < data_to_transfer) { - tapasco.copy_from(handle_from, arr_from.data(), len, (tapasco_device_copy_flag_t)0); + while (copied < data_to_transfer) { + tapasco.copy_from(handle_from, arr_from.data(), len, + (tapasco_device_copy_flag_t)0); copied += len; } end = std::chrono::system_clock::now(); - elapsed_seconds = end-start; + elapsed_seconds = end - start; - std::cout << (data_to_transfer / elapsed_seconds.count()) / (1024.0 * 1024.0) << "MBps" << std::endl; + std::cout << (data_to_transfer / elapsed_seconds.count()) / + (1024.0 * 1024.0) + << "MBps" << std::endl; copied = 0; std::cout << "ReadWrite C " << len << "B @ "; - while(copied < data_to_transfer) { - tapasco.copy_to(arr_to.data(), handle_to, len, (tapasco_device_copy_flag_t)0); - tapasco.copy_from(handle_from, arr_from.data(), len, (tapasco_device_copy_flag_t)0); - copied += len*2; + while (copied < data_to_transfer) { + tapasco.copy_to(arr_to.data(), handle_to, len, + (tapasco_device_copy_flag_t)0); + tapasco.copy_from(handle_from, arr_from.data(), len, + (tapasco_device_copy_flag_t)0); + copied += len * 2; } end = std::chrono::system_clock::now(); - elapsed_seconds = end-start; - - std::cout << ((data_to_transfer*2) / elapsed_seconds.count()) / (1024.0 * 1024.0) << "MBps" << std::endl; + elapsed_seconds = end - start; + std::cout << ((data_to_transfer * 2) / elapsed_seconds.count()) / + (1024.0 * 1024.0) + << "MBps" << std::endl; tapasco.free(handle_to, len, (tapasco_device_alloc_flag_t)0); tapasco.free(handle_from, len, (tapasco_device_alloc_flag_t)0);