Skip to content

Commit

Permalink
Added dlsm::Thread::getaffinity and use MAdviseAllocator.
Browse files Browse the repository at this point in the history
  • Loading branch information
pkarneliuk committed Mar 13, 2024
1 parent 3914e99 commit 94412f8
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 20 deletions.
9 changes: 5 additions & 4 deletions include/impl/Thread.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ namespace dlsm::Thread {

namespace dlsm::Thread {

void name(const std::string& name, std::size_t native_handle = 0);
std::string name(std::size_t native_handle = 0);
void name(const std::string& name, std::size_t handle = 0);
std::string name(std::size_t handle = 0);

static constexpr std::size_t AllCPU = 0xFFFF;
void affinity(std::size_t cpuid = AllCPU, std::size_t native_handle = 0);
static constexpr std::size_t AllCPU = 0;
void affinity(std::size_t cpuid = AllCPU, std::size_t handle = 0);
std::size_t getaffinity(std::size_t handle = 0);

template <typename T>
concept Yield = requires(T c) { c.pause(); };
Expand Down
32 changes: 22 additions & 10 deletions src/Thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,54 @@
#include <ctime>
#include <system_error>

namespace {
pthread_t tid(std::size_t handle = 0) { return (handle == 0) ? pthread_self() : static_cast<pthread_t>(handle); }
} // namespace

namespace dlsm::Thread {

void name(const std::string& name, std::size_t native_handle) {
auto thread = (native_handle == 0) ? pthread_self() : static_cast<pthread_t>(native_handle);
if (const int err = pthread_setname_np(thread, name.c_str())) {
void name(const std::string& name, std::size_t handle) {
if (const int err = pthread_setname_np(tid(handle), name.c_str())) {
throw std::system_error(err, std::generic_category(), "pthread_setname_np()");
}
}

std::string name(std::size_t native_handle) {
auto thread = (native_handle == 0) ? pthread_self() : static_cast<pthread_t>(native_handle);
std::string name(std::size_t handle) {
std::string name(16, '\0');
if (const int err = pthread_getname_np(thread, std::data(name), std::size(name))) {
if (const int err = pthread_getname_np(tid(handle), std::data(name), std::size(name))) {
throw std::system_error(err, std::generic_category(), "pthread_getname_np()");
}
name.resize(std::strlen(name.c_str()));
return name;
}

void affinity(std::size_t cpuid, std::size_t native_handle) {
auto thread = (native_handle == 0) ? pthread_self() : static_cast<pthread_t>(native_handle);
void affinity(std::size_t cpuid, std::size_t handle) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
if (cpuid == AllCPU) {
for (std::size_t i = 0; i < sizeof(cpu_set_t) * 8; ++i) {
for (std::size_t i = 0; i < CPU_SETSIZE; ++i) {
CPU_SET(i, &cpuset);
}
} else {
CPU_SET(cpuid, &cpuset);
}

if (const int err = ::pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) {
if (const int err = ::pthread_setaffinity_np(tid(handle), sizeof(cpuset), &cpuset)) {
throw std::system_error(err, std::system_category(), "pthread_setaffinity_np()");
}
}

std::size_t getaffinity(std::size_t handle) {
cpu_set_t cpuset;
if (const int err = ::pthread_getaffinity_np(tid(handle), sizeof(cpuset), &cpuset)) {
throw std::system_error(err, std::system_category(), "pthread_getaffinity_np()");
}
for (std::size_t i = 0; i < CPU_SETSIZE; ++i) {
if (CPU_ISSET(i, &cpuset)) return i;
}
return 0;
}

void NanoSleep::pause() noexcept {
const timespec timeout = {0, 1};
::nanosleep(&timeout, nullptr);
Expand Down
18 changes: 12 additions & 6 deletions tests/perf/PerfTransport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <utility>
#include <vector>

#include "impl/Allocator.hpp"
#include "impl/Clock.hpp"
#include "impl/Thread.hpp"
#include "impl/Transport.hpp"
Expand Down Expand Up @@ -36,6 +37,7 @@ void TransportPubSub(benchmark::State& state, Args&&... args) {
// clang-format on

auto runtime = dlsm::Transport<std::remove_pointer_t<decltype(type)>>(ropts);
const auto affinity = dlsm::Thread::getaffinity();
std::mutex state_mutex;
const auto synchronized = [&](auto action) {
std::lock_guard<std::mutex> guard(state_mutex);
Expand All @@ -49,14 +51,18 @@ void TransportPubSub(benchmark::State& state, Args&&... args) {
std::vector<std::jthread> threads{1 + subscribers};
std::barrier sync(std::ssize(threads));

using NsList = std::vector<std::chrono::nanoseconds>;
// using NsList = std::vector<std::chrono::nanoseconds>;
using NsList = std::vector<std::chrono::nanoseconds, dlsm::MAdviseAllocator<std::chrono::nanoseconds>>;
// using NsList = std::vector<std::chrono::nanoseconds, dlsm::MmapAllocator<std::chrono::nanoseconds>>;
std::vector<NsList> timestamps{std::size(threads), NsList(tosend, 0ns)};

for (std::size_t i = 0; auto& t : threads) {
t = std::jthread([&, i]() {
if (affinity) dlsm::Thread::affinity(affinity + i);
auto& ts = timestamps[i];
std::uint64_t count = 0;
if (i == 0) {
dlsm::Thread::name("Pub");
auto pub = runtime.pub(popts);

sync.arrive_and_wait();
Expand Down Expand Up @@ -89,9 +95,10 @@ void TransportPubSub(benchmark::State& state, Args&&... args) {
last_sent = count;
sync.arrive_and_wait();
} else {
std::size_t timeouts = 0;

const auto name = "Sub" + std::to_string(i);
dlsm::Thread::name(name);
auto sub = runtime.sub(sopts);
std::size_t timeouts = 0;

sync.arrive_and_wait();
Event e{};
Expand Down Expand Up @@ -120,9 +127,8 @@ void TransportPubSub(benchmark::State& state, Args&&... args) {
}
}
synchronized([&] {
if (const auto lost = tosend - count)
state.counters["Sub" + std::to_string(i) + "Lost"] = static_cast<double>(lost);
if (timeouts) state.counters["Sub" + std::to_string(i) + "TO"] = static_cast<double>(timeouts);
if (const auto lost = tosend - count) state.counters[name + "Lost"] = static_cast<double>(lost);
if (timeouts) state.counters[name + "TO"] = static_cast<double>(timeouts);
});
sync.arrive_and_wait();
}
Expand Down
30 changes: 30 additions & 0 deletions tests/perf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ Performance tests focussed on microbenchmarking using [google/benchmark](https:/
# Build in dir './build' perf and run Transport
make -C ./build perf && ./build/tests/perf/perf --benchmark_filter=Transport --benchmark_counters_tabular=true

# Run Transport under perf stat
sudo perf stat ./build/tests/perf/perf --benchmark_filter=Transport --benchmark_counters_tabular=true
sudo perf stat taskset -c 6-11 ./build/tests/perf/perf --benchmark_filter=Transport --benchmark_counters_tabular=true

# Run 5 repetitions with 1 iteration benchmarking
./build/tests/perf/perf --benchmark_filter=* \
--benchmark_repetitions=5 \
Expand All @@ -21,3 +25,29 @@ This script reads binary files with `int64` samples(nanoseconds timestamps), and
# Display Pub1.ns as master and delays of Sub1/Sub2/Sub3/Sub4.ns signals relative to Pub1
./tests/perf/delays.py ./build/tests/perf/TransportPubSub-mem-*
```

## Threads Affinity and CPU Core Isolation
```sh
lstopo-no-graphics --no-io --no-legend --of txt # Display layout of available CPUs in physical packages
numactl --hardware # Display NUMA nodes
sudo grubby --update-kernel=ALL --args="isolcpus=6-11" # Isolate CPU #6 - #11 from OS scheduling
sudo grubby --update-kernel=ALL --remove-args="isolcpus=6-11"
cat /proc/cmdline # Display kernel startup parameters
taskset -c 0,4,6-8 pid # Setting CPU affinity
```

## HugePages Support
```sh
# Set limit of Huge Pages in system
echo 1000 > /proc/sys/vm/nr_hugepages
# Watch statistics of available Huge Pages
watch -n 1 grep Huge /proc/meminfo
watch -n 1 numastat -cm
cat /sys/kernel/mm/transparent_hugepage/enabled
always [madvise] never
```


## Links and References
- [Isolating CPUs using tuned-profiles-real-time](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux_for_real_time/8/html/optimizing_rhel_8_for_real_time_for_low_latency_operation/assembly_isolating-cpus-using-tuned-profiles-realtime_optimizing-rhel8-for-real-time-for-low-latency-operation)
- [Configuring HugeTLB Huge Pages](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/performance_tuning_guide/sect-red_hat_enterprise_linux-performance_tuning_guide-memory-configuring-huge-pages)
2 changes: 2 additions & 0 deletions tests/unit/TestThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ TEST(Thread, Affinity) {
});

enter.wait();
EXPECT_EQ(dlsm::Thread::getaffinity(t.native_handle()), 0);
dlsm::Thread::affinity(1ULL, t.native_handle());
EXPECT_EQ(dlsm::Thread::getaffinity(t.native_handle()), 1ULL);
dlsm::Thread::affinity(dlsm::Thread::AllCPU, t.native_handle());
exit.count_down();
});
Expand Down

0 comments on commit 94412f8

Please sign in to comment.