diff --git a/api/arch/i686.hpp b/api/arch/i686.hpp index 58043b9ae..2b197bbf1 100644 --- a/api/arch/i686.hpp +++ b/api/arch/i686.hpp @@ -21,6 +21,8 @@ #define ARCH_x86 +#include + namespace os { @@ -34,6 +36,7 @@ namespace os { static inline uint64_t cpu_cycles() noexcept; static inline void read_memory_barrier() noexcept; static inline void write_memory_barrier() noexcept; + static inline void cpu_relax() noexcept; }; } @@ -53,4 +56,8 @@ inline void os::Arch::write_memory_barrier() noexcept { __asm volatile("mfence" ::: "memory"); } +inline void os::Arch::cpu_relax() noexcept { + _mm_pause(); +} + #endif diff --git a/api/arch/x86_64.hpp b/api/arch/x86_64.hpp index 14ba3f577..3ab5ad74c 100644 --- a/api/arch/x86_64.hpp +++ b/api/arch/x86_64.hpp @@ -21,6 +21,8 @@ #define ARCH_x86 +#include + namespace os { // Concept / base class Arch @@ -33,6 +35,7 @@ namespace os { static inline uint64_t cpu_cycles() noexcept; static inline void read_memory_barrier() noexcept; static inline void write_memory_barrier() noexcept; + static inline void cpu_relax() noexcept; }; } @@ -52,4 +55,8 @@ inline void os::Arch::write_memory_barrier() noexcept { __asm volatile("mfence" ::: "memory"); } +inline void os::Arch::cpu_relax() noexcept { + _mm_pause(); +} + #endif diff --git a/api/net/buffer_store.hpp b/api/net/buffer_store.hpp index c240b54a1..152a155fb 100644 --- a/api/net/buffer_store.hpp +++ b/api/net/buffer_store.hpp @@ -85,8 +85,7 @@ namespace net std::vector available_; std::vector pools_; #ifdef INCLUDEOS_SMP_ENABLE - // has strict alignment reqs, so put at end - spinlock_t plock = 0; + Spinlock plock; #endif BufferStore(BufferStore&) = delete; BufferStore(BufferStore&&) = delete; @@ -99,7 +98,7 @@ namespace net auto* buff = (uint8_t*) addr; if (LIKELY(this->is_valid(buff))) { #ifdef INCLUDEOS_SMP_ENABLE - scoped_spinlock spinlock(this->plock); + std::lock_guard lock(this->plock); #endif this->available_.push_back(buff); return; diff --git a/api/smp_utils b/api/smp_utils index 65d489b3c..c74afaa34 100644 --- a/api/smp_utils +++ b/api/smp_utils @@ -20,38 +20,85 @@ #ifndef API_SMP_UTILS_HEADER #define API_SMP_UTILS_HEADER +#include #include -/// x86-related locking stuff /// -#if defined(ARCH_x86) -// Intel 3a 8.10.6.7: 128-byte boundary -typedef unsigned int spinlock_t __attribute__((aligned(128))); +/* TTAS[1] spinlock implementation. + * + * Implements C++ named requirements BasicLockable, Lockable and TimedLockable + * for use with e.g. std::lock_guard. + * + * Loosely follows C++ spinlock implementation by Erik Rigtorp[2]. + * + * References: + * 1. https://en.wikipedia.org/wiki/Test_and_test-and-set + * 2. https://rigtorp.se/spinlock/ + * */ +class Spinlock { + public: + void lock() noexcept { + // See https://rigtorp.se/spinlock/ for details on memory ordering + // + // Optimised for uncontended locks as it starts with exchange, which + // requires a write. + while (lock_.exchange(true, std::memory_order_acquire)) { + while (lock_.load(std::memory_order_relaxed)) { + os::Arch::cpu_relax(); + } + } + } + + void unlock() noexcept { + lock_.store(false, std::memory_order_release); + } + + bool is_locked() noexcept { + return lock_.load(std::memory_order_relaxed); + } + + // try_lock() returns true on success, false on failure. Needed for Lockable. + bool try_lock() noexcept { + // Fast path first. Return false if the lock is taken + if (lock_.load(std::memory_order_relaxed)) { + return false; + } + // Try to take the lock, .exchange returns false if the lock was available + return !lock_.exchange(true, std::memory_order_acquire); + } + + // Try to lock for duration. Needed for TimedLockable. + bool try_lock_for(std::chrono::nanoseconds t) noexcept { + using namespace std::chrono; + + // Try to take lock first + if (try_lock()) { + return true; + } + + // Try to take again until deadline has passed + auto deadline = high_resolution_clock::now() + t; + + do { + os::Arch::cpu_relax(); + if (try_lock()) { + return true; + } + } while (deadline > high_resolution_clock::now()); + + return false; + } + + // Try to lock until timestamp. Needed for TimedLockable. + template + bool try_lock_until(typename std::chrono::time_point t) noexcept { + // Convert to duration + auto d = t - T::now(); + return try_lock_for(d); + } + + private: + std::atomic lock_ = {0}; -#ifdef INCLUDEOS_SMP_ENABLE -inline void lock(spinlock_t& lock) { - while (!__sync_bool_compare_and_swap(&lock, 0, 1)) { - while (lock) asm("pause"); - } -} -inline void unlock(spinlock_t& lock) { - __sync_lock_release(&lock, 0); // barrier -} -#else -inline void lock(spinlock_t&) {} -inline void unlock(spinlock_t&) {} -#endif - -struct scoped_spinlock -{ - scoped_spinlock(spinlock_t& ref) noexcept : spinlock(ref) { - //asm("" : : : "memory"); - lock(this->spinlock); - } - ~scoped_spinlock() noexcept { - unlock(spinlock); // barrier - } -private: - spinlock_t& spinlock; }; struct minimal_barrier_t @@ -79,6 +126,4 @@ private: volatile int val = 0; }; -#endif // arch - #endif // hdr diff --git a/api/util/statman.hpp b/api/util/statman.hpp index 15b7c62f7..0ef29ea48 100644 --- a/api/util/statman.hpp +++ b/api/util/statman.hpp @@ -155,7 +155,7 @@ class Statman { private: std::deque m_stats; #ifdef INCLUDEOS_SMP_ENABLE - mutable spinlock_t stlock = 0; + Spinlock stlock; #endif ssize_t find_free_stat() const noexcept; uint32_t& unused_stats(); diff --git a/src/kernel/events.cpp b/src/kernel/events.cpp index e6d15beb5..b0322e7f2 100644 --- a/src/kernel/events.cpp +++ b/src/kernel/events.cpp @@ -20,6 +20,7 @@ #include #include #include +#include //#define DEBUG_SMP static SMP::Array managers; diff --git a/src/net/buffer_store.cpp b/src/net/buffer_store.cpp index 13d46f7ce..13eecba20 100644 --- a/src/net/buffer_store.cpp +++ b/src/net/buffer_store.cpp @@ -61,7 +61,7 @@ namespace net { uint8_t* BufferStore::get_buffer() { #ifdef INCLUDEOS_SMP_ENABLE - scoped_spinlock spinlock(this->plock); + std::lock_guard lock(this->plock); #endif if (UNLIKELY(available_.empty())) { diff --git a/src/platform/x86_pc/apic_revenant.cpp b/src/platform/x86_pc/apic_revenant.cpp index bbc564f2d..0afaf7243 100644 --- a/src/platform/x86_pc/apic_revenant.cpp +++ b/src/platform/x86_pc/apic_revenant.cpp @@ -24,10 +24,10 @@ using namespace x86; static bool revenant_task_doer(smp_system_stuff& system) { // grab hold on task list - lock(system.tlock); + system.tlock.lock(); if (system.tasks.empty()) { - unlock(system.tlock); + system.tlock.unlock(); // try again return false; } @@ -36,7 +36,7 @@ static bool revenant_task_doer(smp_system_stuff& system) std::vector tasks; system.tasks.swap(tasks); - unlock(system.tlock); + system.tlock.unlock(); for (auto& task : tasks) { @@ -47,9 +47,9 @@ static bool revenant_task_doer(smp_system_stuff& system) if (task.done) { // NOTE: specifically pushing to 'smp' here, and not 'system' - lock(PER_CPU(smp_system).flock); + PER_CPU(smp_system).flock.lock(); PER_CPU(smp_system).completed.push_back(std::move(task.done)); - unlock(PER_CPU(smp_system).flock); + PER_CPU(smp_system).flock.unlock(); // signal home PER_CPU(smp_system).work_done = true; } diff --git a/src/platform/x86_pc/apic_revenant.hpp b/src/platform/x86_pc/apic_revenant.hpp index bee479ee4..aa7249ce3 100644 --- a/src/platform/x86_pc/apic_revenant.hpp +++ b/src/platform/x86_pc/apic_revenant.hpp @@ -52,8 +52,8 @@ extern smp_stuff smp_main; struct smp_system_stuff { - spinlock_t tlock = 0; - spinlock_t flock = 0; + Spinlock tlock; + Spinlock flock; std::vector tasks; std::vector completed; bool work_done; diff --git a/src/platform/x86_pc/smp.cpp b/src/platform/x86_pc/smp.cpp index 622a1a2f7..29ba8f99c 100644 --- a/src/platform/x86_pc/smp.cpp +++ b/src/platform/x86_pc/smp.cpp @@ -147,9 +147,9 @@ void init_SMP() smp_main.bitmap.atomic_reset(next); // get jobs from other CPU std::vector done; - lock(smp_system[next].flock); + smp_system[next].flock.lock(); smp_system[next].completed.swap(done); - unlock(smp_system[next].flock); + smp_system[next].flock.unlock(); // execute all tasks for (auto& func : done) func(); @@ -198,9 +198,9 @@ void SMP::init_task() void SMP::add_task(smp_task_func task, smp_done_func done, int cpu) { #ifdef INCLUDEOS_SMP_ENABLE - lock(smp_system[cpu].tlock); + smp_system[cpu].tlock.lock(); smp_system[cpu].tasks.emplace_back(std::move(task), std::move(done)); - unlock(smp_system[cpu].tlock); + smp_system[cpu].tlock.unlock(); #else assert(cpu == 0); task(); done(); @@ -209,9 +209,9 @@ void SMP::add_task(smp_task_func task, smp_done_func done, int cpu) void SMP::add_task(smp_task_func task, int cpu) { #ifdef INCLUDEOS_SMP_ENABLE - lock(smp_system[cpu].tlock); + smp_system[cpu].tlock.lock(); smp_system[cpu].tasks.emplace_back(std::move(task), nullptr); - unlock(smp_system[cpu].tlock); + smp_system[cpu].tlock.unlock(); #else assert(cpu == 0); task(); @@ -222,9 +222,9 @@ void SMP::add_bsp_task(smp_done_func task) #ifdef INCLUDEOS_SMP_ENABLE // queue job auto& system = PER_CPU(smp_system); - lock(system.flock); + system.flock.lock(); system.completed.push_back(std::move(task)); - unlock(system.flock); + system.flock.unlock(); // set this CPU bit smp_main.bitmap.atomic_set(SMP::cpu_id()); // call home @@ -262,13 +262,13 @@ void SMP::unicast(int cpu, uint8_t irq) x86::APIC::get().send_ipi(cpu, IRQ_BASE + irq); } -static spinlock_t __global_lock = 0; +static Spinlock __global_lock; void SMP::global_lock() noexcept { - lock(__global_lock); + __global_lock.lock(); } void SMP::global_unlock() noexcept { - unlock(__global_lock); + __global_lock.unlock(); } diff --git a/src/util/statman.cpp b/src/util/statman.cpp index 6ef236467..06d289146 100644 --- a/src/util/statman.cpp +++ b/src/util/statman.cpp @@ -75,7 +75,7 @@ Statman::Statman() { Stat& Statman::create(const Stat::Stat_type type, const std::string& name) { #ifdef INCLUDEOS_SMP_ENABLE - volatile scoped_spinlock lock(this->stlock); + const std::lock_guard lock(this->stlock); #endif if (name.empty()) throw Stats_exception("Cannot create Stat with no name"); @@ -95,7 +95,7 @@ Stat& Statman::create(const Stat::Stat_type type, const std::string& name) Stat& Statman::get(const Stat* st) { #ifdef INCLUDEOS_SMP_ENABLE - volatile scoped_spinlock lock(this->stlock); + const std::lock_guard lock(this->stlock); #endif for (auto& stat : this->m_stats) { if (&stat == st) { @@ -110,7 +110,7 @@ Stat& Statman::get(const Stat* st) Stat& Statman::get_by_name(const char* name) { #ifdef INCLUDEOS_SMP_ENABLE - volatile scoped_spinlock lock(this->stlock); + const std::lock_guard lock(this->stlock); #endif for (auto& stat : this->m_stats) { @@ -140,7 +140,7 @@ void Statman::free(void* addr) { auto& stat = this->get((Stat*) addr); #ifdef INCLUDEOS_SMP_ENABLE - volatile scoped_spinlock lock(this->stlock); + const std::lock_guard lock(this->stlock); #endif // delete entry new (&stat) Stat(Stat::FLOAT, ""); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bcaf57e41..d212b22a2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -88,6 +88,7 @@ set(TEST_SOURCES ${TEST}/kernel/unit/unit_liveupdate.cpp ${TEST}/kernel/unit/unit_timers.cpp ${TEST}/kernel/unit/x86_paging.cpp + ${TEST}/kernel/unit/spinlocks.cpp ${TEST}/net/unit/addr_test.cpp ${TEST}/net/unit/bufstore.cpp ${TEST}/net/unit/checksum.cpp @@ -231,14 +232,14 @@ add_custom_target( unittests ALL add_custom_command(TARGET unittests POST_BUILD - COMMAND ctest --output-on-failure -L unit ) + COMMAND ctest --output-on-failure -L unit --timeout 5) add_custom_target( memcheck DEPENDS ${TEST_BINARIES}) add_custom_command(TARGET memcheck POST_BUILD - COMMAND ctest --output-on-failure -L memcheck) + COMMAND ctest --output-on-failure -L memcheck --timeout 5) add_custom_target( clangtidy DEPENDS ${TEST_BINARIES}) diff --git a/test/kernel/integration/spinlocks/CMakeLists.txt b/test/kernel/integration/spinlocks/CMakeLists.txt new file mode 100644 index 000000000..a2b0d3a25 --- /dev/null +++ b/test/kernel/integration/spinlocks/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.5) +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +set(ELF_SYMBOLS true) + +project (service) +include(os) + +set(SOURCES + service.cpp +) + +os_add_executable(kernel_spinlocks "spinlock test" ${SOURCES}) +os_add_stdout(kernel_spinlocks default_stdout) +os_add_drivers(kernel_spinlocks boot_logger) + +configure_file(test.py ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/test/kernel/integration/spinlocks/service.cpp b/test/kernel/integration/spinlocks/service.cpp new file mode 100644 index 000000000..aece41467 --- /dev/null +++ b/test/kernel/integration/spinlocks/service.cpp @@ -0,0 +1,79 @@ +#include +#include +#include + +void Service::start() +{ + INFO("service", "Testing spinlocks!"); + + Spinlock s; + + INFO2("Basic lock/unlock tests"); + + if (s.is_locked()) { + os::panic("Lock was locked at init"); + } + + s.lock(); + + if (!s.is_locked()) { + os::panic("Lock was not locked after call to lock()"); + } + + s.unlock(); + + if (s.is_locked()) { + os::panic("Lock was locked after call to unlock()"); + } + + INFO2("Testing try_lock*"); + if (!s.try_lock()) { + os::panic("try_lock() didn't succeed"); + } + + // This will timeout + INFO2("Waiting for try_lock_for to timeout"); + { + auto t0 = std::chrono::high_resolution_clock::now(); + if (s.try_lock_for(std::chrono::milliseconds(250))) { + os::panic("try_lock_for() got lock, expected timeout"); + } + auto t1 = std::chrono::high_resolution_clock::now(); + + if (t1 - t0 < std::chrono::milliseconds(250)) { + os::panic("try_lock_for() returned earlier than timeout"); + } + } + + s.unlock(); + + // We should get this immediately + INFO2("Testing try_lock_for on unlocked lock"); + { + auto t0 = std::chrono::high_resolution_clock::now(); + if (!s.try_lock_for(std::chrono::seconds(3))) { + os::panic("try_lock_for didn't get lock"); + } + auto t1 = std::chrono::high_resolution_clock::now(); + + if (t1 - t0 > std::chrono::milliseconds(250)) { + os::panic("try_lock_for() didn't return immediately when lock was unlocked"); + } + } + + s.unlock(); + + // std::lock_guard test + INFO2("Testing std::lock_guard"); + { + std::lock_guard lock(s); + if (!s.is_locked()) { + os::panic("std::lock_guard didn't lock"); + } + } + if (s.is_locked()) { + os::panic("std::lock_guard didn't unlock"); + } + + exit(0); +} diff --git a/test/kernel/integration/spinlocks/test.py b/test/kernel/integration/spinlocks/test.py new file mode 100755 index 000000000..96c5e31b8 --- /dev/null +++ b/test/kernel/integration/spinlocks/test.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 + +import sys +import os + +from vmrunner import vmrunner + +vm = vmrunner.vms[0]; + +if len(sys.argv) > 1: + vm.boot(image_name=str(sys.argv[1])) +else: + # Build, run and clean + vm.boot(image_name='kernel_spinlocks') diff --git a/test/kernel/unit/spinlocks.cpp b/test/kernel/unit/spinlocks.cpp new file mode 100644 index 000000000..e6d462c44 --- /dev/null +++ b/test/kernel/unit/spinlocks.cpp @@ -0,0 +1,180 @@ +#include +#include +#include +#include + +CASE("lock/unlock") +{ + Spinlock s; + EXPECT(!s.is_locked()); + s.lock(); + EXPECT(s.is_locked()); + s.unlock(); + EXPECT(!s.is_locked()); +} + +CASE("std::lock_guard") +{ + Spinlock s; + EXPECT(!s.is_locked()); + { + std::lock_guard lock(s); + EXPECT(s.is_locked()); + } + // Should be unlocked when lock_guard is out of scope + EXPECT(!s.is_locked()); +} + +CASE("try_lock") +{ + Spinlock s; + + // Lock should not be taken + EXPECT(!s.is_locked()); + // Try to lock, should succeed + EXPECT(s.try_lock()); + + // Should be locked now + EXPECT(s.is_locked()); + // try_lock should now fail + EXPECT(!s.try_lock()); + // We should still be locked, unlock + EXPECT(s.is_locked()); + s.unlock(); + + // Check that we're unlocked + EXPECT(!s.is_locked()); + // try_lock should succeed again + EXPECT(s.try_lock()); + // ... and the spinlock should be locked + EXPECT(s.is_locked()); +} + +CASE("try_lock_for") +{ + Spinlock s; + + // Lock should not be taken + EXPECT(!s.is_locked()); + + // Try to lock, should succeed immediately + { + auto t0 = std::chrono::high_resolution_clock::now(); + EXPECT(s.try_lock_for(std::chrono::seconds(1))); + auto t1 = std::chrono::high_resolution_clock::now(); + EXPECT(t1 - t0 < std::chrono::milliseconds(250)); + EXPECT(s.is_locked()); + } + + // Now try again, this should fail, but take time + { + auto t0 = std::chrono::high_resolution_clock::now(); + EXPECT(!s.try_lock_for(std::chrono::milliseconds(250))); + auto t1 = std::chrono::high_resolution_clock::now(); + EXPECT(t1 - t0 > std::chrono::milliseconds(250)); + EXPECT(s.is_locked()); + } +} + +CASE("try_lock_until") +{ + Spinlock s; + + // Lock should not be taken + EXPECT(!s.is_locked()); + + // Try to lock, should succeed immediately + { + auto t = std::chrono::high_resolution_clock::now() + std::chrono::seconds(10); + auto t0 = std::chrono::high_resolution_clock::now(); + EXPECT(s.try_lock_until(t)); + auto t1 = std::chrono::high_resolution_clock::now(); + EXPECT(t1 - t0 < std::chrono::milliseconds(250)); + EXPECT(s.is_locked()); + } + + // Try to lock, this should fail, but take time + { + auto t = std::chrono::high_resolution_clock::now() + std::chrono::milliseconds(250); + auto t0 = std::chrono::high_resolution_clock::now(); + EXPECT(!s.try_lock_until(t)); + auto t1 = std::chrono::high_resolution_clock::now(); + EXPECT(t1 - t0 > std::chrono::milliseconds(250)); + EXPECT(s.is_locked()); + } +} + +CASE("multithreaded - basic") +{ + Spinlock s; + int val = 0; + s.lock(); + + std::thread t1([&](void)->void { + s.lock(); + val++; + s.unlock(); + }); + + std::thread t2([&](void)->void { + s.lock(); + val++; + s.unlock(); + }); + + + EXPECT(val == 0); + + s.unlock(); + + t1.join(); + t2.join(); + + EXPECT(val == 2); +} + +CASE("multithreaded - extended") +{ + Spinlock s; + volatile int val = 0; + s.lock(); + + std::barrier barrier_start{ 3 }; + + std::thread t1([&](void)->void { + barrier_start.arrive_and_wait(); + for (int i = 0; i < 100000; i++) { + s.lock(); + val++; + s.unlock(); + } + }); + + std::thread t2([&](void)->void { + barrier_start.arrive_and_wait(); + for (int i = 0; i < 100000; i++) { + s.lock(); + val++; + s.unlock(); + } + }); + + std::thread t3([&](void)->void { + barrier_start.arrive_and_wait(); + for (int i = 0; i < 100000; i++) { + s.lock(); + val++; + s.unlock(); + } + }); + + EXPECT(val == 0); + + s.unlock(); + + t1.join(); + t2.join(); + t3.join(); + + EXPECT(val == 300000); +}