From 2f4fc13b2cb839a41edaf766f4a1bbf8dc5467a3 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Mon, 26 Jun 2023 21:59:20 +0000 Subject: [PATCH 01/26] rt: initial implementation of new threaded runtime This patch includes an initial implementation of a new multi-threaded runtime. The new runtime aims to increase the scheduler throughput by speeding up how it dispatches work to peer worker threads. This implementation improves most benchmarks by about ~10% when the number of threads is below 16. As threads increase, mutex contention deteriorates performance. Because the new scheduler is not yet ready to replace the old one, the patch introduces it as an unstable runtime flavor with a warning that it isn't production ready. Work to improve the scalability of the runtime will most likely require more intrusive changes across Tokio, so I am opting to merge with master to avoid larger conflicts. --- Cargo.toml | 3 + tokio/src/loom/std/unsafe_cell.rs | 2 + tokio/src/runtime/blocking/schedule.rs | 4 + tokio/src/runtime/builder.rs | 68 + tokio/src/runtime/handle.rs | 2 + tokio/src/runtime/runtime.rs | 19 + tokio/src/runtime/scheduler/block_in_place.rs | 21 + tokio/src/runtime/scheduler/current_thread.rs | 4 + tokio/src/runtime/scheduler/mod.rs | 128 +- .../scheduler/multi_thread_alt/counters.rs | 166 ++ .../scheduler/multi_thread_alt/handle.rs | 69 + .../multi_thread_alt/handle/metrics.rs | 41 + .../multi_thread_alt/handle/taskdump.rs | 26 + .../scheduler/multi_thread_alt/idle.rs | 425 +++++ .../runtime/scheduler/multi_thread_alt/mod.rs | 99 ++ .../scheduler/multi_thread_alt/overflow.rs | 26 + .../scheduler/multi_thread_alt/park.rs | 232 +++ .../scheduler/multi_thread_alt/queue.rs | 601 +++++++ .../scheduler/multi_thread_alt/stats.rs | 171 ++ .../scheduler/multi_thread_alt/trace.rs | 61 + .../scheduler/multi_thread_alt/trace_mock.rs | 11 + .../scheduler/multi_thread_alt/worker.rs | 1519 +++++++++++++++++ .../multi_thread_alt/worker/metrics.rs | 11 + .../multi_thread_alt/worker/taskdump.rs | 79 + .../multi_thread_alt/worker/taskdump_mock.rs | 7 + tokio/src/runtime/task/list.rs | 2 +- tokio/src/runtime/tests/loom_alt_pool.rs | 458 +++++ tokio/src/runtime/tests/loom_pool.rs | 2 +- tokio/src/runtime/tests/mod.rs | 3 +- tokio/src/runtime/tests/task.rs | 9 +- tokio/src/task/blocking.rs | 2 +- tokio/tests/rt_common.rs | 34 + tokio/tests/rt_threaded_alt.rs | 738 ++++++++ 33 files changed, 4977 insertions(+), 66 deletions(-) create mode 100644 tokio/src/runtime/scheduler/block_in_place.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/counters.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/handle.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/handle/metrics.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/handle/taskdump.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/idle.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/mod.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/overflow.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/park.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/queue.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/stats.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/trace.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/trace_mock.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/worker.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/worker/metrics.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump.rs create mode 100644 tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump_mock.rs create mode 100644 tokio/src/runtime/tests/loom_alt_pool.rs create mode 100644 tokio/tests/rt_threaded_alt.rs diff --git a/Cargo.toml b/Cargo.toml index f3e19312e8b..e4e70eba569 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,6 @@ members = [ "tests-build", "tests-integration", ] + +[patch.crates-io] +loom = { git = "https://github.com/tokio-rs/loom", branch = "increase-max-threads" } diff --git a/tokio/src/loom/std/unsafe_cell.rs b/tokio/src/loom/std/unsafe_cell.rs index 66c1d7943e0..3d6513b4655 100644 --- a/tokio/src/loom/std/unsafe_cell.rs +++ b/tokio/src/loom/std/unsafe_cell.rs @@ -6,10 +6,12 @@ impl UnsafeCell { UnsafeCell(std::cell::UnsafeCell::new(data)) } + #[inline(always)] pub(crate) fn with(&self, f: impl FnOnce(*const T) -> R) -> R { f(self.0.get()) } + #[inline(always)] pub(crate) fn with_mut(&self, f: impl FnOnce(*mut T) -> R) -> R { f(self.0.get()) } diff --git a/tokio/src/runtime/blocking/schedule.rs b/tokio/src/runtime/blocking/schedule.rs index edf775be8be..b4c6a2862b3 100644 --- a/tokio/src/runtime/blocking/schedule.rs +++ b/tokio/src/runtime/blocking/schedule.rs @@ -25,6 +25,8 @@ impl BlockingSchedule { } #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] scheduler::Handle::MultiThread(_) => {} + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThreadAlt(_) => {} } } BlockingSchedule { @@ -45,6 +47,8 @@ impl task::Schedule for BlockingSchedule { } #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] scheduler::Handle::MultiThread(_) => {} + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThreadAlt(_) => {} } } None diff --git a/tokio/src/runtime/builder.rs b/tokio/src/runtime/builder.rs index af9e0e172f3..d2e10b004ae 100644 --- a/tokio/src/runtime/builder.rs +++ b/tokio/src/runtime/builder.rs @@ -199,6 +199,8 @@ pub(crate) enum Kind { CurrentThread, #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] MultiThread, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThreadAlt, } impl Builder { @@ -230,6 +232,26 @@ impl Builder { // The number `61` is fairly arbitrary. I believe this value was copied from golang. Builder::new(Kind::MultiThread, 61) } + + cfg_unstable! { + /// Returns a new builder with the alternate multi thread scheduler + /// selected. + /// + /// The alternate multi threaded scheduler is an in-progress + /// candidate to replace the existing multi threaded scheduler. It + /// currently does not scale as well to 16+ processors. + /// + /// This runtime flavor is currently **not considered production + /// ready**. + /// + /// Configuration methods can be chained on the return value. + #[cfg(feature = "rt-multi-thread")] + #[cfg_attr(docsrs, doc(cfg(feature = "rt-multi-thread")))] + pub fn new_multi_thread_alt() -> Builder { + // The number `61` is fairly arbitrary. I believe this value was copied from golang. + Builder::new(Kind::MultiThreadAlt, 61) + } + } } /// Returns a new runtime builder initialized with default configuration @@ -656,6 +678,8 @@ impl Builder { Kind::CurrentThread => self.build_current_thread_runtime(), #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Kind::MultiThread => self.build_threaded_runtime(), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Kind::MultiThreadAlt => self.build_alt_threaded_runtime(), } } @@ -665,6 +689,8 @@ impl Builder { Kind::CurrentThread => true, #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Kind::MultiThread => false, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Kind::MultiThreadAlt => false, }, enable_io: self.enable_io, enable_time: self.enable_time, @@ -1214,6 +1240,48 @@ cfg_rt_multi_thread! { Ok(Runtime::from_parts(Scheduler::MultiThread(scheduler), handle, blocking_pool)) } + + cfg_unstable! { + fn build_alt_threaded_runtime(&mut self) -> io::Result { + use crate::loom::sys::num_cpus; + use crate::runtime::{Config, runtime::Scheduler}; + use crate::runtime::scheduler::MultiThreadAlt; + + let core_threads = self.worker_threads.unwrap_or_else(num_cpus); + + let (driver, driver_handle) = driver::Driver::new(self.get_cfg())?; + + // Create the blocking pool + let blocking_pool = + blocking::create_blocking_pool(self, self.max_blocking_threads + core_threads); + let blocking_spawner = blocking_pool.spawner().clone(); + + // Generate a rng seed for this runtime. + let seed_generator_1 = self.seed_generator.next_generator(); + let seed_generator_2 = self.seed_generator.next_generator(); + + let (scheduler, handle) = MultiThreadAlt::new( + core_threads, + driver, + driver_handle, + blocking_spawner, + seed_generator_2, + Config { + before_park: self.before_park.clone(), + after_unpark: self.after_unpark.clone(), + global_queue_interval: self.global_queue_interval, + event_interval: self.event_interval, + #[cfg(tokio_unstable)] + unhandled_panic: self.unhandled_panic.clone(), + disable_lifo_slot: self.disable_lifo_slot, + seed_generator: seed_generator_1, + metrics_poll_count_histogram: self.metrics_poll_count_histogram_builder(), + }, + ); + + Ok(Runtime::from_parts(Scheduler::MultiThreadAlt(scheduler), handle, blocking_pool)) + } + } } } diff --git a/tokio/src/runtime/handle.rs b/tokio/src/runtime/handle.rs index be4743d4775..42745857dc0 100644 --- a/tokio/src/runtime/handle.rs +++ b/tokio/src/runtime/handle.rs @@ -355,6 +355,8 @@ impl Handle { scheduler::Handle::CurrentThread(_) => RuntimeFlavor::CurrentThread, #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] scheduler::Handle::MultiThread(_) => RuntimeFlavor::MultiThread, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThreadAlt(_) => RuntimeFlavor::MultiThreadAlt, } } } diff --git a/tokio/src/runtime/runtime.rs b/tokio/src/runtime/runtime.rs index 3f349997583..681ba28fce1 100644 --- a/tokio/src/runtime/runtime.rs +++ b/tokio/src/runtime/runtime.rs @@ -9,6 +9,10 @@ use std::time::Duration; cfg_rt_multi_thread! { use crate::runtime::Builder; use crate::runtime::scheduler::MultiThread; + + cfg_unstable! { + use crate::runtime::scheduler::MultiThreadAlt; + } } /// The Tokio runtime. @@ -84,6 +88,9 @@ pub enum RuntimeFlavor { CurrentThread, /// The flavor that executes tasks across multiple threads. MultiThread, + /// The flavor that executes tasks across multiple threads. + #[cfg(tokio_unstable)] + MultiThreadAlt, } /// The runtime scheduler is either a multi-thread or a current-thread executor. @@ -95,6 +102,10 @@ pub(super) enum Scheduler { /// Execute tasks across multiple threads. #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] MultiThread(MultiThread), + + /// Execute tasks across multiple threads. + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThreadAlt(MultiThreadAlt), } impl Runtime { @@ -311,6 +322,8 @@ impl Runtime { Scheduler::CurrentThread(exec) => exec.block_on(&self.handle.inner, future), #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Scheduler::MultiThread(exec) => exec.block_on(&self.handle.inner, future), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Scheduler::MultiThreadAlt(exec) => exec.block_on(&self.handle.inner, future), } } @@ -431,6 +444,12 @@ impl Drop for Runtime { // already in the runtime's context. multi_thread.shutdown(&self.handle.inner); } + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Scheduler::MultiThreadAlt(multi_thread) => { + // The threaded scheduler drops its tasks on its worker threads, which is + // already in the runtime's context. + multi_thread.shutdown(&self.handle.inner); + } } } } diff --git a/tokio/src/runtime/scheduler/block_in_place.rs b/tokio/src/runtime/scheduler/block_in_place.rs new file mode 100644 index 00000000000..803ff4504f7 --- /dev/null +++ b/tokio/src/runtime/scheduler/block_in_place.rs @@ -0,0 +1,21 @@ +use crate::runtime::scheduler; + +#[track_caller] +pub(crate) fn block_in_place(f: F) -> R +where + F: FnOnce() -> R, +{ + #[cfg(tokio_unstable)] + { + use crate::runtime::{Handle, RuntimeFlavor::MultiThreadAlt}; + + match Handle::try_current().map(|h| h.runtime_flavor()) { + Ok(MultiThreadAlt) => { + return scheduler::multi_thread_alt::block_in_place(f); + } + _ => {} + } + } + + scheduler::multi_thread::block_in_place(f) +} diff --git a/tokio/src/runtime/scheduler/current_thread.rs b/tokio/src/runtime/scheduler/current_thread.rs index ac4a8d6fac1..9be3fe0b473 100644 --- a/tokio/src/runtime/scheduler/current_thread.rs +++ b/tokio/src/runtime/scheduler/current_thread.rs @@ -523,6 +523,10 @@ cfg_metrics! { &self.shared.worker_metrics } + pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize { + self.worker_metrics(worker).queue_depth() + } + pub(crate) fn num_blocking_threads(&self) -> usize { self.blocking_spawner.num_threads() } diff --git a/tokio/src/runtime/scheduler/mod.rs b/tokio/src/runtime/scheduler/mod.rs index 3e3151711f5..de49dae5e81 100644 --- a/tokio/src/runtime/scheduler/mod.rs +++ b/tokio/src/runtime/scheduler/mod.rs @@ -10,11 +10,19 @@ cfg_rt! { } cfg_rt_multi_thread! { + mod block_in_place; + pub(crate) use block_in_place::block_in_place; + mod lock; use lock::Lock; pub(crate) mod multi_thread; pub(crate) use multi_thread::MultiThread; + + cfg_unstable! { + pub(crate) mod multi_thread_alt; + pub(crate) use multi_thread_alt::MultiThread as MultiThreadAlt; + } } use crate::runtime::driver; @@ -27,6 +35,9 @@ pub(crate) enum Handle { #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] MultiThread(Arc), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThreadAlt(Arc), + // TODO: This is to avoid triggering "dead code" warnings many other places // in the codebase. Remove this during a later cleanup #[cfg(not(feature = "rt"))] @@ -40,6 +51,9 @@ pub(super) enum Context { #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] MultiThread(multi_thread::Context), + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThreadAlt(multi_thread_alt::Context), } impl Handle { @@ -52,6 +66,9 @@ impl Handle { #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Handle::MultiThread(ref h) => &h.driver, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThreadAlt(ref h) => &h.driver, + #[cfg(not(feature = "rt"))] Handle::Disabled => unreachable!(), } @@ -67,6 +84,20 @@ cfg_rt! { use crate::util::RngSeedGenerator; use std::task::Waker; + macro_rules! match_flavor { + ($self:expr, $ty:ident($h:ident) => $e:expr) => { + match $self { + $ty::CurrentThread($h) => $e, + + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + $ty::MultiThread($h) => $e, + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + $ty::MultiThreadAlt($h) => $e, + } + } + } + impl Handle { #[track_caller] pub(crate) fn current() -> Handle { @@ -77,12 +108,7 @@ cfg_rt! { } pub(crate) fn blocking_spawner(&self) -> &blocking::Spawner { - match self { - Handle::CurrentThread(h) => &h.blocking_spawner, - - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(h) => &h.blocking_spawner, - } + match_flavor!(self, Handle(h) => &h.blocking_spawner) } pub(crate) fn spawn(&self, future: F, id: Id) -> JoinHandle @@ -95,6 +121,9 @@ cfg_rt! { #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Handle::MultiThread(h) => multi_thread::Handle::spawn(h, future, id), + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThreadAlt(h) => multi_thread_alt::Handle::spawn(h, future, id), } } @@ -104,16 +133,14 @@ cfg_rt! { #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Handle::MultiThread(ref h) => h.shutdown(), + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThreadAlt(ref h) => h.shutdown(), } } pub(crate) fn seed_generator(&self) -> &RngSeedGenerator { - match self { - Handle::CurrentThread(h) => &h.seed_generator, - - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(h) => &h.seed_generator, - } + match_flavor!(self, Handle(h) => &h.seed_generator) } pub(crate) fn as_current_thread(&self) -> &Arc { @@ -123,6 +150,17 @@ cfg_rt! { _ => panic!("not a CurrentThread handle"), } } + + cfg_rt_multi_thread! { + cfg_unstable! { + pub(crate) fn expect_multi_thread_alt(&self) -> &Arc { + match self { + Handle::MultiThreadAlt(handle) => handle, + _ => panic!("not a `MultiThreadAlt` handle"), + } + } + } + } } cfg_metrics! { @@ -134,71 +172,41 @@ cfg_rt! { Handle::CurrentThread(_) => 1, #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Handle::MultiThread(handle) => handle.num_workers(), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThreadAlt(handle) => handle.num_workers(), } } pub(crate) fn num_blocking_threads(&self) -> usize { - match self { - Handle::CurrentThread(handle) => handle.num_blocking_threads(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.num_blocking_threads(), - } + match_flavor!(self, Handle(handle) => handle.num_blocking_threads()) } pub(crate) fn num_idle_blocking_threads(&self) -> usize { - match self { - Handle::CurrentThread(handle) => handle.num_idle_blocking_threads(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.num_idle_blocking_threads(), - } + match_flavor!(self, Handle(handle) => handle.num_idle_blocking_threads()) } pub(crate) fn active_tasks_count(&self) -> usize { - match self { - Handle::CurrentThread(handle) => handle.active_tasks_count(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.active_tasks_count(), - } + match_flavor!(self, Handle(handle) => handle.active_tasks_count()) } pub(crate) fn scheduler_metrics(&self) -> &SchedulerMetrics { - match self { - Handle::CurrentThread(handle) => handle.scheduler_metrics(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.scheduler_metrics(), - } + match_flavor!(self, Handle(handle) => handle.scheduler_metrics()) } pub(crate) fn worker_metrics(&self, worker: usize) -> &WorkerMetrics { - match self { - Handle::CurrentThread(handle) => handle.worker_metrics(worker), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.worker_metrics(worker), - } + match_flavor!(self, Handle(handle) => handle.worker_metrics(worker)) } pub(crate) fn injection_queue_depth(&self) -> usize { - match self { - Handle::CurrentThread(handle) => handle.injection_queue_depth(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.injection_queue_depth(), - } + match_flavor!(self, Handle(handle) => handle.injection_queue_depth()) } pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize { - match self { - Handle::CurrentThread(handle) => handle.worker_metrics(worker).queue_depth(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.worker_local_queue_depth(worker), - } + match_flavor!(self, Handle(handle) => handle.worker_local_queue_depth(worker)) } pub(crate) fn blocking_queue_depth(&self) -> usize { - match self { - Handle::CurrentThread(handle) => handle.blocking_queue_depth(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.blocking_queue_depth(), - } + match_flavor!(self, Handle(handle) => handle.blocking_queue_depth()) } } } @@ -214,11 +222,7 @@ cfg_rt! { } pub(crate) fn defer(&self, waker: &Waker) { - match self { - Context::CurrentThread(context) => context.defer(waker), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Context::MultiThread(context) => context.defer(waker), - } + match_flavor!(self, Context(context) => context.defer(waker)) } cfg_rt_multi_thread! { @@ -229,6 +233,16 @@ cfg_rt! { _ => panic!("expected `MultiThread::Context`") } } + + cfg_unstable! { + #[track_caller] + pub(crate) fn expect_multi_thread_alt(&self) -> &multi_thread_alt::Context { + match self { + Context::MultiThreadAlt(context) => context, + _ => panic!("expected `MultiThreadAlt::Context`") + } + } + } } } } diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/counters.rs b/tokio/src/runtime/scheduler/multi_thread_alt/counters.rs new file mode 100644 index 00000000000..edda0d46d1e --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/counters.rs @@ -0,0 +1,166 @@ +#[cfg(tokio_internal_mt_counters)] +mod imp { + use std::sync::atomic::AtomicUsize; + use std::sync::atomic::Ordering::Relaxed; + + static NUM_MAINTENANCE: AtomicUsize = AtomicUsize::new(0); + static NUM_NOTIFY_LOCAL: AtomicUsize = AtomicUsize::new(0); + static NUM_NOTIFY_REMOTE: AtomicUsize = AtomicUsize::new(0); + static NUM_UNPARKS_LOCAL: AtomicUsize = AtomicUsize::new(0); + static NUM_UNPARKS_REMOTE: AtomicUsize = AtomicUsize::new(0); + static NUM_LIFO_SCHEDULES: AtomicUsize = AtomicUsize::new(0); + static NUM_LIFO_CAPPED: AtomicUsize = AtomicUsize::new(0); + static NUM_STEALS: AtomicUsize = AtomicUsize::new(0); + static NUM_OVERFLOW: AtomicUsize = AtomicUsize::new(0); + static NUM_PARK: AtomicUsize = AtomicUsize::new(0); + static NUM_POLLS: AtomicUsize = AtomicUsize::new(0); + static NUM_LIFO_POLLS: AtomicUsize = AtomicUsize::new(0); + static NUM_REMOTE_BATCH: AtomicUsize = AtomicUsize::new(0); + static NUM_GLOBAL_QUEUE_INTERVAL: AtomicUsize = AtomicUsize::new(0); + static NUM_NO_AVAIL_CORE: AtomicUsize = AtomicUsize::new(0); + static NUM_RELAY_SEARCH: AtomicUsize = AtomicUsize::new(0); + static NUM_SPIN_STALL: AtomicUsize = AtomicUsize::new(0); + static NUM_NO_LOCAL_WORK: AtomicUsize = AtomicUsize::new(0); + + impl Drop for super::Counters { + fn drop(&mut self) { + let notifies_local = NUM_NOTIFY_LOCAL.load(Relaxed); + let notifies_remote = NUM_NOTIFY_REMOTE.load(Relaxed); + let unparks_local = NUM_UNPARKS_LOCAL.load(Relaxed); + let unparks_remote = NUM_UNPARKS_REMOTE.load(Relaxed); + let maintenance = NUM_MAINTENANCE.load(Relaxed); + let lifo_scheds = NUM_LIFO_SCHEDULES.load(Relaxed); + let lifo_capped = NUM_LIFO_CAPPED.load(Relaxed); + let num_steals = NUM_STEALS.load(Relaxed); + let num_overflow = NUM_OVERFLOW.load(Relaxed); + let num_park = NUM_PARK.load(Relaxed); + let num_polls = NUM_POLLS.load(Relaxed); + let num_lifo_polls = NUM_LIFO_POLLS.load(Relaxed); + let num_remote_batch = NUM_REMOTE_BATCH.load(Relaxed); + let num_global_queue_interval = NUM_GLOBAL_QUEUE_INTERVAL.load(Relaxed); + let num_no_avail_core = NUM_NO_AVAIL_CORE.load(Relaxed); + let num_relay_search = NUM_RELAY_SEARCH.load(Relaxed); + let num_spin_stall = NUM_SPIN_STALL.load(Relaxed); + let num_no_local_work = NUM_NO_LOCAL_WORK.load(Relaxed); + + println!("---"); + println!("notifies (remote): {}", notifies_remote); + println!(" notifies (local): {}", notifies_local); + println!(" unparks (local): {}", unparks_local); + println!(" unparks (remote): {}", unparks_remote); + println!(" notify, no core: {}", num_no_avail_core); + println!(" maintenance: {}", maintenance); + println!(" LIFO schedules: {}", lifo_scheds); + println!(" LIFO capped: {}", lifo_capped); + println!(" steals: {}", num_steals); + println!(" queue overflows: {}", num_overflow); + println!(" parks: {}", num_park); + println!(" polls: {}", num_polls); + println!(" polls (LIFO): {}", num_lifo_polls); + println!("remote task batch: {}", num_remote_batch); + println!("global Q interval: {}", num_global_queue_interval); + println!(" relay search: {}", num_relay_search); + println!(" spin stall: {}", num_spin_stall); + println!(" no local work: {}", num_no_local_work); + } + } + + pub(crate) fn inc_num_inc_notify_local() { + NUM_NOTIFY_LOCAL.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_notify_remote() { + NUM_NOTIFY_REMOTE.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_unparks_local() { + NUM_UNPARKS_LOCAL.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_unparks_remote() { + NUM_UNPARKS_REMOTE.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_maintenance() { + NUM_MAINTENANCE.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_lifo_schedules() { + NUM_LIFO_SCHEDULES.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_lifo_capped() { + NUM_LIFO_CAPPED.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_steals() { + NUM_STEALS.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_overflows() { + NUM_OVERFLOW.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_parks() { + NUM_PARK.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_polls() { + NUM_POLLS.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_lifo_polls() { + NUM_LIFO_POLLS.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_remote_batch() { + NUM_REMOTE_BATCH.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_global_queue_interval() { + NUM_GLOBAL_QUEUE_INTERVAL.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_notify_no_core() { + NUM_NO_AVAIL_CORE.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_relay_search() { + NUM_RELAY_SEARCH.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_spin_stall() { + NUM_SPIN_STALL.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_no_local_work() { + NUM_NO_LOCAL_WORK.fetch_add(1, Relaxed); + } +} + +#[cfg(not(tokio_internal_mt_counters))] +mod imp { + pub(crate) fn inc_num_inc_notify_local() {} + pub(crate) fn inc_num_notify_remote() {} + pub(crate) fn inc_num_unparks_local() {} + pub(crate) fn inc_num_unparks_remote() {} + pub(crate) fn inc_num_maintenance() {} + pub(crate) fn inc_lifo_schedules() {} + pub(crate) fn inc_lifo_capped() {} + pub(crate) fn inc_num_steals() {} + pub(crate) fn inc_num_overflows() {} + pub(crate) fn inc_num_parks() {} + pub(crate) fn inc_num_polls() {} + pub(crate) fn inc_num_lifo_polls() {} + pub(crate) fn inc_num_remote_batch() {} + pub(crate) fn inc_global_queue_interval() {} + pub(crate) fn inc_notify_no_core() {} + pub(crate) fn inc_num_relay_search() {} + pub(crate) fn inc_num_spin_stall() {} + pub(crate) fn inc_num_no_local_work() {} +} + +#[derive(Debug)] +pub(crate) struct Counters; + +pub(super) use imp::*; diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs b/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs new file mode 100644 index 00000000000..b50840ec82c --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs @@ -0,0 +1,69 @@ +use crate::future::Future; +use crate::loom::sync::Arc; +use crate::runtime::scheduler::multi_thread_alt::worker; +use crate::runtime::{ + blocking, driver, + task::{self, JoinHandle}, +}; +use crate::util::RngSeedGenerator; + +use std::fmt; + +cfg_metrics! { + mod metrics; +} + +cfg_taskdump! { + mod taskdump; +} + +/// Handle to the multi thread scheduler +pub(crate) struct Handle { + /// Task spawner + pub(super) shared: worker::Shared, + + /// Resource driver handles + pub(crate) driver: driver::Handle, + + /// Blocking pool spawner + pub(crate) blocking_spawner: blocking::Spawner, + + /// Current random number generator seed + pub(crate) seed_generator: RngSeedGenerator, +} + +impl Handle { + /// Spawns a future onto the thread pool + pub(crate) fn spawn(me: &Arc, future: F, id: task::Id) -> JoinHandle + where + F: crate::future::Future + Send + 'static, + F::Output: Send + 'static, + { + Self::bind_new_task(me, future, id) + } + + pub(crate) fn shutdown(&self) { + self.shared.close(); + self.driver.unpark(); + } + + pub(super) fn bind_new_task(me: &Arc, future: T, id: task::Id) -> JoinHandle + where + T: Future + Send + 'static, + T::Output: Send + 'static, + { + let (handle, notified) = me.shared.owned.bind(future, me.clone(), id); + + if let Some(notified) = notified { + me.shared.schedule_task(notified, false); + } + + handle + } +} + +impl fmt::Debug for Handle { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("multi_thread::Handle { ... }").finish() + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/handle/metrics.rs b/tokio/src/runtime/scheduler/multi_thread_alt/handle/metrics.rs new file mode 100644 index 00000000000..838694fc89e --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/handle/metrics.rs @@ -0,0 +1,41 @@ +use super::Handle; + +use crate::runtime::{SchedulerMetrics, WorkerMetrics}; + +impl Handle { + pub(crate) fn num_workers(&self) -> usize { + self.shared.worker_metrics.len() + } + + pub(crate) fn num_blocking_threads(&self) -> usize { + self.blocking_spawner.num_threads() + } + + pub(crate) fn num_idle_blocking_threads(&self) -> usize { + self.blocking_spawner.num_idle_threads() + } + + pub(crate) fn active_tasks_count(&self) -> usize { + self.shared.owned.active_tasks_count() + } + + pub(crate) fn scheduler_metrics(&self) -> &SchedulerMetrics { + &self.shared.scheduler_metrics + } + + pub(crate) fn worker_metrics(&self, worker: usize) -> &WorkerMetrics { + &self.shared.worker_metrics[worker] + } + + pub(crate) fn injection_queue_depth(&self) -> usize { + self.shared.injection_queue_depth() + } + + pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize { + self.shared.worker_local_queue_depth(worker) + } + + pub(crate) fn blocking_queue_depth(&self) -> usize { + self.blocking_spawner.queue_depth() + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/handle/taskdump.rs b/tokio/src/runtime/scheduler/multi_thread_alt/handle/taskdump.rs new file mode 100644 index 00000000000..477d857d88f --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/handle/taskdump.rs @@ -0,0 +1,26 @@ +use super::Handle; + +use crate::runtime::Dump; + +impl Handle { + pub(crate) async fn dump(&self) -> Dump { + let trace_status = &self.shared.trace_status; + + // If a dump is in progress, block. + trace_status.start_trace_request(&self).await; + + let result = loop { + if let Some(result) = trace_status.take_result() { + break result; + } else { + self.notify_all(); + trace_status.result_ready.notified().await; + } + }; + + // Allow other queued dumps to proceed. + trace_status.end_trace_request(&self).await; + + result + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs new file mode 100644 index 00000000000..f440bdf56a0 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs @@ -0,0 +1,425 @@ +//! Coordinates idling workers + +use crate::loom::sync::atomic::{AtomicBool, AtomicUsize}; +use crate::loom::sync::MutexGuard; +use crate::runtime::scheduler::multi_thread_alt::{worker, Core, Shared}; + +use std::sync::atomic::Ordering::{AcqRel, Acquire, Release}; + +pub(super) struct Idle { + /// Number of searching cores + num_searching: AtomicUsize, + + /// Number of idle cores + num_idle: AtomicUsize, + + /// Map of idle cores + idle_map: IdleMap, + + /// Used to catch false-negatives when waking workers + needs_searching: AtomicBool, + + /// Total number of cores + num_cores: usize, +} + +pub(super) struct IdleMap { + chunks: Vec, +} + +pub(super) struct Snapshot { + chunks: Vec, +} + +/// Data synchronized by the scheduler mutex +pub(super) struct Synced { + /// Worker IDs that are currently sleeping + sleepers: Vec, + + /// Cores available for workers + available_cores: Vec>, +} + +impl Idle { + pub(super) fn new(cores: Vec>, num_workers: usize) -> (Idle, Synced) { + let idle = Idle { + num_searching: AtomicUsize::new(0), + num_idle: AtomicUsize::new(cores.len()), + idle_map: IdleMap::new(&cores), + needs_searching: AtomicBool::new(false), + num_cores: cores.len(), + }; + + let synced = Synced { + sleepers: Vec::with_capacity(num_workers), + available_cores: cores, + }; + + (idle, synced) + } + + pub(super) fn num_idle(&self, synced: &Synced) -> usize { + debug_assert_eq!(synced.available_cores.len(), self.num_idle.load(Acquire)); + synced.available_cores.len() + } + + pub(super) fn num_searching(&self) -> usize { + self.num_searching.load(Acquire) + } + + pub(super) fn snapshot(&self, snapshot: &mut Snapshot) { + snapshot.update(&self.idle_map) + } + + /// Try to acquire an available core + pub(super) fn try_acquire_available_core(&self, synced: &mut Synced) -> Option> { + let ret = synced.available_cores.pop(); + + if let Some(core) = &ret { + // Decrement the number of idle cores + let num_idle = self.num_idle.load(Acquire) - 1; + debug_assert_eq!(num_idle, synced.available_cores.len()); + self.num_idle.store(num_idle, Release); + + self.idle_map.unset(core.index); + debug_assert!(self.idle_map.matches(&synced.available_cores)); + } + + ret + } + + /// We need at least one searching worker + pub(super) fn notify_local(&self, shared: &Shared) { + if self.num_searching.load(Acquire) != 0 { + // There already is a searching worker. Note, that this could be a + // false positive. However, because this method is called **from** a + // worker, we know that there is at least one worker currently + // awake, so the scheduler won't deadlock. + return; + } + + if self.num_idle.load(Acquire) == 0 { + self.needs_searching.store(true, Release); + return; + } + + // There aren't any searching workers. Try to initialize one + if self + .num_searching + .compare_exchange(0, 1, AcqRel, Acquire) + .is_err() + { + // Failing the compare_exchange means another thread concurrently + // launched a searching worker. + return; + } + + super::counters::inc_num_unparks_local(); + + // Acquire the lock + let synced = shared.synced.lock(); + self.notify_synced(synced, shared); + } + + /// Notifies a single worker + pub(super) fn notify_remote(&self, synced: MutexGuard<'_, worker::Synced>, shared: &Shared) { + if synced.idle.sleepers.is_empty() { + self.needs_searching.store(true, Release); + return; + } + + // We need to establish a stronger barrier than with `notify_local` + if self + .num_searching + .compare_exchange(0, 1, AcqRel, Acquire) + .is_err() + { + return; + } + + self.notify_synced(synced, shared); + } + + /// Notify a worker while synced + fn notify_synced(&self, mut synced: MutexGuard<'_, worker::Synced>, shared: &Shared) { + // Find a sleeping worker + if let Some(worker) = synced.idle.sleepers.pop() { + // Find an available core + if let Some(mut core) = synced.idle.available_cores.pop() { + debug_assert!(!core.is_searching); + core.is_searching = true; + + self.idle_map.unset(core.index); + debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + + // Assign the core to the worker + synced.assigned_cores[worker] = Some(core); + + let num_idle = synced.idle.available_cores.len(); + debug_assert_eq!(num_idle, self.num_idle.load(Acquire) - 1); + + // Update the number of sleeping workers + self.num_idle.store(num_idle, Release); + + // Drop the lock before notifying the condvar. + drop(synced); + + super::counters::inc_num_unparks_remote(); + + // Notify the worker + shared.condvars[worker].notify_one(); + return; + } else { + synced.idle.sleepers.push(worker); + } + } + + super::counters::inc_notify_no_core(); + + // Set the `needs_searching` flag, this happens *while* the lock is held. + self.needs_searching.store(true, Release); + self.num_searching.fetch_sub(1, Release); + + // Explicit mutex guard drop to show that holding the guard to this + // point is significant. `needs_searching` and `num_searching` must be + // updated in the critical section. + drop(synced); + } + + pub(super) fn notify_mult( + &self, + synced: &mut worker::Synced, + workers: &mut Vec, + num: usize, + ) { + debug_assert!(workers.is_empty()); + + for _ in 0..num { + if let Some(worker) = synced.idle.sleepers.pop() { + if let Some(core) = synced.idle.available_cores.pop() { + debug_assert!(!core.is_searching); + + self.idle_map.unset(core.index); + + synced.assigned_cores[worker] = Some(core); + + workers.push(worker); + + continue; + } else { + synced.idle.sleepers.push(worker); + } + } + + break; + } + + if !workers.is_empty() { + debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + let num_idle = synced.idle.available_cores.len(); + self.num_idle.store(num_idle, Release); + } else { + debug_assert_eq!( + synced.idle.available_cores.len(), + self.num_idle.load(Acquire) + ); + self.needs_searching.store(true, Release); + } + } + + pub(super) fn shutdown(&self, synced: &mut worker::Synced, shared: &Shared) { + // Wake every sleeping worker and assign a core to it. There may not be + // enough sleeping workers for all cores, but other workers will + // eventually find the cores and shut them down. + while !synced.idle.sleepers.is_empty() && !synced.idle.available_cores.is_empty() { + let worker = synced.idle.sleepers.pop().unwrap(); + let core = synced.idle.available_cores.pop().unwrap(); + + self.idle_map.unset(core.index); + + synced.assigned_cores[worker] = Some(core); + shared.condvars[worker].notify_one(); + + self.num_idle + .store(synced.idle.available_cores.len(), Release); + } + + debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + + // Wake up any other workers + while let Some(index) = synced.idle.sleepers.pop() { + shared.condvars[index].notify_one(); + } + } + + /// The worker releases the given core, making it available to other workers + /// that are waiting. + pub(super) fn release_core(&self, synced: &mut worker::Synced, core: Box) { + // The core should not be searching at this point + debug_assert!(!core.is_searching); + + // Check that this isn't the final worker to go idle *and* + // `needs_searching` is set. + debug_assert!(!self.needs_searching.load(Acquire) || num_active_workers(&synced.idle) > 1); + + let num_idle = synced.idle.available_cores.len(); + debug_assert_eq!(num_idle, self.num_idle.load(Acquire)); + + self.idle_map.set(core.index); + + // Store the core in the list of available cores + synced.idle.available_cores.push(core); + + debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + + // Update `num_idle` + self.num_idle.store(num_idle + 1, Release); + } + + pub(super) fn transition_worker_to_parked(&self, synced: &mut worker::Synced, index: usize) { + // Store the worker index in the list of sleepers + synced.idle.sleepers.push(index); + + // The worker's assigned core slot should be empty + debug_assert!(synced.assigned_cores[index].is_none()); + } + + pub(super) fn try_transition_worker_to_searching(&self, core: &mut Core) { + debug_assert!(!core.is_searching); + + let num_searching = self.num_searching.load(Acquire); + let num_idle = self.num_idle.load(Acquire); + + if 2 * num_searching >= self.num_cores - num_idle { + return; + } + + self.transition_worker_to_searching(core); + } + + /// Needs to happen while synchronized in order to avoid races + pub(super) fn transition_worker_to_searching_if_needed( + &self, + _synced: &mut Synced, + core: &mut Core, + ) -> bool { + if self.needs_searching.load(Acquire) { + // Needs to be called while holding the lock + self.transition_worker_to_searching(core); + true + } else { + false + } + } + + fn transition_worker_to_searching(&self, core: &mut Core) { + core.is_searching = true; + self.num_searching.fetch_add(1, AcqRel); + self.needs_searching.store(false, Release); + } + + /// A lightweight transition from searching -> running. + /// + /// Returns `true` if this is the final searching worker. The caller + /// **must** notify a new worker. + pub(super) fn transition_worker_from_searching(&self, core: &mut Core) -> bool { + debug_assert!(core.is_searching); + core.is_searching = false; + + let prev = self.num_searching.fetch_sub(1, AcqRel); + debug_assert!(prev > 0); + + prev == 1 + } +} + +const BITS: usize = usize::BITS as usize; +const BIT_MASK: usize = (usize::BITS - 1) as usize; + +impl IdleMap { + fn new(cores: &[Box]) -> IdleMap { + let ret = IdleMap::new_n(num_chunks(cores.len())); + ret.set_all(cores); + + ret + } + + fn new_n(n: usize) -> IdleMap { + let chunks = (0..n).map(|_| AtomicUsize::new(0)).collect(); + IdleMap { chunks } + } + + fn set(&self, index: usize) { + let (chunk, mask) = index_to_mask(index); + let prev = self.chunks[chunk].load(Acquire); + let next = prev | mask; + self.chunks[chunk].store(next, Release); + } + + fn set_all(&self, cores: &[Box]) { + for core in cores { + self.set(core.index); + } + } + + fn unset(&self, index: usize) { + let (chunk, mask) = index_to_mask(index); + let prev = self.chunks[chunk].load(Acquire); + let next = prev & !mask; + self.chunks[chunk].store(next, Release); + } + + fn matches(&self, idle_cores: &[Box]) -> bool { + let expect = IdleMap::new_n(self.chunks.len()); + expect.set_all(idle_cores); + + for (i, chunk) in expect.chunks.iter().enumerate() { + if chunk.load(Acquire) != self.chunks[i].load(Acquire) { + return false; + } + } + + true + } +} + +impl Snapshot { + pub(crate) fn new(idle: &Idle) -> Snapshot { + let chunks = vec![0; idle.idle_map.chunks.len()]; + let mut ret = Snapshot { chunks }; + ret.update(&idle.idle_map); + ret + } + + fn update(&mut self, idle_map: &IdleMap) { + for i in 0..self.chunks.len() { + self.chunks[i] = idle_map.chunks[i].load(Acquire); + } + } + + pub(super) fn is_idle(&self, index: usize) -> bool { + let (chunk, mask) = index_to_mask(index); + debug_assert!( + chunk < self.chunks.len(), + "index={}; chunks={}", + index, + self.chunks.len() + ); + self.chunks[chunk] & mask == mask + } +} + +fn num_chunks(max_cores: usize) -> usize { + (max_cores / BITS) + 1 +} + +fn index_to_mask(index: usize) -> (usize, usize) { + let mask = 1 << (index & BIT_MASK); + let chunk = index / BITS; + + (chunk, mask) +} + +fn num_active_workers(synced: &Synced) -> usize { + synced.available_cores.capacity() - synced.available_cores.len() +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs b/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs new file mode 100644 index 00000000000..a6704ab5f1a --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs @@ -0,0 +1,99 @@ +//! Multi-threaded runtime + +mod counters; +use counters::Counters; + +mod handle; +pub(crate) use handle::Handle; + +mod overflow; +pub(crate) use overflow::Overflow; + +mod idle; +use self::idle::Idle; + +mod stats; +pub(crate) use stats::Stats; + +pub(crate) mod queue; + +mod worker; +use worker::Core; +pub(crate) use worker::{Context, Shared}; + +cfg_taskdump! { + mod trace; + use trace::TraceStatus; + + pub(crate) use worker::Synced; +} + +cfg_not_taskdump! { + mod trace_mock; + use trace_mock::TraceStatus; +} + +pub(crate) use worker::block_in_place; + +use crate::runtime::{ + self, blocking, + driver::{self, Driver}, + scheduler, Config, +}; +use crate::util::RngSeedGenerator; + +use std::fmt; +use std::future::Future; + +/// Work-stealing based thread pool for executing futures. +pub(crate) struct MultiThread; + +// ===== impl MultiThread ===== + +impl MultiThread { + pub(crate) fn new( + size: usize, + driver: Driver, + driver_handle: driver::Handle, + blocking_spawner: blocking::Spawner, + seed_generator: RngSeedGenerator, + config: Config, + ) -> (MultiThread, runtime::Handle) { + let handle = worker::create( + size, + driver, + driver_handle, + blocking_spawner, + seed_generator, + config, + ); + + (MultiThread, handle) + } + + /// Blocks the current thread waiting for the future to complete. + /// + /// The future will execute on the current thread, but all spawned tasks + /// will be executed on the thread pool. + pub(crate) fn block_on(&self, handle: &scheduler::Handle, future: F) -> F::Output + where + F: Future, + { + crate::runtime::context::enter_runtime(handle, true, |blocking| { + blocking.block_on(future).expect("failed to park thread") + }) + } + + pub(crate) fn shutdown(&mut self, handle: &scheduler::Handle) { + match handle { + scheduler::Handle::MultiThreadAlt(handle) => handle.shutdown(), + _ => panic!("expected MultiThread scheduler"), + } + } +} + +impl fmt::Debug for MultiThread { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("MultiThread").finish() + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/overflow.rs b/tokio/src/runtime/scheduler/multi_thread_alt/overflow.rs new file mode 100644 index 00000000000..ab664811cff --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/overflow.rs @@ -0,0 +1,26 @@ +use crate::runtime::task; + +#[cfg(test)] +use std::cell::RefCell; + +pub(crate) trait Overflow { + fn push(&self, task: task::Notified); + + fn push_batch(&self, iter: I) + where + I: Iterator>; +} + +#[cfg(test)] +impl Overflow for RefCell>> { + fn push(&self, task: task::Notified) { + self.borrow_mut().push(task); + } + + fn push_batch(&self, iter: I) + where + I: Iterator>, + { + self.borrow_mut().extend(iter); + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/park.rs b/tokio/src/runtime/scheduler/multi_thread_alt/park.rs new file mode 100644 index 00000000000..0a00ea004ee --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/park.rs @@ -0,0 +1,232 @@ +//! Parks the runtime. +//! +//! A combination of the various resource driver park handles. + +use crate::loom::sync::atomic::AtomicUsize; +use crate::loom::sync::{Arc, Condvar, Mutex}; +use crate::runtime::driver::{self, Driver}; +use crate::util::TryLock; + +use std::sync::atomic::Ordering::SeqCst; +use std::time::Duration; + +pub(crate) struct Parker { + inner: Arc, +} + +pub(crate) struct Unparker { + inner: Arc, +} + +struct Inner { + /// Avoids entering the park if possible + state: AtomicUsize, + + /// Used to coordinate access to the driver / condvar + mutex: Mutex<()>, + + /// Condvar to block on if the driver is unavailable. + condvar: Condvar, + + /// Resource (I/O, time, ...) driver + shared: Arc, +} + +const EMPTY: usize = 0; +const PARKED_CONDVAR: usize = 1; +const PARKED_DRIVER: usize = 2; +const NOTIFIED: usize = 3; + +/// Shared across multiple Parker handles +struct Shared { + /// Shared driver. Only one thread at a time can use this + driver: TryLock, +} + +impl Parker { + pub(crate) fn new(driver: Driver) -> Parker { + Parker { + inner: Arc::new(Inner { + state: AtomicUsize::new(EMPTY), + mutex: Mutex::new(()), + condvar: Condvar::new(), + shared: Arc::new(Shared { + driver: TryLock::new(driver), + }), + }), + } + } + + pub(crate) fn unpark(&self) -> Unparker { + Unparker { + inner: self.inner.clone(), + } + } + + pub(crate) fn park(&mut self, handle: &driver::Handle) { + self.inner.park(handle); + } + + pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) { + // Only parking with zero is supported... + assert_eq!(duration, Duration::from_millis(0)); + + if let Some(mut driver) = self.inner.shared.driver.try_lock() { + driver.park_timeout(handle, duration) + } + } + + pub(crate) fn shutdown(&mut self, handle: &driver::Handle) { + self.inner.shutdown(handle); + } +} + +impl Clone for Parker { + fn clone(&self) -> Parker { + Parker { + inner: Arc::new(Inner { + state: AtomicUsize::new(EMPTY), + mutex: Mutex::new(()), + condvar: Condvar::new(), + shared: self.inner.shared.clone(), + }), + } + } +} + +impl Unparker { + pub(crate) fn unpark(&self, driver: &driver::Handle) { + self.inner.unpark(driver); + } +} + +impl Inner { + /// Parks the current thread for at most `dur`. + fn park(&self, handle: &driver::Handle) { + // If we were previously notified then we consume this notification and + // return quickly. + if self + .state + .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst) + .is_ok() + { + return; + } + + if let Some(mut driver) = self.shared.driver.try_lock() { + self.park_driver(&mut driver, handle); + } else { + self.park_condvar(); + } + } + + fn park_condvar(&self) { + // Otherwise we need to coordinate going to sleep + let mut m = self.mutex.lock(); + + match self + .state + .compare_exchange(EMPTY, PARKED_CONDVAR, SeqCst, SeqCst) + { + Ok(_) => {} + Err(NOTIFIED) => { + // We must read here, even though we know it will be `NOTIFIED`. + // This is because `unpark` may have been called again since we read + // `NOTIFIED` in the `compare_exchange` above. We must perform an + // acquire operation that synchronizes with that `unpark` to observe + // any writes it made before the call to unpark. To do that we must + // read from the write it made to `state`. + let old = self.state.swap(EMPTY, SeqCst); + debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly"); + + return; + } + Err(actual) => panic!("inconsistent park state; actual = {}", actual), + } + + loop { + m = self.condvar.wait(m).unwrap(); + + if self + .state + .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst) + .is_ok() + { + // got a notification + return; + } + + // spurious wakeup, go back to sleep + } + } + + fn park_driver(&self, driver: &mut Driver, handle: &driver::Handle) { + match self + .state + .compare_exchange(EMPTY, PARKED_DRIVER, SeqCst, SeqCst) + { + Ok(_) => {} + Err(NOTIFIED) => { + // We must read here, even though we know it will be `NOTIFIED`. + // This is because `unpark` may have been called again since we read + // `NOTIFIED` in the `compare_exchange` above. We must perform an + // acquire operation that synchronizes with that `unpark` to observe + // any writes it made before the call to unpark. To do that we must + // read from the write it made to `state`. + let old = self.state.swap(EMPTY, SeqCst); + debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly"); + + return; + } + Err(actual) => panic!("inconsistent park state; actual = {}", actual), + } + + driver.park(handle); + + match self.state.swap(EMPTY, SeqCst) { + NOTIFIED => {} // got a notification, hurray! + PARKED_DRIVER => {} // no notification, alas + n => panic!("inconsistent park_timeout state: {}", n), + } + } + + fn unpark(&self, driver: &driver::Handle) { + // To ensure the unparked thread will observe any writes we made before + // this call, we must perform a release operation that `park` can + // synchronize with. To do that we must write `NOTIFIED` even if `state` + // is already `NOTIFIED`. That is why this must be a swap rather than a + // compare-and-swap that returns if it reads `NOTIFIED` on failure. + match self.state.swap(NOTIFIED, SeqCst) { + EMPTY => {} // no one was waiting + NOTIFIED => {} // already unparked + PARKED_CONDVAR => self.unpark_condvar(), + PARKED_DRIVER => driver.unpark(), + actual => panic!("inconsistent state in unpark; actual = {}", actual), + } + } + + fn unpark_condvar(&self) { + // There is a period between when the parked thread sets `state` to + // `PARKED` (or last checked `state` in the case of a spurious wake + // up) and when it actually waits on `cvar`. If we were to notify + // during this period it would be ignored and then when the parked + // thread went to sleep it would never wake up. Fortunately, it has + // `lock` locked at this stage so we can acquire `lock` to wait until + // it is ready to receive the notification. + // + // Releasing `lock` before the call to `notify_one` means that when the + // parked thread wakes it doesn't get woken only to have to wait for us + // to release `lock`. + drop(self.mutex.lock()); + + self.condvar.notify_one() + } + + fn shutdown(&self, handle: &driver::Handle) { + if let Some(mut driver) = self.shared.driver.try_lock() { + driver.shutdown(handle); + } + + self.condvar.notify_all(); + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/queue.rs b/tokio/src/runtime/scheduler/multi_thread_alt/queue.rs new file mode 100644 index 00000000000..d4acc408183 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/queue.rs @@ -0,0 +1,601 @@ +//! Run-queue structures to support a work-stealing scheduler + +use crate::loom::cell::UnsafeCell; +use crate::loom::sync::Arc; +use crate::runtime::scheduler::multi_thread_alt::{Overflow, Stats}; +use crate::runtime::task; + +use std::mem::{self, MaybeUninit}; +use std::ptr; +use std::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release}; + +// Use wider integers when possible to increase ABA resilience. +// +// See issue #5041: . +cfg_has_atomic_u64! { + type UnsignedShort = u32; + type UnsignedLong = u64; + type AtomicUnsignedShort = crate::loom::sync::atomic::AtomicU32; + type AtomicUnsignedLong = crate::loom::sync::atomic::AtomicU64; +} +cfg_not_has_atomic_u64! { + type UnsignedShort = u16; + type UnsignedLong = u32; + type AtomicUnsignedShort = crate::loom::sync::atomic::AtomicU16; + type AtomicUnsignedLong = crate::loom::sync::atomic::AtomicU32; +} + +/// Producer handle. May only be used from a single thread. +pub(crate) struct Local { + inner: Arc>, +} + +/// Consumer handle. May be used from many threads. +pub(crate) struct Steal(Arc>); + +#[repr(align(128))] +pub(crate) struct Inner { + /// Concurrently updated by many threads. + /// + /// Contains two `UnsignedShort` values. The LSB byte is the "real" head of + /// the queue. The `UnsignedShort` in the MSB is set by a stealer in process + /// of stealing values. It represents the first value being stolen in the + /// batch. The `UnsignedShort` indices are intentionally wider than strictly + /// required for buffer indexing in order to provide ABA mitigation and make + /// it possible to distinguish between full and empty buffers. + /// + /// When both `UnsignedShort` values are the same, there is no active + /// stealer. + /// + /// Tracking an in-progress stealer prevents a wrapping scenario. + head: AtomicUnsignedLong, + + /// Only updated by producer thread but read by many threads. + tail: AtomicUnsignedShort, + + /// Elements + buffer: Box<[UnsafeCell>>; LOCAL_QUEUE_CAPACITY]>, +} + +unsafe impl Send for Inner {} +unsafe impl Sync for Inner {} + +#[cfg(not(loom))] +const LOCAL_QUEUE_CAPACITY: usize = 256; + +// Shrink the size of the local queue when using loom. This shouldn't impact +// logic, but allows loom to test more edge cases in a reasonable a mount of +// time. +#[cfg(loom)] +const LOCAL_QUEUE_CAPACITY: usize = 4; + +const MASK: usize = LOCAL_QUEUE_CAPACITY - 1; + +// Constructing the fixed size array directly is very awkward. The only way to +// do it is to repeat `UnsafeCell::new(MaybeUninit::uninit())` 256 times, as +// the contents are not Copy. The trick with defining a const doesn't work for +// generic types. +fn make_fixed_size(buffer: Box<[T]>) -> Box<[T; LOCAL_QUEUE_CAPACITY]> { + assert_eq!(buffer.len(), LOCAL_QUEUE_CAPACITY); + + // safety: We check that the length is correct. + unsafe { Box::from_raw(Box::into_raw(buffer).cast()) } +} + +/// Create a new local run-queue +pub(crate) fn local() -> (Steal, Local) { + let mut buffer = Vec::with_capacity(LOCAL_QUEUE_CAPACITY); + + for _ in 0..LOCAL_QUEUE_CAPACITY { + buffer.push(UnsafeCell::new(MaybeUninit::uninit())); + } + + let inner = Arc::new(Inner { + head: AtomicUnsignedLong::new(0), + tail: AtomicUnsignedShort::new(0), + buffer: make_fixed_size(buffer.into_boxed_slice()), + }); + + let local = Local { + inner: inner.clone(), + }; + + let remote = Steal(inner); + + (remote, local) +} + +impl Local { + /// How many tasks can be pushed into the queue + pub(crate) fn remaining_slots(&self) -> usize { + self.inner.remaining_slots() + } + + pub(crate) fn max_capacity(&self) -> usize { + LOCAL_QUEUE_CAPACITY + } + + /// Returns `true` if there are no entries in the queue + pub(crate) fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + /// Pushes a batch of tasks to the back of the queue. All tasks must fit in + /// the local queue. + /// + /// # Panics + /// + /// The method panics if there is not enough capacity to fit in the queue. + pub(crate) fn push_back(&mut self, tasks: impl ExactSizeIterator>) { + let len = tasks.len(); + assert!(len <= LOCAL_QUEUE_CAPACITY); + + if len == 0 { + // Nothing to do + return; + } + + let head = self.inner.head.load(Acquire); + let (steal, _) = unpack(head); + + // safety: this is the **only** thread that updates this cell. + let mut tail = unsafe { self.inner.tail.unsync_load() }; + + if tail.wrapping_sub(steal) <= (LOCAL_QUEUE_CAPACITY - len) as UnsignedShort { + // Yes, this if condition is structured a bit weird (first block + // does nothing, second returns an error). It is this way to match + // `push_back_or_overflow`. + } else { + panic!() + } + + for task in tasks { + let idx = tail as usize & MASK; + + self.inner.buffer[idx].with_mut(|ptr| { + // Write the task to the slot + // + // Safety: There is only one producer and the above `if` + // condition ensures we don't touch a cell if there is a + // value, thus no consumer. + unsafe { + ptr::write((*ptr).as_mut_ptr(), task); + } + }); + + tail = tail.wrapping_add(1); + } + + self.inner.tail.store(tail, Release); + } + + /// Pushes a task to the back of the local queue, if there is not enough + /// capacity in the queue, this triggers the overflow operation. + /// + /// When the queue overflows, half of the curent contents of the queue is + /// moved to the given Injection queue. This frees up capacity for more + /// tasks to be pushed into the local queue. + pub(crate) fn push_back_or_overflow>( + &mut self, + mut task: task::Notified, + overflow: &O, + stats: &mut Stats, + ) { + let tail = loop { + let head = self.inner.head.load(Acquire); + let (steal, real) = unpack(head); + + // safety: this is the **only** thread that updates this cell. + let tail = unsafe { self.inner.tail.unsync_load() }; + + if tail.wrapping_sub(steal) < LOCAL_QUEUE_CAPACITY as UnsignedShort { + // There is capacity for the task + break tail; + } else if steal != real { + super::counters::inc_num_overflows(); + // Concurrently stealing, this will free up capacity, so only + // push the task onto the inject queue + overflow.push(task); + return; + } else { + super::counters::inc_num_overflows(); + // Push the current task and half of the queue into the + // inject queue. + match self.push_overflow(task, real, tail, overflow, stats) { + Ok(_) => return, + // Lost the race, try again + Err(v) => { + task = v; + } + } + } + }; + + self.push_back_finish(task, tail); + } + + // Second half of `push_back` + fn push_back_finish(&self, task: task::Notified, tail: UnsignedShort) { + // Map the position to a slot index. + let idx = tail as usize & MASK; + + self.inner.buffer[idx].with_mut(|ptr| { + // Write the task to the slot + // + // Safety: There is only one producer and the above `if` + // condition ensures we don't touch a cell if there is a + // value, thus no consumer. + unsafe { + ptr::write((*ptr).as_mut_ptr(), task); + } + }); + + // Make the task available. Synchronizes with a load in + // `steal_into2`. + self.inner.tail.store(tail.wrapping_add(1), Release); + } + + /// Moves a batch of tasks into the inject queue. + /// + /// This will temporarily make some of the tasks unavailable to stealers. + /// Once `push_overflow` is done, a notification is sent out, so if other + /// workers "missed" some of the tasks during a steal, they will get + /// another opportunity. + #[inline(never)] + fn push_overflow>( + &mut self, + task: task::Notified, + head: UnsignedShort, + tail: UnsignedShort, + overflow: &O, + stats: &mut Stats, + ) -> Result<(), task::Notified> { + /// How many elements are we taking from the local queue. + /// + /// This is one less than the number of tasks pushed to the inject + /// queue as we are also inserting the `task` argument. + const NUM_TASKS_TAKEN: UnsignedShort = (LOCAL_QUEUE_CAPACITY / 2) as UnsignedShort; + + assert_eq!( + tail.wrapping_sub(head) as usize, + LOCAL_QUEUE_CAPACITY, + "queue is not full; tail = {}; head = {}", + tail, + head + ); + + let prev = pack(head, head); + + // Claim a bunch of tasks + // + // We are claiming the tasks **before** reading them out of the buffer. + // This is safe because only the **current** thread is able to push new + // tasks. + // + // There isn't really any need for memory ordering... Relaxed would + // work. This is because all tasks are pushed into the queue from the + // current thread (or memory has been acquired if the local queue handle + // moved). + if self + .inner + .head + .compare_exchange( + prev, + pack( + head.wrapping_add(NUM_TASKS_TAKEN), + head.wrapping_add(NUM_TASKS_TAKEN), + ), + Release, + Relaxed, + ) + .is_err() + { + // We failed to claim the tasks, losing the race. Return out of + // this function and try the full `push` routine again. The queue + // may not be full anymore. + return Err(task); + } + + /// An iterator that takes elements out of the run queue. + struct BatchTaskIter<'a, T: 'static> { + buffer: &'a [UnsafeCell>>; LOCAL_QUEUE_CAPACITY], + head: UnsignedLong, + i: UnsignedLong, + } + impl<'a, T: 'static> Iterator for BatchTaskIter<'a, T> { + type Item = task::Notified; + + #[inline] + fn next(&mut self) -> Option> { + if self.i == UnsignedLong::from(NUM_TASKS_TAKEN) { + None + } else { + let i_idx = self.i.wrapping_add(self.head) as usize & MASK; + let slot = &self.buffer[i_idx]; + + // safety: Our CAS from before has assumed exclusive ownership + // of the task pointers in this range. + let task = slot.with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) }); + + self.i += 1; + Some(task) + } + } + } + + // safety: The CAS above ensures that no consumer will look at these + // values again, and we are the only producer. + let batch_iter = BatchTaskIter { + buffer: &self.inner.buffer, + head: head as UnsignedLong, + i: 0, + }; + overflow.push_batch(batch_iter.chain(std::iter::once(task))); + + // Add 1 to factor in the task currently being scheduled. + stats.incr_overflow_count(); + + Ok(()) + } + + /// Pops a task from the local queue. + pub(crate) fn pop(&mut self) -> Option> { + let mut head = self.inner.head.load(Acquire); + + let idx = loop { + let (steal, real) = unpack(head); + + // safety: this is the **only** thread that updates this cell. + let tail = unsafe { self.inner.tail.unsync_load() }; + + if real == tail { + // queue is empty + return None; + } + + let next_real = real.wrapping_add(1); + + // If `steal == real` there are no concurrent stealers. Both `steal` + // and `real` are updated. + let next = if steal == real { + pack(next_real, next_real) + } else { + assert_ne!(steal, next_real); + pack(steal, next_real) + }; + + // Attempt to claim a task. + let res = self + .inner + .head + .compare_exchange(head, next, AcqRel, Acquire); + + match res { + Ok(_) => break real as usize & MASK, + Err(actual) => head = actual, + } + }; + + Some(self.inner.buffer[idx].with(|ptr| unsafe { ptr::read(ptr).assume_init() })) + } +} + +impl Steal { + /// Steals half the tasks from self and place them into `dst`. + pub(crate) fn steal_into( + &self, + dst: &mut Local, + dst_stats: &mut Stats, + ) -> Option> { + // Safety: the caller is the only thread that mutates `dst.tail` and + // holds a mutable reference. + let dst_tail = unsafe { dst.inner.tail.unsync_load() }; + + // To the caller, `dst` may **look** empty but still have values + // contained in the buffer. If another thread is concurrently stealing + // from `dst` there may not be enough capacity to steal. + let (steal, _) = unpack(dst.inner.head.load(Acquire)); + + if dst_tail.wrapping_sub(steal) > LOCAL_QUEUE_CAPACITY as UnsignedShort / 2 { + // we *could* try to steal less here, but for simplicity, we're just + // going to abort. + return None; + } + + // Steal the tasks into `dst`'s buffer. This does not yet expose the + // tasks in `dst`. + let mut n = self.steal_into2(dst, dst_tail); + + if n == 0 { + // No tasks were stolen + return None; + } + + super::counters::inc_num_steals(); + + dst_stats.incr_steal_count(n as u16); + dst_stats.incr_steal_operations(); + + // We are returning a task here + n -= 1; + + let ret_pos = dst_tail.wrapping_add(n); + let ret_idx = ret_pos as usize & MASK; + + // safety: the value was written as part of `steal_into2` and not + // exposed to stealers, so no other thread can access it. + let ret = dst.inner.buffer[ret_idx].with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) }); + + if n == 0 { + // The `dst` queue is empty, but a single task was stolen + return Some(ret); + } + + // Make the stolen items available to consumers + dst.inner.tail.store(dst_tail.wrapping_add(n), Release); + + Some(ret) + } + + // Steal tasks from `self`, placing them into `dst`. Returns the number of + // tasks that were stolen. + fn steal_into2(&self, dst: &mut Local, dst_tail: UnsignedShort) -> UnsignedShort { + let mut prev_packed = self.0.head.load(Acquire); + let mut next_packed; + + let n = loop { + let (src_head_steal, src_head_real) = unpack(prev_packed); + let src_tail = self.0.tail.load(Acquire); + + // If these two do not match, another thread is concurrently + // stealing from the queue. + if src_head_steal != src_head_real { + return 0; + } + + // Number of available tasks to steal + let n = src_tail.wrapping_sub(src_head_real); + let n = n - n / 2; + + if n == 0 { + // No tasks available to steal + return 0; + } + + // Update the real head index to acquire the tasks. + let steal_to = src_head_real.wrapping_add(n); + assert_ne!(src_head_steal, steal_to); + next_packed = pack(src_head_steal, steal_to); + + // Claim all those tasks. This is done by incrementing the "real" + // head but not the steal. By doing this, no other thread is able to + // steal from this queue until the current thread completes. + let res = self + .0 + .head + .compare_exchange(prev_packed, next_packed, AcqRel, Acquire); + + match res { + Ok(_) => break n, + Err(actual) => prev_packed = actual, + } + }; + + assert!( + n <= LOCAL_QUEUE_CAPACITY as UnsignedShort / 2, + "actual = {}", + n + ); + + let (first, _) = unpack(next_packed); + + // Take all the tasks + for i in 0..n { + // Compute the positions + let src_pos = first.wrapping_add(i); + let dst_pos = dst_tail.wrapping_add(i); + + // Map to slots + let src_idx = src_pos as usize & MASK; + let dst_idx = dst_pos as usize & MASK; + + // Read the task + // + // safety: We acquired the task with the atomic exchange above. + let task = self.0.buffer[src_idx].with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) }); + + // Write the task to the new slot + // + // safety: `dst` queue is empty and we are the only producer to + // this queue. + dst.inner.buffer[dst_idx] + .with_mut(|ptr| unsafe { ptr::write((*ptr).as_mut_ptr(), task) }); + } + + let mut prev_packed = next_packed; + + // Update `src_head_steal` to match `src_head_real` signalling that the + // stealing routine is complete. + loop { + let head = unpack(prev_packed).1; + next_packed = pack(head, head); + + let res = self + .0 + .head + .compare_exchange(prev_packed, next_packed, AcqRel, Acquire); + + match res { + Ok(_) => return n, + Err(actual) => { + let (actual_steal, actual_real) = unpack(actual); + + assert_ne!(actual_steal, actual_real); + + prev_packed = actual; + } + } + } + } +} + +cfg_metrics! { + impl Steal { + pub(crate) fn len(&self) -> usize { + self.0.len() as _ + } + } +} + +impl Clone for Steal { + fn clone(&self) -> Steal { + Steal(self.0.clone()) + } +} + +impl Drop for Local { + fn drop(&mut self) { + if !std::thread::panicking() { + assert!(self.pop().is_none(), "queue not empty"); + } + } +} + +impl Inner { + fn remaining_slots(&self) -> usize { + let (steal, _) = unpack(self.head.load(Acquire)); + let tail = self.tail.load(Acquire); + + LOCAL_QUEUE_CAPACITY - (tail.wrapping_sub(steal) as usize) + } + + fn len(&self) -> UnsignedShort { + let (_, head) = unpack(self.head.load(Acquire)); + let tail = self.tail.load(Acquire); + + tail.wrapping_sub(head) + } + + fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +/// Split the head value into the real head and the index a stealer is working +/// on. +fn unpack(n: UnsignedLong) -> (UnsignedShort, UnsignedShort) { + let real = n & UnsignedShort::MAX as UnsignedLong; + let steal = n >> (mem::size_of::() * 8); + + (steal as UnsignedShort, real as UnsignedShort) +} + +/// Join the two head values +fn pack(steal: UnsignedShort, real: UnsignedShort) -> UnsignedLong { + (real as UnsignedLong) | ((steal as UnsignedLong) << (mem::size_of::() * 8)) +} + +#[test] +fn test_local_queue_capacity() { + assert!(LOCAL_QUEUE_CAPACITY - 1 <= u8::MAX as usize); +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/stats.rs b/tokio/src/runtime/scheduler/multi_thread_alt/stats.rs new file mode 100644 index 00000000000..57657bb0391 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/stats.rs @@ -0,0 +1,171 @@ +use crate::runtime::{Config, MetricsBatch, WorkerMetrics}; + +use std::cmp; +use std::time::{Duration, Instant}; + +/// Per-worker statistics. This is used for both tuning the scheduler and +/// reporting runtime-level metrics/stats. +pub(crate) struct Stats { + /// The metrics batch used to report runtime-level metrics/stats to the + /// user. + batch: MetricsBatch, + + /// Exponentially-weighted moving average of time spent polling scheduled a + /// task. + /// + /// Tracked in nanoseconds, stored as a f64 since that is what we use with + /// the EWMA calculations + task_poll_time_ewma: f64, +} + +/// Transient state +pub(crate) struct Ephemeral { + /// Instant at which work last resumed (continued after park). + /// + /// This duplicates the value stored in `MetricsBatch`. We will unify + /// `Stats` and `MetricsBatch` when we stabilize metrics. + processing_scheduled_tasks_started_at: Instant, + + /// Number of tasks polled in the batch of scheduled tasks + tasks_polled_in_batch: usize, + + /// Used to ensure calls to start / stop batch are paired + #[cfg(debug_assertions)] + batch_started: bool, +} + +impl Ephemeral { + pub(crate) fn new() -> Ephemeral { + Ephemeral { + processing_scheduled_tasks_started_at: Instant::now(), + tasks_polled_in_batch: 0, + #[cfg(debug_assertions)] + batch_started: false, + } + } +} + +/// How to weigh each individual poll time, value is plucked from thin air. +const TASK_POLL_TIME_EWMA_ALPHA: f64 = 0.1; + +/// Ideally, we wouldn't go above this, value is plucked from thin air. +const TARGET_GLOBAL_QUEUE_INTERVAL: f64 = Duration::from_micros(200).as_nanos() as f64; + +/// Max value for the global queue interval. This is 2x the previous default +const MAX_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL: u32 = 127; + +/// This is the previous default +const TARGET_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL: u32 = 61; + +impl Stats { + pub(crate) const DEFAULT_GLOBAL_QUEUE_INTERVAL: u32 = + TARGET_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL; + + pub(crate) fn new(worker_metrics: &WorkerMetrics) -> Stats { + // Seed the value with what we hope to see. + let task_poll_time_ewma = + TARGET_GLOBAL_QUEUE_INTERVAL / TARGET_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL as f64; + + Stats { + batch: MetricsBatch::new(worker_metrics), + task_poll_time_ewma, + } + } + + pub(crate) fn tuned_global_queue_interval(&self, config: &Config) -> u32 { + // If an interval is explicitly set, don't tune. + if let Some(configured) = config.global_queue_interval { + return configured; + } + + // As of Rust 1.45, casts from f64 -> u32 are saturating, which is fine here. + let tasks_per_interval = (TARGET_GLOBAL_QUEUE_INTERVAL / self.task_poll_time_ewma) as u32; + + cmp::max( + // We don't want to return less than 2 as that would result in the + // global queue always getting checked first. + 2, + cmp::min( + MAX_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL, + tasks_per_interval, + ), + ) + } + + pub(crate) fn submit(&mut self, to: &WorkerMetrics) { + self.batch.submit(to); + } + + pub(crate) fn about_to_park(&mut self) { + self.batch.about_to_park(); + } + + pub(crate) fn inc_local_schedule_count(&mut self) { + self.batch.inc_local_schedule_count(); + } + + pub(crate) fn start_processing_scheduled_tasks(&mut self, ephemeral: &mut Ephemeral) { + self.batch.start_processing_scheduled_tasks(); + + #[cfg(debug_assertions)] + { + debug_assert!(!ephemeral.batch_started); + ephemeral.batch_started = true; + } + + ephemeral.processing_scheduled_tasks_started_at = Instant::now(); + ephemeral.tasks_polled_in_batch = 0; + } + + pub(crate) fn end_processing_scheduled_tasks(&mut self, ephemeral: &mut Ephemeral) { + self.batch.end_processing_scheduled_tasks(); + + #[cfg(debug_assertions)] + { + debug_assert!(ephemeral.batch_started); + ephemeral.batch_started = false; + } + + // Update the EWMA task poll time + if ephemeral.tasks_polled_in_batch > 0 { + let now = Instant::now(); + + // If we "overflow" this conversion, we have bigger problems than + // slightly off stats. + let elapsed = (now - ephemeral.processing_scheduled_tasks_started_at).as_nanos() as f64; + let num_polls = ephemeral.tasks_polled_in_batch as f64; + + // Calculate the mean poll duration for a single task in the batch + let mean_poll_duration = elapsed / num_polls; + + // Compute the alpha weighted by the number of tasks polled this batch. + let weighted_alpha = 1.0 - (1.0 - TASK_POLL_TIME_EWMA_ALPHA).powf(num_polls); + + // Now compute the new weighted average task poll time. + self.task_poll_time_ewma = weighted_alpha * mean_poll_duration + + (1.0 - weighted_alpha) * self.task_poll_time_ewma; + } + } + + pub(crate) fn start_poll(&mut self, ephemeral: &mut Ephemeral) { + self.batch.start_poll(); + + ephemeral.tasks_polled_in_batch += 1; + } + + pub(crate) fn end_poll(&mut self) { + self.batch.end_poll(); + } + + pub(crate) fn incr_steal_count(&mut self, by: u16) { + self.batch.incr_steal_count(by); + } + + pub(crate) fn incr_steal_operations(&mut self) { + self.batch.incr_steal_operations(); + } + + pub(crate) fn incr_overflow_count(&mut self) { + self.batch.incr_overflow_count(); + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/trace.rs b/tokio/src/runtime/scheduler/multi_thread_alt/trace.rs new file mode 100644 index 00000000000..cc65a487543 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/trace.rs @@ -0,0 +1,61 @@ +use crate::loom::sync::atomic::{AtomicBool, Ordering}; +use crate::loom::sync::{Barrier, Mutex}; +use crate::runtime::dump::Dump; +use crate::runtime::scheduler::multi_thread_alt::Handle; +use crate::sync::notify::Notify; + +/// Tracing status of the worker. +pub(super) struct TraceStatus { + pub(super) trace_requested: AtomicBool, + pub(super) trace_start: Barrier, + pub(super) trace_end: Barrier, + pub(super) result_ready: Notify, + pub(super) trace_result: Mutex>, +} + +impl TraceStatus { + pub(super) fn new(remotes_len: usize) -> Self { + Self { + trace_requested: AtomicBool::new(false), + trace_start: Barrier::new(remotes_len), + trace_end: Barrier::new(remotes_len), + result_ready: Notify::new(), + trace_result: Mutex::new(None), + } + } + + pub(super) fn trace_requested(&self) -> bool { + self.trace_requested.load(Ordering::Relaxed) + } + + pub(super) async fn start_trace_request(&self, handle: &Handle) { + while self + .trace_requested + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_err() + { + handle.notify_all(); + crate::task::yield_now().await; + } + } + + pub(super) fn stash_result(&self, dump: Dump) { + let _ = self.trace_result.lock().insert(dump); + self.result_ready.notify_one(); + } + + pub(super) fn take_result(&self) -> Option { + self.trace_result.lock().take() + } + + pub(super) async fn end_trace_request(&self, handle: &Handle) { + while self + .trace_requested + .compare_exchange(true, false, Ordering::Acquire, Ordering::Relaxed) + .is_err() + { + handle.notify_all(); + crate::task::yield_now().await; + } + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/trace_mock.rs b/tokio/src/runtime/scheduler/multi_thread_alt/trace_mock.rs new file mode 100644 index 00000000000..2c17a4e38b5 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/trace_mock.rs @@ -0,0 +1,11 @@ +pub(super) struct TraceStatus {} + +impl TraceStatus { + pub(super) fn new(_: usize) -> Self { + Self {} + } + + pub(super) fn trace_requested(&self) -> bool { + false + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs new file mode 100644 index 00000000000..28b88cb679b --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs @@ -0,0 +1,1519 @@ +//! A scheduler is initialized with a fixed number of workers. Each worker is +//! driven by a thread. Each worker has a "core" which contains data such as the +//! run queue and other state. When `block_in_place` is called, the worker's +//! "core" is handed off to a new thread allowing the scheduler to continue to +//! make progress while the originating thread blocks. +//! +//! # Shutdown +//! +//! Shutting down the runtime involves the following steps: +//! +//! 1. The Shared::close method is called. This closes the inject queue and +//! OwnedTasks instance and wakes up all worker threads. +//! +//! 2. Each worker thread observes the close signal next time it runs +//! Core::maintenance by checking whether the inject queue is closed. +//! The Core::is_shutdown flag is set to true. +//! +//! 3. The worker thread calls `pre_shutdown` in parallel. Here, the worker +//! will keep removing tasks from OwnedTasks until it is empty. No new +//! tasks can be pushed to the OwnedTasks during or after this step as it +//! was closed in step 1. +//! +//! 5. The workers call Shared::shutdown to enter the single-threaded phase of +//! shutdown. These calls will push their core to Shared::shutdown_cores, +//! and the last thread to push its core will finish the shutdown procedure. +//! +//! 6. The local run queue of each core is emptied, then the inject queue is +//! emptied. +//! +//! At this point, shutdown has completed. It is not possible for any of the +//! collections to contain any tasks at this point, as each collection was +//! closed first, then emptied afterwards. +//! +//! ## Spawns during shutdown +//! +//! When spawning tasks during shutdown, there are two cases: +//! +//! * The spawner observes the OwnedTasks being open, and the inject queue is +//! closed. +//! * The spawner observes the OwnedTasks being closed and doesn't check the +//! inject queue. +//! +//! The first case can only happen if the OwnedTasks::bind call happens before +//! or during step 1 of shutdown. In this case, the runtime will clean up the +//! task in step 3 of shutdown. +//! +//! In the latter case, the task was not spawned and the task is immediately +//! cancelled by the spawner. +//! +//! The correctness of shutdown requires both the inject queue and OwnedTasks +//! collection to have a closed bit. With a close bit on only the inject queue, +//! spawning could run in to a situation where a task is successfully bound long +//! after the runtime has shut down. With a close bit on only the OwnedTasks, +//! the first spawning situation could result in the notification being pushed +//! to the inject queue after step 6 of shutdown, which would leave a task in +//! the inject queue indefinitely. This would be a ref-count cycle and a memory +//! leak. + +use crate::loom::sync::{Arc, Condvar, Mutex, MutexGuard}; +use crate::runtime; +use crate::runtime::context; +use crate::runtime::scheduler::multi_thread_alt::{ + idle, queue, stats, Counters, Handle, Idle, Overflow, Stats, TraceStatus, +}; +use crate::runtime::scheduler::{self, inject, Lock}; +use crate::runtime::task::OwnedTasks; +use crate::runtime::{ + blocking, coop, driver, task, Config, SchedulerMetrics, WorkerMetrics, +}; +use crate::runtime::driver::Driver; +use crate::util::atomic_cell::AtomicCell; +use crate::util::rand::{FastRand, RngSeedGenerator}; + +use std::cell::{Cell, RefCell}; +use std::cmp; +use std::task::Waker; +use std::time::Duration; + +cfg_metrics! { + mod metrics; +} + +cfg_taskdump! { + mod taskdump; +} + +cfg_not_taskdump! { + mod taskdump_mock; +} + +/// A scheduler worker +/// +/// Data is stack-allocated and never migrates threads +pub(super) struct Worker { + /// Used to schedule bookkeeping tasks every so often. + tick: u32, + + /// True if the scheduler is being shutdown + pub(super) is_shutdown: bool, + + /// True if the scheduler is being traced + is_traced: bool, + + /// Counter used to track when to poll from the local queue vs. the + /// injection queue + num_seq_local_queue_polls: u32, + + /// How often to check the global queue + global_queue_interval: u32, + + /// Used to collect a list of workers to notify + workers_to_notify: Vec, + + /// Snapshot of idle core list. This helps speedup stealing + idle_snapshot: idle::Snapshot, + + stats: stats::Ephemeral, +} + +/// Core data +/// +/// Data is heap-allocated and migrates threads. +#[repr(align(128))] +pub(super) struct Core { + /// Index holding this core's remote/shared state. + pub(super) index: usize, + + lifo_slot: Option, + + /// The worker-local run queue. + run_queue: queue::Local>, + + /// True if the worker is currently searching for more work. Searching + /// involves attempting to steal from other workers. + pub(super) is_searching: bool, + + /// Per-worker runtime stats + stats: Stats, + + /// Fast random number generator. + rand: FastRand, +} + +/// State shared across all workers +pub(crate) struct Shared { + /// Per-core remote state. + remotes: Box<[Remote]>, + + /// Global task queue used for: + /// 1. Submit work to the scheduler while **not** currently on a worker thread. + /// 2. Submit work to the scheduler when a worker run queue is saturated + pub(super) inject: inject::Shared>, + + /// Coordinates idle workers + idle: Idle, + + /// Collection of all active tasks spawned onto this executor. + pub(super) owned: OwnedTasks>, + + /// Data synchronized by the scheduler mutex + pub(super) synced: Mutex, + + /// Power's Tokio's I/O, timers, etc... the responsibility of polling the + /// driver is shared across workers. + driver: AtomicCell, + + /// Condition variables used to unblock worker threads. Each worker thread + /// has its own condvar it waits on. + pub(super) condvars: Vec, + + /// The number of cores that have observed the trace signal. + pub(super) trace_status: TraceStatus, + + /// Scheduler configuration options + config: Config, + + /// Collects metrics from the runtime. + pub(super) scheduler_metrics: SchedulerMetrics, + + pub(super) worker_metrics: Box<[WorkerMetrics]>, + + /// Only held to trigger some code on drop. This is used to get internal + /// runtime metrics that can be useful when doing performance + /// investigations. This does nothing (empty struct, no drop impl) unless + /// the `tokio_internal_mt_counters` cfg flag is set. + _counters: Counters, +} + +/// Data synchronized by the scheduler mutex +pub(crate) struct Synced { + /// When worker is notified, it is assigned a core. The core is placed here + /// until the worker wakes up to take it. + pub(super) assigned_cores: Vec>>, + + /// Cores that have observed the shutdown signal + /// + /// The core is **not** placed back in the worker to avoid it from being + /// stolen by a thread that was spawned as part of `block_in_place`. + shutdown_cores: Vec>, + + /// Synchronized state for `Idle`. + pub(super) idle: idle::Synced, + + /// Synchronized state for `Inject`. + pub(crate) inject: inject::Synced, +} + +/// Used to communicate with a worker from other threads. +struct Remote { + /// When a task is scheduled from a worker, it is stored in this slot. The + /// worker will check this slot for a task **before** checking the run + /// queue. This effectively results in the **last** scheduled task to be run + /// next (LIFO). This is an optimization for improving locality which + /// benefits message passing patterns and helps to reduce latency. + // lifo_slot: Lifo, + + /// Steals tasks from this worker. + pub(super) steal: queue::Steal>, +} + +/// Thread-local context +pub(crate) struct Context { + // Current scheduler's handle + handle: Arc, + + /// Worker index + index: usize, + + /// True when the LIFO slot is enabled + lifo_enabled: Cell, + + /// Core data + core: RefCell>>, + + /// Used to pass cores to other threads when `block_in_place` is called + handoff_core: Arc>, + + /// Tasks to wake after resource drivers are polled. This is mostly to + /// handle yielded tasks. + pub(crate) defer: RefCell>, +} + +/// Running a task may consume the core. If the core is still available when +/// running the task completes, it is returned. Otherwise, the worker will need +/// to stop processing. +type RunResult = Result, ()>; +type NextTaskResult = Result<(Option, Box), ()>; + +/// A task handle +type Task = task::Task>; + +/// A notified task handle +type Notified = task::Notified>; + +/// Value picked out of thin-air. Running the LIFO slot a handful of times +/// seemms sufficient to benefit from locality. More than 3 times probably is +/// overweighing. The value can be tuned in the future with data that shows +/// improvements. +const MAX_LIFO_POLLS_PER_TICK: usize = 3; + +pub(super) fn create( + num_cores: usize, + driver: Driver, + driver_handle: driver::Handle, + blocking_spawner: blocking::Spawner, + seed_generator: RngSeedGenerator, + config: Config, +) -> runtime::Handle { + // Allocate num_cores + 1 workers so that one worker can handle the I/O + // driver, if needed. + let num_workers = num_cores + 1; + let mut cores = Vec::with_capacity(num_cores); + let mut remotes = Vec::with_capacity(num_cores); + // Worker metrics are actually core based + let mut worker_metrics = Vec::with_capacity(num_cores); + + // Create the local queues + for i in 0..num_cores { + let (steal, run_queue) = queue::local(); + + let metrics = WorkerMetrics::from_config(&config); + let stats = Stats::new(&metrics); + + cores.push(Box::new(Core { + index: i, + lifo_slot: None, + run_queue, + is_searching: false, + stats, + rand: FastRand::from_seed(config.seed_generator.next_seed()), + })); + + remotes.push(Remote { + steal, + // lifo_slot: Lifo::new(), + }); + worker_metrics.push(metrics); + } + + // Allocate num-cores + 1 workers, so one worker can handle the I/O driver, + // if needed. + let (idle, idle_synced) = Idle::new(cores, num_workers); + let (inject, inject_synced) = inject::Shared::new(); + + let handle = Arc::new(Handle { + shared: Shared { + remotes: remotes.into_boxed_slice(), + inject, + idle, + owned: OwnedTasks::new(), + synced: Mutex::new(Synced { + assigned_cores: (0..num_workers).map(|_| None).collect(), + shutdown_cores: Vec::with_capacity(num_cores), + idle: idle_synced, + inject: inject_synced, + }), + driver: AtomicCell::new(Some(Box::new(driver))), + condvars: (0..num_workers).map(|_| Condvar::new()).collect(), + trace_status: TraceStatus::new(num_cores), + config, + scheduler_metrics: SchedulerMetrics::new(), + worker_metrics: worker_metrics.into_boxed_slice(), + _counters: Counters, + }, + driver: driver_handle, + blocking_spawner, + seed_generator, + }); + + let rt_handle = runtime::Handle { + inner: scheduler::Handle::MultiThreadAlt(handle), + }; + + // Eagerly start worker threads + for index in 0..num_workers { + let handle = rt_handle.inner.expect_multi_thread_alt(); + let h2 = handle.clone(); + let handoff_core = Arc::new(AtomicCell::new(None)); + + handle + .blocking_spawner + .spawn_blocking(&rt_handle, move || run(index, h2, handoff_core, false)); + } + + rt_handle +} + +#[track_caller] +pub(crate) fn block_in_place(f: F) -> R +where + F: FnOnce() -> R, +{ + // Try to steal the worker core back + struct Reset(coop::Budget); + + impl Drop for Reset { + fn drop(&mut self) { + with_current(|maybe_cx| { + if let Some(cx) = maybe_cx { + let core = cx.handoff_core.take(); + let mut cx_core = cx.core.borrow_mut(); + assert!(cx_core.is_none()); + *cx_core = core; + + // Reset the task budget as we are re-entering the + // runtime. + coop::set(self.0); + } + }); + } + } + + let mut had_entered = false; + + let setup_result = with_current(|maybe_cx| { + match ( + crate::runtime::context::current_enter_context(), + maybe_cx.is_some(), + ) { + (context::EnterRuntime::Entered { .. }, true) => { + // We are on a thread pool runtime thread, so we just need to + // set up blocking. + had_entered = true; + } + ( + context::EnterRuntime::Entered { + allow_block_in_place, + }, + false, + ) => { + // We are on an executor, but _not_ on the thread pool. That is + // _only_ okay if we are in a thread pool runtime's block_on + // method: + if allow_block_in_place { + had_entered = true; + return Ok(()); + } else { + // This probably means we are on the current_thread runtime or in a + // LocalSet, where it is _not_ okay to block. + return Err( + "can call blocking only when running on the multi-threaded runtime", + ); + } + } + (context::EnterRuntime::NotEntered, true) => { + // This is a nested call to block_in_place (we already exited). + // All the necessary setup has already been done. + return Ok(()); + } + (context::EnterRuntime::NotEntered, false) => { + // We are outside of the tokio runtime, so blocking is fine. + // We can also skip all of the thread pool blocking setup steps. + return Ok(()); + } + } + + let cx = maybe_cx.expect("no .is_some() == false cases above should lead here"); + + // Get the worker core. If none is set, then blocking is fine! + let core = match cx.core.borrow_mut().take() { + Some(core) => core, + None => return Ok(()), + }; + + // In order to block, the core must be sent to another thread for + // execution. + // + // First, move the core back into the worker's shared core slot. + cx.handoff_core.set(core); + + // Next, clone the worker handle and send it to a new thread for + // processing. + // + // Once the blocking task is done executing, we will attempt to + // steal the core back. + let index = cx.index; + let handle = cx.handle.clone(); + let handoff_core = cx.handoff_core.clone(); + runtime::spawn_blocking(move || run(index, handle, handoff_core, true)); + Ok(()) + }); + + if let Err(panic_message) = setup_result { + panic!("{}", panic_message); + } + + if had_entered { + // Unset the current task's budget. Blocking sections are not + // constrained by task budgets. + let _reset = Reset(coop::stop()); + + crate::runtime::context::exit_runtime(f) + } else { + f() + } +} + +fn run( + index: usize, + handle: Arc, + handoff_core: Arc>, + blocking_in_place: bool, +) { + struct AbortOnPanic; + + impl Drop for AbortOnPanic { + fn drop(&mut self) { + if std::thread::panicking() { + eprintln!("worker thread panicking; aborting process"); + std::process::abort(); + } + } + } + + // Catching panics on worker threads in tests is quite tricky. Instead, when + // debug assertions are enabled, we just abort the process. + #[cfg(debug_assertions)] + let _abort_on_panic = AbortOnPanic; + + let num_workers = handle.shared.condvars.len(); + + let mut worker = Worker { + tick: 0, + num_seq_local_queue_polls: 0, + global_queue_interval: Stats::DEFAULT_GLOBAL_QUEUE_INTERVAL, + is_shutdown: false, + is_traced: false, + workers_to_notify: Vec::with_capacity(num_workers - 1), + idle_snapshot: idle::Snapshot::new(&handle.shared.idle), + stats: stats::Ephemeral::new(), + }; + + let sched_handle = scheduler::Handle::MultiThreadAlt(handle.clone()); + + crate::runtime::context::enter_runtime(&sched_handle, true, |_| { + // Set the worker context. + let cx = scheduler::Context::MultiThreadAlt(Context { + index, + lifo_enabled: Cell::new(!handle.shared.config.disable_lifo_slot), + handle, + core: RefCell::new(None), + handoff_core, + defer: RefCell::new(Vec::with_capacity(64)), + }); + + context::set_scheduler(&cx, || { + let cx = cx.expect_multi_thread_alt(); + + // Run the worker + let res = worker.run(&cx, blocking_in_place); + // `err` here signifies the core was lost, this is an expected end + // state for a worker. + debug_assert!(res.is_err()); + + // Check if there are any deferred tasks to notify. This can happen when + // the worker core is lost due to `block_in_place()` being called from + // within the task. + if !cx.defer.borrow().is_empty() { + worker.schedule_deferred_without_core(&cx, &mut cx.shared().synced.lock()); + } + }); + }); +} + +macro_rules! try_task { + ($e:expr) => {{ + let (task, core) = $e?; + if task.is_some() { + return Ok((task, core)); + } + core + }}; +} + +macro_rules! try_task_new_batch { + ($w:expr, $e:expr) => {{ + let (task, mut core) = $e?; + if task.is_some() { + core.stats.start_processing_scheduled_tasks(&mut $w.stats); + return Ok((task, core)); + } + core + }}; +} + +impl Worker { + fn run(&mut self, cx: &Context, blocking_in_place: bool) -> RunResult { + let (maybe_task, mut core) = { + if blocking_in_place { + if let Some(core) = cx.handoff_core.take() { + (None, core) + } else { + // Just shutdown + return Err(()); + } + } else { + let mut synced = cx.shared().synced.lock(); + + // First try to acquire an available core + if let Some(core) = self.try_acquire_available_core(cx, &mut synced) { + // Try to poll a task from the global queue + let maybe_task = self.next_remote_task_synced(cx, &mut synced); + (maybe_task, core) + } else { + // block the thread to wait for a core to be assinged to us + self.wait_for_core(cx, synced)? + } + } + }; + + core.stats.start_processing_scheduled_tasks(&mut self.stats); + + if let Some(task) = maybe_task { + core = self.run_task(cx, core, task)?; + } + + while !self.is_shutdown { + let (maybe_task, c) = self.next_task(cx, core)?; + core = c; + + if let Some(task) = maybe_task { + core = self.run_task(cx, core, task)?; + } else { + // The only reason to get `None` from `next_task` is we have + // entered the shutdown phase. + assert!(self.is_shutdown); + break; + } + } + + self.pre_shutdown(cx, &mut core); + + // Signal shutdown + self.shutdown_core(cx, core); + + // It is possible that tasks wake others during drop, so we need to + // clear the defer list. + self.shutdown_clear_defer(cx); + + Err(()) + } + + // Try to acquire an available core, but do not block the thread + fn try_acquire_available_core( + &mut self, + cx: &Context, + synced: &mut Synced, + ) -> Option> { + if let Some(mut core) = cx + .shared() + .idle + .try_acquire_available_core(&mut synced.idle) + { + self.reset_acquired_core(cx, synced, &mut core); + Some(core) + } else { + None + } + } + + // Block the current thread, waiting for an available core + fn wait_for_core( + &mut self, + cx: &Context, + mut synced: MutexGuard<'_, Synced>, + ) -> NextTaskResult { + cx.shared() + .idle + .transition_worker_to_parked(&mut synced, cx.index); + + // Wait until a core is available, then exit the loop. + let mut core = loop { + if let Some(core) = synced.assigned_cores[cx.index].take() { + break core; + } + + // If shutting down, abort + if cx.shared().inject.is_closed(&synced.inject) { + self.shutdown_clear_defer(cx); + return Err(()); + } + + synced = cx.shared().condvars[cx.index].wait(synced).unwrap(); + }; + + self.reset_acquired_core(cx, &mut synced, &mut core); + + if self.is_shutdown { + // Currently shutting down, don't do any more work + return Ok((None, core)); + } + + let n = core.run_queue.max_capacity() / 2; + let maybe_task = self.next_remote_task_batch_synced(cx, &mut synced, &mut core, n); + + Ok((maybe_task, core)) + } + + /// Ensure core's state is set correctly for the worker to start using. + fn reset_acquired_core(&mut self, cx: &Context, synced: &mut Synced, core: &mut Core) { + self.global_queue_interval = core.stats.tuned_global_queue_interval(&cx.shared().config); + debug_assert!(self.global_queue_interval > 1); + + // Reset `lifo_enabled` here in case the core was previously stolen from + // a task that had the LIFO slot disabled. + self.reset_lifo_enabled(cx); + + // At this point, the local queue should be empty + debug_assert!(core.run_queue.is_empty()); + + // Update shutdown state while locked + self.update_global_flags(cx, synced); + } + + /// Finds the next task to run, this could be from a queue or stealing. If + /// none are available, the thread sleeps and tries again. + fn next_task(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + self.assert_lifo_enabled_is_correct(cx); + + if self.is_traced { + core = cx.handle.trace_core(core); + } + + // Increment the tick + self.tick = self.tick.wrapping_add(1); + + // Runs maintenance every so often. When maintenance is run, the + // driver is checked, which may result in a task being found. + core = try_task!(self.maybe_maintenance(&cx, core)); + + // Check the LIFO slot, local run queue, and the injection queue for + // a notified task. + core = try_task!(self.next_notified_task(cx, core)); + + // We consumed all work in the queues and will start searching for work. + core.stats.end_processing_scheduled_tasks(&mut self.stats); + + super::counters::inc_num_no_local_work(); + + if !cx.defer.borrow().is_empty() { + // We are deferring tasks, so poll the resource driver and schedule + // the deferred tasks. + try_task_new_batch!(self, self.park_yield(cx, core)); + + panic!("what happened to the deferred tasks? 🤔"); + } + + while !self.is_shutdown { + // Search for more work, this involves trying to poll the resource + // driver, steal from other workers, and check the global queue + // again. + core = try_task_new_batch!(self, self.search_for_work(cx, core)); + + debug_assert!(cx.defer.borrow().is_empty()); + core = try_task_new_batch!(self, self.park(cx, core)); + } + + // Shutting down, drop any deferred tasks + self.shutdown_clear_defer(cx); + + Ok((None, core)) + } + + fn next_notified_task(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + self.num_seq_local_queue_polls += 1; + + if self.num_seq_local_queue_polls % self.global_queue_interval == 0 { + super::counters::inc_global_queue_interval(); + + self.num_seq_local_queue_polls = 0; + + // Update the global queue interval, if needed + self.tune_global_queue_interval(cx, &mut core); + + if let Some(task) = self.next_remote_task(cx) { + return Ok((Some(task), core)); + } + } + + if let Some(task) = self.next_local_task(&mut core) { + return Ok((Some(task), core)); + } + + self.next_remote_task_batch(cx, core) + } + + fn next_remote_task(&self, cx: &Context) -> Option { + if cx.shared().inject.is_empty() { + return None; + } + + let mut synced = cx.shared().synced.lock(); + self.next_remote_task_synced(cx, &mut synced) + } + + fn next_remote_task_synced(&self, cx: &Context, synced: &mut Synced) -> Option { + // safety: we only have access to a valid `Synced` in this file. + unsafe { cx.shared().inject.pop(&mut synced.inject) } + } + + fn next_remote_task_batch(&self, cx: &Context, mut core: Box) -> NextTaskResult { + if cx.shared().inject.is_empty() { + return Ok((None, core)); + } + + // Other threads can only **remove** tasks from the current worker's + // `run_queue`. So, we can be confident that by the time we call + // `run_queue.push_back` below, there will be *at least* `cap` + // available slots in the queue. + let cap = usize::min( + core.run_queue.remaining_slots(), + core.run_queue.max_capacity() / 2, + ); + + let mut synced = cx.shared().synced.lock(); + let maybe_task = self.next_remote_task_batch_synced(cx, &mut synced, &mut core, cap); + Ok((maybe_task, core)) + } + + fn next_remote_task_batch_synced( + &self, + cx: &Context, + synced: &mut Synced, + core: &mut Core, + max: usize, + ) -> Option { + super::counters::inc_num_remote_batch(); + + // The worker is currently idle, pull a batch of work from the + // injection queue. We don't want to pull *all* the work so other + // workers can also get some. + let n = if core.is_searching { + cx.shared().inject.len() / cx.shared().idle.num_searching() + 1 + } else { + cx.shared().inject.len() / cx.shared().remotes.len() + 1 + }; + + let n = usize::min(n, max); + + // safety: passing in the correct `inject::Synced`. + let mut tasks = unsafe { cx.shared().inject.pop_n(&mut synced.inject, n) }; + + // Pop the first task to return immedietly + let ret = tasks.next(); + + // Push the rest of the on the run queue + core.run_queue.push_back(tasks); + + ret + } + + fn next_local_task(&self, core: &mut Core) -> Option { + self.next_lifo_task(core).or_else(|| core.run_queue.pop()) + } + + fn next_lifo_task(&self, core: &mut Core) -> Option { + core.lifo_slot.take() + } + + /// Function responsible for stealing tasks from another worker + /// + /// Note: Only if less than half the workers are searching for tasks to steal + /// a new worker will actually try to steal. The idea is to make sure not all + /// workers will be trying to steal at the same time. + fn search_for_work(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + #[cfg(not(loom))] + const ROUNDS: usize = 1; + + #[cfg(loom)] + const ROUNDS: usize = 1; + + debug_assert!(core.lifo_slot.is_none()); + debug_assert!(core.run_queue.is_empty()); + + if !self.transition_to_searching(cx, &mut core) { + return Ok((None, core)); + } + + // core = try_task!(self, self.poll_driver(cx, core)); + + // Get a snapshot of which workers are idle + cx.shared().idle.snapshot(&mut self.idle_snapshot); + + let num = cx.shared().remotes.len(); + + for i in 0..ROUNDS { + // Start from a random worker + let start = core.rand.fastrand_n(num as u32) as usize; + + if let Some(task) = self.steal_one_round(cx, &mut core, start) { + return Ok((Some(task), core)); + } + + core = try_task!(self.next_remote_task_batch(cx, core)); + + if i > 0 { + super::counters::inc_num_spin_stall(); + std::thread::sleep(std::time::Duration::from_micros(i as u64)); + } + } + + Ok((None, core)) + } + + fn steal_one_round(&self, cx: &Context, core: &mut Core, start: usize) -> Option { + let num = cx.shared().remotes.len(); + + for i in 0..num { + let i = (start + i) % num; + + // Don't steal from ourself! We know we don't have work. + if i == core.index { + continue; + } + + // If the core is currently idle, then there is nothing to steal. + if self.idle_snapshot.is_idle(i) { + continue; + } + + let target = &cx.shared().remotes[i]; + + if let Some(task) = target + .steal + .steal_into(&mut core.run_queue, &mut core.stats) + { + return Some(task); + } + } + + None + } + + fn run_task(&mut self, cx: &Context, mut core: Box, task: Notified) -> RunResult { + let task = cx.shared().owned.assert_owner(task); + + // Make sure the worker is not in the **searching** state. This enables + // another idle worker to try to steal work. + if self.transition_from_searching(cx, &mut core) { + super::counters::inc_num_relay_search(); + cx.shared().notify_parked_local(); + } + + self.assert_lifo_enabled_is_correct(cx); + + // Measure the poll start time. Note that we may end up polling other + // tasks under this measurement. In this case, the tasks came from the + // LIFO slot and are considered part of the current task for scheduling + // purposes. These tasks inherent the "parent"'s limits. + core.stats.start_poll(&mut self.stats); + + // Make the core available to the runtime context + *cx.core.borrow_mut() = Some(core); + + // Run the task + coop::budget(|| { + super::counters::inc_num_polls(); + task.run(); + let mut lifo_polls = 0; + + // As long as there is budget remaining and a task exists in the + // `lifo_slot`, then keep running. + loop { + // Check if we still have the core. If not, the core was stolen + // by another worker. + let mut core = match cx.core.borrow_mut().take() { + Some(core) => core, + None => { + // In this case, we cannot call `reset_lifo_enabled()` + // because the core was stolen. The stealer will handle + // that at the top of `Context::run` + return Err(()); + } + }; + + // Check for a task in the LIFO slot + let task = match self.next_lifo_task(&mut core) { + Some(task) => task, + None => { + self.reset_lifo_enabled(cx); + core.stats.end_poll(); + return Ok(core); + } + }; + + if !coop::has_budget_remaining() { + core.stats.end_poll(); + + // Not enough budget left to run the LIFO task, push it to + // the back of the queue and return. + core.run_queue + .push_back_or_overflow(task, cx.shared(), &mut core.stats); + // If we hit this point, the LIFO slot should be enabled. + // There is no need to reset it. + debug_assert!(cx.lifo_enabled.get()); + return Ok(core); + } + + // Track that we are about to run a task from the LIFO slot. + lifo_polls += 1; + super::counters::inc_lifo_schedules(); + + // Disable the LIFO slot if we reach our limit + // + // In ping-ping style workloads where task A notifies task B, + // which notifies task A again, continuously prioritizing the + // LIFO slot can cause starvation as these two tasks will + // repeatedly schedule the other. To mitigate this, we limit the + // number of times the LIFO slot is prioritized. + if lifo_polls >= MAX_LIFO_POLLS_PER_TICK { + cx.lifo_enabled.set(false); + super::counters::inc_lifo_capped(); + } + + // Run the LIFO task, then loop + *cx.core.borrow_mut() = Some(core); + let task = cx.shared().owned.assert_owner(task); + super::counters::inc_num_lifo_polls(); + task.run(); + } + }) + } + + fn schedule_deferred_with_core<'a>( + &mut self, + cx: &'a Context, + mut core: Box, + synced: impl FnOnce() -> MutexGuard<'a, Synced>, + ) -> NextTaskResult { + let mut defer = cx.defer.borrow_mut(); + + // Grab a task to run next + let task = defer.pop(); + + if task.is_none() { + return Ok((None, core)); + } + + if !defer.is_empty() { + let mut synced = synced(); + + // Number of tasks we want to try to spread across idle workers + let num_fanout = cmp::min(defer.len(), cx.shared().idle.num_idle(&synced.idle)); + + if num_fanout > 0 { + cx.shared() + .push_remote_task_batch_synced(&mut synced, defer.drain(..num_fanout)); + + cx.shared() + .idle + .notify_mult(&mut synced, &mut self.workers_to_notify, num_fanout); + } + + // Do not run the task while holding the lock... + drop(synced); + } + + // Notify any workers + for worker in self.workers_to_notify.drain(..) { + cx.shared().condvars[worker].notify_one() + } + + if !defer.is_empty() { + // Push the rest of the tasks on the local queue + for task in defer.drain(..) { + core.run_queue + .push_back_or_overflow(task, cx.shared(), &mut core.stats); + } + + cx.shared().notify_parked_local(); + } + + Ok((task, core)) + } + + fn schedule_deferred_without_core<'a>(&mut self, cx: &Context, synced: &mut Synced) { + let mut defer = cx.defer.borrow_mut(); + let num = defer.len(); + + if num > 0 { + // Push all tasks to the injection queue + cx.shared() + .push_remote_task_batch_synced(synced, defer.drain(..)); + + debug_assert!(self.workers_to_notify.is_empty()); + + // Notify workers + cx.shared() + .idle + .notify_mult(synced, &mut self.workers_to_notify, num); + + // Notify any workers + for worker in self.workers_to_notify.drain(..) { + cx.shared().condvars[worker].notify_one() + } + } + } + + fn maybe_maintenance(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + if self.tick % cx.shared().config.event_interval == 0 { + super::counters::inc_num_maintenance(); + + core.stats.end_processing_scheduled_tasks(&mut self.stats); + + // Run regularly scheduled maintenance + core = try_task_new_batch!(self, self.park_yield(cx, core)); + + core.stats.start_processing_scheduled_tasks(&mut self.stats); + } + + Ok((None, core)) + } + + fn flush_metrics(&self, cx: &Context, core: &mut Core) { + core.stats.submit(&cx.shared().worker_metrics[core.index]); + } + + fn update_global_flags(&mut self, cx: &Context, synced: &mut Synced) { + if !self.is_shutdown { + self.is_shutdown = cx.shared().inject.is_closed(&synced.inject); + } + + if !self.is_traced { + self.is_traced = cx.shared().trace_status.trace_requested(); + } + } + + fn park_yield(&mut self, cx: &Context, core: Box) -> NextTaskResult { + // Call `park` with a 0 timeout. This enables the I/O driver, timer, ... + // to run without actually putting the thread to sleep. + if let Some(mut driver) = cx.shared().driver.take() { + driver.park_timeout(&cx.handle.driver, Duration::from_millis(0)); + + cx.shared().driver.set(driver); + } + + // If there are more I/O events, schedule them. + let (maybe_task, mut core) = + self.schedule_deferred_with_core(cx, core, || cx.shared().synced.lock())?; + + self.flush_metrics(cx, &mut core); + self.update_global_flags(cx, &mut cx.shared().synced.lock()); + + Ok((maybe_task, core)) + } + + /* + fn poll_driver(&mut self, cx: &Context, core: Box) -> NextTaskResult { + // Call `park` with a 0 timeout. This enables the I/O driver, timer, ... + // to run without actually putting the thread to sleep. + if let Some(mut driver) = cx.shared().driver.take() { + driver.park_timeout(&cx.handle.driver, Duration::from_millis(0)); + + cx.shared().driver.set(driver); + + // If there are more I/O events, schedule them. + self.schedule_deferred_with_core(cx, core, || cx.shared().synced.lock()) + } else { + Ok((None, core)) + } + } + */ + + fn park(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + if let Some(f) = &cx.shared().config.before_park { + f(); + } + + if self.can_transition_to_parked(&mut core) { + debug_assert!(!self.is_shutdown); + debug_assert!(!self.is_traced); + + core = try_task!(self.do_park(cx, core)); + } + + if let Some(f) = &cx.shared().config.after_unpark { + f(); + } + + Ok((None, core)) + } + + fn do_park(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + let was_searching = core.is_searching; + + // Before we park, if we are searching, we need to transition away from searching + if self.transition_from_searching(cx, &mut core) { + cx.shared().idle.snapshot(&mut self.idle_snapshot); + // We were the last searching worker, we need to do one last check + if let Some(task) = self.steal_one_round(cx, &mut core, 0) { + cx.shared().notify_parked_local(); + + return Ok((Some(task), core)); + } + } + + // Acquire the lock + let mut synced = cx.shared().synced.lock(); + + // Try one last time to get tasks + let n = core.run_queue.max_capacity() / 2; + if let Some(task) = self.next_remote_task_batch_synced(cx, &mut synced, &mut core, n) { + return Ok((Some(task), core)); + } + + if !was_searching { + if cx + .shared() + .idle + .transition_worker_to_searching_if_needed(&mut synced.idle, &mut core) + { + // Skip parking, go back to searching + return Ok((None, core)); + } + } + + super::counters::inc_num_parks(); + core.stats.about_to_park(); + // Flush metrics to the runtime metrics aggregator + self.flush_metrics(cx, &mut core); + + // If the runtime is shutdown, skip parking + self.update_global_flags(cx, &mut synced); + + if self.is_shutdown { + return Ok((None, core)); + } + + // Core being returned must not be in the searching state + debug_assert!(!core.is_searching); + + // Release the core + cx.shared().idle.release_core(&mut synced, core); + + if let Some(mut driver) = cx.shared().driver.take() { + // Drop the lock before parking on the driver + drop(synced); + + // Wait for driver events + driver.park(&cx.handle.driver); + + synced = cx.shared().synced.lock(); + + // Put the driver back + cx.shared().driver.set(driver); + + if cx.shared().inject.is_closed(&mut synced.inject) { + self.shutdown_clear_defer(cx); + self.shutdown_finalize(cx, synced); + return Err(()); + } + + // Try to acquire an available core to schedule I/O events + if let Some(core) = self.try_acquire_available_core(cx, &mut synced) { + // This may result in a task being run + self.schedule_deferred_with_core(cx, core, move || synced) + } else { + // Schedule any deferred tasks + self.schedule_deferred_without_core(cx, &mut synced); + + // Wait for a core. + self.wait_for_core(cx, synced) + } + } else { + // Wait for a core to be assigned to us + self.wait_for_core(cx, synced) + } + } + + fn transition_to_searching(&self, cx: &Context, core: &mut Core) -> bool { + if !core.is_searching { + cx.shared().idle.try_transition_worker_to_searching(core); + } + + core.is_searching + } + + /// Returns `true` if another worker must be notified + fn transition_from_searching(&self, cx: &Context, core: &mut Core) -> bool { + if !core.is_searching { + return false; + } + + cx.shared().idle.transition_worker_from_searching(core) + } + + fn can_transition_to_parked(&self, core: &mut Core) -> bool { + !self.has_tasks(core) && !self.is_shutdown && !self.is_traced + } + + fn has_tasks(&self, core: &Core) -> bool { + core.lifo_slot.is_some() || !core.run_queue.is_empty() + } + + /// Signals all tasks to shut down, and waits for them to complete. Must run + /// before we enter the single-threaded phase of shutdown processing. + fn pre_shutdown(&self, cx: &Context, core: &mut Core) { + // Signal to all tasks to shut down. + cx.shared().owned.close_and_shutdown_all(); + + core.stats.submit(&cx.shared().worker_metrics[core.index]); + } + + /// Signals that a worker has observed the shutdown signal and has replaced + /// its core back into its handle. + /// + /// If all workers have reached this point, the final cleanup is performed. + fn shutdown_core(&self, cx: &Context, core: Box) { + let mut synced = cx.shared().synced.lock(); + synced.shutdown_cores.push(core); + + self.shutdown_finalize(cx, synced); + } + + fn shutdown_finalize(&self, cx: &Context, mut synced: MutexGuard<'_, Synced>) { + // Wait for all cores + if synced.shutdown_cores.len() != cx.shared().remotes.len() { + return; + } + + let mut driver = match cx.shared().driver.take() { + Some(driver) => driver, + None => return, + }; + + debug_assert!(cx.shared().owned.is_empty()); + + for mut core in synced.shutdown_cores.drain(..) { + // Drain tasks from the local queue + while self.next_local_task(&mut core).is_some() {} + } + + // Shutdown the driver + driver.shutdown(&cx.handle.driver); + + // Drain the injection queue + // + // We already shut down every task, so we can simply drop the tasks. We + // cannot call `next_remote_task()` because we already hold the lock. + // + // safety: passing in correct `idle::Synced` + while let Some(task) = self.next_remote_task_synced(cx, &mut synced) { + drop(task); + } + } + + fn reset_lifo_enabled(&self, cx: &Context) { + cx.lifo_enabled + .set(!cx.handle.shared.config.disable_lifo_slot); + } + + fn assert_lifo_enabled_is_correct(&self, cx: &Context) { + debug_assert_eq!( + cx.lifo_enabled.get(), + !cx.handle.shared.config.disable_lifo_slot + ); + } + + fn tune_global_queue_interval(&mut self, cx: &Context, core: &mut Core) { + let next = core.stats.tuned_global_queue_interval(&cx.shared().config); + + debug_assert!(next > 1); + + // Smooth out jitter + if abs_diff(self.global_queue_interval, next) > 2 { + self.global_queue_interval = next; + } + } + + fn shutdown_clear_defer(&self, cx: &Context) { + let mut defer = cx.defer.borrow_mut(); + + for task in defer.drain(..) { + drop(task); + } + } +} + +impl Context { + pub(crate) fn defer(&self, waker: &Waker) { + // TODO: refactor defer across all runtimes + waker.wake_by_ref(); + } + + fn shared(&self) -> &Shared { + &self.handle.shared + } +} + +impl Shared { + pub(super) fn schedule_task(&self, task: Notified, is_yield: bool) { + use std::ptr; + + with_current(|maybe_cx| { + if let Some(cx) = maybe_cx { + // Make sure the task is part of the **current** scheduler. + if ptr::eq(self, &cx.handle.shared) { + // And the current thread still holds a core + if let Some(core) = cx.core.borrow_mut().as_mut() { + if is_yield { + cx.defer.borrow_mut().push(task); + } else { + self.schedule_local(cx, core, task); + } + } else { + // This can happen if either the core was stolen + // (`block_in_place`) or the notification happens from + // the driver. + cx.defer.borrow_mut().push(task); + } + return; + } + } + + // Otherwise, use the inject queue. + self.schedule_remote(task); + }) + } + + fn schedule_local(&self, cx: &Context, core: &mut Core, task: Notified) { + core.stats.inc_local_schedule_count(); + + if cx.lifo_enabled.get() { + // Push to the LIFO slot + let prev = std::mem::replace(&mut core.lifo_slot, Some(task)); + // let prev = cx.shared().remotes[core.index].lifo_slot.swap_local(task); + + if let Some(prev) = prev { + core.run_queue + .push_back_or_overflow(prev, self, &mut core.stats); + } else { + return; + } + } else { + core.run_queue + .push_back_or_overflow(task, self, &mut core.stats); + } + + self.notify_parked_local(); + } + + fn notify_parked_local(&self) { + super::counters::inc_num_inc_notify_local(); + self.idle.notify_local(self); + } + + fn schedule_remote(&self, task: Notified) { + super::counters::inc_num_notify_remote(); + self.scheduler_metrics.inc_remote_schedule_count(); + + let mut synced = self.synced.lock(); + // Push the task in the + self.push_remote_task(&mut synced, task); + + // Notify a worker. The mutex is passed in and will be released as part + // of the method call. + self.idle.notify_remote(synced, self); + } + + pub(super) fn close(&self) { + let mut synced = self.synced.lock(); + + if self.inject.close(&mut synced.inject) { + // Set the shutdown flag on all available cores + self.idle.shutdown(&mut synced, self); + } + } + + fn push_remote_task(&self, synced: &mut Synced, task: Notified) { + // safety: passing in correct `idle::Synced` + unsafe { + self.inject.push(&mut synced.inject, task); + } + } + + fn push_remote_task_batch(&self, iter: I) + where + I: Iterator>>, + { + unsafe { + self.inject.push_batch(self, iter); + } + } + + fn push_remote_task_batch_synced(&self, synced: &mut Synced, iter: I) + where + I: Iterator>>, + { + unsafe { + self.inject.push_batch(&mut synced.inject, iter); + } + } +} + +impl Overflow> for Shared { + fn push(&self, task: task::Notified>) { + self.push_remote_task(&mut self.synced.lock(), task); + } + + fn push_batch(&self, iter: I) + where + I: Iterator>>, + { + self.push_remote_task_batch(iter) + } +} + +impl<'a> Lock for &'a Shared { + type Handle = InjectGuard<'a>; + + fn lock(self) -> Self::Handle { + InjectGuard { + lock: self.synced.lock(), + } + } +} + +impl task::Schedule for Arc { + fn release(&self, task: &Task) -> Option { + self.shared.owned.remove(task) + } + + fn schedule(&self, task: Notified) { + self.shared.schedule_task(task, false); + } + + fn yield_now(&self, task: Notified) { + self.shared.schedule_task(task, true); + } +} + +pub(crate) struct InjectGuard<'a> { + lock: crate::loom::sync::MutexGuard<'a, Synced>, +} + +impl<'a> AsMut for InjectGuard<'a> { + fn as_mut(&mut self) -> &mut inject::Synced { + &mut self.lock.inject + } +} + +#[track_caller] +fn with_current(f: impl FnOnce(Option<&Context>) -> R) -> R { + use scheduler::Context::MultiThreadAlt; + + context::with_scheduler(|ctx| match ctx { + Some(MultiThreadAlt(ctx)) => f(Some(ctx)), + _ => f(None), + }) +} + +// `u32::abs_diff` is not available on Tokio's MSRV. +fn abs_diff(a: u32, b: u32) -> u32 { + if a > b { + a - b + } else { + b - a + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker/metrics.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker/metrics.rs new file mode 100644 index 00000000000..a9a5ab3ed60 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/worker/metrics.rs @@ -0,0 +1,11 @@ +use super::Shared; + +impl Shared { + pub(crate) fn injection_queue_depth(&self) -> usize { + self.inject.len() + } + + pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize { + self.remotes[worker].steal.len() + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump.rs new file mode 100644 index 00000000000..7cf69c43ddc --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump.rs @@ -0,0 +1,79 @@ +use super::{Core, Handle, Shared}; + +use crate::loom::sync::Arc; +use crate::runtime::scheduler::multi_thread_alt::Stats; +use crate::runtime::task::trace::trace_multi_thread; +use crate::runtime::{dump, WorkerMetrics}; + +use std::time::Duration; + +impl Handle { + pub(super) fn trace_core(&self, mut core: Box) -> Box { + core.is_traced = false; + + if core.is_shutdown { + return core; + } + + // wait for other workers, or timeout without tracing + let timeout = Duration::from_millis(250); // a _very_ generous timeout + let barrier = + if let Some(barrier) = self.shared.trace_status.trace_start.wait_timeout(timeout) { + barrier + } else { + // don't attempt to trace + return core; + }; + + if !barrier.is_leader() { + // wait for leader to finish tracing + self.shared.trace_status.trace_end.wait(); + return core; + } + + // trace + + let owned = &self.shared.owned; + let mut local = self.shared.steal_all(); + let synced = &self.shared.synced; + let injection = &self.shared.inject; + + // safety: `trace_multi_thread` is invoked with the same `synced` that `injection` + // was created with. + let traces = unsafe { trace_multi_thread(owned, &mut local, synced, injection) } + .into_iter() + .map(dump::Task::new) + .collect(); + + let result = dump::Dump::new(traces); + + // stash the result + self.shared.trace_status.stash_result(result); + + // allow other workers to proceed + self.shared.trace_status.trace_end.wait(); + + core + } +} + +impl Shared { + /// Steal all tasks from remotes into a single local queue. + pub(super) fn steal_all(&self) -> super::queue::Local> { + let (_steal, mut local) = super::queue::local(); + + let worker_metrics = WorkerMetrics::new(); + let mut stats = Stats::new(&worker_metrics); + + for remote in self.remotes.iter() { + let steal = &remote.steal; + while !steal.is_empty() { + if let Some(task) = steal.steal_into(&mut local, &mut stats) { + local.push_back([task].into_iter()); + } + } + } + + local + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump_mock.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump_mock.rs new file mode 100644 index 00000000000..24c5600ce2d --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump_mock.rs @@ -0,0 +1,7 @@ +use super::{Core, Handle}; + +impl Handle { + pub(super) fn trace_core(&self, core: Box) -> Box { + core + } +} diff --git a/tokio/src/runtime/task/list.rs b/tokio/src/runtime/task/list.rs index fb7dbdc1d95..da9ea92a0f6 100644 --- a/tokio/src/runtime/task/list.rs +++ b/tokio/src/runtime/task/list.rs @@ -119,7 +119,7 @@ impl OwnedTasks { /// a LocalNotified, giving the thread permission to poll this task. #[inline] pub(crate) fn assert_owner(&self, task: Notified) -> LocalNotified { - assert_eq!(task.header().get_owner_id(), self.id); + debug_assert_eq!(task.header().get_owner_id(), self.id); // safety: All tasks bound to this OwnedTasks are Send, so it is safe // to poll it on this thread no matter what thread we are on. diff --git a/tokio/src/runtime/tests/loom_alt_pool.rs b/tokio/src/runtime/tests/loom_alt_pool.rs new file mode 100644 index 00000000000..a1e6a6baf0d --- /dev/null +++ b/tokio/src/runtime/tests/loom_alt_pool.rs @@ -0,0 +1,458 @@ +/// Full runtime loom tests. These are heavy tests and take significant time to +/// run on CI. +/// +/// Use `LOOM_MAX_PREEMPTIONS=1` to do a "quick" run as a smoke test. +/// +/// In order to speed up the C +use crate::future::poll_fn; +use crate::runtime::tests::loom_oneshot as oneshot; +use crate::runtime::{self, Runtime}; +use crate::{spawn, task}; +use tokio_test::assert_ok; + +use loom::sync::atomic::{AtomicBool, AtomicUsize}; +use loom::sync::Arc; + +use pin_project_lite::pin_project; +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::Ordering::{Relaxed, SeqCst}; +use std::task::{Context, Poll}; + +mod atomic_take { + use loom::sync::atomic::AtomicBool; + use std::mem::MaybeUninit; + use std::sync::atomic::Ordering::SeqCst; + + pub(super) struct AtomicTake { + inner: MaybeUninit, + taken: AtomicBool, + } + + impl AtomicTake { + pub(super) fn new(value: T) -> Self { + Self { + inner: MaybeUninit::new(value), + taken: AtomicBool::new(false), + } + } + + pub(super) fn take(&self) -> Option { + // safety: Only one thread will see the boolean change from false + // to true, so that thread is able to take the value. + match self.taken.fetch_or(true, SeqCst) { + false => unsafe { Some(std::ptr::read(self.inner.as_ptr())) }, + true => None, + } + } + } + + impl Drop for AtomicTake { + fn drop(&mut self) { + drop(self.take()); + } + } +} + +#[derive(Clone)] +struct AtomicOneshot { + value: std::sync::Arc>>, +} +impl AtomicOneshot { + fn new(sender: oneshot::Sender) -> Self { + Self { + value: std::sync::Arc::new(atomic_take::AtomicTake::new(sender)), + } + } + + fn assert_send(&self, value: T) { + self.value.take().unwrap().send(value); + } +} + +/// Tests are divided into groups to make the runs faster on CI. +mod group_a { + use super::*; + + #[test] + fn racy_shutdown() { + loom::model(|| { + let pool = mk_pool(1); + + // here's the case we want to exercise: + // + // a worker that still has tasks in its local queue gets sent to the blocking pool (due to + // block_in_place). the blocking pool is shut down, so drops the worker. the worker's + // shutdown method never gets run. + // + // we do this by spawning two tasks on one worker, the first of which does block_in_place, + // and then immediately drop the pool. + + pool.spawn(track(async { + crate::task::block_in_place(|| {}); + })); + pool.spawn(track(async {})); + drop(pool); + }); + } + + #[test] + fn pool_multi_spawn() { + loom::model(|| { + let pool = mk_pool(2); + let c1 = Arc::new(AtomicUsize::new(0)); + + let (tx, rx) = oneshot::channel(); + let tx1 = AtomicOneshot::new(tx); + + // Spawn a task + let c2 = c1.clone(); + let tx2 = tx1.clone(); + pool.spawn(track(async move { + spawn(track(async move { + if 1 == c1.fetch_add(1, Relaxed) { + tx1.assert_send(()); + } + })); + })); + + // Spawn a second task + pool.spawn(track(async move { + spawn(track(async move { + if 1 == c2.fetch_add(1, Relaxed) { + tx2.assert_send(()); + } + })); + })); + + rx.recv(); + }); + } + + fn only_blocking_inner(first_pending: bool) { + loom::model(move || { + let pool = mk_pool(1); + let (block_tx, block_rx) = oneshot::channel(); + + pool.spawn(track(async move { + crate::task::block_in_place(move || { + block_tx.send(()); + }); + if first_pending { + task::yield_now().await + } + })); + + block_rx.recv(); + drop(pool); + }); + } + + #[test] + fn only_blocking_without_pending() { + only_blocking_inner(false) + } + + #[test] + fn only_blocking_with_pending() { + only_blocking_inner(true) + } +} + +mod group_b { + use super::*; + + fn blocking_and_regular_inner(first_pending: bool) { + const NUM: usize = 3; + loom::model(move || { + let pool = mk_pool(1); + let cnt = Arc::new(AtomicUsize::new(0)); + + let (block_tx, block_rx) = oneshot::channel(); + let (done_tx, done_rx) = oneshot::channel(); + let done_tx = AtomicOneshot::new(done_tx); + + pool.spawn(track(async move { + crate::task::block_in_place(move || { + block_tx.send(()); + }); + if first_pending { + task::yield_now().await + } + })); + + for _ in 0..NUM { + let cnt = cnt.clone(); + let done_tx = done_tx.clone(); + + pool.spawn(track(async move { + if NUM == cnt.fetch_add(1, Relaxed) + 1 { + done_tx.assert_send(()); + } + })); + } + + done_rx.recv(); + block_rx.recv(); + + drop(pool); + }); + } + + #[test] + fn blocking_and_regular() { + blocking_and_regular_inner(false); + } + + #[test] + fn blocking_and_regular_with_pending() { + blocking_and_regular_inner(true); + } + + #[test] + fn join_output() { + loom::model(|| { + let rt = mk_pool(1); + + rt.block_on(async { + let t = crate::spawn(track(async { "hello" })); + + let out = assert_ok!(t.await); + assert_eq!("hello", out.into_inner()); + }); + }); + } + + #[test] + fn poll_drop_handle_then_drop() { + loom::model(|| { + let rt = mk_pool(1); + + rt.block_on(async move { + let mut t = crate::spawn(track(async { "hello" })); + + poll_fn(|cx| { + let _ = Pin::new(&mut t).poll(cx); + Poll::Ready(()) + }) + .await; + }); + }) + } + + #[test] + fn complete_block_on_under_load() { + loom::model(|| { + let pool = mk_pool(1); + + pool.block_on(async { + // Trigger a re-schedule + crate::spawn(track(async { + for _ in 0..2 { + task::yield_now().await; + } + })); + + gated2(true).await + }); + }); + } + + #[test] + fn shutdown_with_notification() { + use crate::sync::oneshot; + + loom::model(|| { + let rt = mk_pool(2); + let (done_tx, done_rx) = oneshot::channel::<()>(); + + rt.spawn(track(async move { + let (tx, rx) = oneshot::channel::<()>(); + + crate::spawn(async move { + crate::task::spawn_blocking(move || { + let _ = tx.send(()); + }); + + let _ = done_rx.await; + }); + + let _ = rx.await; + + let _ = done_tx.send(()); + })); + }); + } +} + +mod group_c { + use super::*; + + #[test] + fn pool_shutdown() { + loom::model(|| { + let pool = mk_pool(2); + + pool.spawn(track(async move { + gated2(true).await; + })); + + pool.spawn(track(async move { + gated2(false).await; + })); + + drop(pool); + }); + } +} + +mod group_d { + use super::*; + + #[test] + fn pool_multi_notify() { + loom::model(|| { + let pool = mk_pool(2); + + let c1 = Arc::new(AtomicUsize::new(0)); + + let (done_tx, done_rx) = oneshot::channel(); + let done_tx1 = AtomicOneshot::new(done_tx); + let done_tx2 = done_tx1.clone(); + + // Spawn a task + let c2 = c1.clone(); + pool.spawn(track(async move { + multi_gated().await; + + if 1 == c1.fetch_add(1, Relaxed) { + done_tx1.assert_send(()); + } + })); + + // Spawn a second task + pool.spawn(track(async move { + multi_gated().await; + + if 1 == c2.fetch_add(1, Relaxed) { + done_tx2.assert_send(()); + } + })); + + done_rx.recv(); + }); + } +} + +fn mk_pool(num_threads: usize) -> Runtime { + runtime::Builder::new_multi_thread_alt() + .worker_threads(num_threads) + // Set the intervals to avoid tuning logic + .event_interval(2) + .build() + .unwrap() +} + +fn gated2(thread: bool) -> impl Future { + use loom::thread; + use std::sync::Arc; + + let gate = Arc::new(AtomicBool::new(false)); + let mut fired = false; + + poll_fn(move |cx| { + if !fired { + let gate = gate.clone(); + let waker = cx.waker().clone(); + + if thread { + thread::spawn(move || { + gate.store(true, SeqCst); + waker.wake_by_ref(); + }); + } else { + spawn(track(async move { + gate.store(true, SeqCst); + waker.wake_by_ref(); + })); + } + + fired = true; + + return Poll::Pending; + } + + if gate.load(SeqCst) { + Poll::Ready("hello world") + } else { + Poll::Pending + } + }) +} + +async fn multi_gated() { + struct Gate { + waker: loom::future::AtomicWaker, + count: AtomicUsize, + } + + let gate = Arc::new(Gate { + waker: loom::future::AtomicWaker::new(), + count: AtomicUsize::new(0), + }); + + { + let gate = gate.clone(); + spawn(track(async move { + for i in 1..3 { + gate.count.store(i, SeqCst); + gate.waker.wake(); + } + })); + } + + poll_fn(move |cx| { + gate.waker.register_by_ref(cx.waker()); + if gate.count.load(SeqCst) < 2 { + Poll::Pending + } else { + Poll::Ready(()) + } + }) + .await; +} + +fn track(f: T) -> Track { + Track { + inner: f, + arc: Arc::new(()), + } +} + +pin_project! { + struct Track { + #[pin] + inner: T, + // Arc is used to hook into loom's leak tracking. + arc: Arc<()>, + } +} + +impl Track { + fn into_inner(self) -> T { + self.inner + } +} + +impl Future for Track { + type Output = Track; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let me = self.project(); + + Poll::Ready(Track { + inner: ready!(me.inner.poll(cx)), + arc: me.arc.clone(), + }) + } +} diff --git a/tokio/src/runtime/tests/loom_pool.rs b/tokio/src/runtime/tests/loom_pool.rs index fb42e1eb40b..96ec92bd17b 100644 --- a/tokio/src/runtime/tests/loom_pool.rs +++ b/tokio/src/runtime/tests/loom_pool.rs @@ -412,8 +412,8 @@ async fn multi_gated() { } poll_fn(move |cx| { + gate.waker.register_by_ref(cx.waker()); if gate.count.load(SeqCst) < 2 { - gate.waker.register_by_ref(cx.waker()); Poll::Pending } else { Poll::Ready(()) diff --git a/tokio/src/runtime/tests/mod.rs b/tokio/src/runtime/tests/mod.rs index b12a76e268e..ee5a64432bb 100644 --- a/tokio/src/runtime/tests/mod.rs +++ b/tokio/src/runtime/tests/mod.rs @@ -56,6 +56,7 @@ cfg_loom! { mod loom_local; mod loom_oneshot; mod loom_pool; + mod loom_alt_pool; mod loom_queue; mod loom_shutdown_join; mod loom_join_set; @@ -63,7 +64,7 @@ cfg_loom! { // Make sure debug assertions are enabled #[cfg(not(debug_assertions))] - compiler_error!("these tests require debug assertions to be enabled"); + compile_error!("these tests require debug assertions to be enabled"); } cfg_not_loom! { diff --git a/tokio/src/runtime/tests/task.rs b/tokio/src/runtime/tests/task.rs index a79c0f50d15..0485bba7a00 100644 --- a/tokio/src/runtime/tests/task.rs +++ b/tokio/src/runtime/tests/task.rs @@ -1,11 +1,10 @@ use crate::runtime::task::{self, unowned, Id, JoinHandle, OwnedTasks, Schedule, Task}; use crate::runtime::tests::NoopSchedule; -use crate::util::TryLock; use std::collections::VecDeque; use std::future::Future; use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; struct AssertDropHandle { is_dropped: Arc, @@ -243,7 +242,7 @@ fn with(f: impl FnOnce(Runtime)) { let rt = Runtime(Arc::new(Inner { owned: OwnedTasks::new(), - core: TryLock::new(Core { + core: Mutex::new(Core { queue: VecDeque::new(), }), })); @@ -256,7 +255,7 @@ fn with(f: impl FnOnce(Runtime)) { struct Runtime(Arc); struct Inner { - core: TryLock, + core: Mutex, owned: OwnedTasks, } @@ -264,7 +263,7 @@ struct Core { queue: VecDeque>, } -static CURRENT: TryLock> = TryLock::new(None); +static CURRENT: Mutex> = Mutex::new(None); impl Runtime { fn spawn(&self, future: T) -> JoinHandle diff --git a/tokio/src/task/blocking.rs b/tokio/src/task/blocking.rs index 9bd15ebd5d8..1cce466394e 100644 --- a/tokio/src/task/blocking.rs +++ b/tokio/src/task/blocking.rs @@ -75,7 +75,7 @@ cfg_rt_multi_thread! { where F: FnOnce() -> R, { - crate::runtime::scheduler::multi_thread::block_in_place(f) + crate::runtime::scheduler::block_in_place(f) } } diff --git a/tokio/tests/rt_common.rs b/tokio/tests/rt_common.rs index 9c6add047a7..9ab7fd3516e 100644 --- a/tokio/tests/rt_common.rs +++ b/tokio/tests/rt_common.rs @@ -52,6 +52,40 @@ macro_rules! rt_test { .into() } } + + #[cfg(not(tokio_wasi))] // Wasi doesn't support threads + #[cfg(tokio_unstable)] + mod alt_threaded_scheduler_4_threads { + $($t)* + + const NUM_WORKERS: usize = 4; + + fn rt() -> Arc { + tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .enable_all() + .build() + .unwrap() + .into() + } + } + + #[cfg(not(tokio_wasi))] // Wasi doesn't support threads + #[cfg(tokio_unstable)] + mod alt_threaded_scheduler_1_thread { + $($t)* + + const NUM_WORKERS: usize = 1; + + fn rt() -> Arc { + tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .unwrap() + .into() + } + } } } diff --git a/tokio/tests/rt_threaded_alt.rs b/tokio/tests/rt_threaded_alt.rs new file mode 100644 index 00000000000..7cb6a3ba9b3 --- /dev/null +++ b/tokio/tests/rt_threaded_alt.rs @@ -0,0 +1,738 @@ +#![warn(rust_2018_idioms)] +#![cfg(all(feature = "full", not(tokio_wasi)))] +#![cfg(tokio_unstable)] + +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::net::{TcpListener, TcpStream}; +use tokio::runtime; +use tokio::sync::oneshot; +use tokio_test::{assert_err, assert_ok}; + +use futures::future::poll_fn; +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering::Relaxed; +use std::sync::{mpsc, Arc, Mutex}; +use std::task::{Context, Poll, Waker}; + +macro_rules! cfg_metrics { + ($($t:tt)*) => { + #[cfg(tokio_unstable)] + { + $( $t )* + } + } +} + +#[test] +fn single_thread() { + // No panic when starting a runtime w/ a single thread + let _ = runtime::Builder::new_multi_thread_alt() + .enable_all() + .worker_threads(1) + .build() + .unwrap(); +} + +#[test] +fn many_oneshot_futures() { + // used for notifying the main thread + const NUM: usize = 1_000; + + for _ in 0..5 { + let (tx, rx) = mpsc::channel(); + + let rt = rt(); + let cnt = Arc::new(AtomicUsize::new(0)); + + for _ in 0..NUM { + let cnt = cnt.clone(); + let tx = tx.clone(); + + rt.spawn(async move { + let num = cnt.fetch_add(1, Relaxed) + 1; + + if num == NUM { + tx.send(()).unwrap(); + } + }); + } + + rx.recv().unwrap(); + + // Wait for the pool to shutdown + drop(rt); + } +} + +#[test] +fn spawn_two() { + let rt = rt(); + + let out = rt.block_on(async { + let (tx, rx) = oneshot::channel(); + + tokio::spawn(async move { + tokio::spawn(async move { + tx.send("ZOMG").unwrap(); + }); + }); + + assert_ok!(rx.await) + }); + + assert_eq!(out, "ZOMG"); + + cfg_metrics! { + let metrics = rt.metrics(); + drop(rt); + assert_eq!(1, metrics.remote_schedule_count()); + + let mut local = 0; + for i in 0..metrics.num_workers() { + local += metrics.worker_local_schedule_count(i); + } + + assert_eq!(1, local); + } +} + +#[test] +fn many_multishot_futures() { + const CHAIN: usize = 200; + const CYCLES: usize = 5; + const TRACKS: usize = 50; + + for _ in 0..50 { + let rt = rt(); + let mut start_txs = Vec::with_capacity(TRACKS); + let mut final_rxs = Vec::with_capacity(TRACKS); + + for _ in 0..TRACKS { + let (start_tx, mut chain_rx) = tokio::sync::mpsc::channel(10); + + for _ in 0..CHAIN { + let (next_tx, next_rx) = tokio::sync::mpsc::channel(10); + + // Forward all the messages + rt.spawn(async move { + while let Some(v) = chain_rx.recv().await { + next_tx.send(v).await.unwrap(); + } + }); + + chain_rx = next_rx; + } + + // This final task cycles if needed + let (final_tx, final_rx) = tokio::sync::mpsc::channel(10); + let cycle_tx = start_tx.clone(); + let mut rem = CYCLES; + + rt.spawn(async move { + for _ in 0..CYCLES { + let msg = chain_rx.recv().await.unwrap(); + + rem -= 1; + + if rem == 0 { + final_tx.send(msg).await.unwrap(); + } else { + cycle_tx.send(msg).await.unwrap(); + } + } + }); + + start_txs.push(start_tx); + final_rxs.push(final_rx); + } + + { + rt.block_on(async move { + for start_tx in start_txs { + start_tx.send("ping").await.unwrap(); + } + + for mut final_rx in final_rxs { + final_rx.recv().await.unwrap(); + } + }); + } + } +} + +#[test] +fn lifo_slot_budget() { + async fn my_fn() { + spawn_another(); + } + + fn spawn_another() { + tokio::spawn(my_fn()); + } + + let rt = runtime::Builder::new_multi_thread_alt() + .enable_all() + .worker_threads(1) + .build() + .unwrap(); + + let (send, recv) = oneshot::channel(); + + rt.spawn(async move { + tokio::spawn(my_fn()); + let _ = send.send(()); + }); + + let _ = rt.block_on(recv); +} + +#[test] +fn spawn_shutdown() { + let rt = rt(); + let (tx, rx) = mpsc::channel(); + + rt.block_on(async { + tokio::spawn(client_server(tx.clone())); + }); + + // Use spawner + rt.spawn(client_server(tx)); + + assert_ok!(rx.recv()); + assert_ok!(rx.recv()); + + drop(rt); + assert_err!(rx.try_recv()); +} + +async fn client_server(tx: mpsc::Sender<()>) { + let server = assert_ok!(TcpListener::bind("127.0.0.1:0").await); + + // Get the assigned address + let addr = assert_ok!(server.local_addr()); + + // Spawn the server + tokio::spawn(async move { + // Accept a socket + let (mut socket, _) = server.accept().await.unwrap(); + + // Write some data + socket.write_all(b"hello").await.unwrap(); + }); + + let mut client = TcpStream::connect(&addr).await.unwrap(); + + let mut buf = vec![]; + client.read_to_end(&mut buf).await.unwrap(); + + assert_eq!(buf, b"hello"); + tx.send(()).unwrap(); +} + +#[test] +fn drop_threadpool_drops_futures() { + for _ in 0..1_000 { + let num_inc = Arc::new(AtomicUsize::new(0)); + let num_dec = Arc::new(AtomicUsize::new(0)); + let num_drop = Arc::new(AtomicUsize::new(0)); + + struct Never(Arc); + + impl Future for Never { + type Output = (); + + fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<()> { + Poll::Pending + } + } + + impl Drop for Never { + fn drop(&mut self) { + self.0.fetch_add(1, Relaxed); + } + } + + let a = num_inc.clone(); + let b = num_dec.clone(); + + let rt = runtime::Builder::new_multi_thread_alt() + .enable_all() + .on_thread_start(move || { + a.fetch_add(1, Relaxed); + }) + .on_thread_stop(move || { + b.fetch_add(1, Relaxed); + }) + .build() + .unwrap(); + + rt.spawn(Never(num_drop.clone())); + + // Wait for the pool to shutdown + drop(rt); + + // Assert that only a single thread was spawned. + let a = num_inc.load(Relaxed); + assert!(a >= 1); + + // Assert that all threads shutdown + let b = num_dec.load(Relaxed); + assert_eq!(a, b); + + // Assert that the future was dropped + let c = num_drop.load(Relaxed); + assert_eq!(c, 1); + } +} + +#[test] +fn start_stop_callbacks_called() { + use std::sync::atomic::{AtomicUsize, Ordering}; + + let after_start = Arc::new(AtomicUsize::new(0)); + let before_stop = Arc::new(AtomicUsize::new(0)); + + let after_inner = after_start.clone(); + let before_inner = before_stop.clone(); + let rt = tokio::runtime::Builder::new_multi_thread_alt() + .enable_all() + .on_thread_start(move || { + after_inner.clone().fetch_add(1, Ordering::Relaxed); + }) + .on_thread_stop(move || { + before_inner.clone().fetch_add(1, Ordering::Relaxed); + }) + .build() + .unwrap(); + + let (tx, rx) = oneshot::channel(); + + rt.spawn(async move { + assert_ok!(tx.send(())); + }); + + assert_ok!(rt.block_on(rx)); + + drop(rt); + + assert!(after_start.load(Ordering::Relaxed) > 0); + assert!(before_stop.load(Ordering::Relaxed) > 0); +} + +#[test] +fn blocking() { + // used for notifying the main thread + const NUM: usize = 1_000; + + for _ in 0..10 { + let (tx, rx) = mpsc::channel(); + + let rt = rt(); + let cnt = Arc::new(AtomicUsize::new(0)); + + // there are four workers in the pool + // so, if we run 4 blocking tasks, we know that handoff must have happened + let block = Arc::new(std::sync::Barrier::new(5)); + for _ in 0..4 { + let block = block.clone(); + rt.spawn(async move { + tokio::task::block_in_place(move || { + block.wait(); + block.wait(); + }) + }); + } + block.wait(); + + for _ in 0..NUM { + let cnt = cnt.clone(); + let tx = tx.clone(); + + rt.spawn(async move { + let num = cnt.fetch_add(1, Relaxed) + 1; + + if num == NUM { + tx.send(()).unwrap(); + } + }); + } + + rx.recv().unwrap(); + + // Wait for the pool to shutdown + block.wait(); + } +} + +#[test] +fn multi_threadpool() { + use tokio::sync::oneshot; + + let rt1 = rt(); + let rt2 = rt(); + + let (tx, rx) = oneshot::channel(); + let (done_tx, done_rx) = mpsc::channel(); + + rt2.spawn(async move { + rx.await.unwrap(); + done_tx.send(()).unwrap(); + }); + + rt1.spawn(async move { + tx.send(()).unwrap(); + }); + + done_rx.recv().unwrap(); +} + +// When `block_in_place` returns, it attempts to reclaim the yielded runtime +// worker. In this case, the remainder of the task is on the runtime worker and +// must take part in the cooperative task budgeting system. +// +// The test ensures that, when this happens, attempting to consume from a +// channel yields occasionally even if there are values ready to receive. +#[test] +fn coop_and_block_in_place() { + let rt = tokio::runtime::Builder::new_multi_thread_alt() + // Setting max threads to 1 prevents another thread from claiming the + // runtime worker yielded as part of `block_in_place` and guarantees the + // same thread will reclaim the worker at the end of the + // `block_in_place` call. + .max_blocking_threads(1) + .build() + .unwrap(); + + rt.block_on(async move { + let (tx, mut rx) = tokio::sync::mpsc::channel(1024); + + // Fill the channel + for _ in 0..1024 { + tx.send(()).await.unwrap(); + } + + drop(tx); + + tokio::spawn(async move { + // Block in place without doing anything + tokio::task::block_in_place(|| {}); + + // Receive all the values, this should trigger a `Pending` as the + // coop limit will be reached. + poll_fn(|cx| { + while let Poll::Ready(v) = { + tokio::pin! { + let fut = rx.recv(); + } + + Pin::new(&mut fut).poll(cx) + } { + if v.is_none() { + panic!("did not yield"); + } + } + + Poll::Ready(()) + }) + .await + }) + .await + .unwrap(); + }); +} + +#[test] +fn yield_after_block_in_place() { + let rt = tokio::runtime::Builder::new_multi_thread_alt() + .worker_threads(1) + .build() + .unwrap(); + + rt.block_on(async { + tokio::spawn(async move { + // Block in place then enter a new runtime + tokio::task::block_in_place(|| { + let rt = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap(); + + rt.block_on(async {}); + }); + + // Yield, then complete + tokio::task::yield_now().await; + }) + .await + .unwrap() + }); +} + +// Testing this does not panic +#[test] +fn max_blocking_threads() { + let _rt = tokio::runtime::Builder::new_multi_thread_alt() + .max_blocking_threads(1) + .build() + .unwrap(); +} + +#[test] +#[should_panic] +fn max_blocking_threads_set_to_zero() { + let _rt = tokio::runtime::Builder::new_multi_thread_alt() + .max_blocking_threads(0) + .build() + .unwrap(); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn hang_on_shutdown() { + let (sync_tx, sync_rx) = std::sync::mpsc::channel::<()>(); + tokio::spawn(async move { + tokio::task::block_in_place(|| sync_rx.recv().ok()); + }); + + tokio::spawn(async { + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + drop(sync_tx); + }); + tokio::time::sleep(std::time::Duration::from_secs(1)).await; +} + +/// Demonstrates tokio-rs/tokio#3869 +#[test] +fn wake_during_shutdown() { + struct Shared { + waker: Option, + } + + struct MyFuture { + shared: Arc>, + put_waker: bool, + } + + impl MyFuture { + fn new() -> (Self, Self) { + let shared = Arc::new(Mutex::new(Shared { waker: None })); + let f1 = MyFuture { + shared: shared.clone(), + put_waker: true, + }; + let f2 = MyFuture { + shared, + put_waker: false, + }; + (f1, f2) + } + } + + impl Future for MyFuture { + type Output = (); + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { + let me = Pin::into_inner(self); + let mut lock = me.shared.lock().unwrap(); + if me.put_waker { + lock.waker = Some(cx.waker().clone()); + } + Poll::Pending + } + } + + impl Drop for MyFuture { + fn drop(&mut self) { + let mut lock = self.shared.lock().unwrap(); + if !self.put_waker { + lock.waker.take().unwrap().wake(); + } + drop(lock); + } + } + + let rt = tokio::runtime::Builder::new_multi_thread_alt() + .worker_threads(1) + .enable_all() + .build() + .unwrap(); + + let (f1, f2) = MyFuture::new(); + + rt.spawn(f1); + rt.spawn(f2); + + rt.block_on(async { tokio::time::sleep(tokio::time::Duration::from_millis(20)).await }); +} + +#[should_panic] +#[tokio::test] +async fn test_block_in_place1() { + tokio::task::block_in_place(|| {}); +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_block_in_place2() { + tokio::task::block_in_place(|| {}); +} + +#[should_panic] +#[tokio::main(flavor = "current_thread")] +#[test] +async fn test_block_in_place3() { + tokio::task::block_in_place(|| {}); +} + +#[tokio::main] +#[test] +async fn test_block_in_place4() { + tokio::task::block_in_place(|| {}); +} + +// Testing the tuning logic is tricky as it is inherently timing based, and more +// of a heuristic than an exact behavior. This test checks that the interval +// changes over time based on load factors. There are no assertions, completion +// is sufficient. If there is a regression, this test will hang. In theory, we +// could add limits, but that would be likely to fail on CI. +#[test] +#[cfg(not(tokio_no_tuning_tests))] +fn test_tuning() { + use std::sync::atomic::AtomicBool; + use std::time::Duration; + + let rt = runtime::Builder::new_multi_thread_alt() + .worker_threads(1) + .build() + .unwrap(); + + fn iter(flag: Arc, counter: Arc, stall: bool) { + if flag.load(Relaxed) { + if stall { + std::thread::sleep(Duration::from_micros(5)); + } + + counter.fetch_add(1, Relaxed); + tokio::spawn(async move { iter(flag, counter, stall) }); + } + } + + let flag = Arc::new(AtomicBool::new(true)); + let counter = Arc::new(AtomicUsize::new(61)); + let interval = Arc::new(AtomicUsize::new(61)); + + { + let flag = flag.clone(); + let counter = counter.clone(); + rt.spawn(async move { iter(flag, counter, true) }); + } + + // Now, hammer the injection queue until the interval drops. + let mut n = 0; + loop { + let curr = interval.load(Relaxed); + + if curr <= 8 { + n += 1; + } else { + n = 0; + } + + // Make sure we get a few good rounds. Jitter in the tuning could result + // in one "good" value without being representative of reaching a good + // state. + if n == 3 { + break; + } + + if Arc::strong_count(&interval) < 5_000 { + let counter = counter.clone(); + let interval = interval.clone(); + + rt.spawn(async move { + let prev = counter.swap(0, Relaxed); + interval.store(prev, Relaxed); + }); + + std::thread::yield_now(); + } + } + + flag.store(false, Relaxed); + + let w = Arc::downgrade(&interval); + drop(interval); + + while w.strong_count() > 0 { + std::thread::sleep(Duration::from_micros(500)); + } + + // Now, run it again with a faster task + let flag = Arc::new(AtomicBool::new(true)); + // Set it high, we know it shouldn't ever really be this high + let counter = Arc::new(AtomicUsize::new(10_000)); + let interval = Arc::new(AtomicUsize::new(10_000)); + + { + let flag = flag.clone(); + let counter = counter.clone(); + rt.spawn(async move { iter(flag, counter, false) }); + } + + // Now, hammer the injection queue until the interval reaches the expected range. + let mut n = 0; + loop { + let curr = interval.load(Relaxed); + + if curr <= 1_000 && curr > 32 { + n += 1; + } else { + n = 0; + } + + if n == 3 { + break; + } + + if Arc::strong_count(&interval) <= 5_000 { + let counter = counter.clone(); + let interval = interval.clone(); + + rt.spawn(async move { + let prev = counter.swap(0, Relaxed); + interval.store(prev, Relaxed); + }); + } + + std::thread::yield_now(); + } + + flag.store(false, Relaxed); +} + +fn rt() -> runtime::Runtime { + Builder::new_multi_thread_alt().enable_all().build() +} + +#[cfg(tokio_unstable)] +mod unstable { + use super::*; + + #[test] + fn test_disable_lifo_slot() { + let rt = runtime::Builder::new_multi_thread_alt() + .disable_lifo_slot() + .worker_threads(2) + .build() + .unwrap(); + + rt.block_on(async { + tokio::spawn(async { + // Spawn another task and block the thread until completion. If the LIFO slot + // is used then the test doesn't complete. + futures::executor::block_on(tokio::spawn(async {})).unwrap(); + }) + .await + .unwrap(); + }) + } +} From cf1bac32a7d5017bdba74c424a49d53abcc5df55 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Mon, 26 Jun 2023 22:07:36 +0000 Subject: [PATCH 02/26] loom PR was merged --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e4e70eba569..6d00cfc1701 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,4 +16,4 @@ members = [ ] [patch.crates-io] -loom = { git = "https://github.com/tokio-rs/loom", branch = "increase-max-threads" } +loom = { git = "https://github.com/tokio-rs/loom" } From f6a566808bed74a51a88429916d3d9cdb25527bb Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Mon, 26 Jun 2023 22:09:04 +0000 Subject: [PATCH 03/26] fmt --- tokio/src/runtime/scheduler/multi_thread_alt/worker.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs index 28b88cb679b..12129643665 100644 --- a/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs @@ -59,15 +59,13 @@ use crate::loom::sync::{Arc, Condvar, Mutex, MutexGuard}; use crate::runtime; use crate::runtime::context; +use crate::runtime::driver::Driver; use crate::runtime::scheduler::multi_thread_alt::{ idle, queue, stats, Counters, Handle, Idle, Overflow, Stats, TraceStatus, }; use crate::runtime::scheduler::{self, inject, Lock}; use crate::runtime::task::OwnedTasks; -use crate::runtime::{ - blocking, coop, driver, task, Config, SchedulerMetrics, WorkerMetrics, -}; -use crate::runtime::driver::Driver; +use crate::runtime::{blocking, coop, driver, task, Config, SchedulerMetrics, WorkerMetrics}; use crate::util::atomic_cell::AtomicCell; use crate::util::rand::{FastRand, RngSeedGenerator}; From 3dc1886c3505c79211c354f415fdf974255d6000 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Tue, 27 Jun 2023 17:10:08 +0000 Subject: [PATCH 04/26] fixes for CI --- tokio/src/runtime/handle.rs | 2 ++ .../runtime/scheduler/multi_thread_alt/handle.rs | 4 ---- .../src/runtime/scheduler/multi_thread_alt/mod.rs | 14 +++----------- .../runtime/scheduler/multi_thread_alt/worker.rs | 8 +------- tokio/src/runtime/task/trace/mod.rs | 2 ++ 5 files changed, 8 insertions(+), 22 deletions(-) diff --git a/tokio/src/runtime/handle.rs b/tokio/src/runtime/handle.rs index 42745857dc0..1548e92d903 100644 --- a/tokio/src/runtime/handle.rs +++ b/tokio/src/runtime/handle.rs @@ -505,6 +505,8 @@ cfg_taskdump! { handle.dump().await }).await }, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThreadAlt(_) => panic!("task dump not implemented for this runtime flavor"), } } } diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs b/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs index b50840ec82c..c9997f89fbe 100644 --- a/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs +++ b/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs @@ -13,10 +13,6 @@ cfg_metrics! { mod metrics; } -cfg_taskdump! { - mod taskdump; -} - /// Handle to the multi thread scheduler pub(crate) struct Handle { /// Task spawner diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs b/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs index a6704ab5f1a..e30c9b4783b 100644 --- a/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs +++ b/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs @@ -21,17 +21,9 @@ mod worker; use worker::Core; pub(crate) use worker::{Context, Shared}; -cfg_taskdump! { - mod trace; - use trace::TraceStatus; - - pub(crate) use worker::Synced; -} - -cfg_not_taskdump! { - mod trace_mock; - use trace_mock::TraceStatus; -} +// TODO: implement task dump +mod trace_mock; +use trace_mock::TraceStatus; pub(crate) use worker::block_in_place; diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs index 12129643665..4d7bfa97eaf 100644 --- a/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs @@ -78,13 +78,7 @@ cfg_metrics! { mod metrics; } -cfg_taskdump! { - mod taskdump; -} - -cfg_not_taskdump! { - mod taskdump_mock; -} +mod taskdump_mock; /// A scheduler worker /// diff --git a/tokio/src/runtime/task/trace/mod.rs b/tokio/src/runtime/task/trace/mod.rs index 543b7eee98e..9c61014e865 100644 --- a/tokio/src/runtime/task/trace/mod.rs +++ b/tokio/src/runtime/task/trace/mod.rs @@ -186,6 +186,8 @@ pub(crate) fn trace_leaf(cx: &mut task::Context<'_>) -> Poll<()> { scheduler::Context::CurrentThread(s) => s.defer.defer(cx.waker()), #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] scheduler::Context::MultiThread(s) => s.defer.defer(cx.waker()), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Context::MultiThreadAlt(_) => unimplemented!(), } } }); From 8baf117e8451615e36f151d61883141d1b65c79e Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Tue, 27 Jun 2023 17:11:06 +0000 Subject: [PATCH 05/26] update loom --- Cargo.toml | 3 --- tokio/Cargo.toml | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6d00cfc1701..f3e19312e8b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,3 @@ members = [ "tests-build", "tests-integration", ] - -[patch.crates-io] -loom = { git = "https://github.com/tokio-rs/loom" } diff --git a/tokio/Cargo.toml b/tokio/Cargo.toml index d5b55eff6b9..46ea36db53d 100644 --- a/tokio/Cargo.toml +++ b/tokio/Cargo.toml @@ -160,7 +160,7 @@ wasm-bindgen-test = "0.3.0" mio-aio = { version = "0.7.0", features = ["tokio"] } [target.'cfg(loom)'.dev-dependencies] -loom = { version = "0.5.2", features = ["futures", "checkpoint"] } +loom = { version = "0.6.0", features = ["futures", "checkpoint"] } [package.metadata.docs.rs] all-features = true From 74fc03d6479e41c36573ae397cb8fb3784874083 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Tue, 27 Jun 2023 17:25:01 +0000 Subject: [PATCH 06/26] fix test --- tokio/tests/rt_threaded_alt.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tokio/tests/rt_threaded_alt.rs b/tokio/tests/rt_threaded_alt.rs index 7cb6a3ba9b3..9105c362791 100644 --- a/tokio/tests/rt_threaded_alt.rs +++ b/tokio/tests/rt_threaded_alt.rs @@ -710,7 +710,10 @@ fn test_tuning() { } fn rt() -> runtime::Runtime { - Builder::new_multi_thread_alt().enable_all().build() + runtime::Builder::new_multi_thread_alt() + .enable_all() + .build() + .unwrap() } #[cfg(tokio_unstable)] From 91b7e801b41f6ee214b02c7098d41deb02fec16d Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Tue, 27 Jun 2023 19:29:37 +0000 Subject: [PATCH 07/26] restructure loom tests --- .github/labeler.yml | 30 ++- .github/workflows/loom.yml | 102 +++++++-- .../mod.rs} | 0 ...ad_scheduler.rs => loom_current_thread.rs} | 2 + .../yield_now.rs} | 0 .../{loom_pool.rs => loom_multi_thread.rs} | 4 + .../queue.rs} | 16 +- .../shutdown.rs} | 0 .../tests/loom_multi_thread/yield_now.rs | 37 ++++ ...m_alt_pool.rs => loom_multi_thread_alt.rs} | 4 + .../tests/loom_multi_thread_alt/queue.rs | 205 ++++++++++++++++++ .../tests/loom_multi_thread_alt/shutdown.rs | 28 +++ .../tests/loom_multi_thread_alt/yield_now.rs | 37 ++++ tokio/src/runtime/tests/mod.rs | 11 +- 14 files changed, 437 insertions(+), 39 deletions(-) rename tokio/src/runtime/scheduler/{current_thread.rs => current_thread/mod.rs} (100%) rename tokio/src/runtime/tests/{loom_current_thread_scheduler.rs => loom_current_thread.rs} (99%) rename tokio/src/runtime/tests/{loom_yield.rs => loom_current_thread/yield_now.rs} (100%) rename tokio/src/runtime/tests/{loom_pool.rs => loom_multi_thread.rs} (99%) rename tokio/src/runtime/tests/{loom_queue.rs => loom_multi_thread/queue.rs} (91%) rename tokio/src/runtime/tests/{loom_shutdown_join.rs => loom_multi_thread/shutdown.rs} (100%) create mode 100644 tokio/src/runtime/tests/loom_multi_thread/yield_now.rs rename tokio/src/runtime/tests/{loom_alt_pool.rs => loom_multi_thread_alt.rs} (99%) create mode 100644 tokio/src/runtime/tests/loom_multi_thread_alt/queue.rs create mode 100644 tokio/src/runtime/tests/loom_multi_thread_alt/shutdown.rs create mode 100644 tokio/src/runtime/tests/loom_multi_thread_alt/yield_now.rs diff --git a/.github/labeler.yml b/.github/labeler.yml index 6e53c92aaf7..b69490cc2a9 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,8 +1,28 @@ -R-loom: +R-loom-sync: - tokio/src/sync/* - tokio/src/sync/**/* -- tokio-util/src/sync/* -- tokio-util/src/sync/**/* -- tokio/src/runtime/* -- tokio/src/runtime/**/* + +R-loom-time-driver: +- tokio/src/runtime/time/* +- tokio/src/runtime/time/**/* + +R-loom-current-thread: +- tokio/src/runtime/scheduler/* +- tokio/src/runtime/scheduler/current_thread/* +- tokio/src/runtime/task/* +- tokio/src/runtime/task/** + +R-loom-multi-thread: +- tokio/src/runtime/scheduler/* +- tokio/src/runtime/scheduler/multi_thread/* +- tokio/src/runtime/scheduler/multi_thread/** +- tokio/src/runtime/task/* +- tokio/src/runtime/task/** + +R-loom-multi-thread-alt: +- tokio/src/runtime/scheduler/* +- tokio/src/runtime/scheduler/multi_thread_alt/* +- tokio/src/runtime/scheduler/multi_thread_alt/** +- tokio/src/runtime/task/* +- tokio/src/runtime/task/** diff --git a/.github/workflows/loom.yml b/.github/workflows/loom.yml index 417c3b470fb..c82d638eb6d 100644 --- a/.github/workflows/loom.yml +++ b/.github/workflows/loom.yml @@ -8,7 +8,9 @@ on: name: Loom env: - RUSTFLAGS: -Dwarnings + RUSTFLAGS: -Dwarnings --cfg loom --cfg tokio_unstable -C debug_assertions + LOOM_MAX_PREEMPTIONS: 2 + LOOM_MAX_BRANCHES: 10000 RUST_BACKTRACE: 1 # Change to specific Rust release to pin rust_stable: stable @@ -17,26 +19,91 @@ permissions: contents: read jobs: - loom: - name: loom + loom-sync: + name: loom tokio::sync # base_ref is null when it's not a pull request - if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom') || (github.base_ref == null)) + if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom-sync') || (github.base_ref == null)) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install Rust ${{ env.rust_stable }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.rust_stable }} + - uses: Swatinem/rust-cache@v2 + - name: run tests + run: cargo test --lib --release --features full -- --nocapture sync::tests + working-directory: tokio + + loom-time-driver: + name: loom time driver + # base_ref is null when it's not a pull request + if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom-time-driver') || (github.base_ref == null)) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install Rust ${{ env.rust_stable }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.rust_stable }} + - uses: Swatinem/rust-cache@v2 + - name: run tests + run: cargo test --lib --release --features full -- --nocapture runtime::time::tests + working-directory: tokio + + loom-current-thread: + name: loom current-thread scheduler + # base_ref is null when it's not a pull request + if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom-current-thread') || (github.base_ref == null)) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install Rust ${{ env.rust_stable }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.rust_stable }} + - uses: Swatinem/rust-cache@v2 + - name: run tests + run: cargo test --lib --release --features full -- --nocapture loom_current_thread + working-directory: tokio + + loom-multi-thread: + name: loom multi-thread scheduler + # base_ref is null when it's not a pull request + if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom-multi-thread') || (github.base_ref == null)) + runs-on: ubuntu-latest + strategy: + matrix: + include: + - scope: loom_multi_thread::group_a + - scope: loom_multi_thread::group_b + - scope: loom_multi_thread::group_c + - scope: loom_multi_thread::group_d + steps: + - uses: actions/checkout@v3 + - name: Install Rust ${{ env.rust_stable }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.rust_stable }} + - uses: Swatinem/rust-cache@v2 + - name: loom ${{ matrix.scope }} + run: cargo test --lib --release --features full -- --nocapture $SCOPE + working-directory: tokio + env: + SCOPE: ${{ matrix.scope }} + + loom-multi-thread-alt: + name: loom ALT multi-thread scheduler + # base_ref is null when it's not a pull request + if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom-multi-thread-alt') || (github.base_ref == null)) runs-on: ubuntu-latest strategy: matrix: include: - - scope: --skip loom_pool - max_preemptions: 2 - - scope: loom_pool::group_a - max_preemptions: 2 - - scope: loom_pool::group_b - max_preemptions: 2 - - scope: loom_pool::group_c - max_preemptions: 2 - - scope: loom_pool::group_d - max_preemptions: 2 - - scope: time::driver - max_preemptions: 2 + - scope: loom_multi_thread_alt::group_a + - scope: loom_multi_thread_alt::group_b + - scope: loom_multi_thread_alt::group_c + - scope: loom_multi_thread_alt::group_d steps: - uses: actions/checkout@v3 - name: Install Rust ${{ env.rust_stable }} @@ -48,7 +115,4 @@ jobs: run: cargo test --lib --release --features full -- --nocapture $SCOPE working-directory: tokio env: - RUSTFLAGS: --cfg loom --cfg tokio_unstable -Dwarnings -C debug-assertions - LOOM_MAX_PREEMPTIONS: ${{ matrix.max_preemptions }} - LOOM_MAX_BRANCHES: 10000 SCOPE: ${{ matrix.scope }} diff --git a/tokio/src/runtime/scheduler/current_thread.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs similarity index 100% rename from tokio/src/runtime/scheduler/current_thread.rs rename to tokio/src/runtime/scheduler/current_thread/mod.rs diff --git a/tokio/src/runtime/tests/loom_current_thread_scheduler.rs b/tokio/src/runtime/tests/loom_current_thread.rs similarity index 99% rename from tokio/src/runtime/tests/loom_current_thread_scheduler.rs rename to tokio/src/runtime/tests/loom_current_thread.rs index a772603f711..edda6e49954 100644 --- a/tokio/src/runtime/tests/loom_current_thread_scheduler.rs +++ b/tokio/src/runtime/tests/loom_current_thread.rs @@ -1,3 +1,5 @@ +mod yield_now; + use crate::loom::sync::atomic::AtomicUsize; use crate::loom::sync::Arc; use crate::loom::thread; diff --git a/tokio/src/runtime/tests/loom_yield.rs b/tokio/src/runtime/tests/loom_current_thread/yield_now.rs similarity index 100% rename from tokio/src/runtime/tests/loom_yield.rs rename to tokio/src/runtime/tests/loom_current_thread/yield_now.rs diff --git a/tokio/src/runtime/tests/loom_pool.rs b/tokio/src/runtime/tests/loom_multi_thread.rs similarity index 99% rename from tokio/src/runtime/tests/loom_pool.rs rename to tokio/src/runtime/tests/loom_multi_thread.rs index 96ec92bd17b..c5980c226e0 100644 --- a/tokio/src/runtime/tests/loom_pool.rs +++ b/tokio/src/runtime/tests/loom_multi_thread.rs @@ -1,3 +1,7 @@ +mod queue; +mod shutdown; +mod yield_now; + /// Full runtime loom tests. These are heavy tests and take significant time to /// run on CI. /// diff --git a/tokio/src/runtime/tests/loom_queue.rs b/tokio/src/runtime/tests/loom_multi_thread/queue.rs similarity index 91% rename from tokio/src/runtime/tests/loom_queue.rs rename to tokio/src/runtime/tests/loom_multi_thread/queue.rs index b60e039b9a6..0d818283653 100644 --- a/tokio/src/runtime/tests/loom_queue.rs +++ b/tokio/src/runtime/tests/loom_multi_thread/queue.rs @@ -1,5 +1,5 @@ use crate::runtime::scheduler::multi_thread::{queue, Stats}; -use crate::runtime::tests::NoopSchedule; +use crate::runtime::tests::{unowned, NoopSchedule}; use loom::thread; use std::cell::RefCell; @@ -37,7 +37,7 @@ fn basic() { for _ in 0..2 { for _ in 0..2 { - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); local.push_back_or_overflow(task, &inject, &mut stats); } @@ -46,7 +46,7 @@ fn basic() { } // Push another task - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); local.push_back_or_overflow(task, &inject, &mut stats); while local.pop().is_some() { @@ -88,7 +88,7 @@ fn steal_overflow() { let mut n = 0; // push a task, pop a task - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); local.push_back_or_overflow(task, &inject, &mut stats); if local.pop().is_some() { @@ -96,7 +96,7 @@ fn steal_overflow() { } for _ in 0..6 { - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); local.push_back_or_overflow(task, &inject, &mut stats); } @@ -140,7 +140,7 @@ fn multi_stealer() { // Push work for _ in 0..NUM_TASKS { - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); local.push_back_or_overflow(task, &inject, &mut stats); } @@ -176,10 +176,10 @@ fn chained_steal() { // Load up some tasks for _ in 0..4 { - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); l1.push_back_or_overflow(task, &inject, &mut stats); - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); l2.push_back_or_overflow(task, &inject, &mut stats); } diff --git a/tokio/src/runtime/tests/loom_shutdown_join.rs b/tokio/src/runtime/tests/loom_multi_thread/shutdown.rs similarity index 100% rename from tokio/src/runtime/tests/loom_shutdown_join.rs rename to tokio/src/runtime/tests/loom_multi_thread/shutdown.rs diff --git a/tokio/src/runtime/tests/loom_multi_thread/yield_now.rs b/tokio/src/runtime/tests/loom_multi_thread/yield_now.rs new file mode 100644 index 00000000000..ba506e5a408 --- /dev/null +++ b/tokio/src/runtime/tests/loom_multi_thread/yield_now.rs @@ -0,0 +1,37 @@ +use crate::runtime::park; +use crate::runtime::tests::loom_oneshot as oneshot; +use crate::runtime::{self, Runtime}; + +#[test] +fn yield_calls_park_before_scheduling_again() { + // Don't need to check all permutations + let mut loom = loom::model::Builder::default(); + loom.max_permutations = Some(1); + loom.check(|| { + let rt = mk_runtime(2); + let (tx, rx) = oneshot::channel::<()>(); + + rt.spawn(async { + let tid = loom::thread::current().id(); + let park_count = park::current_thread_park_count(); + + crate::task::yield_now().await; + + if tid == loom::thread::current().id() { + let new_park_count = park::current_thread_park_count(); + assert_eq!(park_count + 1, new_park_count); + } + + tx.send(()); + }); + + rx.recv(); + }); +} + +fn mk_runtime(num_threads: usize) -> Runtime { + runtime::Builder::new_multi_thread() + .worker_threads(num_threads) + .build() + .unwrap() +} diff --git a/tokio/src/runtime/tests/loom_alt_pool.rs b/tokio/src/runtime/tests/loom_multi_thread_alt.rs similarity index 99% rename from tokio/src/runtime/tests/loom_alt_pool.rs rename to tokio/src/runtime/tests/loom_multi_thread_alt.rs index a1e6a6baf0d..d33714e9677 100644 --- a/tokio/src/runtime/tests/loom_alt_pool.rs +++ b/tokio/src/runtime/tests/loom_multi_thread_alt.rs @@ -1,3 +1,7 @@ +mod queue; +mod shutdown; +mod yield_now; + /// Full runtime loom tests. These are heavy tests and take significant time to /// run on CI. /// diff --git a/tokio/src/runtime/tests/loom_multi_thread_alt/queue.rs b/tokio/src/runtime/tests/loom_multi_thread_alt/queue.rs new file mode 100644 index 00000000000..0d818283653 --- /dev/null +++ b/tokio/src/runtime/tests/loom_multi_thread_alt/queue.rs @@ -0,0 +1,205 @@ +use crate::runtime::scheduler::multi_thread::{queue, Stats}; +use crate::runtime::tests::{unowned, NoopSchedule}; + +use loom::thread; +use std::cell::RefCell; + +fn new_stats() -> Stats { + Stats::new(&crate::runtime::WorkerMetrics::new()) +} + +#[test] +fn basic() { + loom::model(|| { + let (steal, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + let mut stats = new_stats(); + + let th = thread::spawn(move || { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + let mut n = 0; + + for _ in 0..3 { + if steal.steal_into(&mut local, &mut stats).is_some() { + n += 1; + } + + while local.pop().is_some() { + n += 1; + } + } + + n + }); + + let mut n = 0; + + for _ in 0..2 { + for _ in 0..2 { + let (task, _) = unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + if local.pop().is_some() { + n += 1; + } + + // Push another task + let (task, _) = unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + + while local.pop().is_some() { + n += 1; + } + } + + n += inject.borrow_mut().drain(..).count(); + + n += th.join().unwrap(); + + assert_eq!(6, n); + }); +} + +#[test] +fn steal_overflow() { + loom::model(|| { + let (steal, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + let mut stats = new_stats(); + + let th = thread::spawn(move || { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + let mut n = 0; + + if steal.steal_into(&mut local, &mut stats).is_some() { + n += 1; + } + + while local.pop().is_some() { + n += 1; + } + + n + }); + + let mut n = 0; + + // push a task, pop a task + let (task, _) = unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + + if local.pop().is_some() { + n += 1; + } + + for _ in 0..6 { + let (task, _) = unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + n += th.join().unwrap(); + + while local.pop().is_some() { + n += 1; + } + + n += inject.borrow_mut().drain(..).count(); + + assert_eq!(7, n); + }); +} + +#[test] +fn multi_stealer() { + const NUM_TASKS: usize = 5; + + fn steal_tasks(steal: queue::Steal) -> usize { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + + if steal.steal_into(&mut local, &mut stats).is_none() { + return 0; + } + + let mut n = 1; + + while local.pop().is_some() { + n += 1; + } + + n + } + + loom::model(|| { + let (steal, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + let mut stats = new_stats(); + + // Push work + for _ in 0..NUM_TASKS { + let (task, _) = unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + let th1 = { + let steal = steal.clone(); + thread::spawn(move || steal_tasks(steal)) + }; + + let th2 = thread::spawn(move || steal_tasks(steal)); + + let mut n = 0; + + while local.pop().is_some() { + n += 1; + } + + n += inject.borrow_mut().drain(..).count(); + + n += th1.join().unwrap(); + n += th2.join().unwrap(); + + assert_eq!(n, NUM_TASKS); + }); +} + +#[test] +fn chained_steal() { + loom::model(|| { + let mut stats = new_stats(); + let (s1, mut l1) = queue::local(); + let (s2, mut l2) = queue::local(); + let inject = RefCell::new(vec![]); + + // Load up some tasks + for _ in 0..4 { + let (task, _) = unowned(async {}); + l1.push_back_or_overflow(task, &inject, &mut stats); + + let (task, _) = unowned(async {}); + l2.push_back_or_overflow(task, &inject, &mut stats); + } + + // Spawn a task to steal from **our** queue + let th = thread::spawn(move || { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + s1.steal_into(&mut local, &mut stats); + + while local.pop().is_some() {} + }); + + // Drain our tasks, then attempt to steal + while l1.pop().is_some() {} + + s2.steal_into(&mut l1, &mut stats); + + th.join().unwrap(); + + while l1.pop().is_some() {} + while l2.pop().is_some() {} + }); +} diff --git a/tokio/src/runtime/tests/loom_multi_thread_alt/shutdown.rs b/tokio/src/runtime/tests/loom_multi_thread_alt/shutdown.rs new file mode 100644 index 00000000000..6fbc4bfdedf --- /dev/null +++ b/tokio/src/runtime/tests/loom_multi_thread_alt/shutdown.rs @@ -0,0 +1,28 @@ +use crate::runtime::{Builder, Handle}; + +#[test] +fn join_handle_cancel_on_shutdown() { + let mut builder = loom::model::Builder::new(); + builder.preemption_bound = Some(2); + builder.check(|| { + use futures::future::FutureExt; + + let rt = Builder::new_multi_thread() + .worker_threads(2) + .build() + .unwrap(); + + let handle = rt.block_on(async move { Handle::current() }); + + let jh1 = handle.spawn(futures::future::pending::<()>()); + + drop(rt); + + let jh2 = handle.spawn(futures::future::pending::<()>()); + + let err1 = jh1.now_or_never().unwrap().unwrap_err(); + let err2 = jh2.now_or_never().unwrap().unwrap_err(); + assert!(err1.is_cancelled()); + assert!(err2.is_cancelled()); + }); +} diff --git a/tokio/src/runtime/tests/loom_multi_thread_alt/yield_now.rs b/tokio/src/runtime/tests/loom_multi_thread_alt/yield_now.rs new file mode 100644 index 00000000000..ba506e5a408 --- /dev/null +++ b/tokio/src/runtime/tests/loom_multi_thread_alt/yield_now.rs @@ -0,0 +1,37 @@ +use crate::runtime::park; +use crate::runtime::tests::loom_oneshot as oneshot; +use crate::runtime::{self, Runtime}; + +#[test] +fn yield_calls_park_before_scheduling_again() { + // Don't need to check all permutations + let mut loom = loom::model::Builder::default(); + loom.max_permutations = Some(1); + loom.check(|| { + let rt = mk_runtime(2); + let (tx, rx) = oneshot::channel::<()>(); + + rt.spawn(async { + let tid = loom::thread::current().id(); + let park_count = park::current_thread_park_count(); + + crate::task::yield_now().await; + + if tid == loom::thread::current().id() { + let new_park_count = park::current_thread_park_count(); + assert_eq!(park_count + 1, new_park_count); + } + + tx.send(()); + }); + + rx.recv(); + }); +} + +fn mk_runtime(num_threads: usize) -> Runtime { + runtime::Builder::new_multi_thread() + .worker_threads(num_threads) + .build() + .unwrap() +} diff --git a/tokio/src/runtime/tests/mod.rs b/tokio/src/runtime/tests/mod.rs index ee5a64432bb..0ba7480cd4b 100644 --- a/tokio/src/runtime/tests/mod.rs +++ b/tokio/src/runtime/tests/mod.rs @@ -52,15 +52,12 @@ mod unowned_wrapper { cfg_loom! { mod loom_blocking; - mod loom_current_thread_scheduler; + mod loom_current_thread; + mod loom_join_set; mod loom_local; + mod loom_multi_thread; + mod loom_multi_thread_alt; mod loom_oneshot; - mod loom_pool; - mod loom_alt_pool; - mod loom_queue; - mod loom_shutdown_join; - mod loom_join_set; - mod loom_yield; // Make sure debug assertions are enabled #[cfg(not(debug_assertions))] From 7524f02a05cb9073ad065aec6238b2fda8274dac Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Fri, 30 Jun 2023 14:35:11 -0700 Subject: [PATCH 08/26] fix leak --- .../runtime/scheduler/inject/rt_multi_thread.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs b/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs index 07d1063c5d8..afe27822f7f 100644 --- a/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs +++ b/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs @@ -77,6 +77,20 @@ impl Shared { let mut synced = shared.lock(); let synced = synced.as_mut(); + if synced.is_closed { + drop(synced); + + let mut curr = Some(batch_head); + + while let Some(task) = curr { + curr = task.get_queue_next(); + + let _ = unsafe { task::Notified::::from_raw(task) }; + } + + return; + } + if let Some(tail) = synced.tail { unsafe { tail.set_queue_next(Some(batch_head)); From 0abf746fcb5ce80b7b49ac8850d6703ed2fccb28 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Fri, 30 Jun 2023 15:12:48 -0700 Subject: [PATCH 09/26] try to make clippy happy --- tokio/src/runtime/scheduler/inject/rt_multi_thread.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs b/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs index afe27822f7f..1d5f0403b5d 100644 --- a/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs +++ b/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs @@ -75,9 +75,8 @@ impl Shared { debug_assert!(unsafe { batch_tail.get_queue_next().is_none() }); let mut synced = shared.lock(); - let synced = synced.as_mut(); - if synced.is_closed { + if synced.as_mut().is_closed { drop(synced); let mut curr = Some(batch_head); @@ -91,6 +90,8 @@ impl Shared { return; } + let synced = synced.as_mut(); + if let Some(tail) = synced.tail { unsafe { tail.set_queue_next(Some(batch_head)); From 3c7973e70f5b3fc2f73b81a8d80612eeac8509f0 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Sun, 2 Jul 2023 09:58:17 -0700 Subject: [PATCH 10/26] see if this passes --- tokio/src/runtime/tests/loom_multi_thread_alt.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tokio/src/runtime/tests/loom_multi_thread_alt.rs b/tokio/src/runtime/tests/loom_multi_thread_alt.rs index d33714e9677..f36efaf09fd 100644 --- a/tokio/src/runtime/tests/loom_multi_thread_alt.rs +++ b/tokio/src/runtime/tests/loom_multi_thread_alt.rs @@ -204,7 +204,8 @@ mod group_b { } #[test] - fn blocking_and_regular() { + #[ignore] // TODO: uncomment + fn blocking_and_regular_without_pending() { blocking_and_regular_inner(false); } From 3e3aa9895b82119e767d720b862ec7ed3186f9f7 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Sun, 2 Jul 2023 13:46:37 -0700 Subject: [PATCH 11/26] try with more stack --- tokio/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tokio/Cargo.toml b/tokio/Cargo.toml index c344597159d..51e25703460 100644 --- a/tokio/Cargo.toml +++ b/tokio/Cargo.toml @@ -160,7 +160,7 @@ wasm-bindgen-test = "0.3.0" mio-aio = { version = "0.7.0", features = ["tokio"] } [target.'cfg(loom)'.dev-dependencies] -loom = { version = "0.6.0", features = ["futures", "checkpoint"] } +loom = { git = "https://github.com/tokio-rs/loom", branch = "tokio-exp", features = ["futures", "checkpoint"] } [package.metadata.docs.rs] all-features = true From 15dac699661e8b9b0081a785f1406801ea8b35a2 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Sun, 2 Jul 2023 13:54:32 -0700 Subject: [PATCH 12/26] remove release --- .github/workflows/loom.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/loom.yml b/.github/workflows/loom.yml index c82d638eb6d..0392e75e208 100644 --- a/.github/workflows/loom.yml +++ b/.github/workflows/loom.yml @@ -87,7 +87,7 @@ jobs: toolchain: ${{ env.rust_stable }} - uses: Swatinem/rust-cache@v2 - name: loom ${{ matrix.scope }} - run: cargo test --lib --release --features full -- --nocapture $SCOPE + run: cargo test --lib --features full -- --nocapture $SCOPE working-directory: tokio env: SCOPE: ${{ matrix.scope }} From 55e7da4e11b40e715a2aa1dbb8ab015513832796 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Sun, 2 Jul 2023 13:59:47 -0700 Subject: [PATCH 13/26] actually remove release --- .github/workflows/loom.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/loom.yml b/.github/workflows/loom.yml index 0392e75e208..cb17b672ab5 100644 --- a/.github/workflows/loom.yml +++ b/.github/workflows/loom.yml @@ -112,7 +112,7 @@ jobs: toolchain: ${{ env.rust_stable }} - uses: Swatinem/rust-cache@v2 - name: loom ${{ matrix.scope }} - run: cargo test --lib --release --features full -- --nocapture $SCOPE + run: cargo test --lib --features full -- --nocapture $SCOPE working-directory: tokio env: SCOPE: ${{ matrix.scope }} From 5bf1f0dd12911b22641527a7a03c7d0cbb01dd6c Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Sun, 2 Jul 2023 14:13:35 -0700 Subject: [PATCH 14/26] try no capture --- .github/workflows/loom.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/loom.yml b/.github/workflows/loom.yml index cb17b672ab5..73f6d07c733 100644 --- a/.github/workflows/loom.yml +++ b/.github/workflows/loom.yml @@ -112,7 +112,7 @@ jobs: toolchain: ${{ env.rust_stable }} - uses: Swatinem/rust-cache@v2 - name: loom ${{ matrix.scope }} - run: cargo test --lib --features full -- --nocapture $SCOPE + run: cargo test --lib --features full -- $SCOPE working-directory: tokio env: SCOPE: ${{ matrix.scope }} From 6de9bf6acc13e3483ba6544cd252cd52f8e8b568 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Mon, 3 Jul 2023 14:19:39 -0700 Subject: [PATCH 15/26] release --- .github/workflows/loom.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/loom.yml b/.github/workflows/loom.yml index 73f6d07c733..99348d164f7 100644 --- a/.github/workflows/loom.yml +++ b/.github/workflows/loom.yml @@ -87,7 +87,7 @@ jobs: toolchain: ${{ env.rust_stable }} - uses: Swatinem/rust-cache@v2 - name: loom ${{ matrix.scope }} - run: cargo test --lib --features full -- --nocapture $SCOPE + run: cargo test --lib --release --features full -- $SCOPE working-directory: tokio env: SCOPE: ${{ matrix.scope }} @@ -112,7 +112,7 @@ jobs: toolchain: ${{ env.rust_stable }} - uses: Swatinem/rust-cache@v2 - name: loom ${{ matrix.scope }} - run: cargo test --lib --features full -- $SCOPE + run: cargo test --lib --release --features full -- $SCOPE working-directory: tokio env: SCOPE: ${{ matrix.scope }} From 9f93079fa7e74214caf1fb783cb77016189363ef Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Tue, 18 Jul 2023 09:22:22 -0700 Subject: [PATCH 16/26] try to make CI happy... again --- tokio/src/runtime/tests/loom_multi_thread_alt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tokio/src/runtime/tests/loom_multi_thread_alt.rs b/tokio/src/runtime/tests/loom_multi_thread_alt.rs index f36efaf09fd..ed4e8e2e287 100644 --- a/tokio/src/runtime/tests/loom_multi_thread_alt.rs +++ b/tokio/src/runtime/tests/loom_multi_thread_alt.rs @@ -353,7 +353,7 @@ fn mk_pool(num_threads: usize) -> Runtime { runtime::Builder::new_multi_thread_alt() .worker_threads(num_threads) // Set the intervals to avoid tuning logic - .event_interval(2) + .global_event_interval(2) .build() .unwrap() } From 1b4fb39a3f7d43f60523973eb748a3ab035d57d5 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Tue, 18 Jul 2023 13:38:41 -0700 Subject: [PATCH 17/26] try again From f37e05463d7d802d23d740fcad72b8f00a129491 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Tue, 18 Jul 2023 14:37:47 -0700 Subject: [PATCH 18/26] fix build --- tokio/src/runtime/tests/loom_multi_thread_alt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tokio/src/runtime/tests/loom_multi_thread_alt.rs b/tokio/src/runtime/tests/loom_multi_thread_alt.rs index ed4e8e2e287..27ed4d04792 100644 --- a/tokio/src/runtime/tests/loom_multi_thread_alt.rs +++ b/tokio/src/runtime/tests/loom_multi_thread_alt.rs @@ -353,7 +353,7 @@ fn mk_pool(num_threads: usize) -> Runtime { runtime::Builder::new_multi_thread_alt() .worker_threads(num_threads) // Set the intervals to avoid tuning logic - .global_event_interval(2) + .global_queue_interval(2) .build() .unwrap() } From 7dff6be7af328c455f18ca86fe23cd537c3dd818 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Thu, 20 Jul 2023 09:16:44 -0700 Subject: [PATCH 19/26] increase global queue interval for loom tests --- tokio/src/runtime/tests/loom_multi_thread_alt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tokio/src/runtime/tests/loom_multi_thread_alt.rs b/tokio/src/runtime/tests/loom_multi_thread_alt.rs index 27ed4d04792..6ab066ab6f6 100644 --- a/tokio/src/runtime/tests/loom_multi_thread_alt.rs +++ b/tokio/src/runtime/tests/loom_multi_thread_alt.rs @@ -353,7 +353,7 @@ fn mk_pool(num_threads: usize) -> Runtime { runtime::Builder::new_multi_thread_alt() .worker_threads(num_threads) // Set the intervals to avoid tuning logic - .global_queue_interval(2) + .global_queue_interval(61) .build() .unwrap() } From 80f037d54379460c34e432ed509e66332095b769 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Thu, 20 Jul 2023 09:53:49 -0700 Subject: [PATCH 20/26] fix build --- tokio/src/runtime/handle.rs | 2 ++ tokio/src/runtime/scheduler/multi_thread_alt/handle.rs | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/tokio/src/runtime/handle.rs b/tokio/src/runtime/handle.rs index 45b9745b2e9..121ed8815f8 100644 --- a/tokio/src/runtime/handle.rs +++ b/tokio/src/runtime/handle.rs @@ -387,6 +387,8 @@ impl Handle { scheduler::Handle::CurrentThread(handle) => handle.owned_id(), #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] scheduler::Handle::MultiThread(handle) => handle.owned_id(), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThreadAlt(handle) => handle.owned_id(), }; owned_id.into() } diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs b/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs index c9997f89fbe..e0353f8da6e 100644 --- a/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs +++ b/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs @@ -58,6 +58,16 @@ impl Handle { } } +cfg_unstable! { + use std::num::NonZeroU64; + + impl Handle { + pub(crate) fn owned_id(&self) -> NonZeroU64 { + self.shared.owned.id + } + } +} + impl fmt::Debug for Handle { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("multi_thread::Handle { ... }").finish() From 43bc3ae85d441bd889e201207dbdf8293cbb9430 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Thu, 20 Jul 2023 14:21:14 -0700 Subject: [PATCH 21/26] try removing idle map --- .../scheduler/multi_thread_alt/idle.rs | 35 +++++++++++-------- .../scheduler/multi_thread_alt/worker.rs | 2 ++ 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs index f440bdf56a0..253688723ea 100644 --- a/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs +++ b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs @@ -14,7 +14,7 @@ pub(super) struct Idle { num_idle: AtomicUsize, /// Map of idle cores - idle_map: IdleMap, + // idle_map: IdleMap, /// Used to catch false-negatives when waking workers needs_searching: AtomicBool, @@ -28,7 +28,7 @@ pub(super) struct IdleMap { } pub(super) struct Snapshot { - chunks: Vec, + // chunks: Vec, } /// Data synchronized by the scheduler mutex @@ -45,7 +45,7 @@ impl Idle { let idle = Idle { num_searching: AtomicUsize::new(0), num_idle: AtomicUsize::new(cores.len()), - idle_map: IdleMap::new(&cores), + // idle_map: IdleMap::new(&cores), needs_searching: AtomicBool::new(false), num_cores: cores.len(), }; @@ -68,7 +68,7 @@ impl Idle { } pub(super) fn snapshot(&self, snapshot: &mut Snapshot) { - snapshot.update(&self.idle_map) + // snapshot.update(&self.idle_map) } /// Try to acquire an available core @@ -81,8 +81,8 @@ impl Idle { debug_assert_eq!(num_idle, synced.available_cores.len()); self.num_idle.store(num_idle, Release); - self.idle_map.unset(core.index); - debug_assert!(self.idle_map.matches(&synced.available_cores)); + // self.idle_map.unset(core.index); + // debug_assert!(self.idle_map.matches(&synced.available_cores)); } ret @@ -149,8 +149,8 @@ impl Idle { debug_assert!(!core.is_searching); core.is_searching = true; - self.idle_map.unset(core.index); - debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + // self.idle_map.unset(core.index); + // debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); // Assign the core to the worker synced.assigned_cores[worker] = Some(core); @@ -199,7 +199,7 @@ impl Idle { if let Some(core) = synced.idle.available_cores.pop() { debug_assert!(!core.is_searching); - self.idle_map.unset(core.index); + // self.idle_map.unset(core.index); synced.assigned_cores[worker] = Some(core); @@ -215,7 +215,7 @@ impl Idle { } if !workers.is_empty() { - debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + // debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); let num_idle = synced.idle.available_cores.len(); self.num_idle.store(num_idle, Release); } else { @@ -235,7 +235,7 @@ impl Idle { let worker = synced.idle.sleepers.pop().unwrap(); let core = synced.idle.available_cores.pop().unwrap(); - self.idle_map.unset(core.index); + // self.idle_map.unset(core.index); synced.assigned_cores[worker] = Some(core); shared.condvars[worker].notify_one(); @@ -244,7 +244,7 @@ impl Idle { .store(synced.idle.available_cores.len(), Release); } - debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + // debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); // Wake up any other workers while let Some(index) = synced.idle.sleepers.pop() { @@ -265,12 +265,12 @@ impl Idle { let num_idle = synced.idle.available_cores.len(); debug_assert_eq!(num_idle, self.num_idle.load(Acquire)); - self.idle_map.set(core.index); + // self.idle_map.set(core.index); // Store the core in the list of available cores synced.idle.available_cores.push(core); - debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + // debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); // Update `num_idle` self.num_idle.store(num_idle + 1, Release); @@ -385,18 +385,24 @@ impl IdleMap { impl Snapshot { pub(crate) fn new(idle: &Idle) -> Snapshot { + /* let chunks = vec![0; idle.idle_map.chunks.len()]; let mut ret = Snapshot { chunks }; ret.update(&idle.idle_map); ret + */ + Snapshot {} } fn update(&mut self, idle_map: &IdleMap) { + /* for i in 0..self.chunks.len() { self.chunks[i] = idle_map.chunks[i].load(Acquire); } + */ } + /* pub(super) fn is_idle(&self, index: usize) -> bool { let (chunk, mask) = index_to_mask(index); debug_assert!( @@ -407,6 +413,7 @@ impl Snapshot { ); self.chunks[chunk] & mask == mask } + */ } fn num_chunks(max_cores: usize) -> usize { diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs index 4d7bfa97eaf..d402d55f2c7 100644 --- a/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs @@ -865,10 +865,12 @@ impl Worker { continue; } + /* // If the core is currently idle, then there is nothing to steal. if self.idle_snapshot.is_idle(i) { continue; } + */ let target = &cx.shared().remotes[i]; From 2542796f899019db245c0ad69a107313b04396bb Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Thu, 20 Jul 2023 14:27:06 -0700 Subject: [PATCH 22/26] try fixing the build --- tokio/src/runtime/scheduler/multi_thread_alt/idle.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs index 253688723ea..da19de30b98 100644 --- a/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs +++ b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs @@ -67,7 +67,7 @@ impl Idle { self.num_searching.load(Acquire) } - pub(super) fn snapshot(&self, snapshot: &mut Snapshot) { + pub(super) fn snapshot(&self, _snapshot: &mut Snapshot) { // snapshot.update(&self.idle_map) } @@ -75,7 +75,7 @@ impl Idle { pub(super) fn try_acquire_available_core(&self, synced: &mut Synced) -> Option> { let ret = synced.available_cores.pop(); - if let Some(core) = &ret { + if let Some(_core) = &ret { // Decrement the number of idle cores let num_idle = self.num_idle.load(Acquire) - 1; debug_assert_eq!(num_idle, synced.available_cores.len()); @@ -384,7 +384,7 @@ impl IdleMap { } impl Snapshot { - pub(crate) fn new(idle: &Idle) -> Snapshot { + pub(crate) fn new(_idle: &Idle) -> Snapshot { /* let chunks = vec![0; idle.idle_map.chunks.len()]; let mut ret = Snapshot { chunks }; @@ -394,7 +394,7 @@ impl Snapshot { Snapshot {} } - fn update(&mut self, idle_map: &IdleMap) { + fn update(&mut self, _idle_map: &IdleMap) { /* for i in 0..self.chunks.len() { self.chunks[i] = idle_map.chunks[i].load(Acquire); From 51b342c2cb12681ded00eb720ece4cdf0a5056fd Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Thu, 20 Jul 2023 14:34:42 -0700 Subject: [PATCH 23/26] again --- tokio/src/runtime/scheduler/multi_thread_alt/idle.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs index da19de30b98..f0d547dcbd4 100644 --- a/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs +++ b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs @@ -1,5 +1,8 @@ //! Coordinates idling workers + +#![allow(dead_code)] + use crate::loom::sync::atomic::{AtomicBool, AtomicUsize}; use crate::loom::sync::MutexGuard; use crate::runtime::scheduler::multi_thread_alt::{worker, Core, Shared}; From 43331f1ce4cc0ae7e1260e26419ed09d4cee442c Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Thu, 20 Jul 2023 14:36:11 -0700 Subject: [PATCH 24/26] fmt --- tokio/src/runtime/scheduler/multi_thread_alt/idle.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs index f0d547dcbd4..5440c913be5 100644 --- a/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs +++ b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs @@ -1,6 +1,5 @@ //! Coordinates idling workers - #![allow(dead_code)] use crate::loom::sync::atomic::{AtomicBool, AtomicUsize}; From fad2ce23120e88a03236426d71f88547dd5a9eda Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Fri, 21 Jul 2023 08:54:40 -0700 Subject: [PATCH 25/26] try again --- .github/workflows/loom.yml | 2 ++ tokio/tests/rt_threaded_alt.rs | 24 ------------------------ 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/.github/workflows/loom.yml b/.github/workflows/loom.yml index 99348d164f7..3952dfe5d7e 100644 --- a/.github/workflows/loom.yml +++ b/.github/workflows/loom.yml @@ -116,3 +116,5 @@ jobs: working-directory: tokio env: SCOPE: ${{ matrix.scope }} + # TODO: remove this before stabilizing + LOOM_MAX_PREEMPTIONS: 1 diff --git a/tokio/tests/rt_threaded_alt.rs b/tokio/tests/rt_threaded_alt.rs index 9105c362791..b8af6a7b8a9 100644 --- a/tokio/tests/rt_threaded_alt.rs +++ b/tokio/tests/rt_threaded_alt.rs @@ -715,27 +715,3 @@ fn rt() -> runtime::Runtime { .build() .unwrap() } - -#[cfg(tokio_unstable)] -mod unstable { - use super::*; - - #[test] - fn test_disable_lifo_slot() { - let rt = runtime::Builder::new_multi_thread_alt() - .disable_lifo_slot() - .worker_threads(2) - .build() - .unwrap(); - - rt.block_on(async { - tokio::spawn(async { - // Spawn another task and block the thread until completion. If the LIFO slot - // is used then the test doesn't complete. - futures::executor::block_on(tokio::spawn(async {})).unwrap(); - }) - .await - .unwrap(); - }) - } -} From ca7d282e61072911e199d89cac371742c704c506 Mon Sep 17 00:00:00 2001 From: Carl Lerche Date: Fri, 21 Jul 2023 08:55:54 -0700 Subject: [PATCH 26/26] use released loom --- tokio/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tokio/Cargo.toml b/tokio/Cargo.toml index 51e25703460..fa5ec3b8553 100644 --- a/tokio/Cargo.toml +++ b/tokio/Cargo.toml @@ -160,7 +160,7 @@ wasm-bindgen-test = "0.3.0" mio-aio = { version = "0.7.0", features = ["tokio"] } [target.'cfg(loom)'.dev-dependencies] -loom = { git = "https://github.com/tokio-rs/loom", branch = "tokio-exp", features = ["futures", "checkpoint"] } +loom = { version = "0.6", features = ["futures", "checkpoint"] } [package.metadata.docs.rs] all-features = true