diff --git a/.github/labeler.yml b/.github/labeler.yml index 6e53c92aaf7..b69490cc2a9 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,8 +1,28 @@ -R-loom: +R-loom-sync: - tokio/src/sync/* - tokio/src/sync/**/* -- tokio-util/src/sync/* -- tokio-util/src/sync/**/* -- tokio/src/runtime/* -- tokio/src/runtime/**/* + +R-loom-time-driver: +- tokio/src/runtime/time/* +- tokio/src/runtime/time/**/* + +R-loom-current-thread: +- tokio/src/runtime/scheduler/* +- tokio/src/runtime/scheduler/current_thread/* +- tokio/src/runtime/task/* +- tokio/src/runtime/task/** + +R-loom-multi-thread: +- tokio/src/runtime/scheduler/* +- tokio/src/runtime/scheduler/multi_thread/* +- tokio/src/runtime/scheduler/multi_thread/** +- tokio/src/runtime/task/* +- tokio/src/runtime/task/** + +R-loom-multi-thread-alt: +- tokio/src/runtime/scheduler/* +- tokio/src/runtime/scheduler/multi_thread_alt/* +- tokio/src/runtime/scheduler/multi_thread_alt/** +- tokio/src/runtime/task/* +- tokio/src/runtime/task/** diff --git a/.github/workflows/loom.yml b/.github/workflows/loom.yml index 417c3b470fb..3952dfe5d7e 100644 --- a/.github/workflows/loom.yml +++ b/.github/workflows/loom.yml @@ -8,7 +8,9 @@ on: name: Loom env: - RUSTFLAGS: -Dwarnings + RUSTFLAGS: -Dwarnings --cfg loom --cfg tokio_unstable -C debug_assertions + LOOM_MAX_PREEMPTIONS: 2 + LOOM_MAX_BRANCHES: 10000 RUST_BACKTRACE: 1 # Change to specific Rust release to pin rust_stable: stable @@ -17,26 +19,91 @@ permissions: contents: read jobs: - loom: - name: loom + loom-sync: + name: loom tokio::sync # base_ref is null when it's not a pull request - if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom') || (github.base_ref == null)) + if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom-sync') || (github.base_ref == null)) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install Rust ${{ env.rust_stable }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.rust_stable }} + - uses: Swatinem/rust-cache@v2 + - name: run tests + run: cargo test --lib --release --features full -- --nocapture sync::tests + working-directory: tokio + + loom-time-driver: + name: loom time driver + # base_ref is null when it's not a pull request + if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom-time-driver') || (github.base_ref == null)) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install Rust ${{ env.rust_stable }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.rust_stable }} + - uses: Swatinem/rust-cache@v2 + - name: run tests + run: cargo test --lib --release --features full -- --nocapture runtime::time::tests + working-directory: tokio + + loom-current-thread: + name: loom current-thread scheduler + # base_ref is null when it's not a pull request + if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom-current-thread') || (github.base_ref == null)) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install Rust ${{ env.rust_stable }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.rust_stable }} + - uses: Swatinem/rust-cache@v2 + - name: run tests + run: cargo test --lib --release --features full -- --nocapture loom_current_thread + working-directory: tokio + + loom-multi-thread: + name: loom multi-thread scheduler + # base_ref is null when it's not a pull request + if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom-multi-thread') || (github.base_ref == null)) + runs-on: ubuntu-latest + strategy: + matrix: + include: + - scope: loom_multi_thread::group_a + - scope: loom_multi_thread::group_b + - scope: loom_multi_thread::group_c + - scope: loom_multi_thread::group_d + steps: + - uses: actions/checkout@v3 + - name: Install Rust ${{ env.rust_stable }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.rust_stable }} + - uses: Swatinem/rust-cache@v2 + - name: loom ${{ matrix.scope }} + run: cargo test --lib --release --features full -- $SCOPE + working-directory: tokio + env: + SCOPE: ${{ matrix.scope }} + + loom-multi-thread-alt: + name: loom ALT multi-thread scheduler + # base_ref is null when it's not a pull request + if: github.repository_owner == 'tokio-rs' && (contains(github.event.pull_request.labels.*.name, 'R-loom-multi-thread-alt') || (github.base_ref == null)) runs-on: ubuntu-latest strategy: matrix: include: - - scope: --skip loom_pool - max_preemptions: 2 - - scope: loom_pool::group_a - max_preemptions: 2 - - scope: loom_pool::group_b - max_preemptions: 2 - - scope: loom_pool::group_c - max_preemptions: 2 - - scope: loom_pool::group_d - max_preemptions: 2 - - scope: time::driver - max_preemptions: 2 + - scope: loom_multi_thread_alt::group_a + - scope: loom_multi_thread_alt::group_b + - scope: loom_multi_thread_alt::group_c + - scope: loom_multi_thread_alt::group_d steps: - uses: actions/checkout@v3 - name: Install Rust ${{ env.rust_stable }} @@ -45,10 +112,9 @@ jobs: toolchain: ${{ env.rust_stable }} - uses: Swatinem/rust-cache@v2 - name: loom ${{ matrix.scope }} - run: cargo test --lib --release --features full -- --nocapture $SCOPE + run: cargo test --lib --release --features full -- $SCOPE working-directory: tokio env: - RUSTFLAGS: --cfg loom --cfg tokio_unstable -Dwarnings -C debug-assertions - LOOM_MAX_PREEMPTIONS: ${{ matrix.max_preemptions }} - LOOM_MAX_BRANCHES: 10000 SCOPE: ${{ matrix.scope }} + # TODO: remove this before stabilizing + LOOM_MAX_PREEMPTIONS: 1 diff --git a/tokio/Cargo.toml b/tokio/Cargo.toml index 0a530ff88fa..fa5ec3b8553 100644 --- a/tokio/Cargo.toml +++ b/tokio/Cargo.toml @@ -160,7 +160,7 @@ wasm-bindgen-test = "0.3.0" mio-aio = { version = "0.7.0", features = ["tokio"] } [target.'cfg(loom)'.dev-dependencies] -loom = { version = "0.5.2", features = ["futures", "checkpoint"] } +loom = { version = "0.6", features = ["futures", "checkpoint"] } [package.metadata.docs.rs] all-features = true diff --git a/tokio/src/loom/std/unsafe_cell.rs b/tokio/src/loom/std/unsafe_cell.rs index 66c1d7943e0..3d6513b4655 100644 --- a/tokio/src/loom/std/unsafe_cell.rs +++ b/tokio/src/loom/std/unsafe_cell.rs @@ -6,10 +6,12 @@ impl UnsafeCell { UnsafeCell(std::cell::UnsafeCell::new(data)) } + #[inline(always)] pub(crate) fn with(&self, f: impl FnOnce(*const T) -> R) -> R { f(self.0.get()) } + #[inline(always)] pub(crate) fn with_mut(&self, f: impl FnOnce(*mut T) -> R) -> R { f(self.0.get()) } diff --git a/tokio/src/runtime/blocking/schedule.rs b/tokio/src/runtime/blocking/schedule.rs index edf775be8be..b4c6a2862b3 100644 --- a/tokio/src/runtime/blocking/schedule.rs +++ b/tokio/src/runtime/blocking/schedule.rs @@ -25,6 +25,8 @@ impl BlockingSchedule { } #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] scheduler::Handle::MultiThread(_) => {} + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThreadAlt(_) => {} } } BlockingSchedule { @@ -45,6 +47,8 @@ impl task::Schedule for BlockingSchedule { } #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] scheduler::Handle::MultiThread(_) => {} + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThreadAlt(_) => {} } } None diff --git a/tokio/src/runtime/builder.rs b/tokio/src/runtime/builder.rs index af9e0e172f3..d2e10b004ae 100644 --- a/tokio/src/runtime/builder.rs +++ b/tokio/src/runtime/builder.rs @@ -199,6 +199,8 @@ pub(crate) enum Kind { CurrentThread, #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] MultiThread, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThreadAlt, } impl Builder { @@ -230,6 +232,26 @@ impl Builder { // The number `61` is fairly arbitrary. I believe this value was copied from golang. Builder::new(Kind::MultiThread, 61) } + + cfg_unstable! { + /// Returns a new builder with the alternate multi thread scheduler + /// selected. + /// + /// The alternate multi threaded scheduler is an in-progress + /// candidate to replace the existing multi threaded scheduler. It + /// currently does not scale as well to 16+ processors. + /// + /// This runtime flavor is currently **not considered production + /// ready**. + /// + /// Configuration methods can be chained on the return value. + #[cfg(feature = "rt-multi-thread")] + #[cfg_attr(docsrs, doc(cfg(feature = "rt-multi-thread")))] + pub fn new_multi_thread_alt() -> Builder { + // The number `61` is fairly arbitrary. I believe this value was copied from golang. + Builder::new(Kind::MultiThreadAlt, 61) + } + } } /// Returns a new runtime builder initialized with default configuration @@ -656,6 +678,8 @@ impl Builder { Kind::CurrentThread => self.build_current_thread_runtime(), #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Kind::MultiThread => self.build_threaded_runtime(), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Kind::MultiThreadAlt => self.build_alt_threaded_runtime(), } } @@ -665,6 +689,8 @@ impl Builder { Kind::CurrentThread => true, #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Kind::MultiThread => false, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Kind::MultiThreadAlt => false, }, enable_io: self.enable_io, enable_time: self.enable_time, @@ -1214,6 +1240,48 @@ cfg_rt_multi_thread! { Ok(Runtime::from_parts(Scheduler::MultiThread(scheduler), handle, blocking_pool)) } + + cfg_unstable! { + fn build_alt_threaded_runtime(&mut self) -> io::Result { + use crate::loom::sys::num_cpus; + use crate::runtime::{Config, runtime::Scheduler}; + use crate::runtime::scheduler::MultiThreadAlt; + + let core_threads = self.worker_threads.unwrap_or_else(num_cpus); + + let (driver, driver_handle) = driver::Driver::new(self.get_cfg())?; + + // Create the blocking pool + let blocking_pool = + blocking::create_blocking_pool(self, self.max_blocking_threads + core_threads); + let blocking_spawner = blocking_pool.spawner().clone(); + + // Generate a rng seed for this runtime. + let seed_generator_1 = self.seed_generator.next_generator(); + let seed_generator_2 = self.seed_generator.next_generator(); + + let (scheduler, handle) = MultiThreadAlt::new( + core_threads, + driver, + driver_handle, + blocking_spawner, + seed_generator_2, + Config { + before_park: self.before_park.clone(), + after_unpark: self.after_unpark.clone(), + global_queue_interval: self.global_queue_interval, + event_interval: self.event_interval, + #[cfg(tokio_unstable)] + unhandled_panic: self.unhandled_panic.clone(), + disable_lifo_slot: self.disable_lifo_slot, + seed_generator: seed_generator_1, + metrics_poll_count_histogram: self.metrics_poll_count_histogram_builder(), + }, + ); + + Ok(Runtime::from_parts(Scheduler::MultiThreadAlt(scheduler), handle, blocking_pool)) + } + } } } diff --git a/tokio/src/runtime/handle.rs b/tokio/src/runtime/handle.rs index 36431df49c0..121ed8815f8 100644 --- a/tokio/src/runtime/handle.rs +++ b/tokio/src/runtime/handle.rs @@ -357,6 +357,8 @@ impl Handle { scheduler::Handle::CurrentThread(_) => RuntimeFlavor::CurrentThread, #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] scheduler::Handle::MultiThread(_) => RuntimeFlavor::MultiThread, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThreadAlt(_) => RuntimeFlavor::MultiThreadAlt, } } @@ -385,6 +387,8 @@ impl Handle { scheduler::Handle::CurrentThread(handle) => handle.owned_id(), #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] scheduler::Handle::MultiThread(handle) => handle.owned_id(), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThreadAlt(handle) => handle.owned_id(), }; owned_id.into() } @@ -535,6 +539,8 @@ cfg_taskdump! { handle.dump().await }).await }, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Handle::MultiThreadAlt(_) => panic!("task dump not implemented for this runtime flavor"), } } } diff --git a/tokio/src/runtime/runtime.rs b/tokio/src/runtime/runtime.rs index a52bf1a52d4..ddec2ab5f20 100644 --- a/tokio/src/runtime/runtime.rs +++ b/tokio/src/runtime/runtime.rs @@ -9,6 +9,10 @@ use std::time::Duration; cfg_rt_multi_thread! { use crate::runtime::Builder; use crate::runtime::scheduler::MultiThread; + + cfg_unstable! { + use crate::runtime::scheduler::MultiThreadAlt; + } } /// The Tokio runtime. @@ -109,6 +113,9 @@ pub enum RuntimeFlavor { CurrentThread, /// The flavor that executes tasks across multiple threads. MultiThread, + /// The flavor that executes tasks across multiple threads. + #[cfg(tokio_unstable)] + MultiThreadAlt, } /// The runtime scheduler is either a multi-thread or a current-thread executor. @@ -120,6 +127,10 @@ pub(super) enum Scheduler { /// Execute tasks across multiple threads. #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] MultiThread(MultiThread), + + /// Execute tasks across multiple threads. + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThreadAlt(MultiThreadAlt), } impl Runtime { @@ -336,6 +347,8 @@ impl Runtime { Scheduler::CurrentThread(exec) => exec.block_on(&self.handle.inner, future), #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Scheduler::MultiThread(exec) => exec.block_on(&self.handle.inner, future), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Scheduler::MultiThreadAlt(exec) => exec.block_on(&self.handle.inner, future), } } @@ -456,6 +469,12 @@ impl Drop for Runtime { // already in the runtime's context. multi_thread.shutdown(&self.handle.inner); } + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Scheduler::MultiThreadAlt(multi_thread) => { + // The threaded scheduler drops its tasks on its worker threads, which is + // already in the runtime's context. + multi_thread.shutdown(&self.handle.inner); + } } } } diff --git a/tokio/src/runtime/scheduler/block_in_place.rs b/tokio/src/runtime/scheduler/block_in_place.rs new file mode 100644 index 00000000000..803ff4504f7 --- /dev/null +++ b/tokio/src/runtime/scheduler/block_in_place.rs @@ -0,0 +1,21 @@ +use crate::runtime::scheduler; + +#[track_caller] +pub(crate) fn block_in_place(f: F) -> R +where + F: FnOnce() -> R, +{ + #[cfg(tokio_unstable)] + { + use crate::runtime::{Handle, RuntimeFlavor::MultiThreadAlt}; + + match Handle::try_current().map(|h| h.runtime_flavor()) { + Ok(MultiThreadAlt) => { + return scheduler::multi_thread_alt::block_in_place(f); + } + _ => {} + } + } + + scheduler::multi_thread::block_in_place(f) +} diff --git a/tokio/src/runtime/scheduler/current_thread.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs similarity index 99% rename from tokio/src/runtime/scheduler/current_thread.rs rename to tokio/src/runtime/scheduler/current_thread/mod.rs index 80943aea87b..1100147d5cf 100644 --- a/tokio/src/runtime/scheduler/current_thread.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -523,6 +523,10 @@ cfg_metrics! { &self.shared.worker_metrics } + pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize { + self.worker_metrics(worker).queue_depth() + } + pub(crate) fn num_blocking_threads(&self) -> usize { self.blocking_spawner.num_threads() } diff --git a/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs b/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs index 07d1063c5d8..1d5f0403b5d 100644 --- a/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs +++ b/tokio/src/runtime/scheduler/inject/rt_multi_thread.rs @@ -75,6 +75,21 @@ impl Shared { debug_assert!(unsafe { batch_tail.get_queue_next().is_none() }); let mut synced = shared.lock(); + + if synced.as_mut().is_closed { + drop(synced); + + let mut curr = Some(batch_head); + + while let Some(task) = curr { + curr = task.get_queue_next(); + + let _ = unsafe { task::Notified::::from_raw(task) }; + } + + return; + } + let synced = synced.as_mut(); if let Some(tail) = synced.tail { diff --git a/tokio/src/runtime/scheduler/mod.rs b/tokio/src/runtime/scheduler/mod.rs index 3e3151711f5..de49dae5e81 100644 --- a/tokio/src/runtime/scheduler/mod.rs +++ b/tokio/src/runtime/scheduler/mod.rs @@ -10,11 +10,19 @@ cfg_rt! { } cfg_rt_multi_thread! { + mod block_in_place; + pub(crate) use block_in_place::block_in_place; + mod lock; use lock::Lock; pub(crate) mod multi_thread; pub(crate) use multi_thread::MultiThread; + + cfg_unstable! { + pub(crate) mod multi_thread_alt; + pub(crate) use multi_thread_alt::MultiThread as MultiThreadAlt; + } } use crate::runtime::driver; @@ -27,6 +35,9 @@ pub(crate) enum Handle { #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] MultiThread(Arc), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThreadAlt(Arc), + // TODO: This is to avoid triggering "dead code" warnings many other places // in the codebase. Remove this during a later cleanup #[cfg(not(feature = "rt"))] @@ -40,6 +51,9 @@ pub(super) enum Context { #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] MultiThread(multi_thread::Context), + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + MultiThreadAlt(multi_thread_alt::Context), } impl Handle { @@ -52,6 +66,9 @@ impl Handle { #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Handle::MultiThread(ref h) => &h.driver, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThreadAlt(ref h) => &h.driver, + #[cfg(not(feature = "rt"))] Handle::Disabled => unreachable!(), } @@ -67,6 +84,20 @@ cfg_rt! { use crate::util::RngSeedGenerator; use std::task::Waker; + macro_rules! match_flavor { + ($self:expr, $ty:ident($h:ident) => $e:expr) => { + match $self { + $ty::CurrentThread($h) => $e, + + #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] + $ty::MultiThread($h) => $e, + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + $ty::MultiThreadAlt($h) => $e, + } + } + } + impl Handle { #[track_caller] pub(crate) fn current() -> Handle { @@ -77,12 +108,7 @@ cfg_rt! { } pub(crate) fn blocking_spawner(&self) -> &blocking::Spawner { - match self { - Handle::CurrentThread(h) => &h.blocking_spawner, - - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(h) => &h.blocking_spawner, - } + match_flavor!(self, Handle(h) => &h.blocking_spawner) } pub(crate) fn spawn(&self, future: F, id: Id) -> JoinHandle @@ -95,6 +121,9 @@ cfg_rt! { #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Handle::MultiThread(h) => multi_thread::Handle::spawn(h, future, id), + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThreadAlt(h) => multi_thread_alt::Handle::spawn(h, future, id), } } @@ -104,16 +133,14 @@ cfg_rt! { #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Handle::MultiThread(ref h) => h.shutdown(), + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThreadAlt(ref h) => h.shutdown(), } } pub(crate) fn seed_generator(&self) -> &RngSeedGenerator { - match self { - Handle::CurrentThread(h) => &h.seed_generator, - - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(h) => &h.seed_generator, - } + match_flavor!(self, Handle(h) => &h.seed_generator) } pub(crate) fn as_current_thread(&self) -> &Arc { @@ -123,6 +150,17 @@ cfg_rt! { _ => panic!("not a CurrentThread handle"), } } + + cfg_rt_multi_thread! { + cfg_unstable! { + pub(crate) fn expect_multi_thread_alt(&self) -> &Arc { + match self { + Handle::MultiThreadAlt(handle) => handle, + _ => panic!("not a `MultiThreadAlt` handle"), + } + } + } + } } cfg_metrics! { @@ -134,71 +172,41 @@ cfg_rt! { Handle::CurrentThread(_) => 1, #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] Handle::MultiThread(handle) => handle.num_workers(), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + Handle::MultiThreadAlt(handle) => handle.num_workers(), } } pub(crate) fn num_blocking_threads(&self) -> usize { - match self { - Handle::CurrentThread(handle) => handle.num_blocking_threads(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.num_blocking_threads(), - } + match_flavor!(self, Handle(handle) => handle.num_blocking_threads()) } pub(crate) fn num_idle_blocking_threads(&self) -> usize { - match self { - Handle::CurrentThread(handle) => handle.num_idle_blocking_threads(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.num_idle_blocking_threads(), - } + match_flavor!(self, Handle(handle) => handle.num_idle_blocking_threads()) } pub(crate) fn active_tasks_count(&self) -> usize { - match self { - Handle::CurrentThread(handle) => handle.active_tasks_count(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.active_tasks_count(), - } + match_flavor!(self, Handle(handle) => handle.active_tasks_count()) } pub(crate) fn scheduler_metrics(&self) -> &SchedulerMetrics { - match self { - Handle::CurrentThread(handle) => handle.scheduler_metrics(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.scheduler_metrics(), - } + match_flavor!(self, Handle(handle) => handle.scheduler_metrics()) } pub(crate) fn worker_metrics(&self, worker: usize) -> &WorkerMetrics { - match self { - Handle::CurrentThread(handle) => handle.worker_metrics(worker), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.worker_metrics(worker), - } + match_flavor!(self, Handle(handle) => handle.worker_metrics(worker)) } pub(crate) fn injection_queue_depth(&self) -> usize { - match self { - Handle::CurrentThread(handle) => handle.injection_queue_depth(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.injection_queue_depth(), - } + match_flavor!(self, Handle(handle) => handle.injection_queue_depth()) } pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize { - match self { - Handle::CurrentThread(handle) => handle.worker_metrics(worker).queue_depth(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.worker_local_queue_depth(worker), - } + match_flavor!(self, Handle(handle) => handle.worker_local_queue_depth(worker)) } pub(crate) fn blocking_queue_depth(&self) -> usize { - match self { - Handle::CurrentThread(handle) => handle.blocking_queue_depth(), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Handle::MultiThread(handle) => handle.blocking_queue_depth(), - } + match_flavor!(self, Handle(handle) => handle.blocking_queue_depth()) } } } @@ -214,11 +222,7 @@ cfg_rt! { } pub(crate) fn defer(&self, waker: &Waker) { - match self { - Context::CurrentThread(context) => context.defer(waker), - #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] - Context::MultiThread(context) => context.defer(waker), - } + match_flavor!(self, Context(context) => context.defer(waker)) } cfg_rt_multi_thread! { @@ -229,6 +233,16 @@ cfg_rt! { _ => panic!("expected `MultiThread::Context`") } } + + cfg_unstable! { + #[track_caller] + pub(crate) fn expect_multi_thread_alt(&self) -> &multi_thread_alt::Context { + match self { + Context::MultiThreadAlt(context) => context, + _ => panic!("expected `MultiThreadAlt::Context`") + } + } + } } } } diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/counters.rs b/tokio/src/runtime/scheduler/multi_thread_alt/counters.rs new file mode 100644 index 00000000000..edda0d46d1e --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/counters.rs @@ -0,0 +1,166 @@ +#[cfg(tokio_internal_mt_counters)] +mod imp { + use std::sync::atomic::AtomicUsize; + use std::sync::atomic::Ordering::Relaxed; + + static NUM_MAINTENANCE: AtomicUsize = AtomicUsize::new(0); + static NUM_NOTIFY_LOCAL: AtomicUsize = AtomicUsize::new(0); + static NUM_NOTIFY_REMOTE: AtomicUsize = AtomicUsize::new(0); + static NUM_UNPARKS_LOCAL: AtomicUsize = AtomicUsize::new(0); + static NUM_UNPARKS_REMOTE: AtomicUsize = AtomicUsize::new(0); + static NUM_LIFO_SCHEDULES: AtomicUsize = AtomicUsize::new(0); + static NUM_LIFO_CAPPED: AtomicUsize = AtomicUsize::new(0); + static NUM_STEALS: AtomicUsize = AtomicUsize::new(0); + static NUM_OVERFLOW: AtomicUsize = AtomicUsize::new(0); + static NUM_PARK: AtomicUsize = AtomicUsize::new(0); + static NUM_POLLS: AtomicUsize = AtomicUsize::new(0); + static NUM_LIFO_POLLS: AtomicUsize = AtomicUsize::new(0); + static NUM_REMOTE_BATCH: AtomicUsize = AtomicUsize::new(0); + static NUM_GLOBAL_QUEUE_INTERVAL: AtomicUsize = AtomicUsize::new(0); + static NUM_NO_AVAIL_CORE: AtomicUsize = AtomicUsize::new(0); + static NUM_RELAY_SEARCH: AtomicUsize = AtomicUsize::new(0); + static NUM_SPIN_STALL: AtomicUsize = AtomicUsize::new(0); + static NUM_NO_LOCAL_WORK: AtomicUsize = AtomicUsize::new(0); + + impl Drop for super::Counters { + fn drop(&mut self) { + let notifies_local = NUM_NOTIFY_LOCAL.load(Relaxed); + let notifies_remote = NUM_NOTIFY_REMOTE.load(Relaxed); + let unparks_local = NUM_UNPARKS_LOCAL.load(Relaxed); + let unparks_remote = NUM_UNPARKS_REMOTE.load(Relaxed); + let maintenance = NUM_MAINTENANCE.load(Relaxed); + let lifo_scheds = NUM_LIFO_SCHEDULES.load(Relaxed); + let lifo_capped = NUM_LIFO_CAPPED.load(Relaxed); + let num_steals = NUM_STEALS.load(Relaxed); + let num_overflow = NUM_OVERFLOW.load(Relaxed); + let num_park = NUM_PARK.load(Relaxed); + let num_polls = NUM_POLLS.load(Relaxed); + let num_lifo_polls = NUM_LIFO_POLLS.load(Relaxed); + let num_remote_batch = NUM_REMOTE_BATCH.load(Relaxed); + let num_global_queue_interval = NUM_GLOBAL_QUEUE_INTERVAL.load(Relaxed); + let num_no_avail_core = NUM_NO_AVAIL_CORE.load(Relaxed); + let num_relay_search = NUM_RELAY_SEARCH.load(Relaxed); + let num_spin_stall = NUM_SPIN_STALL.load(Relaxed); + let num_no_local_work = NUM_NO_LOCAL_WORK.load(Relaxed); + + println!("---"); + println!("notifies (remote): {}", notifies_remote); + println!(" notifies (local): {}", notifies_local); + println!(" unparks (local): {}", unparks_local); + println!(" unparks (remote): {}", unparks_remote); + println!(" notify, no core: {}", num_no_avail_core); + println!(" maintenance: {}", maintenance); + println!(" LIFO schedules: {}", lifo_scheds); + println!(" LIFO capped: {}", lifo_capped); + println!(" steals: {}", num_steals); + println!(" queue overflows: {}", num_overflow); + println!(" parks: {}", num_park); + println!(" polls: {}", num_polls); + println!(" polls (LIFO): {}", num_lifo_polls); + println!("remote task batch: {}", num_remote_batch); + println!("global Q interval: {}", num_global_queue_interval); + println!(" relay search: {}", num_relay_search); + println!(" spin stall: {}", num_spin_stall); + println!(" no local work: {}", num_no_local_work); + } + } + + pub(crate) fn inc_num_inc_notify_local() { + NUM_NOTIFY_LOCAL.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_notify_remote() { + NUM_NOTIFY_REMOTE.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_unparks_local() { + NUM_UNPARKS_LOCAL.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_unparks_remote() { + NUM_UNPARKS_REMOTE.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_maintenance() { + NUM_MAINTENANCE.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_lifo_schedules() { + NUM_LIFO_SCHEDULES.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_lifo_capped() { + NUM_LIFO_CAPPED.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_steals() { + NUM_STEALS.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_overflows() { + NUM_OVERFLOW.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_parks() { + NUM_PARK.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_polls() { + NUM_POLLS.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_lifo_polls() { + NUM_LIFO_POLLS.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_remote_batch() { + NUM_REMOTE_BATCH.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_global_queue_interval() { + NUM_GLOBAL_QUEUE_INTERVAL.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_notify_no_core() { + NUM_NO_AVAIL_CORE.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_relay_search() { + NUM_RELAY_SEARCH.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_spin_stall() { + NUM_SPIN_STALL.fetch_add(1, Relaxed); + } + + pub(crate) fn inc_num_no_local_work() { + NUM_NO_LOCAL_WORK.fetch_add(1, Relaxed); + } +} + +#[cfg(not(tokio_internal_mt_counters))] +mod imp { + pub(crate) fn inc_num_inc_notify_local() {} + pub(crate) fn inc_num_notify_remote() {} + pub(crate) fn inc_num_unparks_local() {} + pub(crate) fn inc_num_unparks_remote() {} + pub(crate) fn inc_num_maintenance() {} + pub(crate) fn inc_lifo_schedules() {} + pub(crate) fn inc_lifo_capped() {} + pub(crate) fn inc_num_steals() {} + pub(crate) fn inc_num_overflows() {} + pub(crate) fn inc_num_parks() {} + pub(crate) fn inc_num_polls() {} + pub(crate) fn inc_num_lifo_polls() {} + pub(crate) fn inc_num_remote_batch() {} + pub(crate) fn inc_global_queue_interval() {} + pub(crate) fn inc_notify_no_core() {} + pub(crate) fn inc_num_relay_search() {} + pub(crate) fn inc_num_spin_stall() {} + pub(crate) fn inc_num_no_local_work() {} +} + +#[derive(Debug)] +pub(crate) struct Counters; + +pub(super) use imp::*; diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs b/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs new file mode 100644 index 00000000000..e0353f8da6e --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/handle.rs @@ -0,0 +1,75 @@ +use crate::future::Future; +use crate::loom::sync::Arc; +use crate::runtime::scheduler::multi_thread_alt::worker; +use crate::runtime::{ + blocking, driver, + task::{self, JoinHandle}, +}; +use crate::util::RngSeedGenerator; + +use std::fmt; + +cfg_metrics! { + mod metrics; +} + +/// Handle to the multi thread scheduler +pub(crate) struct Handle { + /// Task spawner + pub(super) shared: worker::Shared, + + /// Resource driver handles + pub(crate) driver: driver::Handle, + + /// Blocking pool spawner + pub(crate) blocking_spawner: blocking::Spawner, + + /// Current random number generator seed + pub(crate) seed_generator: RngSeedGenerator, +} + +impl Handle { + /// Spawns a future onto the thread pool + pub(crate) fn spawn(me: &Arc, future: F, id: task::Id) -> JoinHandle + where + F: crate::future::Future + Send + 'static, + F::Output: Send + 'static, + { + Self::bind_new_task(me, future, id) + } + + pub(crate) fn shutdown(&self) { + self.shared.close(); + self.driver.unpark(); + } + + pub(super) fn bind_new_task(me: &Arc, future: T, id: task::Id) -> JoinHandle + where + T: Future + Send + 'static, + T::Output: Send + 'static, + { + let (handle, notified) = me.shared.owned.bind(future, me.clone(), id); + + if let Some(notified) = notified { + me.shared.schedule_task(notified, false); + } + + handle + } +} + +cfg_unstable! { + use std::num::NonZeroU64; + + impl Handle { + pub(crate) fn owned_id(&self) -> NonZeroU64 { + self.shared.owned.id + } + } +} + +impl fmt::Debug for Handle { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("multi_thread::Handle { ... }").finish() + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/handle/metrics.rs b/tokio/src/runtime/scheduler/multi_thread_alt/handle/metrics.rs new file mode 100644 index 00000000000..838694fc89e --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/handle/metrics.rs @@ -0,0 +1,41 @@ +use super::Handle; + +use crate::runtime::{SchedulerMetrics, WorkerMetrics}; + +impl Handle { + pub(crate) fn num_workers(&self) -> usize { + self.shared.worker_metrics.len() + } + + pub(crate) fn num_blocking_threads(&self) -> usize { + self.blocking_spawner.num_threads() + } + + pub(crate) fn num_idle_blocking_threads(&self) -> usize { + self.blocking_spawner.num_idle_threads() + } + + pub(crate) fn active_tasks_count(&self) -> usize { + self.shared.owned.active_tasks_count() + } + + pub(crate) fn scheduler_metrics(&self) -> &SchedulerMetrics { + &self.shared.scheduler_metrics + } + + pub(crate) fn worker_metrics(&self, worker: usize) -> &WorkerMetrics { + &self.shared.worker_metrics[worker] + } + + pub(crate) fn injection_queue_depth(&self) -> usize { + self.shared.injection_queue_depth() + } + + pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize { + self.shared.worker_local_queue_depth(worker) + } + + pub(crate) fn blocking_queue_depth(&self) -> usize { + self.blocking_spawner.queue_depth() + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/handle/taskdump.rs b/tokio/src/runtime/scheduler/multi_thread_alt/handle/taskdump.rs new file mode 100644 index 00000000000..477d857d88f --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/handle/taskdump.rs @@ -0,0 +1,26 @@ +use super::Handle; + +use crate::runtime::Dump; + +impl Handle { + pub(crate) async fn dump(&self) -> Dump { + let trace_status = &self.shared.trace_status; + + // If a dump is in progress, block. + trace_status.start_trace_request(&self).await; + + let result = loop { + if let Some(result) = trace_status.take_result() { + break result; + } else { + self.notify_all(); + trace_status.result_ready.notified().await; + } + }; + + // Allow other queued dumps to proceed. + trace_status.end_trace_request(&self).await; + + result + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs new file mode 100644 index 00000000000..5440c913be5 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/idle.rs @@ -0,0 +1,434 @@ +//! Coordinates idling workers + +#![allow(dead_code)] + +use crate::loom::sync::atomic::{AtomicBool, AtomicUsize}; +use crate::loom::sync::MutexGuard; +use crate::runtime::scheduler::multi_thread_alt::{worker, Core, Shared}; + +use std::sync::atomic::Ordering::{AcqRel, Acquire, Release}; + +pub(super) struct Idle { + /// Number of searching cores + num_searching: AtomicUsize, + + /// Number of idle cores + num_idle: AtomicUsize, + + /// Map of idle cores + // idle_map: IdleMap, + + /// Used to catch false-negatives when waking workers + needs_searching: AtomicBool, + + /// Total number of cores + num_cores: usize, +} + +pub(super) struct IdleMap { + chunks: Vec, +} + +pub(super) struct Snapshot { + // chunks: Vec, +} + +/// Data synchronized by the scheduler mutex +pub(super) struct Synced { + /// Worker IDs that are currently sleeping + sleepers: Vec, + + /// Cores available for workers + available_cores: Vec>, +} + +impl Idle { + pub(super) fn new(cores: Vec>, num_workers: usize) -> (Idle, Synced) { + let idle = Idle { + num_searching: AtomicUsize::new(0), + num_idle: AtomicUsize::new(cores.len()), + // idle_map: IdleMap::new(&cores), + needs_searching: AtomicBool::new(false), + num_cores: cores.len(), + }; + + let synced = Synced { + sleepers: Vec::with_capacity(num_workers), + available_cores: cores, + }; + + (idle, synced) + } + + pub(super) fn num_idle(&self, synced: &Synced) -> usize { + debug_assert_eq!(synced.available_cores.len(), self.num_idle.load(Acquire)); + synced.available_cores.len() + } + + pub(super) fn num_searching(&self) -> usize { + self.num_searching.load(Acquire) + } + + pub(super) fn snapshot(&self, _snapshot: &mut Snapshot) { + // snapshot.update(&self.idle_map) + } + + /// Try to acquire an available core + pub(super) fn try_acquire_available_core(&self, synced: &mut Synced) -> Option> { + let ret = synced.available_cores.pop(); + + if let Some(_core) = &ret { + // Decrement the number of idle cores + let num_idle = self.num_idle.load(Acquire) - 1; + debug_assert_eq!(num_idle, synced.available_cores.len()); + self.num_idle.store(num_idle, Release); + + // self.idle_map.unset(core.index); + // debug_assert!(self.idle_map.matches(&synced.available_cores)); + } + + ret + } + + /// We need at least one searching worker + pub(super) fn notify_local(&self, shared: &Shared) { + if self.num_searching.load(Acquire) != 0 { + // There already is a searching worker. Note, that this could be a + // false positive. However, because this method is called **from** a + // worker, we know that there is at least one worker currently + // awake, so the scheduler won't deadlock. + return; + } + + if self.num_idle.load(Acquire) == 0 { + self.needs_searching.store(true, Release); + return; + } + + // There aren't any searching workers. Try to initialize one + if self + .num_searching + .compare_exchange(0, 1, AcqRel, Acquire) + .is_err() + { + // Failing the compare_exchange means another thread concurrently + // launched a searching worker. + return; + } + + super::counters::inc_num_unparks_local(); + + // Acquire the lock + let synced = shared.synced.lock(); + self.notify_synced(synced, shared); + } + + /// Notifies a single worker + pub(super) fn notify_remote(&self, synced: MutexGuard<'_, worker::Synced>, shared: &Shared) { + if synced.idle.sleepers.is_empty() { + self.needs_searching.store(true, Release); + return; + } + + // We need to establish a stronger barrier than with `notify_local` + if self + .num_searching + .compare_exchange(0, 1, AcqRel, Acquire) + .is_err() + { + return; + } + + self.notify_synced(synced, shared); + } + + /// Notify a worker while synced + fn notify_synced(&self, mut synced: MutexGuard<'_, worker::Synced>, shared: &Shared) { + // Find a sleeping worker + if let Some(worker) = synced.idle.sleepers.pop() { + // Find an available core + if let Some(mut core) = synced.idle.available_cores.pop() { + debug_assert!(!core.is_searching); + core.is_searching = true; + + // self.idle_map.unset(core.index); + // debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + + // Assign the core to the worker + synced.assigned_cores[worker] = Some(core); + + let num_idle = synced.idle.available_cores.len(); + debug_assert_eq!(num_idle, self.num_idle.load(Acquire) - 1); + + // Update the number of sleeping workers + self.num_idle.store(num_idle, Release); + + // Drop the lock before notifying the condvar. + drop(synced); + + super::counters::inc_num_unparks_remote(); + + // Notify the worker + shared.condvars[worker].notify_one(); + return; + } else { + synced.idle.sleepers.push(worker); + } + } + + super::counters::inc_notify_no_core(); + + // Set the `needs_searching` flag, this happens *while* the lock is held. + self.needs_searching.store(true, Release); + self.num_searching.fetch_sub(1, Release); + + // Explicit mutex guard drop to show that holding the guard to this + // point is significant. `needs_searching` and `num_searching` must be + // updated in the critical section. + drop(synced); + } + + pub(super) fn notify_mult( + &self, + synced: &mut worker::Synced, + workers: &mut Vec, + num: usize, + ) { + debug_assert!(workers.is_empty()); + + for _ in 0..num { + if let Some(worker) = synced.idle.sleepers.pop() { + if let Some(core) = synced.idle.available_cores.pop() { + debug_assert!(!core.is_searching); + + // self.idle_map.unset(core.index); + + synced.assigned_cores[worker] = Some(core); + + workers.push(worker); + + continue; + } else { + synced.idle.sleepers.push(worker); + } + } + + break; + } + + if !workers.is_empty() { + // debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + let num_idle = synced.idle.available_cores.len(); + self.num_idle.store(num_idle, Release); + } else { + debug_assert_eq!( + synced.idle.available_cores.len(), + self.num_idle.load(Acquire) + ); + self.needs_searching.store(true, Release); + } + } + + pub(super) fn shutdown(&self, synced: &mut worker::Synced, shared: &Shared) { + // Wake every sleeping worker and assign a core to it. There may not be + // enough sleeping workers for all cores, but other workers will + // eventually find the cores and shut them down. + while !synced.idle.sleepers.is_empty() && !synced.idle.available_cores.is_empty() { + let worker = synced.idle.sleepers.pop().unwrap(); + let core = synced.idle.available_cores.pop().unwrap(); + + // self.idle_map.unset(core.index); + + synced.assigned_cores[worker] = Some(core); + shared.condvars[worker].notify_one(); + + self.num_idle + .store(synced.idle.available_cores.len(), Release); + } + + // debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + + // Wake up any other workers + while let Some(index) = synced.idle.sleepers.pop() { + shared.condvars[index].notify_one(); + } + } + + /// The worker releases the given core, making it available to other workers + /// that are waiting. + pub(super) fn release_core(&self, synced: &mut worker::Synced, core: Box) { + // The core should not be searching at this point + debug_assert!(!core.is_searching); + + // Check that this isn't the final worker to go idle *and* + // `needs_searching` is set. + debug_assert!(!self.needs_searching.load(Acquire) || num_active_workers(&synced.idle) > 1); + + let num_idle = synced.idle.available_cores.len(); + debug_assert_eq!(num_idle, self.num_idle.load(Acquire)); + + // self.idle_map.set(core.index); + + // Store the core in the list of available cores + synced.idle.available_cores.push(core); + + // debug_assert!(self.idle_map.matches(&synced.idle.available_cores)); + + // Update `num_idle` + self.num_idle.store(num_idle + 1, Release); + } + + pub(super) fn transition_worker_to_parked(&self, synced: &mut worker::Synced, index: usize) { + // Store the worker index in the list of sleepers + synced.idle.sleepers.push(index); + + // The worker's assigned core slot should be empty + debug_assert!(synced.assigned_cores[index].is_none()); + } + + pub(super) fn try_transition_worker_to_searching(&self, core: &mut Core) { + debug_assert!(!core.is_searching); + + let num_searching = self.num_searching.load(Acquire); + let num_idle = self.num_idle.load(Acquire); + + if 2 * num_searching >= self.num_cores - num_idle { + return; + } + + self.transition_worker_to_searching(core); + } + + /// Needs to happen while synchronized in order to avoid races + pub(super) fn transition_worker_to_searching_if_needed( + &self, + _synced: &mut Synced, + core: &mut Core, + ) -> bool { + if self.needs_searching.load(Acquire) { + // Needs to be called while holding the lock + self.transition_worker_to_searching(core); + true + } else { + false + } + } + + fn transition_worker_to_searching(&self, core: &mut Core) { + core.is_searching = true; + self.num_searching.fetch_add(1, AcqRel); + self.needs_searching.store(false, Release); + } + + /// A lightweight transition from searching -> running. + /// + /// Returns `true` if this is the final searching worker. The caller + /// **must** notify a new worker. + pub(super) fn transition_worker_from_searching(&self, core: &mut Core) -> bool { + debug_assert!(core.is_searching); + core.is_searching = false; + + let prev = self.num_searching.fetch_sub(1, AcqRel); + debug_assert!(prev > 0); + + prev == 1 + } +} + +const BITS: usize = usize::BITS as usize; +const BIT_MASK: usize = (usize::BITS - 1) as usize; + +impl IdleMap { + fn new(cores: &[Box]) -> IdleMap { + let ret = IdleMap::new_n(num_chunks(cores.len())); + ret.set_all(cores); + + ret + } + + fn new_n(n: usize) -> IdleMap { + let chunks = (0..n).map(|_| AtomicUsize::new(0)).collect(); + IdleMap { chunks } + } + + fn set(&self, index: usize) { + let (chunk, mask) = index_to_mask(index); + let prev = self.chunks[chunk].load(Acquire); + let next = prev | mask; + self.chunks[chunk].store(next, Release); + } + + fn set_all(&self, cores: &[Box]) { + for core in cores { + self.set(core.index); + } + } + + fn unset(&self, index: usize) { + let (chunk, mask) = index_to_mask(index); + let prev = self.chunks[chunk].load(Acquire); + let next = prev & !mask; + self.chunks[chunk].store(next, Release); + } + + fn matches(&self, idle_cores: &[Box]) -> bool { + let expect = IdleMap::new_n(self.chunks.len()); + expect.set_all(idle_cores); + + for (i, chunk) in expect.chunks.iter().enumerate() { + if chunk.load(Acquire) != self.chunks[i].load(Acquire) { + return false; + } + } + + true + } +} + +impl Snapshot { + pub(crate) fn new(_idle: &Idle) -> Snapshot { + /* + let chunks = vec![0; idle.idle_map.chunks.len()]; + let mut ret = Snapshot { chunks }; + ret.update(&idle.idle_map); + ret + */ + Snapshot {} + } + + fn update(&mut self, _idle_map: &IdleMap) { + /* + for i in 0..self.chunks.len() { + self.chunks[i] = idle_map.chunks[i].load(Acquire); + } + */ + } + + /* + pub(super) fn is_idle(&self, index: usize) -> bool { + let (chunk, mask) = index_to_mask(index); + debug_assert!( + chunk < self.chunks.len(), + "index={}; chunks={}", + index, + self.chunks.len() + ); + self.chunks[chunk] & mask == mask + } + */ +} + +fn num_chunks(max_cores: usize) -> usize { + (max_cores / BITS) + 1 +} + +fn index_to_mask(index: usize) -> (usize, usize) { + let mask = 1 << (index & BIT_MASK); + let chunk = index / BITS; + + (chunk, mask) +} + +fn num_active_workers(synced: &Synced) -> usize { + synced.available_cores.capacity() - synced.available_cores.len() +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs b/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs new file mode 100644 index 00000000000..e30c9b4783b --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs @@ -0,0 +1,91 @@ +//! Multi-threaded runtime + +mod counters; +use counters::Counters; + +mod handle; +pub(crate) use handle::Handle; + +mod overflow; +pub(crate) use overflow::Overflow; + +mod idle; +use self::idle::Idle; + +mod stats; +pub(crate) use stats::Stats; + +pub(crate) mod queue; + +mod worker; +use worker::Core; +pub(crate) use worker::{Context, Shared}; + +// TODO: implement task dump +mod trace_mock; +use trace_mock::TraceStatus; + +pub(crate) use worker::block_in_place; + +use crate::runtime::{ + self, blocking, + driver::{self, Driver}, + scheduler, Config, +}; +use crate::util::RngSeedGenerator; + +use std::fmt; +use std::future::Future; + +/// Work-stealing based thread pool for executing futures. +pub(crate) struct MultiThread; + +// ===== impl MultiThread ===== + +impl MultiThread { + pub(crate) fn new( + size: usize, + driver: Driver, + driver_handle: driver::Handle, + blocking_spawner: blocking::Spawner, + seed_generator: RngSeedGenerator, + config: Config, + ) -> (MultiThread, runtime::Handle) { + let handle = worker::create( + size, + driver, + driver_handle, + blocking_spawner, + seed_generator, + config, + ); + + (MultiThread, handle) + } + + /// Blocks the current thread waiting for the future to complete. + /// + /// The future will execute on the current thread, but all spawned tasks + /// will be executed on the thread pool. + pub(crate) fn block_on(&self, handle: &scheduler::Handle, future: F) -> F::Output + where + F: Future, + { + crate::runtime::context::enter_runtime(handle, true, |blocking| { + blocking.block_on(future).expect("failed to park thread") + }) + } + + pub(crate) fn shutdown(&mut self, handle: &scheduler::Handle) { + match handle { + scheduler::Handle::MultiThreadAlt(handle) => handle.shutdown(), + _ => panic!("expected MultiThread scheduler"), + } + } +} + +impl fmt::Debug for MultiThread { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("MultiThread").finish() + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/overflow.rs b/tokio/src/runtime/scheduler/multi_thread_alt/overflow.rs new file mode 100644 index 00000000000..ab664811cff --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/overflow.rs @@ -0,0 +1,26 @@ +use crate::runtime::task; + +#[cfg(test)] +use std::cell::RefCell; + +pub(crate) trait Overflow { + fn push(&self, task: task::Notified); + + fn push_batch(&self, iter: I) + where + I: Iterator>; +} + +#[cfg(test)] +impl Overflow for RefCell>> { + fn push(&self, task: task::Notified) { + self.borrow_mut().push(task); + } + + fn push_batch(&self, iter: I) + where + I: Iterator>, + { + self.borrow_mut().extend(iter); + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/park.rs b/tokio/src/runtime/scheduler/multi_thread_alt/park.rs new file mode 100644 index 00000000000..0a00ea004ee --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/park.rs @@ -0,0 +1,232 @@ +//! Parks the runtime. +//! +//! A combination of the various resource driver park handles. + +use crate::loom::sync::atomic::AtomicUsize; +use crate::loom::sync::{Arc, Condvar, Mutex}; +use crate::runtime::driver::{self, Driver}; +use crate::util::TryLock; + +use std::sync::atomic::Ordering::SeqCst; +use std::time::Duration; + +pub(crate) struct Parker { + inner: Arc, +} + +pub(crate) struct Unparker { + inner: Arc, +} + +struct Inner { + /// Avoids entering the park if possible + state: AtomicUsize, + + /// Used to coordinate access to the driver / condvar + mutex: Mutex<()>, + + /// Condvar to block on if the driver is unavailable. + condvar: Condvar, + + /// Resource (I/O, time, ...) driver + shared: Arc, +} + +const EMPTY: usize = 0; +const PARKED_CONDVAR: usize = 1; +const PARKED_DRIVER: usize = 2; +const NOTIFIED: usize = 3; + +/// Shared across multiple Parker handles +struct Shared { + /// Shared driver. Only one thread at a time can use this + driver: TryLock, +} + +impl Parker { + pub(crate) fn new(driver: Driver) -> Parker { + Parker { + inner: Arc::new(Inner { + state: AtomicUsize::new(EMPTY), + mutex: Mutex::new(()), + condvar: Condvar::new(), + shared: Arc::new(Shared { + driver: TryLock::new(driver), + }), + }), + } + } + + pub(crate) fn unpark(&self) -> Unparker { + Unparker { + inner: self.inner.clone(), + } + } + + pub(crate) fn park(&mut self, handle: &driver::Handle) { + self.inner.park(handle); + } + + pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) { + // Only parking with zero is supported... + assert_eq!(duration, Duration::from_millis(0)); + + if let Some(mut driver) = self.inner.shared.driver.try_lock() { + driver.park_timeout(handle, duration) + } + } + + pub(crate) fn shutdown(&mut self, handle: &driver::Handle) { + self.inner.shutdown(handle); + } +} + +impl Clone for Parker { + fn clone(&self) -> Parker { + Parker { + inner: Arc::new(Inner { + state: AtomicUsize::new(EMPTY), + mutex: Mutex::new(()), + condvar: Condvar::new(), + shared: self.inner.shared.clone(), + }), + } + } +} + +impl Unparker { + pub(crate) fn unpark(&self, driver: &driver::Handle) { + self.inner.unpark(driver); + } +} + +impl Inner { + /// Parks the current thread for at most `dur`. + fn park(&self, handle: &driver::Handle) { + // If we were previously notified then we consume this notification and + // return quickly. + if self + .state + .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst) + .is_ok() + { + return; + } + + if let Some(mut driver) = self.shared.driver.try_lock() { + self.park_driver(&mut driver, handle); + } else { + self.park_condvar(); + } + } + + fn park_condvar(&self) { + // Otherwise we need to coordinate going to sleep + let mut m = self.mutex.lock(); + + match self + .state + .compare_exchange(EMPTY, PARKED_CONDVAR, SeqCst, SeqCst) + { + Ok(_) => {} + Err(NOTIFIED) => { + // We must read here, even though we know it will be `NOTIFIED`. + // This is because `unpark` may have been called again since we read + // `NOTIFIED` in the `compare_exchange` above. We must perform an + // acquire operation that synchronizes with that `unpark` to observe + // any writes it made before the call to unpark. To do that we must + // read from the write it made to `state`. + let old = self.state.swap(EMPTY, SeqCst); + debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly"); + + return; + } + Err(actual) => panic!("inconsistent park state; actual = {}", actual), + } + + loop { + m = self.condvar.wait(m).unwrap(); + + if self + .state + .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst) + .is_ok() + { + // got a notification + return; + } + + // spurious wakeup, go back to sleep + } + } + + fn park_driver(&self, driver: &mut Driver, handle: &driver::Handle) { + match self + .state + .compare_exchange(EMPTY, PARKED_DRIVER, SeqCst, SeqCst) + { + Ok(_) => {} + Err(NOTIFIED) => { + // We must read here, even though we know it will be `NOTIFIED`. + // This is because `unpark` may have been called again since we read + // `NOTIFIED` in the `compare_exchange` above. We must perform an + // acquire operation that synchronizes with that `unpark` to observe + // any writes it made before the call to unpark. To do that we must + // read from the write it made to `state`. + let old = self.state.swap(EMPTY, SeqCst); + debug_assert_eq!(old, NOTIFIED, "park state changed unexpectedly"); + + return; + } + Err(actual) => panic!("inconsistent park state; actual = {}", actual), + } + + driver.park(handle); + + match self.state.swap(EMPTY, SeqCst) { + NOTIFIED => {} // got a notification, hurray! + PARKED_DRIVER => {} // no notification, alas + n => panic!("inconsistent park_timeout state: {}", n), + } + } + + fn unpark(&self, driver: &driver::Handle) { + // To ensure the unparked thread will observe any writes we made before + // this call, we must perform a release operation that `park` can + // synchronize with. To do that we must write `NOTIFIED` even if `state` + // is already `NOTIFIED`. That is why this must be a swap rather than a + // compare-and-swap that returns if it reads `NOTIFIED` on failure. + match self.state.swap(NOTIFIED, SeqCst) { + EMPTY => {} // no one was waiting + NOTIFIED => {} // already unparked + PARKED_CONDVAR => self.unpark_condvar(), + PARKED_DRIVER => driver.unpark(), + actual => panic!("inconsistent state in unpark; actual = {}", actual), + } + } + + fn unpark_condvar(&self) { + // There is a period between when the parked thread sets `state` to + // `PARKED` (or last checked `state` in the case of a spurious wake + // up) and when it actually waits on `cvar`. If we were to notify + // during this period it would be ignored and then when the parked + // thread went to sleep it would never wake up. Fortunately, it has + // `lock` locked at this stage so we can acquire `lock` to wait until + // it is ready to receive the notification. + // + // Releasing `lock` before the call to `notify_one` means that when the + // parked thread wakes it doesn't get woken only to have to wait for us + // to release `lock`. + drop(self.mutex.lock()); + + self.condvar.notify_one() + } + + fn shutdown(&self, handle: &driver::Handle) { + if let Some(mut driver) = self.shared.driver.try_lock() { + driver.shutdown(handle); + } + + self.condvar.notify_all(); + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/queue.rs b/tokio/src/runtime/scheduler/multi_thread_alt/queue.rs new file mode 100644 index 00000000000..d4acc408183 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/queue.rs @@ -0,0 +1,601 @@ +//! Run-queue structures to support a work-stealing scheduler + +use crate::loom::cell::UnsafeCell; +use crate::loom::sync::Arc; +use crate::runtime::scheduler::multi_thread_alt::{Overflow, Stats}; +use crate::runtime::task; + +use std::mem::{self, MaybeUninit}; +use std::ptr; +use std::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release}; + +// Use wider integers when possible to increase ABA resilience. +// +// See issue #5041: . +cfg_has_atomic_u64! { + type UnsignedShort = u32; + type UnsignedLong = u64; + type AtomicUnsignedShort = crate::loom::sync::atomic::AtomicU32; + type AtomicUnsignedLong = crate::loom::sync::atomic::AtomicU64; +} +cfg_not_has_atomic_u64! { + type UnsignedShort = u16; + type UnsignedLong = u32; + type AtomicUnsignedShort = crate::loom::sync::atomic::AtomicU16; + type AtomicUnsignedLong = crate::loom::sync::atomic::AtomicU32; +} + +/// Producer handle. May only be used from a single thread. +pub(crate) struct Local { + inner: Arc>, +} + +/// Consumer handle. May be used from many threads. +pub(crate) struct Steal(Arc>); + +#[repr(align(128))] +pub(crate) struct Inner { + /// Concurrently updated by many threads. + /// + /// Contains two `UnsignedShort` values. The LSB byte is the "real" head of + /// the queue. The `UnsignedShort` in the MSB is set by a stealer in process + /// of stealing values. It represents the first value being stolen in the + /// batch. The `UnsignedShort` indices are intentionally wider than strictly + /// required for buffer indexing in order to provide ABA mitigation and make + /// it possible to distinguish between full and empty buffers. + /// + /// When both `UnsignedShort` values are the same, there is no active + /// stealer. + /// + /// Tracking an in-progress stealer prevents a wrapping scenario. + head: AtomicUnsignedLong, + + /// Only updated by producer thread but read by many threads. + tail: AtomicUnsignedShort, + + /// Elements + buffer: Box<[UnsafeCell>>; LOCAL_QUEUE_CAPACITY]>, +} + +unsafe impl Send for Inner {} +unsafe impl Sync for Inner {} + +#[cfg(not(loom))] +const LOCAL_QUEUE_CAPACITY: usize = 256; + +// Shrink the size of the local queue when using loom. This shouldn't impact +// logic, but allows loom to test more edge cases in a reasonable a mount of +// time. +#[cfg(loom)] +const LOCAL_QUEUE_CAPACITY: usize = 4; + +const MASK: usize = LOCAL_QUEUE_CAPACITY - 1; + +// Constructing the fixed size array directly is very awkward. The only way to +// do it is to repeat `UnsafeCell::new(MaybeUninit::uninit())` 256 times, as +// the contents are not Copy. The trick with defining a const doesn't work for +// generic types. +fn make_fixed_size(buffer: Box<[T]>) -> Box<[T; LOCAL_QUEUE_CAPACITY]> { + assert_eq!(buffer.len(), LOCAL_QUEUE_CAPACITY); + + // safety: We check that the length is correct. + unsafe { Box::from_raw(Box::into_raw(buffer).cast()) } +} + +/// Create a new local run-queue +pub(crate) fn local() -> (Steal, Local) { + let mut buffer = Vec::with_capacity(LOCAL_QUEUE_CAPACITY); + + for _ in 0..LOCAL_QUEUE_CAPACITY { + buffer.push(UnsafeCell::new(MaybeUninit::uninit())); + } + + let inner = Arc::new(Inner { + head: AtomicUnsignedLong::new(0), + tail: AtomicUnsignedShort::new(0), + buffer: make_fixed_size(buffer.into_boxed_slice()), + }); + + let local = Local { + inner: inner.clone(), + }; + + let remote = Steal(inner); + + (remote, local) +} + +impl Local { + /// How many tasks can be pushed into the queue + pub(crate) fn remaining_slots(&self) -> usize { + self.inner.remaining_slots() + } + + pub(crate) fn max_capacity(&self) -> usize { + LOCAL_QUEUE_CAPACITY + } + + /// Returns `true` if there are no entries in the queue + pub(crate) fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + /// Pushes a batch of tasks to the back of the queue. All tasks must fit in + /// the local queue. + /// + /// # Panics + /// + /// The method panics if there is not enough capacity to fit in the queue. + pub(crate) fn push_back(&mut self, tasks: impl ExactSizeIterator>) { + let len = tasks.len(); + assert!(len <= LOCAL_QUEUE_CAPACITY); + + if len == 0 { + // Nothing to do + return; + } + + let head = self.inner.head.load(Acquire); + let (steal, _) = unpack(head); + + // safety: this is the **only** thread that updates this cell. + let mut tail = unsafe { self.inner.tail.unsync_load() }; + + if tail.wrapping_sub(steal) <= (LOCAL_QUEUE_CAPACITY - len) as UnsignedShort { + // Yes, this if condition is structured a bit weird (first block + // does nothing, second returns an error). It is this way to match + // `push_back_or_overflow`. + } else { + panic!() + } + + for task in tasks { + let idx = tail as usize & MASK; + + self.inner.buffer[idx].with_mut(|ptr| { + // Write the task to the slot + // + // Safety: There is only one producer and the above `if` + // condition ensures we don't touch a cell if there is a + // value, thus no consumer. + unsafe { + ptr::write((*ptr).as_mut_ptr(), task); + } + }); + + tail = tail.wrapping_add(1); + } + + self.inner.tail.store(tail, Release); + } + + /// Pushes a task to the back of the local queue, if there is not enough + /// capacity in the queue, this triggers the overflow operation. + /// + /// When the queue overflows, half of the curent contents of the queue is + /// moved to the given Injection queue. This frees up capacity for more + /// tasks to be pushed into the local queue. + pub(crate) fn push_back_or_overflow>( + &mut self, + mut task: task::Notified, + overflow: &O, + stats: &mut Stats, + ) { + let tail = loop { + let head = self.inner.head.load(Acquire); + let (steal, real) = unpack(head); + + // safety: this is the **only** thread that updates this cell. + let tail = unsafe { self.inner.tail.unsync_load() }; + + if tail.wrapping_sub(steal) < LOCAL_QUEUE_CAPACITY as UnsignedShort { + // There is capacity for the task + break tail; + } else if steal != real { + super::counters::inc_num_overflows(); + // Concurrently stealing, this will free up capacity, so only + // push the task onto the inject queue + overflow.push(task); + return; + } else { + super::counters::inc_num_overflows(); + // Push the current task and half of the queue into the + // inject queue. + match self.push_overflow(task, real, tail, overflow, stats) { + Ok(_) => return, + // Lost the race, try again + Err(v) => { + task = v; + } + } + } + }; + + self.push_back_finish(task, tail); + } + + // Second half of `push_back` + fn push_back_finish(&self, task: task::Notified, tail: UnsignedShort) { + // Map the position to a slot index. + let idx = tail as usize & MASK; + + self.inner.buffer[idx].with_mut(|ptr| { + // Write the task to the slot + // + // Safety: There is only one producer and the above `if` + // condition ensures we don't touch a cell if there is a + // value, thus no consumer. + unsafe { + ptr::write((*ptr).as_mut_ptr(), task); + } + }); + + // Make the task available. Synchronizes with a load in + // `steal_into2`. + self.inner.tail.store(tail.wrapping_add(1), Release); + } + + /// Moves a batch of tasks into the inject queue. + /// + /// This will temporarily make some of the tasks unavailable to stealers. + /// Once `push_overflow` is done, a notification is sent out, so if other + /// workers "missed" some of the tasks during a steal, they will get + /// another opportunity. + #[inline(never)] + fn push_overflow>( + &mut self, + task: task::Notified, + head: UnsignedShort, + tail: UnsignedShort, + overflow: &O, + stats: &mut Stats, + ) -> Result<(), task::Notified> { + /// How many elements are we taking from the local queue. + /// + /// This is one less than the number of tasks pushed to the inject + /// queue as we are also inserting the `task` argument. + const NUM_TASKS_TAKEN: UnsignedShort = (LOCAL_QUEUE_CAPACITY / 2) as UnsignedShort; + + assert_eq!( + tail.wrapping_sub(head) as usize, + LOCAL_QUEUE_CAPACITY, + "queue is not full; tail = {}; head = {}", + tail, + head + ); + + let prev = pack(head, head); + + // Claim a bunch of tasks + // + // We are claiming the tasks **before** reading them out of the buffer. + // This is safe because only the **current** thread is able to push new + // tasks. + // + // There isn't really any need for memory ordering... Relaxed would + // work. This is because all tasks are pushed into the queue from the + // current thread (or memory has been acquired if the local queue handle + // moved). + if self + .inner + .head + .compare_exchange( + prev, + pack( + head.wrapping_add(NUM_TASKS_TAKEN), + head.wrapping_add(NUM_TASKS_TAKEN), + ), + Release, + Relaxed, + ) + .is_err() + { + // We failed to claim the tasks, losing the race. Return out of + // this function and try the full `push` routine again. The queue + // may not be full anymore. + return Err(task); + } + + /// An iterator that takes elements out of the run queue. + struct BatchTaskIter<'a, T: 'static> { + buffer: &'a [UnsafeCell>>; LOCAL_QUEUE_CAPACITY], + head: UnsignedLong, + i: UnsignedLong, + } + impl<'a, T: 'static> Iterator for BatchTaskIter<'a, T> { + type Item = task::Notified; + + #[inline] + fn next(&mut self) -> Option> { + if self.i == UnsignedLong::from(NUM_TASKS_TAKEN) { + None + } else { + let i_idx = self.i.wrapping_add(self.head) as usize & MASK; + let slot = &self.buffer[i_idx]; + + // safety: Our CAS from before has assumed exclusive ownership + // of the task pointers in this range. + let task = slot.with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) }); + + self.i += 1; + Some(task) + } + } + } + + // safety: The CAS above ensures that no consumer will look at these + // values again, and we are the only producer. + let batch_iter = BatchTaskIter { + buffer: &self.inner.buffer, + head: head as UnsignedLong, + i: 0, + }; + overflow.push_batch(batch_iter.chain(std::iter::once(task))); + + // Add 1 to factor in the task currently being scheduled. + stats.incr_overflow_count(); + + Ok(()) + } + + /// Pops a task from the local queue. + pub(crate) fn pop(&mut self) -> Option> { + let mut head = self.inner.head.load(Acquire); + + let idx = loop { + let (steal, real) = unpack(head); + + // safety: this is the **only** thread that updates this cell. + let tail = unsafe { self.inner.tail.unsync_load() }; + + if real == tail { + // queue is empty + return None; + } + + let next_real = real.wrapping_add(1); + + // If `steal == real` there are no concurrent stealers. Both `steal` + // and `real` are updated. + let next = if steal == real { + pack(next_real, next_real) + } else { + assert_ne!(steal, next_real); + pack(steal, next_real) + }; + + // Attempt to claim a task. + let res = self + .inner + .head + .compare_exchange(head, next, AcqRel, Acquire); + + match res { + Ok(_) => break real as usize & MASK, + Err(actual) => head = actual, + } + }; + + Some(self.inner.buffer[idx].with(|ptr| unsafe { ptr::read(ptr).assume_init() })) + } +} + +impl Steal { + /// Steals half the tasks from self and place them into `dst`. + pub(crate) fn steal_into( + &self, + dst: &mut Local, + dst_stats: &mut Stats, + ) -> Option> { + // Safety: the caller is the only thread that mutates `dst.tail` and + // holds a mutable reference. + let dst_tail = unsafe { dst.inner.tail.unsync_load() }; + + // To the caller, `dst` may **look** empty but still have values + // contained in the buffer. If another thread is concurrently stealing + // from `dst` there may not be enough capacity to steal. + let (steal, _) = unpack(dst.inner.head.load(Acquire)); + + if dst_tail.wrapping_sub(steal) > LOCAL_QUEUE_CAPACITY as UnsignedShort / 2 { + // we *could* try to steal less here, but for simplicity, we're just + // going to abort. + return None; + } + + // Steal the tasks into `dst`'s buffer. This does not yet expose the + // tasks in `dst`. + let mut n = self.steal_into2(dst, dst_tail); + + if n == 0 { + // No tasks were stolen + return None; + } + + super::counters::inc_num_steals(); + + dst_stats.incr_steal_count(n as u16); + dst_stats.incr_steal_operations(); + + // We are returning a task here + n -= 1; + + let ret_pos = dst_tail.wrapping_add(n); + let ret_idx = ret_pos as usize & MASK; + + // safety: the value was written as part of `steal_into2` and not + // exposed to stealers, so no other thread can access it. + let ret = dst.inner.buffer[ret_idx].with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) }); + + if n == 0 { + // The `dst` queue is empty, but a single task was stolen + return Some(ret); + } + + // Make the stolen items available to consumers + dst.inner.tail.store(dst_tail.wrapping_add(n), Release); + + Some(ret) + } + + // Steal tasks from `self`, placing them into `dst`. Returns the number of + // tasks that were stolen. + fn steal_into2(&self, dst: &mut Local, dst_tail: UnsignedShort) -> UnsignedShort { + let mut prev_packed = self.0.head.load(Acquire); + let mut next_packed; + + let n = loop { + let (src_head_steal, src_head_real) = unpack(prev_packed); + let src_tail = self.0.tail.load(Acquire); + + // If these two do not match, another thread is concurrently + // stealing from the queue. + if src_head_steal != src_head_real { + return 0; + } + + // Number of available tasks to steal + let n = src_tail.wrapping_sub(src_head_real); + let n = n - n / 2; + + if n == 0 { + // No tasks available to steal + return 0; + } + + // Update the real head index to acquire the tasks. + let steal_to = src_head_real.wrapping_add(n); + assert_ne!(src_head_steal, steal_to); + next_packed = pack(src_head_steal, steal_to); + + // Claim all those tasks. This is done by incrementing the "real" + // head but not the steal. By doing this, no other thread is able to + // steal from this queue until the current thread completes. + let res = self + .0 + .head + .compare_exchange(prev_packed, next_packed, AcqRel, Acquire); + + match res { + Ok(_) => break n, + Err(actual) => prev_packed = actual, + } + }; + + assert!( + n <= LOCAL_QUEUE_CAPACITY as UnsignedShort / 2, + "actual = {}", + n + ); + + let (first, _) = unpack(next_packed); + + // Take all the tasks + for i in 0..n { + // Compute the positions + let src_pos = first.wrapping_add(i); + let dst_pos = dst_tail.wrapping_add(i); + + // Map to slots + let src_idx = src_pos as usize & MASK; + let dst_idx = dst_pos as usize & MASK; + + // Read the task + // + // safety: We acquired the task with the atomic exchange above. + let task = self.0.buffer[src_idx].with(|ptr| unsafe { ptr::read((*ptr).as_ptr()) }); + + // Write the task to the new slot + // + // safety: `dst` queue is empty and we are the only producer to + // this queue. + dst.inner.buffer[dst_idx] + .with_mut(|ptr| unsafe { ptr::write((*ptr).as_mut_ptr(), task) }); + } + + let mut prev_packed = next_packed; + + // Update `src_head_steal` to match `src_head_real` signalling that the + // stealing routine is complete. + loop { + let head = unpack(prev_packed).1; + next_packed = pack(head, head); + + let res = self + .0 + .head + .compare_exchange(prev_packed, next_packed, AcqRel, Acquire); + + match res { + Ok(_) => return n, + Err(actual) => { + let (actual_steal, actual_real) = unpack(actual); + + assert_ne!(actual_steal, actual_real); + + prev_packed = actual; + } + } + } + } +} + +cfg_metrics! { + impl Steal { + pub(crate) fn len(&self) -> usize { + self.0.len() as _ + } + } +} + +impl Clone for Steal { + fn clone(&self) -> Steal { + Steal(self.0.clone()) + } +} + +impl Drop for Local { + fn drop(&mut self) { + if !std::thread::panicking() { + assert!(self.pop().is_none(), "queue not empty"); + } + } +} + +impl Inner { + fn remaining_slots(&self) -> usize { + let (steal, _) = unpack(self.head.load(Acquire)); + let tail = self.tail.load(Acquire); + + LOCAL_QUEUE_CAPACITY - (tail.wrapping_sub(steal) as usize) + } + + fn len(&self) -> UnsignedShort { + let (_, head) = unpack(self.head.load(Acquire)); + let tail = self.tail.load(Acquire); + + tail.wrapping_sub(head) + } + + fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +/// Split the head value into the real head and the index a stealer is working +/// on. +fn unpack(n: UnsignedLong) -> (UnsignedShort, UnsignedShort) { + let real = n & UnsignedShort::MAX as UnsignedLong; + let steal = n >> (mem::size_of::() * 8); + + (steal as UnsignedShort, real as UnsignedShort) +} + +/// Join the two head values +fn pack(steal: UnsignedShort, real: UnsignedShort) -> UnsignedLong { + (real as UnsignedLong) | ((steal as UnsignedLong) << (mem::size_of::() * 8)) +} + +#[test] +fn test_local_queue_capacity() { + assert!(LOCAL_QUEUE_CAPACITY - 1 <= u8::MAX as usize); +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/stats.rs b/tokio/src/runtime/scheduler/multi_thread_alt/stats.rs new file mode 100644 index 00000000000..57657bb0391 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/stats.rs @@ -0,0 +1,171 @@ +use crate::runtime::{Config, MetricsBatch, WorkerMetrics}; + +use std::cmp; +use std::time::{Duration, Instant}; + +/// Per-worker statistics. This is used for both tuning the scheduler and +/// reporting runtime-level metrics/stats. +pub(crate) struct Stats { + /// The metrics batch used to report runtime-level metrics/stats to the + /// user. + batch: MetricsBatch, + + /// Exponentially-weighted moving average of time spent polling scheduled a + /// task. + /// + /// Tracked in nanoseconds, stored as a f64 since that is what we use with + /// the EWMA calculations + task_poll_time_ewma: f64, +} + +/// Transient state +pub(crate) struct Ephemeral { + /// Instant at which work last resumed (continued after park). + /// + /// This duplicates the value stored in `MetricsBatch`. We will unify + /// `Stats` and `MetricsBatch` when we stabilize metrics. + processing_scheduled_tasks_started_at: Instant, + + /// Number of tasks polled in the batch of scheduled tasks + tasks_polled_in_batch: usize, + + /// Used to ensure calls to start / stop batch are paired + #[cfg(debug_assertions)] + batch_started: bool, +} + +impl Ephemeral { + pub(crate) fn new() -> Ephemeral { + Ephemeral { + processing_scheduled_tasks_started_at: Instant::now(), + tasks_polled_in_batch: 0, + #[cfg(debug_assertions)] + batch_started: false, + } + } +} + +/// How to weigh each individual poll time, value is plucked from thin air. +const TASK_POLL_TIME_EWMA_ALPHA: f64 = 0.1; + +/// Ideally, we wouldn't go above this, value is plucked from thin air. +const TARGET_GLOBAL_QUEUE_INTERVAL: f64 = Duration::from_micros(200).as_nanos() as f64; + +/// Max value for the global queue interval. This is 2x the previous default +const MAX_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL: u32 = 127; + +/// This is the previous default +const TARGET_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL: u32 = 61; + +impl Stats { + pub(crate) const DEFAULT_GLOBAL_QUEUE_INTERVAL: u32 = + TARGET_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL; + + pub(crate) fn new(worker_metrics: &WorkerMetrics) -> Stats { + // Seed the value with what we hope to see. + let task_poll_time_ewma = + TARGET_GLOBAL_QUEUE_INTERVAL / TARGET_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL as f64; + + Stats { + batch: MetricsBatch::new(worker_metrics), + task_poll_time_ewma, + } + } + + pub(crate) fn tuned_global_queue_interval(&self, config: &Config) -> u32 { + // If an interval is explicitly set, don't tune. + if let Some(configured) = config.global_queue_interval { + return configured; + } + + // As of Rust 1.45, casts from f64 -> u32 are saturating, which is fine here. + let tasks_per_interval = (TARGET_GLOBAL_QUEUE_INTERVAL / self.task_poll_time_ewma) as u32; + + cmp::max( + // We don't want to return less than 2 as that would result in the + // global queue always getting checked first. + 2, + cmp::min( + MAX_TASKS_POLLED_PER_GLOBAL_QUEUE_INTERVAL, + tasks_per_interval, + ), + ) + } + + pub(crate) fn submit(&mut self, to: &WorkerMetrics) { + self.batch.submit(to); + } + + pub(crate) fn about_to_park(&mut self) { + self.batch.about_to_park(); + } + + pub(crate) fn inc_local_schedule_count(&mut self) { + self.batch.inc_local_schedule_count(); + } + + pub(crate) fn start_processing_scheduled_tasks(&mut self, ephemeral: &mut Ephemeral) { + self.batch.start_processing_scheduled_tasks(); + + #[cfg(debug_assertions)] + { + debug_assert!(!ephemeral.batch_started); + ephemeral.batch_started = true; + } + + ephemeral.processing_scheduled_tasks_started_at = Instant::now(); + ephemeral.tasks_polled_in_batch = 0; + } + + pub(crate) fn end_processing_scheduled_tasks(&mut self, ephemeral: &mut Ephemeral) { + self.batch.end_processing_scheduled_tasks(); + + #[cfg(debug_assertions)] + { + debug_assert!(ephemeral.batch_started); + ephemeral.batch_started = false; + } + + // Update the EWMA task poll time + if ephemeral.tasks_polled_in_batch > 0 { + let now = Instant::now(); + + // If we "overflow" this conversion, we have bigger problems than + // slightly off stats. + let elapsed = (now - ephemeral.processing_scheduled_tasks_started_at).as_nanos() as f64; + let num_polls = ephemeral.tasks_polled_in_batch as f64; + + // Calculate the mean poll duration for a single task in the batch + let mean_poll_duration = elapsed / num_polls; + + // Compute the alpha weighted by the number of tasks polled this batch. + let weighted_alpha = 1.0 - (1.0 - TASK_POLL_TIME_EWMA_ALPHA).powf(num_polls); + + // Now compute the new weighted average task poll time. + self.task_poll_time_ewma = weighted_alpha * mean_poll_duration + + (1.0 - weighted_alpha) * self.task_poll_time_ewma; + } + } + + pub(crate) fn start_poll(&mut self, ephemeral: &mut Ephemeral) { + self.batch.start_poll(); + + ephemeral.tasks_polled_in_batch += 1; + } + + pub(crate) fn end_poll(&mut self) { + self.batch.end_poll(); + } + + pub(crate) fn incr_steal_count(&mut self, by: u16) { + self.batch.incr_steal_count(by); + } + + pub(crate) fn incr_steal_operations(&mut self) { + self.batch.incr_steal_operations(); + } + + pub(crate) fn incr_overflow_count(&mut self) { + self.batch.incr_overflow_count(); + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/trace.rs b/tokio/src/runtime/scheduler/multi_thread_alt/trace.rs new file mode 100644 index 00000000000..cc65a487543 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/trace.rs @@ -0,0 +1,61 @@ +use crate::loom::sync::atomic::{AtomicBool, Ordering}; +use crate::loom::sync::{Barrier, Mutex}; +use crate::runtime::dump::Dump; +use crate::runtime::scheduler::multi_thread_alt::Handle; +use crate::sync::notify::Notify; + +/// Tracing status of the worker. +pub(super) struct TraceStatus { + pub(super) trace_requested: AtomicBool, + pub(super) trace_start: Barrier, + pub(super) trace_end: Barrier, + pub(super) result_ready: Notify, + pub(super) trace_result: Mutex>, +} + +impl TraceStatus { + pub(super) fn new(remotes_len: usize) -> Self { + Self { + trace_requested: AtomicBool::new(false), + trace_start: Barrier::new(remotes_len), + trace_end: Barrier::new(remotes_len), + result_ready: Notify::new(), + trace_result: Mutex::new(None), + } + } + + pub(super) fn trace_requested(&self) -> bool { + self.trace_requested.load(Ordering::Relaxed) + } + + pub(super) async fn start_trace_request(&self, handle: &Handle) { + while self + .trace_requested + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_err() + { + handle.notify_all(); + crate::task::yield_now().await; + } + } + + pub(super) fn stash_result(&self, dump: Dump) { + let _ = self.trace_result.lock().insert(dump); + self.result_ready.notify_one(); + } + + pub(super) fn take_result(&self) -> Option { + self.trace_result.lock().take() + } + + pub(super) async fn end_trace_request(&self, handle: &Handle) { + while self + .trace_requested + .compare_exchange(true, false, Ordering::Acquire, Ordering::Relaxed) + .is_err() + { + handle.notify_all(); + crate::task::yield_now().await; + } + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/trace_mock.rs b/tokio/src/runtime/scheduler/multi_thread_alt/trace_mock.rs new file mode 100644 index 00000000000..2c17a4e38b5 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/trace_mock.rs @@ -0,0 +1,11 @@ +pub(super) struct TraceStatus {} + +impl TraceStatus { + pub(super) fn new(_: usize) -> Self { + Self {} + } + + pub(super) fn trace_requested(&self) -> bool { + false + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs new file mode 100644 index 00000000000..d402d55f2c7 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs @@ -0,0 +1,1513 @@ +//! A scheduler is initialized with a fixed number of workers. Each worker is +//! driven by a thread. Each worker has a "core" which contains data such as the +//! run queue and other state. When `block_in_place` is called, the worker's +//! "core" is handed off to a new thread allowing the scheduler to continue to +//! make progress while the originating thread blocks. +//! +//! # Shutdown +//! +//! Shutting down the runtime involves the following steps: +//! +//! 1. The Shared::close method is called. This closes the inject queue and +//! OwnedTasks instance and wakes up all worker threads. +//! +//! 2. Each worker thread observes the close signal next time it runs +//! Core::maintenance by checking whether the inject queue is closed. +//! The Core::is_shutdown flag is set to true. +//! +//! 3. The worker thread calls `pre_shutdown` in parallel. Here, the worker +//! will keep removing tasks from OwnedTasks until it is empty. No new +//! tasks can be pushed to the OwnedTasks during or after this step as it +//! was closed in step 1. +//! +//! 5. The workers call Shared::shutdown to enter the single-threaded phase of +//! shutdown. These calls will push their core to Shared::shutdown_cores, +//! and the last thread to push its core will finish the shutdown procedure. +//! +//! 6. The local run queue of each core is emptied, then the inject queue is +//! emptied. +//! +//! At this point, shutdown has completed. It is not possible for any of the +//! collections to contain any tasks at this point, as each collection was +//! closed first, then emptied afterwards. +//! +//! ## Spawns during shutdown +//! +//! When spawning tasks during shutdown, there are two cases: +//! +//! * The spawner observes the OwnedTasks being open, and the inject queue is +//! closed. +//! * The spawner observes the OwnedTasks being closed and doesn't check the +//! inject queue. +//! +//! The first case can only happen if the OwnedTasks::bind call happens before +//! or during step 1 of shutdown. In this case, the runtime will clean up the +//! task in step 3 of shutdown. +//! +//! In the latter case, the task was not spawned and the task is immediately +//! cancelled by the spawner. +//! +//! The correctness of shutdown requires both the inject queue and OwnedTasks +//! collection to have a closed bit. With a close bit on only the inject queue, +//! spawning could run in to a situation where a task is successfully bound long +//! after the runtime has shut down. With a close bit on only the OwnedTasks, +//! the first spawning situation could result in the notification being pushed +//! to the inject queue after step 6 of shutdown, which would leave a task in +//! the inject queue indefinitely. This would be a ref-count cycle and a memory +//! leak. + +use crate::loom::sync::{Arc, Condvar, Mutex, MutexGuard}; +use crate::runtime; +use crate::runtime::context; +use crate::runtime::driver::Driver; +use crate::runtime::scheduler::multi_thread_alt::{ + idle, queue, stats, Counters, Handle, Idle, Overflow, Stats, TraceStatus, +}; +use crate::runtime::scheduler::{self, inject, Lock}; +use crate::runtime::task::OwnedTasks; +use crate::runtime::{blocking, coop, driver, task, Config, SchedulerMetrics, WorkerMetrics}; +use crate::util::atomic_cell::AtomicCell; +use crate::util::rand::{FastRand, RngSeedGenerator}; + +use std::cell::{Cell, RefCell}; +use std::cmp; +use std::task::Waker; +use std::time::Duration; + +cfg_metrics! { + mod metrics; +} + +mod taskdump_mock; + +/// A scheduler worker +/// +/// Data is stack-allocated and never migrates threads +pub(super) struct Worker { + /// Used to schedule bookkeeping tasks every so often. + tick: u32, + + /// True if the scheduler is being shutdown + pub(super) is_shutdown: bool, + + /// True if the scheduler is being traced + is_traced: bool, + + /// Counter used to track when to poll from the local queue vs. the + /// injection queue + num_seq_local_queue_polls: u32, + + /// How often to check the global queue + global_queue_interval: u32, + + /// Used to collect a list of workers to notify + workers_to_notify: Vec, + + /// Snapshot of idle core list. This helps speedup stealing + idle_snapshot: idle::Snapshot, + + stats: stats::Ephemeral, +} + +/// Core data +/// +/// Data is heap-allocated and migrates threads. +#[repr(align(128))] +pub(super) struct Core { + /// Index holding this core's remote/shared state. + pub(super) index: usize, + + lifo_slot: Option, + + /// The worker-local run queue. + run_queue: queue::Local>, + + /// True if the worker is currently searching for more work. Searching + /// involves attempting to steal from other workers. + pub(super) is_searching: bool, + + /// Per-worker runtime stats + stats: Stats, + + /// Fast random number generator. + rand: FastRand, +} + +/// State shared across all workers +pub(crate) struct Shared { + /// Per-core remote state. + remotes: Box<[Remote]>, + + /// Global task queue used for: + /// 1. Submit work to the scheduler while **not** currently on a worker thread. + /// 2. Submit work to the scheduler when a worker run queue is saturated + pub(super) inject: inject::Shared>, + + /// Coordinates idle workers + idle: Idle, + + /// Collection of all active tasks spawned onto this executor. + pub(super) owned: OwnedTasks>, + + /// Data synchronized by the scheduler mutex + pub(super) synced: Mutex, + + /// Power's Tokio's I/O, timers, etc... the responsibility of polling the + /// driver is shared across workers. + driver: AtomicCell, + + /// Condition variables used to unblock worker threads. Each worker thread + /// has its own condvar it waits on. + pub(super) condvars: Vec, + + /// The number of cores that have observed the trace signal. + pub(super) trace_status: TraceStatus, + + /// Scheduler configuration options + config: Config, + + /// Collects metrics from the runtime. + pub(super) scheduler_metrics: SchedulerMetrics, + + pub(super) worker_metrics: Box<[WorkerMetrics]>, + + /// Only held to trigger some code on drop. This is used to get internal + /// runtime metrics that can be useful when doing performance + /// investigations. This does nothing (empty struct, no drop impl) unless + /// the `tokio_internal_mt_counters` cfg flag is set. + _counters: Counters, +} + +/// Data synchronized by the scheduler mutex +pub(crate) struct Synced { + /// When worker is notified, it is assigned a core. The core is placed here + /// until the worker wakes up to take it. + pub(super) assigned_cores: Vec>>, + + /// Cores that have observed the shutdown signal + /// + /// The core is **not** placed back in the worker to avoid it from being + /// stolen by a thread that was spawned as part of `block_in_place`. + shutdown_cores: Vec>, + + /// Synchronized state for `Idle`. + pub(super) idle: idle::Synced, + + /// Synchronized state for `Inject`. + pub(crate) inject: inject::Synced, +} + +/// Used to communicate with a worker from other threads. +struct Remote { + /// When a task is scheduled from a worker, it is stored in this slot. The + /// worker will check this slot for a task **before** checking the run + /// queue. This effectively results in the **last** scheduled task to be run + /// next (LIFO). This is an optimization for improving locality which + /// benefits message passing patterns and helps to reduce latency. + // lifo_slot: Lifo, + + /// Steals tasks from this worker. + pub(super) steal: queue::Steal>, +} + +/// Thread-local context +pub(crate) struct Context { + // Current scheduler's handle + handle: Arc, + + /// Worker index + index: usize, + + /// True when the LIFO slot is enabled + lifo_enabled: Cell, + + /// Core data + core: RefCell>>, + + /// Used to pass cores to other threads when `block_in_place` is called + handoff_core: Arc>, + + /// Tasks to wake after resource drivers are polled. This is mostly to + /// handle yielded tasks. + pub(crate) defer: RefCell>, +} + +/// Running a task may consume the core. If the core is still available when +/// running the task completes, it is returned. Otherwise, the worker will need +/// to stop processing. +type RunResult = Result, ()>; +type NextTaskResult = Result<(Option, Box), ()>; + +/// A task handle +type Task = task::Task>; + +/// A notified task handle +type Notified = task::Notified>; + +/// Value picked out of thin-air. Running the LIFO slot a handful of times +/// seemms sufficient to benefit from locality. More than 3 times probably is +/// overweighing. The value can be tuned in the future with data that shows +/// improvements. +const MAX_LIFO_POLLS_PER_TICK: usize = 3; + +pub(super) fn create( + num_cores: usize, + driver: Driver, + driver_handle: driver::Handle, + blocking_spawner: blocking::Spawner, + seed_generator: RngSeedGenerator, + config: Config, +) -> runtime::Handle { + // Allocate num_cores + 1 workers so that one worker can handle the I/O + // driver, if needed. + let num_workers = num_cores + 1; + let mut cores = Vec::with_capacity(num_cores); + let mut remotes = Vec::with_capacity(num_cores); + // Worker metrics are actually core based + let mut worker_metrics = Vec::with_capacity(num_cores); + + // Create the local queues + for i in 0..num_cores { + let (steal, run_queue) = queue::local(); + + let metrics = WorkerMetrics::from_config(&config); + let stats = Stats::new(&metrics); + + cores.push(Box::new(Core { + index: i, + lifo_slot: None, + run_queue, + is_searching: false, + stats, + rand: FastRand::from_seed(config.seed_generator.next_seed()), + })); + + remotes.push(Remote { + steal, + // lifo_slot: Lifo::new(), + }); + worker_metrics.push(metrics); + } + + // Allocate num-cores + 1 workers, so one worker can handle the I/O driver, + // if needed. + let (idle, idle_synced) = Idle::new(cores, num_workers); + let (inject, inject_synced) = inject::Shared::new(); + + let handle = Arc::new(Handle { + shared: Shared { + remotes: remotes.into_boxed_slice(), + inject, + idle, + owned: OwnedTasks::new(), + synced: Mutex::new(Synced { + assigned_cores: (0..num_workers).map(|_| None).collect(), + shutdown_cores: Vec::with_capacity(num_cores), + idle: idle_synced, + inject: inject_synced, + }), + driver: AtomicCell::new(Some(Box::new(driver))), + condvars: (0..num_workers).map(|_| Condvar::new()).collect(), + trace_status: TraceStatus::new(num_cores), + config, + scheduler_metrics: SchedulerMetrics::new(), + worker_metrics: worker_metrics.into_boxed_slice(), + _counters: Counters, + }, + driver: driver_handle, + blocking_spawner, + seed_generator, + }); + + let rt_handle = runtime::Handle { + inner: scheduler::Handle::MultiThreadAlt(handle), + }; + + // Eagerly start worker threads + for index in 0..num_workers { + let handle = rt_handle.inner.expect_multi_thread_alt(); + let h2 = handle.clone(); + let handoff_core = Arc::new(AtomicCell::new(None)); + + handle + .blocking_spawner + .spawn_blocking(&rt_handle, move || run(index, h2, handoff_core, false)); + } + + rt_handle +} + +#[track_caller] +pub(crate) fn block_in_place(f: F) -> R +where + F: FnOnce() -> R, +{ + // Try to steal the worker core back + struct Reset(coop::Budget); + + impl Drop for Reset { + fn drop(&mut self) { + with_current(|maybe_cx| { + if let Some(cx) = maybe_cx { + let core = cx.handoff_core.take(); + let mut cx_core = cx.core.borrow_mut(); + assert!(cx_core.is_none()); + *cx_core = core; + + // Reset the task budget as we are re-entering the + // runtime. + coop::set(self.0); + } + }); + } + } + + let mut had_entered = false; + + let setup_result = with_current(|maybe_cx| { + match ( + crate::runtime::context::current_enter_context(), + maybe_cx.is_some(), + ) { + (context::EnterRuntime::Entered { .. }, true) => { + // We are on a thread pool runtime thread, so we just need to + // set up blocking. + had_entered = true; + } + ( + context::EnterRuntime::Entered { + allow_block_in_place, + }, + false, + ) => { + // We are on an executor, but _not_ on the thread pool. That is + // _only_ okay if we are in a thread pool runtime's block_on + // method: + if allow_block_in_place { + had_entered = true; + return Ok(()); + } else { + // This probably means we are on the current_thread runtime or in a + // LocalSet, where it is _not_ okay to block. + return Err( + "can call blocking only when running on the multi-threaded runtime", + ); + } + } + (context::EnterRuntime::NotEntered, true) => { + // This is a nested call to block_in_place (we already exited). + // All the necessary setup has already been done. + return Ok(()); + } + (context::EnterRuntime::NotEntered, false) => { + // We are outside of the tokio runtime, so blocking is fine. + // We can also skip all of the thread pool blocking setup steps. + return Ok(()); + } + } + + let cx = maybe_cx.expect("no .is_some() == false cases above should lead here"); + + // Get the worker core. If none is set, then blocking is fine! + let core = match cx.core.borrow_mut().take() { + Some(core) => core, + None => return Ok(()), + }; + + // In order to block, the core must be sent to another thread for + // execution. + // + // First, move the core back into the worker's shared core slot. + cx.handoff_core.set(core); + + // Next, clone the worker handle and send it to a new thread for + // processing. + // + // Once the blocking task is done executing, we will attempt to + // steal the core back. + let index = cx.index; + let handle = cx.handle.clone(); + let handoff_core = cx.handoff_core.clone(); + runtime::spawn_blocking(move || run(index, handle, handoff_core, true)); + Ok(()) + }); + + if let Err(panic_message) = setup_result { + panic!("{}", panic_message); + } + + if had_entered { + // Unset the current task's budget. Blocking sections are not + // constrained by task budgets. + let _reset = Reset(coop::stop()); + + crate::runtime::context::exit_runtime(f) + } else { + f() + } +} + +fn run( + index: usize, + handle: Arc, + handoff_core: Arc>, + blocking_in_place: bool, +) { + struct AbortOnPanic; + + impl Drop for AbortOnPanic { + fn drop(&mut self) { + if std::thread::panicking() { + eprintln!("worker thread panicking; aborting process"); + std::process::abort(); + } + } + } + + // Catching panics on worker threads in tests is quite tricky. Instead, when + // debug assertions are enabled, we just abort the process. + #[cfg(debug_assertions)] + let _abort_on_panic = AbortOnPanic; + + let num_workers = handle.shared.condvars.len(); + + let mut worker = Worker { + tick: 0, + num_seq_local_queue_polls: 0, + global_queue_interval: Stats::DEFAULT_GLOBAL_QUEUE_INTERVAL, + is_shutdown: false, + is_traced: false, + workers_to_notify: Vec::with_capacity(num_workers - 1), + idle_snapshot: idle::Snapshot::new(&handle.shared.idle), + stats: stats::Ephemeral::new(), + }; + + let sched_handle = scheduler::Handle::MultiThreadAlt(handle.clone()); + + crate::runtime::context::enter_runtime(&sched_handle, true, |_| { + // Set the worker context. + let cx = scheduler::Context::MultiThreadAlt(Context { + index, + lifo_enabled: Cell::new(!handle.shared.config.disable_lifo_slot), + handle, + core: RefCell::new(None), + handoff_core, + defer: RefCell::new(Vec::with_capacity(64)), + }); + + context::set_scheduler(&cx, || { + let cx = cx.expect_multi_thread_alt(); + + // Run the worker + let res = worker.run(&cx, blocking_in_place); + // `err` here signifies the core was lost, this is an expected end + // state for a worker. + debug_assert!(res.is_err()); + + // Check if there are any deferred tasks to notify. This can happen when + // the worker core is lost due to `block_in_place()` being called from + // within the task. + if !cx.defer.borrow().is_empty() { + worker.schedule_deferred_without_core(&cx, &mut cx.shared().synced.lock()); + } + }); + }); +} + +macro_rules! try_task { + ($e:expr) => {{ + let (task, core) = $e?; + if task.is_some() { + return Ok((task, core)); + } + core + }}; +} + +macro_rules! try_task_new_batch { + ($w:expr, $e:expr) => {{ + let (task, mut core) = $e?; + if task.is_some() { + core.stats.start_processing_scheduled_tasks(&mut $w.stats); + return Ok((task, core)); + } + core + }}; +} + +impl Worker { + fn run(&mut self, cx: &Context, blocking_in_place: bool) -> RunResult { + let (maybe_task, mut core) = { + if blocking_in_place { + if let Some(core) = cx.handoff_core.take() { + (None, core) + } else { + // Just shutdown + return Err(()); + } + } else { + let mut synced = cx.shared().synced.lock(); + + // First try to acquire an available core + if let Some(core) = self.try_acquire_available_core(cx, &mut synced) { + // Try to poll a task from the global queue + let maybe_task = self.next_remote_task_synced(cx, &mut synced); + (maybe_task, core) + } else { + // block the thread to wait for a core to be assinged to us + self.wait_for_core(cx, synced)? + } + } + }; + + core.stats.start_processing_scheduled_tasks(&mut self.stats); + + if let Some(task) = maybe_task { + core = self.run_task(cx, core, task)?; + } + + while !self.is_shutdown { + let (maybe_task, c) = self.next_task(cx, core)?; + core = c; + + if let Some(task) = maybe_task { + core = self.run_task(cx, core, task)?; + } else { + // The only reason to get `None` from `next_task` is we have + // entered the shutdown phase. + assert!(self.is_shutdown); + break; + } + } + + self.pre_shutdown(cx, &mut core); + + // Signal shutdown + self.shutdown_core(cx, core); + + // It is possible that tasks wake others during drop, so we need to + // clear the defer list. + self.shutdown_clear_defer(cx); + + Err(()) + } + + // Try to acquire an available core, but do not block the thread + fn try_acquire_available_core( + &mut self, + cx: &Context, + synced: &mut Synced, + ) -> Option> { + if let Some(mut core) = cx + .shared() + .idle + .try_acquire_available_core(&mut synced.idle) + { + self.reset_acquired_core(cx, synced, &mut core); + Some(core) + } else { + None + } + } + + // Block the current thread, waiting for an available core + fn wait_for_core( + &mut self, + cx: &Context, + mut synced: MutexGuard<'_, Synced>, + ) -> NextTaskResult { + cx.shared() + .idle + .transition_worker_to_parked(&mut synced, cx.index); + + // Wait until a core is available, then exit the loop. + let mut core = loop { + if let Some(core) = synced.assigned_cores[cx.index].take() { + break core; + } + + // If shutting down, abort + if cx.shared().inject.is_closed(&synced.inject) { + self.shutdown_clear_defer(cx); + return Err(()); + } + + synced = cx.shared().condvars[cx.index].wait(synced).unwrap(); + }; + + self.reset_acquired_core(cx, &mut synced, &mut core); + + if self.is_shutdown { + // Currently shutting down, don't do any more work + return Ok((None, core)); + } + + let n = core.run_queue.max_capacity() / 2; + let maybe_task = self.next_remote_task_batch_synced(cx, &mut synced, &mut core, n); + + Ok((maybe_task, core)) + } + + /// Ensure core's state is set correctly for the worker to start using. + fn reset_acquired_core(&mut self, cx: &Context, synced: &mut Synced, core: &mut Core) { + self.global_queue_interval = core.stats.tuned_global_queue_interval(&cx.shared().config); + debug_assert!(self.global_queue_interval > 1); + + // Reset `lifo_enabled` here in case the core was previously stolen from + // a task that had the LIFO slot disabled. + self.reset_lifo_enabled(cx); + + // At this point, the local queue should be empty + debug_assert!(core.run_queue.is_empty()); + + // Update shutdown state while locked + self.update_global_flags(cx, synced); + } + + /// Finds the next task to run, this could be from a queue or stealing. If + /// none are available, the thread sleeps and tries again. + fn next_task(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + self.assert_lifo_enabled_is_correct(cx); + + if self.is_traced { + core = cx.handle.trace_core(core); + } + + // Increment the tick + self.tick = self.tick.wrapping_add(1); + + // Runs maintenance every so often. When maintenance is run, the + // driver is checked, which may result in a task being found. + core = try_task!(self.maybe_maintenance(&cx, core)); + + // Check the LIFO slot, local run queue, and the injection queue for + // a notified task. + core = try_task!(self.next_notified_task(cx, core)); + + // We consumed all work in the queues and will start searching for work. + core.stats.end_processing_scheduled_tasks(&mut self.stats); + + super::counters::inc_num_no_local_work(); + + if !cx.defer.borrow().is_empty() { + // We are deferring tasks, so poll the resource driver and schedule + // the deferred tasks. + try_task_new_batch!(self, self.park_yield(cx, core)); + + panic!("what happened to the deferred tasks? 🤔"); + } + + while !self.is_shutdown { + // Search for more work, this involves trying to poll the resource + // driver, steal from other workers, and check the global queue + // again. + core = try_task_new_batch!(self, self.search_for_work(cx, core)); + + debug_assert!(cx.defer.borrow().is_empty()); + core = try_task_new_batch!(self, self.park(cx, core)); + } + + // Shutting down, drop any deferred tasks + self.shutdown_clear_defer(cx); + + Ok((None, core)) + } + + fn next_notified_task(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + self.num_seq_local_queue_polls += 1; + + if self.num_seq_local_queue_polls % self.global_queue_interval == 0 { + super::counters::inc_global_queue_interval(); + + self.num_seq_local_queue_polls = 0; + + // Update the global queue interval, if needed + self.tune_global_queue_interval(cx, &mut core); + + if let Some(task) = self.next_remote_task(cx) { + return Ok((Some(task), core)); + } + } + + if let Some(task) = self.next_local_task(&mut core) { + return Ok((Some(task), core)); + } + + self.next_remote_task_batch(cx, core) + } + + fn next_remote_task(&self, cx: &Context) -> Option { + if cx.shared().inject.is_empty() { + return None; + } + + let mut synced = cx.shared().synced.lock(); + self.next_remote_task_synced(cx, &mut synced) + } + + fn next_remote_task_synced(&self, cx: &Context, synced: &mut Synced) -> Option { + // safety: we only have access to a valid `Synced` in this file. + unsafe { cx.shared().inject.pop(&mut synced.inject) } + } + + fn next_remote_task_batch(&self, cx: &Context, mut core: Box) -> NextTaskResult { + if cx.shared().inject.is_empty() { + return Ok((None, core)); + } + + // Other threads can only **remove** tasks from the current worker's + // `run_queue`. So, we can be confident that by the time we call + // `run_queue.push_back` below, there will be *at least* `cap` + // available slots in the queue. + let cap = usize::min( + core.run_queue.remaining_slots(), + core.run_queue.max_capacity() / 2, + ); + + let mut synced = cx.shared().synced.lock(); + let maybe_task = self.next_remote_task_batch_synced(cx, &mut synced, &mut core, cap); + Ok((maybe_task, core)) + } + + fn next_remote_task_batch_synced( + &self, + cx: &Context, + synced: &mut Synced, + core: &mut Core, + max: usize, + ) -> Option { + super::counters::inc_num_remote_batch(); + + // The worker is currently idle, pull a batch of work from the + // injection queue. We don't want to pull *all* the work so other + // workers can also get some. + let n = if core.is_searching { + cx.shared().inject.len() / cx.shared().idle.num_searching() + 1 + } else { + cx.shared().inject.len() / cx.shared().remotes.len() + 1 + }; + + let n = usize::min(n, max); + + // safety: passing in the correct `inject::Synced`. + let mut tasks = unsafe { cx.shared().inject.pop_n(&mut synced.inject, n) }; + + // Pop the first task to return immedietly + let ret = tasks.next(); + + // Push the rest of the on the run queue + core.run_queue.push_back(tasks); + + ret + } + + fn next_local_task(&self, core: &mut Core) -> Option { + self.next_lifo_task(core).or_else(|| core.run_queue.pop()) + } + + fn next_lifo_task(&self, core: &mut Core) -> Option { + core.lifo_slot.take() + } + + /// Function responsible for stealing tasks from another worker + /// + /// Note: Only if less than half the workers are searching for tasks to steal + /// a new worker will actually try to steal. The idea is to make sure not all + /// workers will be trying to steal at the same time. + fn search_for_work(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + #[cfg(not(loom))] + const ROUNDS: usize = 1; + + #[cfg(loom)] + const ROUNDS: usize = 1; + + debug_assert!(core.lifo_slot.is_none()); + debug_assert!(core.run_queue.is_empty()); + + if !self.transition_to_searching(cx, &mut core) { + return Ok((None, core)); + } + + // core = try_task!(self, self.poll_driver(cx, core)); + + // Get a snapshot of which workers are idle + cx.shared().idle.snapshot(&mut self.idle_snapshot); + + let num = cx.shared().remotes.len(); + + for i in 0..ROUNDS { + // Start from a random worker + let start = core.rand.fastrand_n(num as u32) as usize; + + if let Some(task) = self.steal_one_round(cx, &mut core, start) { + return Ok((Some(task), core)); + } + + core = try_task!(self.next_remote_task_batch(cx, core)); + + if i > 0 { + super::counters::inc_num_spin_stall(); + std::thread::sleep(std::time::Duration::from_micros(i as u64)); + } + } + + Ok((None, core)) + } + + fn steal_one_round(&self, cx: &Context, core: &mut Core, start: usize) -> Option { + let num = cx.shared().remotes.len(); + + for i in 0..num { + let i = (start + i) % num; + + // Don't steal from ourself! We know we don't have work. + if i == core.index { + continue; + } + + /* + // If the core is currently idle, then there is nothing to steal. + if self.idle_snapshot.is_idle(i) { + continue; + } + */ + + let target = &cx.shared().remotes[i]; + + if let Some(task) = target + .steal + .steal_into(&mut core.run_queue, &mut core.stats) + { + return Some(task); + } + } + + None + } + + fn run_task(&mut self, cx: &Context, mut core: Box, task: Notified) -> RunResult { + let task = cx.shared().owned.assert_owner(task); + + // Make sure the worker is not in the **searching** state. This enables + // another idle worker to try to steal work. + if self.transition_from_searching(cx, &mut core) { + super::counters::inc_num_relay_search(); + cx.shared().notify_parked_local(); + } + + self.assert_lifo_enabled_is_correct(cx); + + // Measure the poll start time. Note that we may end up polling other + // tasks under this measurement. In this case, the tasks came from the + // LIFO slot and are considered part of the current task for scheduling + // purposes. These tasks inherent the "parent"'s limits. + core.stats.start_poll(&mut self.stats); + + // Make the core available to the runtime context + *cx.core.borrow_mut() = Some(core); + + // Run the task + coop::budget(|| { + super::counters::inc_num_polls(); + task.run(); + let mut lifo_polls = 0; + + // As long as there is budget remaining and a task exists in the + // `lifo_slot`, then keep running. + loop { + // Check if we still have the core. If not, the core was stolen + // by another worker. + let mut core = match cx.core.borrow_mut().take() { + Some(core) => core, + None => { + // In this case, we cannot call `reset_lifo_enabled()` + // because the core was stolen. The stealer will handle + // that at the top of `Context::run` + return Err(()); + } + }; + + // Check for a task in the LIFO slot + let task = match self.next_lifo_task(&mut core) { + Some(task) => task, + None => { + self.reset_lifo_enabled(cx); + core.stats.end_poll(); + return Ok(core); + } + }; + + if !coop::has_budget_remaining() { + core.stats.end_poll(); + + // Not enough budget left to run the LIFO task, push it to + // the back of the queue and return. + core.run_queue + .push_back_or_overflow(task, cx.shared(), &mut core.stats); + // If we hit this point, the LIFO slot should be enabled. + // There is no need to reset it. + debug_assert!(cx.lifo_enabled.get()); + return Ok(core); + } + + // Track that we are about to run a task from the LIFO slot. + lifo_polls += 1; + super::counters::inc_lifo_schedules(); + + // Disable the LIFO slot if we reach our limit + // + // In ping-ping style workloads where task A notifies task B, + // which notifies task A again, continuously prioritizing the + // LIFO slot can cause starvation as these two tasks will + // repeatedly schedule the other. To mitigate this, we limit the + // number of times the LIFO slot is prioritized. + if lifo_polls >= MAX_LIFO_POLLS_PER_TICK { + cx.lifo_enabled.set(false); + super::counters::inc_lifo_capped(); + } + + // Run the LIFO task, then loop + *cx.core.borrow_mut() = Some(core); + let task = cx.shared().owned.assert_owner(task); + super::counters::inc_num_lifo_polls(); + task.run(); + } + }) + } + + fn schedule_deferred_with_core<'a>( + &mut self, + cx: &'a Context, + mut core: Box, + synced: impl FnOnce() -> MutexGuard<'a, Synced>, + ) -> NextTaskResult { + let mut defer = cx.defer.borrow_mut(); + + // Grab a task to run next + let task = defer.pop(); + + if task.is_none() { + return Ok((None, core)); + } + + if !defer.is_empty() { + let mut synced = synced(); + + // Number of tasks we want to try to spread across idle workers + let num_fanout = cmp::min(defer.len(), cx.shared().idle.num_idle(&synced.idle)); + + if num_fanout > 0 { + cx.shared() + .push_remote_task_batch_synced(&mut synced, defer.drain(..num_fanout)); + + cx.shared() + .idle + .notify_mult(&mut synced, &mut self.workers_to_notify, num_fanout); + } + + // Do not run the task while holding the lock... + drop(synced); + } + + // Notify any workers + for worker in self.workers_to_notify.drain(..) { + cx.shared().condvars[worker].notify_one() + } + + if !defer.is_empty() { + // Push the rest of the tasks on the local queue + for task in defer.drain(..) { + core.run_queue + .push_back_or_overflow(task, cx.shared(), &mut core.stats); + } + + cx.shared().notify_parked_local(); + } + + Ok((task, core)) + } + + fn schedule_deferred_without_core<'a>(&mut self, cx: &Context, synced: &mut Synced) { + let mut defer = cx.defer.borrow_mut(); + let num = defer.len(); + + if num > 0 { + // Push all tasks to the injection queue + cx.shared() + .push_remote_task_batch_synced(synced, defer.drain(..)); + + debug_assert!(self.workers_to_notify.is_empty()); + + // Notify workers + cx.shared() + .idle + .notify_mult(synced, &mut self.workers_to_notify, num); + + // Notify any workers + for worker in self.workers_to_notify.drain(..) { + cx.shared().condvars[worker].notify_one() + } + } + } + + fn maybe_maintenance(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + if self.tick % cx.shared().config.event_interval == 0 { + super::counters::inc_num_maintenance(); + + core.stats.end_processing_scheduled_tasks(&mut self.stats); + + // Run regularly scheduled maintenance + core = try_task_new_batch!(self, self.park_yield(cx, core)); + + core.stats.start_processing_scheduled_tasks(&mut self.stats); + } + + Ok((None, core)) + } + + fn flush_metrics(&self, cx: &Context, core: &mut Core) { + core.stats.submit(&cx.shared().worker_metrics[core.index]); + } + + fn update_global_flags(&mut self, cx: &Context, synced: &mut Synced) { + if !self.is_shutdown { + self.is_shutdown = cx.shared().inject.is_closed(&synced.inject); + } + + if !self.is_traced { + self.is_traced = cx.shared().trace_status.trace_requested(); + } + } + + fn park_yield(&mut self, cx: &Context, core: Box) -> NextTaskResult { + // Call `park` with a 0 timeout. This enables the I/O driver, timer, ... + // to run without actually putting the thread to sleep. + if let Some(mut driver) = cx.shared().driver.take() { + driver.park_timeout(&cx.handle.driver, Duration::from_millis(0)); + + cx.shared().driver.set(driver); + } + + // If there are more I/O events, schedule them. + let (maybe_task, mut core) = + self.schedule_deferred_with_core(cx, core, || cx.shared().synced.lock())?; + + self.flush_metrics(cx, &mut core); + self.update_global_flags(cx, &mut cx.shared().synced.lock()); + + Ok((maybe_task, core)) + } + + /* + fn poll_driver(&mut self, cx: &Context, core: Box) -> NextTaskResult { + // Call `park` with a 0 timeout. This enables the I/O driver, timer, ... + // to run without actually putting the thread to sleep. + if let Some(mut driver) = cx.shared().driver.take() { + driver.park_timeout(&cx.handle.driver, Duration::from_millis(0)); + + cx.shared().driver.set(driver); + + // If there are more I/O events, schedule them. + self.schedule_deferred_with_core(cx, core, || cx.shared().synced.lock()) + } else { + Ok((None, core)) + } + } + */ + + fn park(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + if let Some(f) = &cx.shared().config.before_park { + f(); + } + + if self.can_transition_to_parked(&mut core) { + debug_assert!(!self.is_shutdown); + debug_assert!(!self.is_traced); + + core = try_task!(self.do_park(cx, core)); + } + + if let Some(f) = &cx.shared().config.after_unpark { + f(); + } + + Ok((None, core)) + } + + fn do_park(&mut self, cx: &Context, mut core: Box) -> NextTaskResult { + let was_searching = core.is_searching; + + // Before we park, if we are searching, we need to transition away from searching + if self.transition_from_searching(cx, &mut core) { + cx.shared().idle.snapshot(&mut self.idle_snapshot); + // We were the last searching worker, we need to do one last check + if let Some(task) = self.steal_one_round(cx, &mut core, 0) { + cx.shared().notify_parked_local(); + + return Ok((Some(task), core)); + } + } + + // Acquire the lock + let mut synced = cx.shared().synced.lock(); + + // Try one last time to get tasks + let n = core.run_queue.max_capacity() / 2; + if let Some(task) = self.next_remote_task_batch_synced(cx, &mut synced, &mut core, n) { + return Ok((Some(task), core)); + } + + if !was_searching { + if cx + .shared() + .idle + .transition_worker_to_searching_if_needed(&mut synced.idle, &mut core) + { + // Skip parking, go back to searching + return Ok((None, core)); + } + } + + super::counters::inc_num_parks(); + core.stats.about_to_park(); + // Flush metrics to the runtime metrics aggregator + self.flush_metrics(cx, &mut core); + + // If the runtime is shutdown, skip parking + self.update_global_flags(cx, &mut synced); + + if self.is_shutdown { + return Ok((None, core)); + } + + // Core being returned must not be in the searching state + debug_assert!(!core.is_searching); + + // Release the core + cx.shared().idle.release_core(&mut synced, core); + + if let Some(mut driver) = cx.shared().driver.take() { + // Drop the lock before parking on the driver + drop(synced); + + // Wait for driver events + driver.park(&cx.handle.driver); + + synced = cx.shared().synced.lock(); + + // Put the driver back + cx.shared().driver.set(driver); + + if cx.shared().inject.is_closed(&mut synced.inject) { + self.shutdown_clear_defer(cx); + self.shutdown_finalize(cx, synced); + return Err(()); + } + + // Try to acquire an available core to schedule I/O events + if let Some(core) = self.try_acquire_available_core(cx, &mut synced) { + // This may result in a task being run + self.schedule_deferred_with_core(cx, core, move || synced) + } else { + // Schedule any deferred tasks + self.schedule_deferred_without_core(cx, &mut synced); + + // Wait for a core. + self.wait_for_core(cx, synced) + } + } else { + // Wait for a core to be assigned to us + self.wait_for_core(cx, synced) + } + } + + fn transition_to_searching(&self, cx: &Context, core: &mut Core) -> bool { + if !core.is_searching { + cx.shared().idle.try_transition_worker_to_searching(core); + } + + core.is_searching + } + + /// Returns `true` if another worker must be notified + fn transition_from_searching(&self, cx: &Context, core: &mut Core) -> bool { + if !core.is_searching { + return false; + } + + cx.shared().idle.transition_worker_from_searching(core) + } + + fn can_transition_to_parked(&self, core: &mut Core) -> bool { + !self.has_tasks(core) && !self.is_shutdown && !self.is_traced + } + + fn has_tasks(&self, core: &Core) -> bool { + core.lifo_slot.is_some() || !core.run_queue.is_empty() + } + + /// Signals all tasks to shut down, and waits for them to complete. Must run + /// before we enter the single-threaded phase of shutdown processing. + fn pre_shutdown(&self, cx: &Context, core: &mut Core) { + // Signal to all tasks to shut down. + cx.shared().owned.close_and_shutdown_all(); + + core.stats.submit(&cx.shared().worker_metrics[core.index]); + } + + /// Signals that a worker has observed the shutdown signal and has replaced + /// its core back into its handle. + /// + /// If all workers have reached this point, the final cleanup is performed. + fn shutdown_core(&self, cx: &Context, core: Box) { + let mut synced = cx.shared().synced.lock(); + synced.shutdown_cores.push(core); + + self.shutdown_finalize(cx, synced); + } + + fn shutdown_finalize(&self, cx: &Context, mut synced: MutexGuard<'_, Synced>) { + // Wait for all cores + if synced.shutdown_cores.len() != cx.shared().remotes.len() { + return; + } + + let mut driver = match cx.shared().driver.take() { + Some(driver) => driver, + None => return, + }; + + debug_assert!(cx.shared().owned.is_empty()); + + for mut core in synced.shutdown_cores.drain(..) { + // Drain tasks from the local queue + while self.next_local_task(&mut core).is_some() {} + } + + // Shutdown the driver + driver.shutdown(&cx.handle.driver); + + // Drain the injection queue + // + // We already shut down every task, so we can simply drop the tasks. We + // cannot call `next_remote_task()` because we already hold the lock. + // + // safety: passing in correct `idle::Synced` + while let Some(task) = self.next_remote_task_synced(cx, &mut synced) { + drop(task); + } + } + + fn reset_lifo_enabled(&self, cx: &Context) { + cx.lifo_enabled + .set(!cx.handle.shared.config.disable_lifo_slot); + } + + fn assert_lifo_enabled_is_correct(&self, cx: &Context) { + debug_assert_eq!( + cx.lifo_enabled.get(), + !cx.handle.shared.config.disable_lifo_slot + ); + } + + fn tune_global_queue_interval(&mut self, cx: &Context, core: &mut Core) { + let next = core.stats.tuned_global_queue_interval(&cx.shared().config); + + debug_assert!(next > 1); + + // Smooth out jitter + if abs_diff(self.global_queue_interval, next) > 2 { + self.global_queue_interval = next; + } + } + + fn shutdown_clear_defer(&self, cx: &Context) { + let mut defer = cx.defer.borrow_mut(); + + for task in defer.drain(..) { + drop(task); + } + } +} + +impl Context { + pub(crate) fn defer(&self, waker: &Waker) { + // TODO: refactor defer across all runtimes + waker.wake_by_ref(); + } + + fn shared(&self) -> &Shared { + &self.handle.shared + } +} + +impl Shared { + pub(super) fn schedule_task(&self, task: Notified, is_yield: bool) { + use std::ptr; + + with_current(|maybe_cx| { + if let Some(cx) = maybe_cx { + // Make sure the task is part of the **current** scheduler. + if ptr::eq(self, &cx.handle.shared) { + // And the current thread still holds a core + if let Some(core) = cx.core.borrow_mut().as_mut() { + if is_yield { + cx.defer.borrow_mut().push(task); + } else { + self.schedule_local(cx, core, task); + } + } else { + // This can happen if either the core was stolen + // (`block_in_place`) or the notification happens from + // the driver. + cx.defer.borrow_mut().push(task); + } + return; + } + } + + // Otherwise, use the inject queue. + self.schedule_remote(task); + }) + } + + fn schedule_local(&self, cx: &Context, core: &mut Core, task: Notified) { + core.stats.inc_local_schedule_count(); + + if cx.lifo_enabled.get() { + // Push to the LIFO slot + let prev = std::mem::replace(&mut core.lifo_slot, Some(task)); + // let prev = cx.shared().remotes[core.index].lifo_slot.swap_local(task); + + if let Some(prev) = prev { + core.run_queue + .push_back_or_overflow(prev, self, &mut core.stats); + } else { + return; + } + } else { + core.run_queue + .push_back_or_overflow(task, self, &mut core.stats); + } + + self.notify_parked_local(); + } + + fn notify_parked_local(&self) { + super::counters::inc_num_inc_notify_local(); + self.idle.notify_local(self); + } + + fn schedule_remote(&self, task: Notified) { + super::counters::inc_num_notify_remote(); + self.scheduler_metrics.inc_remote_schedule_count(); + + let mut synced = self.synced.lock(); + // Push the task in the + self.push_remote_task(&mut synced, task); + + // Notify a worker. The mutex is passed in and will be released as part + // of the method call. + self.idle.notify_remote(synced, self); + } + + pub(super) fn close(&self) { + let mut synced = self.synced.lock(); + + if self.inject.close(&mut synced.inject) { + // Set the shutdown flag on all available cores + self.idle.shutdown(&mut synced, self); + } + } + + fn push_remote_task(&self, synced: &mut Synced, task: Notified) { + // safety: passing in correct `idle::Synced` + unsafe { + self.inject.push(&mut synced.inject, task); + } + } + + fn push_remote_task_batch(&self, iter: I) + where + I: Iterator>>, + { + unsafe { + self.inject.push_batch(self, iter); + } + } + + fn push_remote_task_batch_synced(&self, synced: &mut Synced, iter: I) + where + I: Iterator>>, + { + unsafe { + self.inject.push_batch(&mut synced.inject, iter); + } + } +} + +impl Overflow> for Shared { + fn push(&self, task: task::Notified>) { + self.push_remote_task(&mut self.synced.lock(), task); + } + + fn push_batch(&self, iter: I) + where + I: Iterator>>, + { + self.push_remote_task_batch(iter) + } +} + +impl<'a> Lock for &'a Shared { + type Handle = InjectGuard<'a>; + + fn lock(self) -> Self::Handle { + InjectGuard { + lock: self.synced.lock(), + } + } +} + +impl task::Schedule for Arc { + fn release(&self, task: &Task) -> Option { + self.shared.owned.remove(task) + } + + fn schedule(&self, task: Notified) { + self.shared.schedule_task(task, false); + } + + fn yield_now(&self, task: Notified) { + self.shared.schedule_task(task, true); + } +} + +pub(crate) struct InjectGuard<'a> { + lock: crate::loom::sync::MutexGuard<'a, Synced>, +} + +impl<'a> AsMut for InjectGuard<'a> { + fn as_mut(&mut self) -> &mut inject::Synced { + &mut self.lock.inject + } +} + +#[track_caller] +fn with_current(f: impl FnOnce(Option<&Context>) -> R) -> R { + use scheduler::Context::MultiThreadAlt; + + context::with_scheduler(|ctx| match ctx { + Some(MultiThreadAlt(ctx)) => f(Some(ctx)), + _ => f(None), + }) +} + +// `u32::abs_diff` is not available on Tokio's MSRV. +fn abs_diff(a: u32, b: u32) -> u32 { + if a > b { + a - b + } else { + b - a + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker/metrics.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker/metrics.rs new file mode 100644 index 00000000000..a9a5ab3ed60 --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/worker/metrics.rs @@ -0,0 +1,11 @@ +use super::Shared; + +impl Shared { + pub(crate) fn injection_queue_depth(&self) -> usize { + self.inject.len() + } + + pub(crate) fn worker_local_queue_depth(&self, worker: usize) -> usize { + self.remotes[worker].steal.len() + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump.rs new file mode 100644 index 00000000000..7cf69c43ddc --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump.rs @@ -0,0 +1,79 @@ +use super::{Core, Handle, Shared}; + +use crate::loom::sync::Arc; +use crate::runtime::scheduler::multi_thread_alt::Stats; +use crate::runtime::task::trace::trace_multi_thread; +use crate::runtime::{dump, WorkerMetrics}; + +use std::time::Duration; + +impl Handle { + pub(super) fn trace_core(&self, mut core: Box) -> Box { + core.is_traced = false; + + if core.is_shutdown { + return core; + } + + // wait for other workers, or timeout without tracing + let timeout = Duration::from_millis(250); // a _very_ generous timeout + let barrier = + if let Some(barrier) = self.shared.trace_status.trace_start.wait_timeout(timeout) { + barrier + } else { + // don't attempt to trace + return core; + }; + + if !barrier.is_leader() { + // wait for leader to finish tracing + self.shared.trace_status.trace_end.wait(); + return core; + } + + // trace + + let owned = &self.shared.owned; + let mut local = self.shared.steal_all(); + let synced = &self.shared.synced; + let injection = &self.shared.inject; + + // safety: `trace_multi_thread` is invoked with the same `synced` that `injection` + // was created with. + let traces = unsafe { trace_multi_thread(owned, &mut local, synced, injection) } + .into_iter() + .map(dump::Task::new) + .collect(); + + let result = dump::Dump::new(traces); + + // stash the result + self.shared.trace_status.stash_result(result); + + // allow other workers to proceed + self.shared.trace_status.trace_end.wait(); + + core + } +} + +impl Shared { + /// Steal all tasks from remotes into a single local queue. + pub(super) fn steal_all(&self) -> super::queue::Local> { + let (_steal, mut local) = super::queue::local(); + + let worker_metrics = WorkerMetrics::new(); + let mut stats = Stats::new(&worker_metrics); + + for remote in self.remotes.iter() { + let steal = &remote.steal; + while !steal.is_empty() { + if let Some(task) = steal.steal_into(&mut local, &mut stats) { + local.push_back([task].into_iter()); + } + } + } + + local + } +} diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump_mock.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump_mock.rs new file mode 100644 index 00000000000..24c5600ce2d --- /dev/null +++ b/tokio/src/runtime/scheduler/multi_thread_alt/worker/taskdump_mock.rs @@ -0,0 +1,7 @@ +use super::{Core, Handle}; + +impl Handle { + pub(super) fn trace_core(&self, core: Box) -> Box { + core + } +} diff --git a/tokio/src/runtime/task/list.rs b/tokio/src/runtime/task/list.rs index 7f376affda2..3d2f57404fd 100644 --- a/tokio/src/runtime/task/list.rs +++ b/tokio/src/runtime/task/list.rs @@ -128,7 +128,7 @@ impl OwnedTasks { /// a LocalNotified, giving the thread permission to poll this task. #[inline] pub(crate) fn assert_owner(&self, task: Notified) -> LocalNotified { - assert_eq!(task.header().get_owner_id(), Some(self.id)); + debug_assert_eq!(task.header().get_owner_id(), Some(self.id)); // safety: All tasks bound to this OwnedTasks are Send, so it is safe // to poll it on this thread no matter what thread we are on. diff --git a/tokio/src/runtime/task/trace/mod.rs b/tokio/src/runtime/task/trace/mod.rs index 543b7eee98e..9c61014e865 100644 --- a/tokio/src/runtime/task/trace/mod.rs +++ b/tokio/src/runtime/task/trace/mod.rs @@ -186,6 +186,8 @@ pub(crate) fn trace_leaf(cx: &mut task::Context<'_>) -> Poll<()> { scheduler::Context::CurrentThread(s) => s.defer.defer(cx.waker()), #[cfg(all(feature = "rt-multi-thread", not(tokio_wasi)))] scheduler::Context::MultiThread(s) => s.defer.defer(cx.waker()), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", not(tokio_wasi)))] + scheduler::Context::MultiThreadAlt(_) => unimplemented!(), } } }); diff --git a/tokio/src/runtime/tests/loom_current_thread_scheduler.rs b/tokio/src/runtime/tests/loom_current_thread.rs similarity index 99% rename from tokio/src/runtime/tests/loom_current_thread_scheduler.rs rename to tokio/src/runtime/tests/loom_current_thread.rs index a772603f711..edda6e49954 100644 --- a/tokio/src/runtime/tests/loom_current_thread_scheduler.rs +++ b/tokio/src/runtime/tests/loom_current_thread.rs @@ -1,3 +1,5 @@ +mod yield_now; + use crate::loom::sync::atomic::AtomicUsize; use crate::loom::sync::Arc; use crate::loom::thread; diff --git a/tokio/src/runtime/tests/loom_yield.rs b/tokio/src/runtime/tests/loom_current_thread/yield_now.rs similarity index 100% rename from tokio/src/runtime/tests/loom_yield.rs rename to tokio/src/runtime/tests/loom_current_thread/yield_now.rs diff --git a/tokio/src/runtime/tests/loom_pool.rs b/tokio/src/runtime/tests/loom_multi_thread.rs similarity index 99% rename from tokio/src/runtime/tests/loom_pool.rs rename to tokio/src/runtime/tests/loom_multi_thread.rs index fb42e1eb40b..c5980c226e0 100644 --- a/tokio/src/runtime/tests/loom_pool.rs +++ b/tokio/src/runtime/tests/loom_multi_thread.rs @@ -1,3 +1,7 @@ +mod queue; +mod shutdown; +mod yield_now; + /// Full runtime loom tests. These are heavy tests and take significant time to /// run on CI. /// @@ -412,8 +416,8 @@ async fn multi_gated() { } poll_fn(move |cx| { + gate.waker.register_by_ref(cx.waker()); if gate.count.load(SeqCst) < 2 { - gate.waker.register_by_ref(cx.waker()); Poll::Pending } else { Poll::Ready(()) diff --git a/tokio/src/runtime/tests/loom_queue.rs b/tokio/src/runtime/tests/loom_multi_thread/queue.rs similarity index 91% rename from tokio/src/runtime/tests/loom_queue.rs rename to tokio/src/runtime/tests/loom_multi_thread/queue.rs index b60e039b9a6..0d818283653 100644 --- a/tokio/src/runtime/tests/loom_queue.rs +++ b/tokio/src/runtime/tests/loom_multi_thread/queue.rs @@ -1,5 +1,5 @@ use crate::runtime::scheduler::multi_thread::{queue, Stats}; -use crate::runtime::tests::NoopSchedule; +use crate::runtime::tests::{unowned, NoopSchedule}; use loom::thread; use std::cell::RefCell; @@ -37,7 +37,7 @@ fn basic() { for _ in 0..2 { for _ in 0..2 { - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); local.push_back_or_overflow(task, &inject, &mut stats); } @@ -46,7 +46,7 @@ fn basic() { } // Push another task - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); local.push_back_or_overflow(task, &inject, &mut stats); while local.pop().is_some() { @@ -88,7 +88,7 @@ fn steal_overflow() { let mut n = 0; // push a task, pop a task - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); local.push_back_or_overflow(task, &inject, &mut stats); if local.pop().is_some() { @@ -96,7 +96,7 @@ fn steal_overflow() { } for _ in 0..6 { - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); local.push_back_or_overflow(task, &inject, &mut stats); } @@ -140,7 +140,7 @@ fn multi_stealer() { // Push work for _ in 0..NUM_TASKS { - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); local.push_back_or_overflow(task, &inject, &mut stats); } @@ -176,10 +176,10 @@ fn chained_steal() { // Load up some tasks for _ in 0..4 { - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); l1.push_back_or_overflow(task, &inject, &mut stats); - let (task, _) = super::unowned(async {}); + let (task, _) = unowned(async {}); l2.push_back_or_overflow(task, &inject, &mut stats); } diff --git a/tokio/src/runtime/tests/loom_shutdown_join.rs b/tokio/src/runtime/tests/loom_multi_thread/shutdown.rs similarity index 100% rename from tokio/src/runtime/tests/loom_shutdown_join.rs rename to tokio/src/runtime/tests/loom_multi_thread/shutdown.rs diff --git a/tokio/src/runtime/tests/loom_multi_thread/yield_now.rs b/tokio/src/runtime/tests/loom_multi_thread/yield_now.rs new file mode 100644 index 00000000000..ba506e5a408 --- /dev/null +++ b/tokio/src/runtime/tests/loom_multi_thread/yield_now.rs @@ -0,0 +1,37 @@ +use crate::runtime::park; +use crate::runtime::tests::loom_oneshot as oneshot; +use crate::runtime::{self, Runtime}; + +#[test] +fn yield_calls_park_before_scheduling_again() { + // Don't need to check all permutations + let mut loom = loom::model::Builder::default(); + loom.max_permutations = Some(1); + loom.check(|| { + let rt = mk_runtime(2); + let (tx, rx) = oneshot::channel::<()>(); + + rt.spawn(async { + let tid = loom::thread::current().id(); + let park_count = park::current_thread_park_count(); + + crate::task::yield_now().await; + + if tid == loom::thread::current().id() { + let new_park_count = park::current_thread_park_count(); + assert_eq!(park_count + 1, new_park_count); + } + + tx.send(()); + }); + + rx.recv(); + }); +} + +fn mk_runtime(num_threads: usize) -> Runtime { + runtime::Builder::new_multi_thread() + .worker_threads(num_threads) + .build() + .unwrap() +} diff --git a/tokio/src/runtime/tests/loom_multi_thread_alt.rs b/tokio/src/runtime/tests/loom_multi_thread_alt.rs new file mode 100644 index 00000000000..6ab066ab6f6 --- /dev/null +++ b/tokio/src/runtime/tests/loom_multi_thread_alt.rs @@ -0,0 +1,463 @@ +mod queue; +mod shutdown; +mod yield_now; + +/// Full runtime loom tests. These are heavy tests and take significant time to +/// run on CI. +/// +/// Use `LOOM_MAX_PREEMPTIONS=1` to do a "quick" run as a smoke test. +/// +/// In order to speed up the C +use crate::future::poll_fn; +use crate::runtime::tests::loom_oneshot as oneshot; +use crate::runtime::{self, Runtime}; +use crate::{spawn, task}; +use tokio_test::assert_ok; + +use loom::sync::atomic::{AtomicBool, AtomicUsize}; +use loom::sync::Arc; + +use pin_project_lite::pin_project; +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::Ordering::{Relaxed, SeqCst}; +use std::task::{Context, Poll}; + +mod atomic_take { + use loom::sync::atomic::AtomicBool; + use std::mem::MaybeUninit; + use std::sync::atomic::Ordering::SeqCst; + + pub(super) struct AtomicTake { + inner: MaybeUninit, + taken: AtomicBool, + } + + impl AtomicTake { + pub(super) fn new(value: T) -> Self { + Self { + inner: MaybeUninit::new(value), + taken: AtomicBool::new(false), + } + } + + pub(super) fn take(&self) -> Option { + // safety: Only one thread will see the boolean change from false + // to true, so that thread is able to take the value. + match self.taken.fetch_or(true, SeqCst) { + false => unsafe { Some(std::ptr::read(self.inner.as_ptr())) }, + true => None, + } + } + } + + impl Drop for AtomicTake { + fn drop(&mut self) { + drop(self.take()); + } + } +} + +#[derive(Clone)] +struct AtomicOneshot { + value: std::sync::Arc>>, +} +impl AtomicOneshot { + fn new(sender: oneshot::Sender) -> Self { + Self { + value: std::sync::Arc::new(atomic_take::AtomicTake::new(sender)), + } + } + + fn assert_send(&self, value: T) { + self.value.take().unwrap().send(value); + } +} + +/// Tests are divided into groups to make the runs faster on CI. +mod group_a { + use super::*; + + #[test] + fn racy_shutdown() { + loom::model(|| { + let pool = mk_pool(1); + + // here's the case we want to exercise: + // + // a worker that still has tasks in its local queue gets sent to the blocking pool (due to + // block_in_place). the blocking pool is shut down, so drops the worker. the worker's + // shutdown method never gets run. + // + // we do this by spawning two tasks on one worker, the first of which does block_in_place, + // and then immediately drop the pool. + + pool.spawn(track(async { + crate::task::block_in_place(|| {}); + })); + pool.spawn(track(async {})); + drop(pool); + }); + } + + #[test] + fn pool_multi_spawn() { + loom::model(|| { + let pool = mk_pool(2); + let c1 = Arc::new(AtomicUsize::new(0)); + + let (tx, rx) = oneshot::channel(); + let tx1 = AtomicOneshot::new(tx); + + // Spawn a task + let c2 = c1.clone(); + let tx2 = tx1.clone(); + pool.spawn(track(async move { + spawn(track(async move { + if 1 == c1.fetch_add(1, Relaxed) { + tx1.assert_send(()); + } + })); + })); + + // Spawn a second task + pool.spawn(track(async move { + spawn(track(async move { + if 1 == c2.fetch_add(1, Relaxed) { + tx2.assert_send(()); + } + })); + })); + + rx.recv(); + }); + } + + fn only_blocking_inner(first_pending: bool) { + loom::model(move || { + let pool = mk_pool(1); + let (block_tx, block_rx) = oneshot::channel(); + + pool.spawn(track(async move { + crate::task::block_in_place(move || { + block_tx.send(()); + }); + if first_pending { + task::yield_now().await + } + })); + + block_rx.recv(); + drop(pool); + }); + } + + #[test] + fn only_blocking_without_pending() { + only_blocking_inner(false) + } + + #[test] + fn only_blocking_with_pending() { + only_blocking_inner(true) + } +} + +mod group_b { + use super::*; + + fn blocking_and_regular_inner(first_pending: bool) { + const NUM: usize = 3; + loom::model(move || { + let pool = mk_pool(1); + let cnt = Arc::new(AtomicUsize::new(0)); + + let (block_tx, block_rx) = oneshot::channel(); + let (done_tx, done_rx) = oneshot::channel(); + let done_tx = AtomicOneshot::new(done_tx); + + pool.spawn(track(async move { + crate::task::block_in_place(move || { + block_tx.send(()); + }); + if first_pending { + task::yield_now().await + } + })); + + for _ in 0..NUM { + let cnt = cnt.clone(); + let done_tx = done_tx.clone(); + + pool.spawn(track(async move { + if NUM == cnt.fetch_add(1, Relaxed) + 1 { + done_tx.assert_send(()); + } + })); + } + + done_rx.recv(); + block_rx.recv(); + + drop(pool); + }); + } + + #[test] + #[ignore] // TODO: uncomment + fn blocking_and_regular_without_pending() { + blocking_and_regular_inner(false); + } + + #[test] + fn blocking_and_regular_with_pending() { + blocking_and_regular_inner(true); + } + + #[test] + fn join_output() { + loom::model(|| { + let rt = mk_pool(1); + + rt.block_on(async { + let t = crate::spawn(track(async { "hello" })); + + let out = assert_ok!(t.await); + assert_eq!("hello", out.into_inner()); + }); + }); + } + + #[test] + fn poll_drop_handle_then_drop() { + loom::model(|| { + let rt = mk_pool(1); + + rt.block_on(async move { + let mut t = crate::spawn(track(async { "hello" })); + + poll_fn(|cx| { + let _ = Pin::new(&mut t).poll(cx); + Poll::Ready(()) + }) + .await; + }); + }) + } + + #[test] + fn complete_block_on_under_load() { + loom::model(|| { + let pool = mk_pool(1); + + pool.block_on(async { + // Trigger a re-schedule + crate::spawn(track(async { + for _ in 0..2 { + task::yield_now().await; + } + })); + + gated2(true).await + }); + }); + } + + #[test] + fn shutdown_with_notification() { + use crate::sync::oneshot; + + loom::model(|| { + let rt = mk_pool(2); + let (done_tx, done_rx) = oneshot::channel::<()>(); + + rt.spawn(track(async move { + let (tx, rx) = oneshot::channel::<()>(); + + crate::spawn(async move { + crate::task::spawn_blocking(move || { + let _ = tx.send(()); + }); + + let _ = done_rx.await; + }); + + let _ = rx.await; + + let _ = done_tx.send(()); + })); + }); + } +} + +mod group_c { + use super::*; + + #[test] + fn pool_shutdown() { + loom::model(|| { + let pool = mk_pool(2); + + pool.spawn(track(async move { + gated2(true).await; + })); + + pool.spawn(track(async move { + gated2(false).await; + })); + + drop(pool); + }); + } +} + +mod group_d { + use super::*; + + #[test] + fn pool_multi_notify() { + loom::model(|| { + let pool = mk_pool(2); + + let c1 = Arc::new(AtomicUsize::new(0)); + + let (done_tx, done_rx) = oneshot::channel(); + let done_tx1 = AtomicOneshot::new(done_tx); + let done_tx2 = done_tx1.clone(); + + // Spawn a task + let c2 = c1.clone(); + pool.spawn(track(async move { + multi_gated().await; + + if 1 == c1.fetch_add(1, Relaxed) { + done_tx1.assert_send(()); + } + })); + + // Spawn a second task + pool.spawn(track(async move { + multi_gated().await; + + if 1 == c2.fetch_add(1, Relaxed) { + done_tx2.assert_send(()); + } + })); + + done_rx.recv(); + }); + } +} + +fn mk_pool(num_threads: usize) -> Runtime { + runtime::Builder::new_multi_thread_alt() + .worker_threads(num_threads) + // Set the intervals to avoid tuning logic + .global_queue_interval(61) + .build() + .unwrap() +} + +fn gated2(thread: bool) -> impl Future { + use loom::thread; + use std::sync::Arc; + + let gate = Arc::new(AtomicBool::new(false)); + let mut fired = false; + + poll_fn(move |cx| { + if !fired { + let gate = gate.clone(); + let waker = cx.waker().clone(); + + if thread { + thread::spawn(move || { + gate.store(true, SeqCst); + waker.wake_by_ref(); + }); + } else { + spawn(track(async move { + gate.store(true, SeqCst); + waker.wake_by_ref(); + })); + } + + fired = true; + + return Poll::Pending; + } + + if gate.load(SeqCst) { + Poll::Ready("hello world") + } else { + Poll::Pending + } + }) +} + +async fn multi_gated() { + struct Gate { + waker: loom::future::AtomicWaker, + count: AtomicUsize, + } + + let gate = Arc::new(Gate { + waker: loom::future::AtomicWaker::new(), + count: AtomicUsize::new(0), + }); + + { + let gate = gate.clone(); + spawn(track(async move { + for i in 1..3 { + gate.count.store(i, SeqCst); + gate.waker.wake(); + } + })); + } + + poll_fn(move |cx| { + gate.waker.register_by_ref(cx.waker()); + if gate.count.load(SeqCst) < 2 { + Poll::Pending + } else { + Poll::Ready(()) + } + }) + .await; +} + +fn track(f: T) -> Track { + Track { + inner: f, + arc: Arc::new(()), + } +} + +pin_project! { + struct Track { + #[pin] + inner: T, + // Arc is used to hook into loom's leak tracking. + arc: Arc<()>, + } +} + +impl Track { + fn into_inner(self) -> T { + self.inner + } +} + +impl Future for Track { + type Output = Track; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let me = self.project(); + + Poll::Ready(Track { + inner: ready!(me.inner.poll(cx)), + arc: me.arc.clone(), + }) + } +} diff --git a/tokio/src/runtime/tests/loom_multi_thread_alt/queue.rs b/tokio/src/runtime/tests/loom_multi_thread_alt/queue.rs new file mode 100644 index 00000000000..0d818283653 --- /dev/null +++ b/tokio/src/runtime/tests/loom_multi_thread_alt/queue.rs @@ -0,0 +1,205 @@ +use crate::runtime::scheduler::multi_thread::{queue, Stats}; +use crate::runtime::tests::{unowned, NoopSchedule}; + +use loom::thread; +use std::cell::RefCell; + +fn new_stats() -> Stats { + Stats::new(&crate::runtime::WorkerMetrics::new()) +} + +#[test] +fn basic() { + loom::model(|| { + let (steal, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + let mut stats = new_stats(); + + let th = thread::spawn(move || { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + let mut n = 0; + + for _ in 0..3 { + if steal.steal_into(&mut local, &mut stats).is_some() { + n += 1; + } + + while local.pop().is_some() { + n += 1; + } + } + + n + }); + + let mut n = 0; + + for _ in 0..2 { + for _ in 0..2 { + let (task, _) = unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + if local.pop().is_some() { + n += 1; + } + + // Push another task + let (task, _) = unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + + while local.pop().is_some() { + n += 1; + } + } + + n += inject.borrow_mut().drain(..).count(); + + n += th.join().unwrap(); + + assert_eq!(6, n); + }); +} + +#[test] +fn steal_overflow() { + loom::model(|| { + let (steal, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + let mut stats = new_stats(); + + let th = thread::spawn(move || { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + let mut n = 0; + + if steal.steal_into(&mut local, &mut stats).is_some() { + n += 1; + } + + while local.pop().is_some() { + n += 1; + } + + n + }); + + let mut n = 0; + + // push a task, pop a task + let (task, _) = unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + + if local.pop().is_some() { + n += 1; + } + + for _ in 0..6 { + let (task, _) = unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + n += th.join().unwrap(); + + while local.pop().is_some() { + n += 1; + } + + n += inject.borrow_mut().drain(..).count(); + + assert_eq!(7, n); + }); +} + +#[test] +fn multi_stealer() { + const NUM_TASKS: usize = 5; + + fn steal_tasks(steal: queue::Steal) -> usize { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + + if steal.steal_into(&mut local, &mut stats).is_none() { + return 0; + } + + let mut n = 1; + + while local.pop().is_some() { + n += 1; + } + + n + } + + loom::model(|| { + let (steal, mut local) = queue::local(); + let inject = RefCell::new(vec![]); + let mut stats = new_stats(); + + // Push work + for _ in 0..NUM_TASKS { + let (task, _) = unowned(async {}); + local.push_back_or_overflow(task, &inject, &mut stats); + } + + let th1 = { + let steal = steal.clone(); + thread::spawn(move || steal_tasks(steal)) + }; + + let th2 = thread::spawn(move || steal_tasks(steal)); + + let mut n = 0; + + while local.pop().is_some() { + n += 1; + } + + n += inject.borrow_mut().drain(..).count(); + + n += th1.join().unwrap(); + n += th2.join().unwrap(); + + assert_eq!(n, NUM_TASKS); + }); +} + +#[test] +fn chained_steal() { + loom::model(|| { + let mut stats = new_stats(); + let (s1, mut l1) = queue::local(); + let (s2, mut l2) = queue::local(); + let inject = RefCell::new(vec![]); + + // Load up some tasks + for _ in 0..4 { + let (task, _) = unowned(async {}); + l1.push_back_or_overflow(task, &inject, &mut stats); + + let (task, _) = unowned(async {}); + l2.push_back_or_overflow(task, &inject, &mut stats); + } + + // Spawn a task to steal from **our** queue + let th = thread::spawn(move || { + let mut stats = new_stats(); + let (_, mut local) = queue::local(); + s1.steal_into(&mut local, &mut stats); + + while local.pop().is_some() {} + }); + + // Drain our tasks, then attempt to steal + while l1.pop().is_some() {} + + s2.steal_into(&mut l1, &mut stats); + + th.join().unwrap(); + + while l1.pop().is_some() {} + while l2.pop().is_some() {} + }); +} diff --git a/tokio/src/runtime/tests/loom_multi_thread_alt/shutdown.rs b/tokio/src/runtime/tests/loom_multi_thread_alt/shutdown.rs new file mode 100644 index 00000000000..6fbc4bfdedf --- /dev/null +++ b/tokio/src/runtime/tests/loom_multi_thread_alt/shutdown.rs @@ -0,0 +1,28 @@ +use crate::runtime::{Builder, Handle}; + +#[test] +fn join_handle_cancel_on_shutdown() { + let mut builder = loom::model::Builder::new(); + builder.preemption_bound = Some(2); + builder.check(|| { + use futures::future::FutureExt; + + let rt = Builder::new_multi_thread() + .worker_threads(2) + .build() + .unwrap(); + + let handle = rt.block_on(async move { Handle::current() }); + + let jh1 = handle.spawn(futures::future::pending::<()>()); + + drop(rt); + + let jh2 = handle.spawn(futures::future::pending::<()>()); + + let err1 = jh1.now_or_never().unwrap().unwrap_err(); + let err2 = jh2.now_or_never().unwrap().unwrap_err(); + assert!(err1.is_cancelled()); + assert!(err2.is_cancelled()); + }); +} diff --git a/tokio/src/runtime/tests/loom_multi_thread_alt/yield_now.rs b/tokio/src/runtime/tests/loom_multi_thread_alt/yield_now.rs new file mode 100644 index 00000000000..ba506e5a408 --- /dev/null +++ b/tokio/src/runtime/tests/loom_multi_thread_alt/yield_now.rs @@ -0,0 +1,37 @@ +use crate::runtime::park; +use crate::runtime::tests::loom_oneshot as oneshot; +use crate::runtime::{self, Runtime}; + +#[test] +fn yield_calls_park_before_scheduling_again() { + // Don't need to check all permutations + let mut loom = loom::model::Builder::default(); + loom.max_permutations = Some(1); + loom.check(|| { + let rt = mk_runtime(2); + let (tx, rx) = oneshot::channel::<()>(); + + rt.spawn(async { + let tid = loom::thread::current().id(); + let park_count = park::current_thread_park_count(); + + crate::task::yield_now().await; + + if tid == loom::thread::current().id() { + let new_park_count = park::current_thread_park_count(); + assert_eq!(park_count + 1, new_park_count); + } + + tx.send(()); + }); + + rx.recv(); + }); +} + +fn mk_runtime(num_threads: usize) -> Runtime { + runtime::Builder::new_multi_thread() + .worker_threads(num_threads) + .build() + .unwrap() +} diff --git a/tokio/src/runtime/tests/mod.rs b/tokio/src/runtime/tests/mod.rs index 56699998c21..0ba7480cd4b 100644 --- a/tokio/src/runtime/tests/mod.rs +++ b/tokio/src/runtime/tests/mod.rs @@ -52,14 +52,12 @@ mod unowned_wrapper { cfg_loom! { mod loom_blocking; - mod loom_current_thread_scheduler; + mod loom_current_thread; + mod loom_join_set; mod loom_local; + mod loom_multi_thread; + mod loom_multi_thread_alt; mod loom_oneshot; - mod loom_pool; - mod loom_queue; - mod loom_shutdown_join; - mod loom_join_set; - mod loom_yield; // Make sure debug assertions are enabled #[cfg(not(debug_assertions))] diff --git a/tokio/src/runtime/tests/task.rs b/tokio/src/runtime/tests/task.rs index a79c0f50d15..0485bba7a00 100644 --- a/tokio/src/runtime/tests/task.rs +++ b/tokio/src/runtime/tests/task.rs @@ -1,11 +1,10 @@ use crate::runtime::task::{self, unowned, Id, JoinHandle, OwnedTasks, Schedule, Task}; use crate::runtime::tests::NoopSchedule; -use crate::util::TryLock; use std::collections::VecDeque; use std::future::Future; use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; struct AssertDropHandle { is_dropped: Arc, @@ -243,7 +242,7 @@ fn with(f: impl FnOnce(Runtime)) { let rt = Runtime(Arc::new(Inner { owned: OwnedTasks::new(), - core: TryLock::new(Core { + core: Mutex::new(Core { queue: VecDeque::new(), }), })); @@ -256,7 +255,7 @@ fn with(f: impl FnOnce(Runtime)) { struct Runtime(Arc); struct Inner { - core: TryLock, + core: Mutex, owned: OwnedTasks, } @@ -264,7 +263,7 @@ struct Core { queue: VecDeque>, } -static CURRENT: TryLock> = TryLock::new(None); +static CURRENT: Mutex> = Mutex::new(None); impl Runtime { fn spawn(&self, future: T) -> JoinHandle diff --git a/tokio/src/task/blocking.rs b/tokio/src/task/blocking.rs index 9bd15ebd5d8..1cce466394e 100644 --- a/tokio/src/task/blocking.rs +++ b/tokio/src/task/blocking.rs @@ -75,7 +75,7 @@ cfg_rt_multi_thread! { where F: FnOnce() -> R, { - crate::runtime::scheduler::multi_thread::block_in_place(f) + crate::runtime::scheduler::block_in_place(f) } } diff --git a/tokio/tests/rt_common.rs b/tokio/tests/rt_common.rs index 9c6add047a7..9ab7fd3516e 100644 --- a/tokio/tests/rt_common.rs +++ b/tokio/tests/rt_common.rs @@ -52,6 +52,40 @@ macro_rules! rt_test { .into() } } + + #[cfg(not(tokio_wasi))] // Wasi doesn't support threads + #[cfg(tokio_unstable)] + mod alt_threaded_scheduler_4_threads { + $($t)* + + const NUM_WORKERS: usize = 4; + + fn rt() -> Arc { + tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .enable_all() + .build() + .unwrap() + .into() + } + } + + #[cfg(not(tokio_wasi))] // Wasi doesn't support threads + #[cfg(tokio_unstable)] + mod alt_threaded_scheduler_1_thread { + $($t)* + + const NUM_WORKERS: usize = 1; + + fn rt() -> Arc { + tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .unwrap() + .into() + } + } } } diff --git a/tokio/tests/rt_threaded_alt.rs b/tokio/tests/rt_threaded_alt.rs new file mode 100644 index 00000000000..b8af6a7b8a9 --- /dev/null +++ b/tokio/tests/rt_threaded_alt.rs @@ -0,0 +1,717 @@ +#![warn(rust_2018_idioms)] +#![cfg(all(feature = "full", not(tokio_wasi)))] +#![cfg(tokio_unstable)] + +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::net::{TcpListener, TcpStream}; +use tokio::runtime; +use tokio::sync::oneshot; +use tokio_test::{assert_err, assert_ok}; + +use futures::future::poll_fn; +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering::Relaxed; +use std::sync::{mpsc, Arc, Mutex}; +use std::task::{Context, Poll, Waker}; + +macro_rules! cfg_metrics { + ($($t:tt)*) => { + #[cfg(tokio_unstable)] + { + $( $t )* + } + } +} + +#[test] +fn single_thread() { + // No panic when starting a runtime w/ a single thread + let _ = runtime::Builder::new_multi_thread_alt() + .enable_all() + .worker_threads(1) + .build() + .unwrap(); +} + +#[test] +fn many_oneshot_futures() { + // used for notifying the main thread + const NUM: usize = 1_000; + + for _ in 0..5 { + let (tx, rx) = mpsc::channel(); + + let rt = rt(); + let cnt = Arc::new(AtomicUsize::new(0)); + + for _ in 0..NUM { + let cnt = cnt.clone(); + let tx = tx.clone(); + + rt.spawn(async move { + let num = cnt.fetch_add(1, Relaxed) + 1; + + if num == NUM { + tx.send(()).unwrap(); + } + }); + } + + rx.recv().unwrap(); + + // Wait for the pool to shutdown + drop(rt); + } +} + +#[test] +fn spawn_two() { + let rt = rt(); + + let out = rt.block_on(async { + let (tx, rx) = oneshot::channel(); + + tokio::spawn(async move { + tokio::spawn(async move { + tx.send("ZOMG").unwrap(); + }); + }); + + assert_ok!(rx.await) + }); + + assert_eq!(out, "ZOMG"); + + cfg_metrics! { + let metrics = rt.metrics(); + drop(rt); + assert_eq!(1, metrics.remote_schedule_count()); + + let mut local = 0; + for i in 0..metrics.num_workers() { + local += metrics.worker_local_schedule_count(i); + } + + assert_eq!(1, local); + } +} + +#[test] +fn many_multishot_futures() { + const CHAIN: usize = 200; + const CYCLES: usize = 5; + const TRACKS: usize = 50; + + for _ in 0..50 { + let rt = rt(); + let mut start_txs = Vec::with_capacity(TRACKS); + let mut final_rxs = Vec::with_capacity(TRACKS); + + for _ in 0..TRACKS { + let (start_tx, mut chain_rx) = tokio::sync::mpsc::channel(10); + + for _ in 0..CHAIN { + let (next_tx, next_rx) = tokio::sync::mpsc::channel(10); + + // Forward all the messages + rt.spawn(async move { + while let Some(v) = chain_rx.recv().await { + next_tx.send(v).await.unwrap(); + } + }); + + chain_rx = next_rx; + } + + // This final task cycles if needed + let (final_tx, final_rx) = tokio::sync::mpsc::channel(10); + let cycle_tx = start_tx.clone(); + let mut rem = CYCLES; + + rt.spawn(async move { + for _ in 0..CYCLES { + let msg = chain_rx.recv().await.unwrap(); + + rem -= 1; + + if rem == 0 { + final_tx.send(msg).await.unwrap(); + } else { + cycle_tx.send(msg).await.unwrap(); + } + } + }); + + start_txs.push(start_tx); + final_rxs.push(final_rx); + } + + { + rt.block_on(async move { + for start_tx in start_txs { + start_tx.send("ping").await.unwrap(); + } + + for mut final_rx in final_rxs { + final_rx.recv().await.unwrap(); + } + }); + } + } +} + +#[test] +fn lifo_slot_budget() { + async fn my_fn() { + spawn_another(); + } + + fn spawn_another() { + tokio::spawn(my_fn()); + } + + let rt = runtime::Builder::new_multi_thread_alt() + .enable_all() + .worker_threads(1) + .build() + .unwrap(); + + let (send, recv) = oneshot::channel(); + + rt.spawn(async move { + tokio::spawn(my_fn()); + let _ = send.send(()); + }); + + let _ = rt.block_on(recv); +} + +#[test] +fn spawn_shutdown() { + let rt = rt(); + let (tx, rx) = mpsc::channel(); + + rt.block_on(async { + tokio::spawn(client_server(tx.clone())); + }); + + // Use spawner + rt.spawn(client_server(tx)); + + assert_ok!(rx.recv()); + assert_ok!(rx.recv()); + + drop(rt); + assert_err!(rx.try_recv()); +} + +async fn client_server(tx: mpsc::Sender<()>) { + let server = assert_ok!(TcpListener::bind("127.0.0.1:0").await); + + // Get the assigned address + let addr = assert_ok!(server.local_addr()); + + // Spawn the server + tokio::spawn(async move { + // Accept a socket + let (mut socket, _) = server.accept().await.unwrap(); + + // Write some data + socket.write_all(b"hello").await.unwrap(); + }); + + let mut client = TcpStream::connect(&addr).await.unwrap(); + + let mut buf = vec![]; + client.read_to_end(&mut buf).await.unwrap(); + + assert_eq!(buf, b"hello"); + tx.send(()).unwrap(); +} + +#[test] +fn drop_threadpool_drops_futures() { + for _ in 0..1_000 { + let num_inc = Arc::new(AtomicUsize::new(0)); + let num_dec = Arc::new(AtomicUsize::new(0)); + let num_drop = Arc::new(AtomicUsize::new(0)); + + struct Never(Arc); + + impl Future for Never { + type Output = (); + + fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<()> { + Poll::Pending + } + } + + impl Drop for Never { + fn drop(&mut self) { + self.0.fetch_add(1, Relaxed); + } + } + + let a = num_inc.clone(); + let b = num_dec.clone(); + + let rt = runtime::Builder::new_multi_thread_alt() + .enable_all() + .on_thread_start(move || { + a.fetch_add(1, Relaxed); + }) + .on_thread_stop(move || { + b.fetch_add(1, Relaxed); + }) + .build() + .unwrap(); + + rt.spawn(Never(num_drop.clone())); + + // Wait for the pool to shutdown + drop(rt); + + // Assert that only a single thread was spawned. + let a = num_inc.load(Relaxed); + assert!(a >= 1); + + // Assert that all threads shutdown + let b = num_dec.load(Relaxed); + assert_eq!(a, b); + + // Assert that the future was dropped + let c = num_drop.load(Relaxed); + assert_eq!(c, 1); + } +} + +#[test] +fn start_stop_callbacks_called() { + use std::sync::atomic::{AtomicUsize, Ordering}; + + let after_start = Arc::new(AtomicUsize::new(0)); + let before_stop = Arc::new(AtomicUsize::new(0)); + + let after_inner = after_start.clone(); + let before_inner = before_stop.clone(); + let rt = tokio::runtime::Builder::new_multi_thread_alt() + .enable_all() + .on_thread_start(move || { + after_inner.clone().fetch_add(1, Ordering::Relaxed); + }) + .on_thread_stop(move || { + before_inner.clone().fetch_add(1, Ordering::Relaxed); + }) + .build() + .unwrap(); + + let (tx, rx) = oneshot::channel(); + + rt.spawn(async move { + assert_ok!(tx.send(())); + }); + + assert_ok!(rt.block_on(rx)); + + drop(rt); + + assert!(after_start.load(Ordering::Relaxed) > 0); + assert!(before_stop.load(Ordering::Relaxed) > 0); +} + +#[test] +fn blocking() { + // used for notifying the main thread + const NUM: usize = 1_000; + + for _ in 0..10 { + let (tx, rx) = mpsc::channel(); + + let rt = rt(); + let cnt = Arc::new(AtomicUsize::new(0)); + + // there are four workers in the pool + // so, if we run 4 blocking tasks, we know that handoff must have happened + let block = Arc::new(std::sync::Barrier::new(5)); + for _ in 0..4 { + let block = block.clone(); + rt.spawn(async move { + tokio::task::block_in_place(move || { + block.wait(); + block.wait(); + }) + }); + } + block.wait(); + + for _ in 0..NUM { + let cnt = cnt.clone(); + let tx = tx.clone(); + + rt.spawn(async move { + let num = cnt.fetch_add(1, Relaxed) + 1; + + if num == NUM { + tx.send(()).unwrap(); + } + }); + } + + rx.recv().unwrap(); + + // Wait for the pool to shutdown + block.wait(); + } +} + +#[test] +fn multi_threadpool() { + use tokio::sync::oneshot; + + let rt1 = rt(); + let rt2 = rt(); + + let (tx, rx) = oneshot::channel(); + let (done_tx, done_rx) = mpsc::channel(); + + rt2.spawn(async move { + rx.await.unwrap(); + done_tx.send(()).unwrap(); + }); + + rt1.spawn(async move { + tx.send(()).unwrap(); + }); + + done_rx.recv().unwrap(); +} + +// When `block_in_place` returns, it attempts to reclaim the yielded runtime +// worker. In this case, the remainder of the task is on the runtime worker and +// must take part in the cooperative task budgeting system. +// +// The test ensures that, when this happens, attempting to consume from a +// channel yields occasionally even if there are values ready to receive. +#[test] +fn coop_and_block_in_place() { + let rt = tokio::runtime::Builder::new_multi_thread_alt() + // Setting max threads to 1 prevents another thread from claiming the + // runtime worker yielded as part of `block_in_place` and guarantees the + // same thread will reclaim the worker at the end of the + // `block_in_place` call. + .max_blocking_threads(1) + .build() + .unwrap(); + + rt.block_on(async move { + let (tx, mut rx) = tokio::sync::mpsc::channel(1024); + + // Fill the channel + for _ in 0..1024 { + tx.send(()).await.unwrap(); + } + + drop(tx); + + tokio::spawn(async move { + // Block in place without doing anything + tokio::task::block_in_place(|| {}); + + // Receive all the values, this should trigger a `Pending` as the + // coop limit will be reached. + poll_fn(|cx| { + while let Poll::Ready(v) = { + tokio::pin! { + let fut = rx.recv(); + } + + Pin::new(&mut fut).poll(cx) + } { + if v.is_none() { + panic!("did not yield"); + } + } + + Poll::Ready(()) + }) + .await + }) + .await + .unwrap(); + }); +} + +#[test] +fn yield_after_block_in_place() { + let rt = tokio::runtime::Builder::new_multi_thread_alt() + .worker_threads(1) + .build() + .unwrap(); + + rt.block_on(async { + tokio::spawn(async move { + // Block in place then enter a new runtime + tokio::task::block_in_place(|| { + let rt = tokio::runtime::Builder::new_current_thread() + .build() + .unwrap(); + + rt.block_on(async {}); + }); + + // Yield, then complete + tokio::task::yield_now().await; + }) + .await + .unwrap() + }); +} + +// Testing this does not panic +#[test] +fn max_blocking_threads() { + let _rt = tokio::runtime::Builder::new_multi_thread_alt() + .max_blocking_threads(1) + .build() + .unwrap(); +} + +#[test] +#[should_panic] +fn max_blocking_threads_set_to_zero() { + let _rt = tokio::runtime::Builder::new_multi_thread_alt() + .max_blocking_threads(0) + .build() + .unwrap(); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn hang_on_shutdown() { + let (sync_tx, sync_rx) = std::sync::mpsc::channel::<()>(); + tokio::spawn(async move { + tokio::task::block_in_place(|| sync_rx.recv().ok()); + }); + + tokio::spawn(async { + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + drop(sync_tx); + }); + tokio::time::sleep(std::time::Duration::from_secs(1)).await; +} + +/// Demonstrates tokio-rs/tokio#3869 +#[test] +fn wake_during_shutdown() { + struct Shared { + waker: Option, + } + + struct MyFuture { + shared: Arc>, + put_waker: bool, + } + + impl MyFuture { + fn new() -> (Self, Self) { + let shared = Arc::new(Mutex::new(Shared { waker: None })); + let f1 = MyFuture { + shared: shared.clone(), + put_waker: true, + }; + let f2 = MyFuture { + shared, + put_waker: false, + }; + (f1, f2) + } + } + + impl Future for MyFuture { + type Output = (); + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { + let me = Pin::into_inner(self); + let mut lock = me.shared.lock().unwrap(); + if me.put_waker { + lock.waker = Some(cx.waker().clone()); + } + Poll::Pending + } + } + + impl Drop for MyFuture { + fn drop(&mut self) { + let mut lock = self.shared.lock().unwrap(); + if !self.put_waker { + lock.waker.take().unwrap().wake(); + } + drop(lock); + } + } + + let rt = tokio::runtime::Builder::new_multi_thread_alt() + .worker_threads(1) + .enable_all() + .build() + .unwrap(); + + let (f1, f2) = MyFuture::new(); + + rt.spawn(f1); + rt.spawn(f2); + + rt.block_on(async { tokio::time::sleep(tokio::time::Duration::from_millis(20)).await }); +} + +#[should_panic] +#[tokio::test] +async fn test_block_in_place1() { + tokio::task::block_in_place(|| {}); +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_block_in_place2() { + tokio::task::block_in_place(|| {}); +} + +#[should_panic] +#[tokio::main(flavor = "current_thread")] +#[test] +async fn test_block_in_place3() { + tokio::task::block_in_place(|| {}); +} + +#[tokio::main] +#[test] +async fn test_block_in_place4() { + tokio::task::block_in_place(|| {}); +} + +// Testing the tuning logic is tricky as it is inherently timing based, and more +// of a heuristic than an exact behavior. This test checks that the interval +// changes over time based on load factors. There are no assertions, completion +// is sufficient. If there is a regression, this test will hang. In theory, we +// could add limits, but that would be likely to fail on CI. +#[test] +#[cfg(not(tokio_no_tuning_tests))] +fn test_tuning() { + use std::sync::atomic::AtomicBool; + use std::time::Duration; + + let rt = runtime::Builder::new_multi_thread_alt() + .worker_threads(1) + .build() + .unwrap(); + + fn iter(flag: Arc, counter: Arc, stall: bool) { + if flag.load(Relaxed) { + if stall { + std::thread::sleep(Duration::from_micros(5)); + } + + counter.fetch_add(1, Relaxed); + tokio::spawn(async move { iter(flag, counter, stall) }); + } + } + + let flag = Arc::new(AtomicBool::new(true)); + let counter = Arc::new(AtomicUsize::new(61)); + let interval = Arc::new(AtomicUsize::new(61)); + + { + let flag = flag.clone(); + let counter = counter.clone(); + rt.spawn(async move { iter(flag, counter, true) }); + } + + // Now, hammer the injection queue until the interval drops. + let mut n = 0; + loop { + let curr = interval.load(Relaxed); + + if curr <= 8 { + n += 1; + } else { + n = 0; + } + + // Make sure we get a few good rounds. Jitter in the tuning could result + // in one "good" value without being representative of reaching a good + // state. + if n == 3 { + break; + } + + if Arc::strong_count(&interval) < 5_000 { + let counter = counter.clone(); + let interval = interval.clone(); + + rt.spawn(async move { + let prev = counter.swap(0, Relaxed); + interval.store(prev, Relaxed); + }); + + std::thread::yield_now(); + } + } + + flag.store(false, Relaxed); + + let w = Arc::downgrade(&interval); + drop(interval); + + while w.strong_count() > 0 { + std::thread::sleep(Duration::from_micros(500)); + } + + // Now, run it again with a faster task + let flag = Arc::new(AtomicBool::new(true)); + // Set it high, we know it shouldn't ever really be this high + let counter = Arc::new(AtomicUsize::new(10_000)); + let interval = Arc::new(AtomicUsize::new(10_000)); + + { + let flag = flag.clone(); + let counter = counter.clone(); + rt.spawn(async move { iter(flag, counter, false) }); + } + + // Now, hammer the injection queue until the interval reaches the expected range. + let mut n = 0; + loop { + let curr = interval.load(Relaxed); + + if curr <= 1_000 && curr > 32 { + n += 1; + } else { + n = 0; + } + + if n == 3 { + break; + } + + if Arc::strong_count(&interval) <= 5_000 { + let counter = counter.clone(); + let interval = interval.clone(); + + rt.spawn(async move { + let prev = counter.swap(0, Relaxed); + interval.store(prev, Relaxed); + }); + } + + std::thread::yield_now(); + } + + flag.store(false, Relaxed); +} + +fn rt() -> runtime::Runtime { + runtime::Builder::new_multi_thread_alt() + .enable_all() + .build() + .unwrap() +}