typelevel · djspiewak · Dec 30, 2021 · Dec 23, 2021 · Dec 23, 2021 · Dec 23, 2021
diff --git a/benchmarks/src/main/scala/cats/effect/benchmarks/BlockingBenchmark.scala b/benchmarks/src/main/scala/cats/effect/benchmarks/BlockingBenchmark.scala
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2020-2021 Typelevel
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package cats.effect
+package benchmarks
+
+import cats.effect.unsafe.implicits.global
+import org.openjdk.jmh.annotations._
+
+import java.util.concurrent.TimeUnit
+
+/**
+ * To run the benchmark from within sbt:
+ *
+ * benchmarks/Jmh/run -i 10 -wi 10 -f 2 -t 1 cats.effect.benchmarks.BlockingBenchmark
+ *
+ * Which means "10 iterations", "10 warm-up iterations", "2 forks", "1 thread". Please note that
+ * benchmarks should be usually executed at least in 10 iterations (as a rule of thumb), but
+ * more is better.
+ */
+@State(Scope.Thread)
+@BenchmarkMode(Array(Mode.Throughput))
+@OutputTimeUnit(TimeUnit.SECONDS)
+class BlockingBenchmark {
+
+  @Param(Array("10000"))
+  var size: Int = _
+
+  /*
+   * Uses `IO.blocking` around a very tiny region. As things stand, each time
+   * `IO.blocking` is executed by the runtime, the whole computation is shifted
+   * to a thread on the blocking EC and immediately shifted back to the compute
+   * EC when the blocking ends.
+   */
+  @Benchmark
+  def fine(): Int = {
+    def loop(n: Int): IO[Int] =
+      IO.blocking(42).flatMap { a =>
+        if (n < size) loop(n + 1)
+        else IO.pure(a)
+      }
+
+    loop(0).unsafeRunSync()
+  }
+
+  /*
+   * Uses `IO.blocking` around a very big region. This should incur only a single
+   * shift to the blocking EC and a single shift back to the compute EC.
+   */
+  @Benchmark
+  def coarse(): Int = {
+    def loop(n: Int): IO[Int] =
+      IO(42).flatMap { a =>
+        if (n < size) loop(n + 1)
+        else IO.pure(a)
+      }
+
+    IO.blocking(loop(0).unsafeRunSync()).unsafeRunSync()
+  }
+
+  /*
+   * Uses `IO.blocking` around a very big region, but the code inside the blocking
+   * region also contains smaller blocking regions.
+   */
+  @Benchmark
+  def nested(): Int = {
+    def loop(n: Int): IO[Int] =
+      IO.blocking(42).flatMap { a =>
+        if (n < size) loop(n + 1)
+        else IO.pure(a)
+      }
+
+    IO.blocking(loop(0).unsafeRunSync()).unsafeRunSync()
+  }
+
+  /*
+   * Cedes after every blocking operation.
+   */
+  @Benchmark
+  def blockThenCede(): Int = {
+    def loop(n: Int): IO[Int] =
+      IO.blocking(42).flatMap { a =>
+        if (n < size) IO.cede.flatMap(_ => loop(n + 1))
+        else IO.cede.flatMap(_ => IO.pure(a))
+      }
+
+    loop(0).unsafeRunSync()
+  }
+}
diff --git a/core/jvm/src/main/scala/cats/effect/unsafe/WorkStealingThreadPool.scala b/core/jvm/src/main/scala/cats/effect/unsafe/WorkStealingThreadPool.scala
@@ -35,7 +35,8 @@ import cats.effect.tracing.TracingConstants
 import scala.collection.mutable
 import scala.concurrent.ExecutionContext
 
-import java.util.concurrent.ThreadLocalRandom
+import java.util.Comparator
+import java.util.concurrent.{ConcurrentSkipListSet, ThreadLocalRandom}
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger}
 import java.util.concurrent.locks.LockSupport
 
@@ -56,7 +57,7 @@ import java.util.concurrent.locks.LockSupport
  */
 private[effect] final class WorkStealingThreadPool(
     threadCount: Int, // number of worker threads
-    threadPrefix: String, // prefix for the name of worker threads
+    private[unsafe] val threadPrefix: String, // prefix for the name of worker threads
     self0: => IORuntime
 ) extends ExecutionContext {
 
@@ -95,12 +96,16 @@ private[effect] final class WorkStealingThreadPool(
    */
   private[this] val state: AtomicInteger = new AtomicInteger(threadCount << UnparkShift)
 
+  private[unsafe] val cachedThreads: ConcurrentSkipListSet[WorkerThread] =
+    new ConcurrentSkipListSet(Comparator.comparingInt[WorkerThread](_.nameIndex))
+
   /**
    * The shutdown latch of the work stealing thread pool.
    */
   private[unsafe] val done: AtomicBoolean = new AtomicBoolean(false)
 
   private[unsafe] val blockedWorkerThreadCounter: AtomicInteger = new AtomicInteger(0)
+  private[unsafe] val blockedWorkerThreadNamingIndex: AtomicInteger = new AtomicInteger(0)
 
   // Thread pool initialization block.
   {
@@ -114,15 +119,7 @@ private[effect] final class WorkStealingThreadPool(
       val index = i
       val fiberBag = new WeakBag[IOFiber[_]]()
       val thread =
-        new WorkerThread(
-          index,
-          threadPrefix,
-          queue,
-          parkedSignal,
-          externalQueue,
-          null,
-          fiberBag,
-          this)
+        new WorkerThread(index, queue, parkedSignal, externalQueue, null, fiberBag, this)
       workerThreads(i) = thread
       i += 1
     }
@@ -558,6 +555,15 @@ private[effect] final class WorkStealingThreadPool(
 
       // Clear the interrupt flag.
       Thread.interrupted()
+
+      var t: WorkerThread = null
+      while ({
+        t = cachedThreads.pollFirst()
+        t ne null
+      }) {
+        t.interrupt()
+      }
+
       // Drain the external queue.
       externalQueue.clear()
       Thread.currentThread().interrupt()