ocaml-multicore
diff --git a/‎CHANGES.md‎
Lines changed: 1 addition & 1 deletion b/‎CHANGES.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 20 additions & 13 deletions b/‎README.md‎
Lines changed: 20 additions & 13 deletions
diff --git a/‎dune-project‎
Lines changed: 6 additions & 0 deletions b/‎dune-project‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎saturn.opam‎
Lines changed: 1 addition & 1 deletion b/‎saturn.opam‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎saturn_lockfree.opam‎
Lines changed: 24 additions & 0 deletions b/‎saturn_lockfree.opam‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎src/dune‎
Lines changed: 1 addition & 1 deletion b/‎src/dune‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/mpmc_relaxed_queue.ml‎
Lines changed: 5 additions & 131 deletions b/‎src/mpmc_relaxed_queue.ml‎
Lines changed: 5 additions & 131 deletions
diff --git a/‎src/mpmc_relaxed_queue.mli‎
Lines changed: 1 addition & 1 deletion b/‎src/mpmc_relaxed_queue.mli‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/saturn.ml‎
Lines changed: 6 additions & 6 deletions b/‎src/saturn.ml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/saturn.mli‎
Lines changed: 6 additions & 7 deletions b/‎src/saturn.mli‎
Lines changed: 6 additions & 7 deletions
@@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
 
 ## Not released
 
-- Rename data structures and package, add docs (@lyrm)
+- Add docs and rename/refactor to add a lockfree package (@lyrm)
 - Add STM tests for current data structures (@lyrm, @jmid)
 
 ## 0.3.1
 
@@ -2,24 +2,31 @@
 
 ---
 
-A collection of parallelism-safe data structures for OCaml 5. It contains:
+This repository is a collection of parallelism-safe data structures for OCaml 5.
+They are contained in two packages:
 
-| Name                                               | What is it ?                                                                                                                                                                                                                                                                   | Sources                                                                                                                                                                                                      |
-| -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| [Treiber Stack](src/treiber_stack.mli)             | A classic multi-producer multi-consumer stack, robust and flexible. Recommended starting point when needing LIFO structure                                                                                                                                                     |                                                                                                                                                                                                              |
-| [Michael-Scott Queue](src/michael_scott_queue.mli) | A classic multi-producer multi-consumer queue, robust and flexible. Recommended starting point when needing FIFO structure.                                                                                                                                                    | [Simple, Fast, and Practical Non-Blocking and Blocking Concurrent Queue Algorithms](https://www.cs.rochester.edu/~scott/papers/1996_PODC_queues.pdf)                                                         |
-| [Chase-Lev Work-Stealing Deque](src/ws_deque.mli)  | Single-producer, multi-consumer dynamic-size double-ended queue (deque). Ideal for throughput-focused scheduling using per-core work distribution. Note, `pop` and `steal` follow different ordering (respectively LIFO and FIFO) and have different linearization contraints. | [Dynamic circular work-stealing deque](https://dl.acm.org/doi/10.1145/1073970.1073974) and [Correct and efficient work-stealing for weak memory models](https://dl.acm.org/doi/abs/10.1145/2442516.2442524)) |
-| [SPSC Queue](src/spsc_queue.mli)                   | Simple single-producer single-consumer fixed-size queue. Thread-safe as long as at most one thread acts as producer and at most one as consumer at any single point in time.                                                                                                   |                                                                                                                                                                                                              |
-| [MPMC Relaxed Queue](src/mpmc_relaxed_queue.mli)   | Multi-producer, multi-consumer, fixed-size relaxed queue. Optimised for high number of threads. Not strictly FIFO. Note, it exposes two interfaces: a lockfree and a non-lockfree (albeit more practical) one. See the mli for details.                                        |                                                                                                                                                                                                              |
-| [MPSC Queue](src/mpsc_queue.mli)                   | A multi-producer, single-consumer, thread-safe queue without support for cancellation. This makes a good data structure for a scheduler's run queue. It is used in [Eio](https://github.com/ocaml-multicore/eio).                                                              | It is a single consumer version of the queue described in [Implementing lock-free queues](https://people.cs.pitt.edu/~jacklange/teaching/cs2510-f12/papers/implementing_lock_free.pdf).                      |
+- `Saturn` that includes every data structures and should be used by default if
+  you just want parallelism-safe data structures..
+- `Saturn_lockfree` that includes only lock-free data structures.
+
+The available data structures are :
+
+| Names                                                        | Names in `Saturn` <br> (in `Saturn_lockfree`) | What is it ?                                                                                                                                                                                                                                                                   | Sources                                                                                                                                                                                                      |
+| ------------------------------------------------------------ | --------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| [Treiber stack](src_lockfree/treiber_stack.mli)              | `Stack` (same)                                | A classic multi-producer multi-consumer stack, robust and flexible. Recommended starting point when needing a LIFO structure                                                                                                                                                   |                                                                                                                                                                                                              |
+| [Michael-Scott queue](src_lockfree/michael_scott_queue.mli)  | `Queue` (same)                                | A classic multi-producer multi-consumer queue, robust and flexible. Recommended starting point when needing a FIFO structure.                                                                                                                                                  | [Simple, Fast, and Practical Non-Blocking and Blocking Concurrent Queue Algorithms](https://www.cs.rochester.edu/~scott/papers/1996_PODC_queues.pdf)                                                         |
+| [Chase-Lev Work-Stealing Dequeue](src_lockfree/ws_deque.mli) | `Work_stealing_deque` (same)                  | Single-producer, multi-consumer dynamic-size double-ended queue (deque). Ideal for throughput-focused scheduling using per-core work distribution. Note, `pop` and `steal` follow different ordering (respectively LIFO and FIFO) and have different linearization contraints. | [Dynamic circular work-stealing deque](https://dl.acm.org/doi/10.1145/1073970.1073974) and [Correct and efficient work-stealing for weak memory models](https://dl.acm.org/doi/abs/10.1145/2442516.2442524)) |
+| [SPSC Queue](src_lockfree/spsc_queue.mli)                    | `Single_prod_single_`<br>`cons_queue` (same)  | Simple single-producer single-consumer fixed-size queue. Thread-safe as long as at most one thread acts as producer and at most one as consumer at any single point in time.                                                                                                   |                                                                                                                                                                                                              |
+| [MPMC bounded relaxed queue](src/mpmc_relaxed_queue.mli)             | `Relaxed queue` (same)                | Multi-producer, multi-consumer, fixed-size relaxed queue. Optimised for high number of threads. Not strictly FIFO. Note, it exposes two interfaces: a lockfree and a non-lockfree (albeit more practical) one. See the mli for details.                                        |                                                                                                                                                                                                              |
+| [MPSC Queue](src_lockfree/mpsc_queue.mli)                    | `Single_consumer_queue` (same)                | A multi-producer, single-consumer, thread-safe queue without support for cancellation. This makes a gooddata structure for a scheduler's run queue. It is used in [Eio](https://github.com/ocaml-multicore/eio).                                                               | It is a single consumer version of the queue described in [Implementing lock-free queues](https://people.cs.pitt.edu/~jacklange/teaching/cs2510-f12/papers/implementing_lock_free.pdf).                      |
 
 ## Usage
 
 `Saturn` can be installed from `opam`: `opam install saturn`. Sample usage of
-`Ws_deque` is illustrated below.
+`Work_stealing_deque` is illustrated below.
 
 ```ocaml
-module Ws_deque = Ws_deque.M
+module Ws_deque = Work_stealing_deque.M
 
 let q = Ws_deque.create ()
 
@@ -49,5 +56,5 @@ There is a number of benchmarks in `bench` directory. You can run them with
 
 ## Contributing
 
-Contributions of more parallelism-safe data structures appreciated! Please create
-issues/PRs to this repo.
+Contributions are appreciated! If you intend to add a new data structure, please
+read [this](CONTRIBUTING.md) before.
@@ -1,2 +1,8 @@
 (lang dune 3.0)
 (name saturn)
+(package
+ (name saturn)
+ (synopsis "Collection of parallelism-safe data structures for Multicore OCaml"))
+(package
+ (name saturn_lockfree)
+ (synopsis "Collection of lock-free data structures for Multicore OCaml"))
@@ -16,7 +16,7 @@ depends: [
   "qcheck-stm" {with-test & >= "0.2"}
   "qcheck-alcotest" {with-test & >= "0.18.1"}
   "alcotest" {with-test & >= "1.6.0"}
-  "yojson" {>= "2.0.2"}
+  "yojson" {with-test &>= "2.0.2"}
   "dscheck" {with-test & >= "0.1.0"}
 ]
 available: arch != "x86_32" & arch != "arm32" & arch != "ppc64"
 
@@ -0,0 +1,24 @@
+opam-version: "2.0"
+maintainer:"KC Sivaramakrishnan <sk826@cl.cam.ac.uk>"
+authors: ["KC Sivaramakrishnan <sk826@cl.cam.ac.uk>"]
+homepage: "https://github.com/ocaml-multicore/saturn"
+doc: "https://ocaml-multicore.github.io/saturn"
+synopsis: "Lock-free data structures for multicore OCaml"
+license: "ISC"
+dev-repo: "git+https://github.com/ocaml-multicore/saturn.git"
+bug-reports: "https://github.com/ocaml-multicore/saturn/issues"
+tags: []
+depends: [
+  "ocaml" {>= "4.12"}
+  "dune" {>= "3.0"}
+  "domain_shims" {>= "0.1.0"}
+  "qcheck" {with-test & >= "0.18.1"}
+  "qcheck-stm" {with-test & >= "0.2"}
+  "qcheck-alcotest" {with-test & >= "0.18.1"}
+  "alcotest" {with-test & >= "1.6.0"}
+  "yojson" {with-test &>= "2.0.2"}
+  "dscheck" {with-test & >= "0.1.0"}
+]
+available: arch != "x86_32" & arch != "arm32" & arch != "ppc64"
+depopts: []
+build: ["dune" "build" "-p" name "-j" jobs]
@@ -1,4 +1,4 @@
 (library
  (name saturn)
  (public_name saturn)
- (libraries domain_shims))
+ (libraries saturn_lockfree domain_shims))
@@ -1,140 +1,14 @@
-(*
-  # General idea
-  
-  It is the easiest to explain the general idea on an array of infinite size. 
-  Let's start with that. Each element in such an array constitutes a single-use 
-  exchange slot. Enqueuer increments [tail] and treats prior value as index of 
-  its slot. Same for dequeuer and [head]. This effectively creates pairs 
-  (enqueuer, dequeuer) assigned to the same slot. Enqueuer leaves the value in 
-  the slot, dequer copies it out. 
-   
-  Enqueuer never fails. It always gets a brand-new slot and places item in it. 
-  Dequeuer, on the other hand, may witness an empty slot. That's because [head] 
-  may jump behind [tail]. Remember, indices are implemented blindy. For now, 
-  assume dequeuer simply spins on the empty slot until an item appears. 
+include Lockfree.Relaxed_queue
 
-  That's it. There's a few things flowing from this construction: 
-  * Slots are atomic. This is where paired enqueuer and dequeuer communicate. 
-  * [head] overshooting [tail] is a normal condition and that's good - we want 
-  to keep operations on [head] and [tail] independent.
-
-  # Finite array
-
-  Now, to make it work in real-world, simply treat finite array as circular, 
-  i.e. wrap around when reached the end. Slots are now re-used, so we need to be 
-  more careful. 
-  
-  Firstly, if there's too many items, enqueuer may witness a full slot. Let's assume 
-  enqueuer simply spins on full slot until some dequeuer appears and takes the old
-  value.
-  
-  Secondly, in the case of overlap, there can be more than 2 threads (1x enqueuer, 
-  1x dequeuer) assigned to a single slot (imagine 10 enqueuers spinning on an 8-slot 
-  array). In fact, it could be any number. Thus, all operations on slot have to use 
-  CAS to ensure that no item is overwrriten on store and no item is dequeued by two 
-  threads at once. 
-
-  Above works okay in practise, and there is some relevant literature, e.g.  
-  (DOI: 10.1145/3437801.3441583) analyzed this particular design. There's also 
-  plenty older papers looking at similar approaches 
-  (e.g. DOI: 10.1145/2851141.2851168). 
-
-  Note, this design may violate FIFO (on overlap). The risk can be minimized by 
-  ensuring size of array >> number of threads but it's never zero.
-  (github.com/rigtorp/MPMCQueue has a nice way of fixing this, we could add it).
-
-  # Blocking (non-lockfree paths on full, empty)
-
-  Up until now [push] and [pop] were allowed to block indefinitely on empty and full 
-  queue. Overall, what can be done in those states?
-
-  1. Busy wait until able to finish.
-  2. Rollback own index with CAS (unassign itself from slot).
-  3. Move forward other index with CAS (assign itself to the same slot as opposite 
-  action).
-  4. Mark slot as burned - dequeue only.
-
-  Which one then?
-
-  Let's optimize for stability, i.e. some reasonable latency that won't get much worse
-  under heavy load. Busy wait is great because it does not cause any contention in the
-  hotspots ([head], [tail]). Thus, start with busy wait (1). If queue is busy and 
-  moving fast, there is a fair chance that within, say, 30 spins, we'll manage to 
-  complete action without having to add contention elsewhere. 
-  
-  Once N busy-loops happen and nothing changes, we probably want to return even if its 
-  costs. (2), (3) both allow that. (2) doesn't add contention to the other index like 
-  (3) does. Say, there's a lot more dequeuers than enqueuers, if all dequeurs did (3), 
-  they would add a fair amount of contention to the [tail] index and slow the 
-  already-outnumbered enqueuers further. So, (2) > (3) for that reason.
-
-  However, with just (2), some dequeuers will struggle to return. If many dequeuers 
-  constatly try to pop an element and fail, they will form a chain.
-
-   tl                 hd
-   |                  |
-  [.]-[A]-[B]-[C]-..-[X]
-
-  For A to rollback, B has to rollback first. For B to rollback C has to rollback first.
-
-  [A] is likely to experience a large latency spike. In such a case, it is easier for [A]
-  to do (3) rather than hope all other active dequeuers will unblock it at some point. 
-  Thus, it's worthwile also trying to do (3) periodically.
-
-  Thus, the current policy does (1) for a bit, then (1), (2) with periodic (3). 
-
-  What about burned slots (4)?
-
-  It's present in the literature. Weakly I'm not a fan. If dequeuers are faster to remove 
-  items than enqueuers supply them, slots burned by dequeuers are going to make enqueuers 
-  do even more work.
-
-  # Resizing 
-
-  The queue does not support resizing, but it can be simulated by wrapping it in a 
-  lockfree list. 
-*)
-
-type 'a t = {
-  array : 'a Option.t Atomic.t Array.t;
-  head : int Atomic.t;
-  tail : int Atomic.t;
-  mask : int;
-}
-
-let create ~size_exponent () : 'a t =
-  let size = 1 lsl size_exponent in
-  let array = Array.init size (fun _ -> Atomic.make None) in
-  let mask = size - 1 in
-  let head = Atomic.make 0 in
-  let tail = Atomic.make 0 in
-  { array; head; tail; mask }
+module Spin = struct
+  let push = push
+  let pop = pop
+end
 
 (* [ccas] A slightly nicer CAS. Tries without taking microarch lock first. Use on indices. *)
 let ccas cell seen v =
   if Atomic.get cell != seen then false else Atomic.compare_and_set cell seen v
 
-module Spin = struct
-  let push { array; tail; mask; _ } item =
-    let tail_val = Atomic.fetch_and_add tail 1 in
-    let index = tail_val land mask in
-    let cell = Array.get array index in
-    while not (ccas cell None (Some item)) do
-      Domain.cpu_relax ()
-    done
-
-  let pop { array; head; mask; _ } =
-    let head_val = Atomic.fetch_and_add head 1 in
-    let index = head_val land mask in
-    let cell = Array.get array index in
-    let item = ref (Atomic.get cell) in
-    while Option.is_none !item || not (ccas cell !item None) do
-      Domain.cpu_relax ();
-      item := Atomic.get cell
-    done;
-    Option.get !item
-end
-
 module Not_lockfree = struct
   (* [spin_threshold] Number of times on spin on a slot before trying an exit strategy. *)
   let spin_threshold = 30
 
@@ -1,5 +1,5 @@
 (**
-    A multi-producer, multi-consumer, thread-safe, relaxed-FIFO queue.
+    A multi-producer, multi-consumer, thread-safe, bounded relaxed-FIFO queue.
 
     It exposes two interfaces: [Spin] and [Not_lockfree]. [Spin] is lock-free
     formally, but the property is achieved in a fairly counterintuitive way -
 
@@ -26,10 +26,10 @@ Copyright (c) 2017, Nicolas ASSOUAD <nicolas.assouad@ens.fr>
 ########
 *)
 
-module Queue = Michael_scott_queue
-module Stack = Treiber_stack
-module Work_stealing_deque = Ws_deque
-module Single_prod_single_cons_queue = Spsc_queue
-module Single_consumer_queue = Mpsc_queue
+module Queue = Lockfree.Queue
+module Stack = Lockfree.Stack
+module Work_stealing_deque = Lockfree.Work_stealing_deque
+module Single_prod_single_cons_queue = Lockfree.Single_prod_single_cons_queue
+module Single_consumer_queue = Lockfree.Single_consumer_queue
 module Relaxed_queue = Mpmc_relaxed_queue
-module Backoff = Backoff
+module Backoff = Lockfree.Backoff
@@ -30,13 +30,12 @@ Copyright (c) 2017, Nicolas ASSOUAD <nicolas.assouad@ens.fr>
 
 (** {1 Data structures} *)
 
-module Queue = Michael_scott_queue
-module Stack = Treiber_stack
-module Work_stealing_deque = Ws_deque
-module Single_prod_single_cons_queue = Spsc_queue
-module Single_consumer_queue = Mpsc_queue
+module Queue = Lockfree.Queue
+module Stack = Lockfree.Stack
+module Work_stealing_deque = Lockfree.Work_stealing_deque
+module Single_prod_single_cons_queue = Lockfree.Single_prod_single_cons_queue
+module Single_consumer_queue = Lockfree.Single_consumer_queue
 module Relaxed_queue = Mpmc_relaxed_queue
 
+module Backoff = Lockfree.Backoff
 (** {2 Other} *)
-
-module Backoff = Backoff
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@ depends: [`
`16`	`16`	`"qcheck-stm" {with-test & >= "0.2"}`
`17`	`17`	`"qcheck-alcotest" {with-test & >= "0.18.1"}`
`18`	`18`	`"alcotest" {with-test & >= "1.6.0"}`
`19`		`- "yojson" {>= "2.0.2"}`
	`19`	`+ "yojson" {with-test &>= "2.0.2"}`
`20`	`20`	`"dscheck" {with-test & >= "0.1.0"}`
`21`	`21`	`]`
`22`	`22`	`available: arch != "x86_32" & arch != "arm32" & arch != "ppc64"`