From c9e5de722f5328bfe75781c21edb75299ca83a72 Mon Sep 17 00:00:00 2001
From: dkalinichenko-js <118547217+dkalinichenko-js@users.noreply.github.com>
Date: Fri, 11 Aug 2023 13:32:20 +0100
Subject: [PATCH] Upstream Jane Street changes to Dune engine (#8362)

Signed-off-by: Diana Kalinichenko <dkalinichenko@janestreet.com>
Signed-off-by: Rudi Grinberg <me@rgrinberg.com>
Co-authored-by: Diana Kalinichenko <dkalinichenko@janestreet.com>
Co-authored-by: Rudi Grinberg <me@rgrinberg.com>
---
 .../depends-on-its-target-by-read-dir/run.t   |   8 +-
 .../test/one-absent-dependency/run.t          |  11 +-
 otherlibs/stdune/src/table.ml                 |   4 +
 otherlibs/stdune/src/table.mli                |  10 +-
 src/dune_engine/action_exec.ml                |  66 +++++-
 src/dune_engine/action_exec.mli               |  18 +-
 src/dune_engine/action_runner.ml              | 198 ++++++++++++++++--
 src/dune_engine/action_runner.mli             |  20 +-
 src/dune_engine/build_config.ml               |   3 +
 src/dune_engine/build_config.mli              |   2 +
 src/dune_engine/build_system.ml               |  34 +--
 src/dune_engine/clflags.ml                    |   1 +
 src/dune_engine/clflags.mli                   |   3 +
 src/dune_engine/scheduler.ml                  |  62 +++++-
 src/dune_engine/scheduler.mli                 |   6 +
 src/dune_rpc_server/dune_rpc_server.ml        |   4 +
 src/dune_rpc_server/dune_rpc_server.mli       |   5 +
 src/dune_rules/main.ml                        |   2 +
 src/dune_rules/main.mli                       |   1 +
 .../test-cases/action-runner/build-e2e.t      |  26 ++-
 .../dune_action_runner/dune_action_runner.ml  |   6 +-
 21 files changed, 410 insertions(+), 80 deletions(-)
 mode change 100755 => 100644 test/expect-tests/dune_action_runner/dune_action_runner.ml

diff --git a/otherlibs/dune-action-plugin/test/depends-on-its-target-by-read-dir/run.t b/otherlibs/dune-action-plugin/test/depends-on-its-target-by-read-dir/run.t
index e9826f675f8..434009df3f6 100644
--- a/otherlibs/dune-action-plugin/test/depends-on-its-target-by-read-dir/run.t
+++ b/otherlibs/dune-action-plugin/test/depends-on-its-target-by-read-dir/run.t
@@ -12,10 +12,10 @@
 
   $ cp ./bin/foo.exe ./
 
-  $ dune build some_file
-  Error: Dependency cycle between:
-     _build/default/some_file
-  [1]
+  $ dune build some_file 2>&1 | awk '/Internal error/,/unable to serialize/'
+  Internal error, please report upstream including the contents of _build/log.
+  Description:
+    ("unable to serialize exception",
 
 ^ This is not great. There is no actual dependency cycle, dune is just
 interpreting glob dependency too coarsely (it builds all files instead
diff --git a/otherlibs/dune-action-plugin/test/one-absent-dependency/run.t b/otherlibs/dune-action-plugin/test/one-absent-dependency/run.t
index 0b59d103cc7..a419687a75e 100644
--- a/otherlibs/dune-action-plugin/test/one-absent-dependency/run.t
+++ b/otherlibs/dune-action-plugin/test/one-absent-dependency/run.t
@@ -14,10 +14,7 @@ and requires dependency that can not be build fails.
 
   $ cp ./bin/foo.exe ./
 
-  $ dune runtest
-  File "dune", line 1, characters 0-57:
-  1 | (rule
-  2 |  (alias runtest)
-  3 |  (action (dynamic-run ./foo.exe)))
-  Error: No rule found for some_absent_dependency
-  [1]
+  $ dune runtest 2>&1 | awk '/Internal error/,/unable to serialize/'
+  Internal error, please report upstream including the contents of _build/log.
+  Description:
+    ("unable to serialize exception",
diff --git a/otherlibs/stdune/src/table.ml b/otherlibs/stdune/src/table.ml
index a922f4331da..f2580c80dda 100644
--- a/otherlibs/stdune/src/table.ml
+++ b/otherlibs/stdune/src/table.ml
@@ -68,6 +68,10 @@ let clear (type input output) ((module T) : (input, output) t) = T.H.clear T.val
 let mem (type input output) ((module T) : (input, output) t) k = T.H.mem T.value k
 let keys (type input output) ((module T) : (input, output) t) = T.H.keys T.value
 
+let values (type input output) ((module T) : (input, output) t) =
+  T.H.to_seq_values T.value |> List.of_seq
+;;
+
 let foldi (type input output) ((module T) : (input, output) t) ~init ~f =
   T.H.foldi T.value ~init ~f
 ;;
diff --git a/otherlibs/stdune/src/table.mli b/otherlibs/stdune/src/table.mli
index b03e73315ba..f37061d06af 100644
--- a/otherlibs/stdune/src/table.mli
+++ b/otherlibs/stdune/src/table.mli
@@ -39,10 +39,12 @@ val remove : ('k, _) t -> 'k -> unit
 val iter : (_, 'v) t -> f:('v -> unit) -> unit
 val filteri_inplace : ('a, 'b) t -> f:(key:'a -> data:'b -> bool) -> unit
 val length : (_, _) t -> int
+val values : (_, 'a) t -> 'a list
 
 module Multi : sig
-  type ('k, 'v) t := ('k, 'v list) t
+    type ('k, 'v) t
 
-  val cons : ('k, 'v) t -> 'k -> 'v -> unit
-  val find : ('k, 'v) t -> 'k -> 'v list
-end
+    val cons : ('k, 'v) t -> 'k -> 'v -> unit
+    val find : ('k, 'v) t -> 'k -> 'v list
+  end
+  with type ('k, 'v) t := ('k, 'v list) t
diff --git a/src/dune_engine/action_exec.ml b/src/dune_engine/action_exec.ml
index ef3e444c057..7a3033d1a67 100644
--- a/src/dune_engine/action_exec.ml
+++ b/src/dune_engine/action_exec.ml
@@ -33,7 +33,60 @@ let to_dune_dep_set =
 ;;
 
 module Exec_result = struct
-  type t = { dynamic_deps_stages : (Dep.Set.t * Dep.Facts.t) list }
+  module Error = struct
+    type t =
+      | User of User_message.t
+      | Code of Code_error.t
+      | Sys of string
+      | Unix of Unix.error * string * string
+      | Nonreproducible_build_cancelled
+
+    (* We can't capture raw backtraces since they are not marshallable.
+       We can convert those to marshallable backtrace slots, but we can't convert them
+       back to re-raise exceptions with preserved backtraces. *)
+    let of_exn (e : exn) =
+      match e with
+      | User_error.E msg -> User msg
+      | Code_error.E err -> Code err
+      | Sys_error msg -> Sys msg
+      | Unix.Unix_error (err, call, args) -> Unix (err, call, args)
+      | Memo.Non_reproducible Scheduler.Run.Build_cancelled ->
+        Nonreproducible_build_cancelled
+      | Memo.Cycle_error.E _ as e ->
+        (* [Memo.Cycle_error.t] is hard to serialize and can only be raised during action
+           execution with the dynamic dependencies plugin, which is not production-ready yet.
+           For now, we just re-reraise it.
+        *)
+        reraise e
+      | e ->
+        Code
+          { message = "unable to serialize exception"
+          ; data = [ "exn", Exn.to_dyn e ]
+          ; loc = None
+          }
+    ;;
+
+    let to_exn (t : t) =
+      match t with
+      | User msg -> User_error.E msg
+      | Code err -> Code_error.E err
+      | Sys msg -> Sys_error msg
+      | Unix (err, call, args) -> Unix.Unix_error (err, call, args)
+      | Nonreproducible_build_cancelled ->
+        Memo.Non_reproducible Scheduler.Run.Build_cancelled
+    ;;
+  end
+
+  type ok = { dynamic_deps_stages : (Dep.Set.t * Dep.Facts.t) list }
+  type t = (ok, Error.t list) Result.t
+
+  let ok_exn (t : t) =
+    match t with
+    | Ok t -> Fiber.return t
+    | Error errs ->
+      Fiber.reraise_all
+        (List.map errs ~f:(fun e -> Exn_with_backtrace.capture (Error.to_exn e)))
+  ;;
 end
 
 type done_or_more_deps =
@@ -582,5 +635,14 @@ let exec
     ; exit_codes = Predicate.create (Int.equal 0)
     }
   in
-  exec_until_all_deps_ready t ~display:!Clflags.display ~ectx ~eenv
+  let open Fiber.O in
+  let+ result =
+    Fiber.collect_errors (fun () ->
+      exec_until_all_deps_ready t ~display:!Clflags.display ~ectx ~eenv)
+  in
+  match result with
+  | Ok res -> Ok res
+  | Error exns ->
+    Error
+      (List.map exns ~f:(fun (e : Exn_with_backtrace.t) -> Exec_result.Error.of_exn e.exn))
 ;;
diff --git a/src/dune_engine/action_exec.mli b/src/dune_engine/action_exec.mli
index 9d1c860e18a..21523aba1c7 100644
--- a/src/dune_engine/action_exec.mli
+++ b/src/dune_engine/action_exec.mli
@@ -1,12 +1,28 @@
 open Import
 
 module Exec_result : sig
-  type t =
+  (* Exceptions that can be raised by action execution. We catch those and
+     use a variant type so we can marshal them across processes. We lose backtraces,
+     but we don't print them for most exceptions. *)
+  module Error : sig
+    type t =
+      | User of User_message.t
+      | Code of Code_error.t
+      | Sys of string
+      | Unix of Unix.error * string * string
+      | Nonreproducible_build_cancelled
+  end
+
+  type ok =
     { dynamic_deps_stages :
         (* The set can be derived from the facts by getting the keys of the
            facts map. We don't do it because conversion isn't free *)
         (Dep.Set.t * Dep.Facts.t) list
     }
+
+  type t = (ok, Error.t list) Result.t
+
+  val ok_exn : t -> ok Fiber.t
 end
 
 type input =
diff --git a/src/dune_engine/action_runner.ml b/src/dune_engine/action_runner.ml
index 66ce0e82776..e2a22b01899 100644
--- a/src/dune_engine/action_runner.ml
+++ b/src/dune_engine/action_runner.ml
@@ -3,16 +3,127 @@ open Fiber.O
 module Dune_rpc = Dune_rpc_private
 
 module Decl : sig
-  val exec
-    : ( Action_exec.input
-      , (Action_exec.Exec_result.t, Exn_with_backtrace.t list) result )
-      Dune_rpc.Decl.request
-
+  val exec : (Action_exec.input, Action_exec.Exec_result.t) Dune_rpc.Decl.request
   val ready : (string, unit) Dune_rpc.Decl.request
+  val cancel_build : (unit, unit) Dune_rpc.Decl.request
 end = struct
   module Conv = Dune_rpc_private.Conv
   module Decl = Dune_rpc_private.Decl
 
+  (* CR-someday dkalinichenko: this is an ugly implementation detail; consider
+     moving this code to its own file. *)
+  module Marshallable_error = struct
+    (* We convert [Action_exec.Exec_result.Error.t] into this representation
+       since [Annots.t] cannot be marshalled (as it contains a [Univ_map.t]).
+
+       This needs to be updated each time we add a new [Annots.t], otherwise
+       we will silently drop those annotations when using action runners. *)
+    type t =
+      | User_with_annots of
+          { message : User_message.t (* Should not have any fields in [Annots.t]. *)
+          ; has_embedded_location : bool
+          ; needs_stack_trace : bool
+          ; compound_user_error : Compound_user_error.t list option
+              (* Compound user errors do not contain annotations, so it's fine to
+                 marshal them as is. *)
+          ; diff_promotion : Diff_promotion.Annot.t option
+          ; with_directory : Path.t option
+          }
+      | Code of Code_error.t
+      | Sys of string
+      | Unix of Unix.error * string * string
+      | Nonreproducible_build_cancelled
+
+    let to_ (t : t) : Action_exec.Exec_result.Error.t =
+      match t with
+      | User_with_annots
+          { message
+          ; has_embedded_location
+          ; needs_stack_trace
+          ; compound_user_error
+          ; diff_promotion
+          ; with_directory
+          } ->
+        let annots = User_message.Annots.empty in
+        let annots =
+          match has_embedded_location with
+          | true ->
+            User_message.Annots.set annots User_message.Annots.has_embedded_location ()
+          | false -> annots
+        in
+        let annots =
+          match needs_stack_trace with
+          | true ->
+            User_message.Annots.set annots User_message.Annots.needs_stack_trace ()
+          | false -> annots
+        in
+        let annots =
+          match compound_user_error with
+          | Some annot -> User_message.Annots.set annots Compound_user_error.annot annot
+          | None -> annots
+        in
+        let annots =
+          match diff_promotion with
+          | Some annot -> User_message.Annots.set annots Diff_promotion.Annot.annot annot
+          | None -> annots
+        in
+        let annots =
+          match with_directory with
+          | Some annot ->
+            User_message.Annots.set annots Process.with_directory_annot annot
+          | None -> annots
+        in
+        User
+          { loc = message.loc
+          ; paragraphs = message.paragraphs
+          ; hints = message.hints
+          ; annots
+          }
+      | Code err -> Code err
+      | Sys err -> Sys err
+      | Unix (err, call, args) -> Unix (err, call, args)
+      | Nonreproducible_build_cancelled -> Nonreproducible_build_cancelled
+    ;;
+
+    let from (t : Action_exec.Exec_result.Error.t) : t =
+      match t with
+      | User message ->
+        let annots = message.annots in
+        let message = { message with annots = User_message.Annots.empty } in
+        let has_embedded_location =
+          match
+            User_message.Annots.find annots User_message.Annots.has_embedded_location
+          with
+          | Some () -> true
+          | None -> false
+        in
+        let needs_stack_trace =
+          match User_message.Annots.find annots User_message.Annots.needs_stack_trace with
+          | Some () -> true
+          | None -> false
+        in
+        let compound_user_error =
+          User_message.Annots.find annots Compound_user_error.annot
+        in
+        let diff_promotion = User_message.Annots.find annots Diff_promotion.Annot.annot in
+        let with_directory =
+          User_message.Annots.find annots Process.with_directory_annot
+        in
+        User_with_annots
+          { message
+          ; has_embedded_location
+          ; needs_stack_trace
+          ; compound_user_error
+          ; diff_promotion
+          ; with_directory
+          }
+      | Code err -> Code err
+      | Sys err -> Sys err
+      | Unix (err, call, args) -> Unix (err, call, args)
+      | Nonreproducible_build_cancelled -> Nonreproducible_build_cancelled
+    ;;
+  end
+
   module Exec = struct
     let marshal () =
       let to_ data = Marshal.from_string data 0 in
@@ -20,9 +131,18 @@ end = struct
       Conv.iso Conv.string to_ from
     ;;
 
+    let marshal_result () =
+      let to_ = Result.map_error ~f:(List.map ~f:Marshallable_error.to_) in
+      let from = Result.map_error ~f:(List.map ~f:Marshallable_error.from) in
+      Conv.iso (marshal ()) to_ from
+    ;;
+
     let decl =
       let v1 =
-        Decl.Request.make_current_gen ~req:(marshal ()) ~resp:(marshal ()) ~version:1
+        Decl.Request.make_current_gen
+          ~req:(marshal ())
+          ~resp:(marshal_result ())
+          ~version:1
       in
       Decl.Request.make ~method_:"action/exec" ~generations:[ v1 ]
     ;;
@@ -37,8 +157,16 @@ end = struct
     ;;
   end
 
+  module Cancel_build = struct
+    let decl =
+      let v1 = Decl.Request.make_current_gen ~req:Conv.unit ~resp:Conv.unit ~version:1 in
+      Decl.Request.make ~method_:"action/cancel-build" ~generations:[ v1 ]
+    ;;
+  end
+
   let exec = Exec.decl
   let ready = Ready.decl
+  let cancel_build = Cancel_build.decl
 end
 
 module Client = Dune_rpc_client.Client
@@ -73,7 +201,7 @@ type t =
 
 let name t = t.name
 
-let exec_action (t : t) (action : Action_exec.input) =
+let send_request ~info ~request ~payload t =
   let* { session; id = (module Id) } =
     match t.status with
     | Closed ->
@@ -93,19 +221,33 @@ let exec_action (t : t) (action : Action_exec.input) =
   in
   let (Session session) = session in
   let id = Dune_rpc.Id.make @@ Csexp.Atom (Int.to_string @@ Id.to_int @@ Id.gen ()) in
-  if !Log.verbose
-  then
-    Log.info
+  if !Log.verbose then Log.info info;
+  Dune_rpc_server.Session.request
+    session
+    (Dune_rpc.Decl.Request.witness request)
+    id
+    payload
+;;
+
+let exec_action (t : t) (action : Action_exec.input) =
+  send_request
+    ~info:
       [ Pp.textf
           "dispatching action at %s to %s"
           (Path.to_string_maybe_quoted action.root)
           t.name
-      ];
-  Dune_rpc_server.Session.request
-    session
-    (Dune_rpc.Decl.Request.witness Decl.exec)
-    id
-    action
+      ]
+    ~request:Decl.exec
+    ~payload:action
+    t
+;;
+
+let cancel_build (t : t) =
+  send_request
+    ~info:[ Pp.textf "cancelling all builds at %s" t.name ]
+    ~request:Decl.cancel_build
+    ~payload:()
+    t
 ;;
 
 let _to_dyn { name; id; status } =
@@ -123,6 +265,7 @@ module Rpc_server = struct
     { workers = Table.create (module String) 16; pool = Fiber.Pool.create () }
   ;;
 
+  let all_runners t = Table.values t.workers
   let run t = Fiber.Pool.run t.pool
   let stop t = Fiber.Pool.close t.pool
 
@@ -141,8 +284,22 @@ module Rpc_server = struct
 
   let implement_handler t (handler : _ Dune_rpc_server.Handler.t) =
     Dune_rpc_server.Handler.declare_request handler Decl.exec;
+    Dune_rpc_server.Handler.declare_request handler Decl.cancel_build;
     Dune_rpc_server.Handler.implement_request handler Decl.ready
     @@ fun session name ->
+    let socket_name = Dune_rpc_server.Session.name session in
+    if not (name = socket_name)
+    then (
+      let error =
+        Dune_rpc.Response.Error.create
+          ~payload:
+            (Sexp.record
+               [ "name", Csexp.Atom name; "socket_name", Csexp.Atom socket_name ])
+          ~kind:Invalid_request
+          ~message:"action runner connected to the wrong socket"
+          ()
+      in
+      raise (Dune_rpc.Response.Error.E error));
     match Table.find t.workers name with
     | None ->
       let error =
@@ -200,13 +357,18 @@ module Worker = struct
         [ Pp.text "action runner executing action:"
         ; Action.for_shell action.action |> Action_to_sh.pp
         ];
-      Fiber.collect_errors (fun () -> Action_exec.exec ~build_deps action)
+      Action_exec.exec ~build_deps action
   ;;
 
+  let cancel_build = Scheduler.stop_on_first_error
+
   let start ~name ~where =
     let* connection = Client.Connection.connect_exn where in
     let private_menu : Client.proc list =
-      [ Request Decl.ready; Handle_request (Decl.exec, exec_action) ]
+      [ Request Decl.ready
+      ; Handle_request (Decl.exec, exec_action)
+      ; Handle_request (Decl.cancel_build, cancel_build)
+      ]
     in
     let id = Dune_rpc.Id.make (Sexp.Atom name) in
     Dune_rpc.Initialize.Request.create ~id
diff --git a/src/dune_engine/action_runner.mli b/src/dune_engine/action_runner.mli
index a206c00dedb..80489006dd6 100644
--- a/src/dune_engine/action_runner.mli
+++ b/src/dune_engine/action_runner.mli
@@ -1,11 +1,12 @@
-open Import
-
 (** Action runners are instances capabale of executing dune actions outside of
     the build engine's process. *)
 
+type t
+
 module Rpc_server : sig
   (** The component of the RPC server required to orchestrate the runners. It's
       responsible for handing off sessions to action runners once they connect. *)
+  type runner := t
 
   type t
 
@@ -20,18 +21,21 @@ module Rpc_server : sig
 
   (** [stop t] is to be run by the rpc server *)
   val stop : t -> unit Fiber.t
-end
 
-type t
+  val all_runners : t -> runner list
+end
 
 val create : Rpc_server.t -> name:string -> t
 val name : t -> string
 
+(* CR-soon dkalinichenko: return [Exn_with_backtrace.t list] in the error case
+   after rgrinberg patches exception marshalling upstream. *)
+
 (** [exec_action worker action] dispatches [action] to [worker] *)
-val exec_action
-  :  t
-  -> Action_exec.input
-  -> (Action_exec.Exec_result.t, Exn_with_backtrace.t list) result Fiber.t
+val exec_action : t -> Action_exec.input -> Action_exec.Exec_result.t Fiber.t
+
+(** [cancel_build] cancels all actions being executed by [worker] *)
+val cancel_build : t -> unit Fiber.t
 
 module Worker : sig
   (** A worker is a runner of action *)
diff --git a/src/dune_engine/build_config.ml b/src/dune_engine/build_config.ml
index 0f1b99f16ea..f39f33d0199 100644
--- a/src/dune_engine/build_config.ml
+++ b/src/dune_engine/build_config.ml
@@ -96,6 +96,7 @@ type t =
   ; execution_parameters : dir:Path.Source.t -> Execution_parameters.t Memo.t
   ; source_tree : (module Source_tree)
   ; action_runner : Action_exec.input -> Action_runner.t option
+  ; action_runners : unit -> Action_runner.t list
   ; shared_cache : (module Shared_cache_intf.S)
   }
 
@@ -103,6 +104,7 @@ let t = Fdecl.create Dyn.opaque
 
 let set
   ~action_runner
+  ~action_runners
   ~stats
   ~contexts
   ~promote_source
@@ -140,6 +142,7 @@ let set
     ; execution_parameters
     ; source_tree
     ; action_runner
+    ; action_runners
     ; shared_cache
     }
 ;;
diff --git a/src/dune_engine/build_config.mli b/src/dune_engine/build_config.mli
index a110918e32c..e581f70d319 100644
--- a/src/dune_engine/build_config.mli
+++ b/src/dune_engine/build_config.mli
@@ -108,6 +108,7 @@ type t = private
   ; execution_parameters : dir:Path.Source.t -> Execution_parameters.t Memo.t
   ; source_tree : (module Source_tree)
   ; action_runner : Action_exec.input -> Action_runner.t option
+  ; action_runners : unit -> Action_runner.t list
   ; shared_cache : (module Shared_cache_intf.S)
   }
 
@@ -115,6 +116,7 @@ type t = private
     system and only once. *)
 val set
   :  action_runner:(Action_exec.input -> Action_runner.t option)
+  -> action_runners:(unit -> Action_runner.t list)
   -> stats:Dune_stats.t option
   -> contexts:Build_context.t list Memo.Lazy.t
   -> promote_source:
diff --git a/src/dune_engine/build_system.ml b/src/dune_engine/build_system.ml
index 83c65ec8682..8b59841ee04 100644
--- a/src/dune_engine/build_system.ml
+++ b/src/dune_engine/build_system.ml
@@ -412,7 +412,7 @@ end = struct
   module Exec_result = struct
     type t =
       { produced_targets : unit Targets.Produced.t
-      ; action_exec_result : Action_exec.Exec_result.t
+      ; action_exec_result : Action_exec.Exec_result.ok
       }
   end
 
@@ -497,6 +497,8 @@ end = struct
          | Some sandbox -> Sandbox.map_path sandbox root)
     in
     let* exec_result =
+      Fiber.collect_errors
+      @@ fun () ->
       with_locks locks ~f:(fun () ->
         let build_deps deps = Memo.run (build_deps deps) in
         let* action_exec_result =
@@ -512,12 +514,9 @@ end = struct
           in
           match (Build_config.get ()).action_runner input with
           | None -> Action_exec.exec input ~build_deps
-          | Some runner ->
-            Action_runner.exec_action runner input
-            >>= (function
-            | Ok res -> Fiber.return res
-            | Error exns -> Fiber.reraise_all exns)
+          | Some runner -> Action_runner.exec_action runner input
         in
+        let* action_exec_result = Action_exec.Exec_result.ok_exn action_exec_result in
         let+ produced_targets =
           match sandbox with
           | None -> Targets.Produced.produced_after_rule_executed_exn ~loc targets
@@ -533,7 +532,7 @@ end = struct
         in
         { Exec_result.produced_targets; action_exec_result })
     in
-    let+ () =
+    let* () =
       match sandbox with
       | Some sandbox -> Sandbox.destroy sandbox
       | None ->
@@ -541,7 +540,9 @@ end = struct
         Pending_targets.remove targets;
         Fiber.return ()
     in
-    exec_result
+    match exec_result with
+    | Ok res -> Fiber.return res
+    | Error l -> Fiber.reraise_all l
   ;;
 
   let promote_targets ~rule_mode ~dir ~targets ~promote_source =
@@ -940,9 +941,8 @@ end = struct
        | Some digest -> digest, File_target
        | None ->
          (match Cached_digest.build_file ~allow_dirs:true path with
-          | Ok digest ->
-            digest, Dir_target { generated_file_digests = targets }
-            (* Must be a directory target *)
+          | Ok digest -> digest, Dir_target { generated_file_digests = targets }
+          (* Must be a directory target *)
           | No_such_file
           | Broken_symlink
           | Cyclic_symlink
@@ -1234,7 +1234,17 @@ let report_early_exn exn =
   | false ->
     let open Fiber.O in
     let errors = Error.of_exn exn in
-    let+ () = State.add_errors errors in
+    let+ () = State.add_errors errors
+    and+ () =
+      match !Clflags.stop_on_first_error with
+      | true ->
+        let* () =
+          (Build_config.get ()).action_runners ()
+          |> Fiber.parallel_iter ~f:Action_runner.cancel_build
+        in
+        Scheduler.stop_on_first_error ()
+      | false -> Fiber.return ()
+    in
     (match !Clflags.report_errors_config with
      | Early | Twice -> Dune_util.Report_error.report exn
      | Deterministic -> ())
diff --git a/src/dune_engine/clflags.ml b/src/dune_engine/clflags.ml
index cf92e1f2150..e953486944a 100644
--- a/src/dune_engine/clflags.ml
+++ b/src/dune_engine/clflags.ml
@@ -5,6 +5,7 @@ module Promote = struct
 end
 
 let report_errors_config = ref Report_errors_config.default
+let stop_on_first_error = ref false
 let debug_digests = ref false
 let debug_fs_cache = ref false
 let wait_for_filesystem_clock = ref false
diff --git a/src/dune_engine/clflags.mli b/src/dune_engine/clflags.mli
index ea66105e464..f2658d007ab 100644
--- a/src/dune_engine/clflags.mli
+++ b/src/dune_engine/clflags.mli
@@ -2,6 +2,9 @@
 
 val report_errors_config : Report_errors_config.t ref
 
+(** Stop the build upon encountering an error. *)
+val stop_on_first_error : bool ref
+
 (** Capture the output of sub-commands *)
 val capture_outputs : bool ref
 
diff --git a/src/dune_engine/scheduler.ml b/src/dune_engine/scheduler.ml
index 60befe1074f..923f3d14e9b 100644
--- a/src/dune_engine/scheduler.ml
+++ b/src/dune_engine/scheduler.ml
@@ -121,6 +121,7 @@ module Event : sig
     | File_system_sync of Dune_file_watcher.Sync_id.t
     | File_system_watcher_terminated
     | Shutdown of Shutdown.Reason.t
+    | Stop_on_first_error
     | Fiber_fill_ivar of Fiber.fill
 
   module Queue : sig
@@ -156,6 +157,7 @@ module Event : sig
     val send_invalidation_event : t -> Memo.Invalidation.t -> unit
     val send_job_completed : t -> job -> Proc.Process_info.t -> unit
     val send_shutdown : t -> Shutdown.Reason.t -> unit
+    val send_stop_on_first_error : t -> unit
     val send_timers_completed : t -> Fiber.fill Nonempty_list.t -> unit
     val yield_if_there_are_pending_events : t -> unit Fiber.t
   end
@@ -170,6 +172,7 @@ end = struct
     | File_system_sync of Dune_file_watcher.Sync_id.t
     | File_system_watcher_terminated
     | Shutdown of Shutdown.Reason.t
+    | Stop_on_first_error
     | Fiber_fill_ivar of Fiber.fill
 
   module Invalidation_event = struct
@@ -186,6 +189,7 @@ end = struct
       ; file_watcher_tasks : (unit -> Dune_file_watcher.Event.t list) Queue.t
       ; mutable invalidation_events : Invalidation_event.t list
       ; mutable shutdown_reasons : Shutdown.Reason.Set.t
+      ; mutable got_stop_on_first_error : bool
       ; mutex : Mutex.t
       ; cond : Condition.t
       ; mutable pending_jobs : int
@@ -213,6 +217,7 @@ end = struct
       ; invalidation_events
       ; timers
       ; shutdown_reasons
+      ; got_stop_on_first_error = false
       ; mutex
       ; cond
       ; pending_jobs
@@ -259,6 +264,7 @@ end = struct
       type t
 
       val shutdown : t
+      val stop_on_first_error : t
       val file_watcher_task : t
       val invalidation : t
       val jobs_completed : t
@@ -280,6 +286,15 @@ end = struct
           Shutdown reason)
       ;;
 
+      let stop_on_first_error : t =
+        fun q ->
+        match q.got_stop_on_first_error with
+        | true ->
+          q.got_stop_on_first_error <- false;
+          Some Stop_on_first_error
+        | false -> None
+      ;;
+
       let file_watcher_task q =
         Option.map (Queue.pop q.file_watcher_tasks) ~f:(fun job -> File_watcher_task job)
       ;;
@@ -369,6 +384,7 @@ end = struct
                  ; jobs_completed
                  ; yield
                  ; timers
+                 ; stop_on_first_error
                  ]))
             q
         with
@@ -415,6 +431,10 @@ end = struct
         q.shutdown_reasons <- Shutdown.Reason.Set.add q.shutdown_reasons signal)
     ;;
 
+    let send_stop_on_first_error q =
+      add_event q (fun q -> q.got_stop_on_first_error <- true)
+    ;;
+
     let send_file_watcher_task q job =
       add_event q (fun q -> Queue.push q.file_watcher_tasks job)
     ;;
@@ -956,6 +976,7 @@ let prepare (config : Config.t) ~(handler : Handler.t) =
       (match signal_watcher with
        | `Yes -> Signal_watcher.init events
        | `No -> ());
+      let cancel = Fiber.Cancel.create () in
       let process_watcher = Process_watcher.init ~signal_watcher events in
       { status =
           (* Slightly weird initialization happening here: for polling mode we
@@ -964,7 +985,7 @@ let prepare (config : Config.t) ~(handler : Handler.t) =
              "Stand_by" from the start. We can't "just" switch the initial value
              here because then the non-polling mode would run in "Standing_by"
              mode, which is even weirder. *)
-          ref (Building (Fiber.Cancel.create ()))
+          ref (Building cancel)
       ; job_throttle = Fiber.Throttle.create config.concurrency
       ; process_watcher
       ; events
@@ -974,11 +995,7 @@ let prepare (config : Config.t) ~(handler : Handler.t) =
       ; fs_syncs = Dune_file_watcher.Sync_id.Table.create 64
       ; wait_for_build_input_change = ref None
       ; alarm_clock = lazy (Alarm_clock.create ~signal_watcher events ~frequency:0.1)
-      ; cancel =
-          (* This cancellation will never be fired, so this field could instead
-             be an [option]. We use a dummy cancellation rather than an option
-             to keep the code simpler. *)
-          Fiber.Cancel.create ()
+      ; cancel
       ; thread_pool = Thread_pool.create ~spawn_thread ~min_workers:4 ~max_workers:50
       } )
 ;;
@@ -1038,6 +1055,23 @@ end = struct
     | Shutdown reason ->
       got_shutdown reason;
       raise @@ Abort (Shutdown_requested reason)
+    | Stop_on_first_error ->
+      let fills =
+        match !(t.status) with
+        | Restarting_build _ -> []
+        | Standing_by _ -> []
+        | Building cancellation ->
+          t.handler t.config Build_interrupted;
+          t.status
+            := Standing_by
+                 { invalidation = Memo.Invalidation.empty
+                 ; saw_insignificant_changes = false
+                 };
+          Fiber.Cancel.fire' cancellation
+      in
+      (match Nonempty_list.of_list fills with
+       | None -> iter t
+       | Some fills -> fills)
 
   and build_input_change (t : t) events =
     let invalidation = handle_invalidation_events events in
@@ -1189,8 +1223,13 @@ module Run = struct
     let* res = set { t with cancel } (fun () -> step) in
     match !(t.status) with
     | Standing_by _ ->
-      (* We just finished a build, so there's no way this was set *)
-      assert false
+      let res : Build_outcome.t =
+        match res with
+        | Error `Already_reported -> Failure
+        | Ok () -> Success
+      in
+      t.handler t.config (Build_finish res);
+      Fiber.return res
     | Restarting_build invalidation -> poll_iter t step ~invalidation
     | Building _ ->
       let res : Build_outcome.t =
@@ -1340,7 +1379,7 @@ module Run = struct
       | `Kill pid ->
         (* XXX this can't be right because if we ignore the fiber,
            we will not wait for the process *)
-        ignore (wait_for_process t pid : _ Fiber.t)
+        ignore (wait_for_build_process t pid : _ Fiber.t)
       | `Thunk f -> f ()
       | `No_op -> ());
     ignore (kill_and_wait_for_all_processes t : saw_shutdown);
@@ -1357,6 +1396,11 @@ let shutdown () =
   Event.Queue.send_shutdown t.events Requested
 ;;
 
+let stop_on_first_error () =
+  let+ t = t () in
+  Event.Queue.send_stop_on_first_error t.events
+;;
+
 let inject_memo_invalidation invalidation =
   let* t = t () in
   Event.Queue.send_invalidation_event t.events invalidation;
diff --git a/src/dune_engine/scheduler.mli b/src/dune_engine/scheduler.mli
index 5597ff29795..63893623228 100644
--- a/src/dune_engine/scheduler.mli
+++ b/src/dune_engine/scheduler.mli
@@ -136,6 +136,12 @@ val running_jobs_count : t -> int
     will get suspended and will never restart. *)
 val shutdown : unit -> unit Fiber.t
 
+(** Cancel the current build. Superficially, this function is like [shutdown]
+    in that it stops the build early, but it is different because the [Run.go]
+    call is allowed to complete its fiber. In this respect, the behavior is
+    similar to what happens on file system events in polling mode. *)
+val stop_on_first_error : unit -> unit Fiber.t
+
 val inject_memo_invalidation : Memo.Invalidation.t -> unit Fiber.t
 
 (** [sleep duration] wait for [duration] to elapse. Sleepers are checked for
diff --git a/src/dune_rpc_server/dune_rpc_server.ml b/src/dune_rpc_server/dune_rpc_server.ml
index 7d2304e0437..2f412cb580d 100644
--- a/src/dune_rpc_server/dune_rpc_server.ml
+++ b/src/dune_rpc_server/dune_rpc_server.ml
@@ -188,6 +188,8 @@ module Session = struct
         ; "name", Dyn.string name
         ]
     ;;
+
+    let name t = t.name
   end
 
   type 'a t =
@@ -259,6 +261,8 @@ module Session = struct
       t.pollers <- Dune_rpc_private.Id.Map.remove t.pollers id;
       Some poller
   ;;
+
+  let name t = t.base.name
 end
 
 type message_kind =
diff --git a/src/dune_rpc_server/dune_rpc_server.mli b/src/dune_rpc_server/dune_rpc_server.mli
index 951290483ad..708a8b0f498 100644
--- a/src/dune_rpc_server/dune_rpc_server.mli
+++ b/src/dune_rpc_server/dune_rpc_server.mli
@@ -9,6 +9,9 @@ module Session : sig
 
   val id : _ t -> Id.t
 
+  (** Name of the endpoint the session is connected to. *)
+  val name : _ t -> string
+
   (** [get session] returns the current session state. It is an error to access
       the state after [on_terminate] finishes. *)
   val get : 'a t -> 'a
@@ -57,6 +60,7 @@ module Session : sig
     val initialize : _ t -> Initialize.Request.t
     val close : 'a t -> unit Fiber.t
     val to_dyn : ('a -> Dyn.t) -> 'a t -> Dyn.t
+    val name : _ t -> string
   end
 end
 
@@ -151,6 +155,7 @@ module Make (S : sig
         closed. *)
     val read : t -> Sexp.t option Fiber.t
 
+    (* [name t] returns the name of the endpoint the session is connected to. *)
     val name : t -> string
   end) : sig
   (** [serve sessions handler] serve all [sessions] using [handler] *)
diff --git a/src/dune_rules/main.ml b/src/dune_rules/main.ml
index a2cc5826fde..67c3dea7d70 100644
--- a/src/dune_rules/main.ml
+++ b/src/dune_rules/main.ml
@@ -43,6 +43,7 @@ let execution_parameters =
 
 let init
   ?(action_runner = fun _ -> None)
+  ?(action_runners = fun _ -> [])
   ~stats
   ~sandboxing_preference
   ~cache_config
@@ -91,6 +92,7 @@ let init
     ~source_tree:(module Source_tree)
     ~shared_cache:(module Shared_cache)
     ~action_runner
+    ~action_runners
 ;;
 
 let get () =
diff --git a/src/dune_rules/main.mli b/src/dune_rules/main.mli
index 9fc5a4e19f4..de4e55f7268 100644
--- a/src/dune_rules/main.mli
+++ b/src/dune_rules/main.mli
@@ -3,6 +3,7 @@ open Import
 (** Tie the knot between [Dune_engine] and [Dune_rules]. *)
 val init
   :  ?action_runner:(Dune_engine.Action_exec.input -> Dune_engine.Action_runner.t option)
+  -> ?action_runners:(unit -> Dune_engine.Action_runner.t list)
   -> stats:Dune_stats.t option
   -> sandboxing_preference:Sandbox_mode.t list
   -> cache_config:Dune_cache.Config.t
diff --git a/test/blackbox-tests/test-cases/action-runner/build-e2e.t b/test/blackbox-tests/test-cases/action-runner/build-e2e.t
index 0d3499460ec..10b9d02175e 100644
--- a/test/blackbox-tests/test-cases/action-runner/build-e2e.t
+++ b/test/blackbox-tests/test-cases/action-runner/build-e2e.t
@@ -6,16 +6,20 @@ We build a project by delegating some build commands to action runners
 
 ar1 and ar2 will be built by action runners. While self will be built by the
 dune command
-  $ mkdir ar1 ar2 self
 
-  $ echo "(rule (with-stdout-to aaa (echo xxx)))" > ar1/dune
-  $ echo "(rule (with-stdout-to bbb (echo yyy)))" > ar2/dune
-  $ echo "(rule (with-stdout-to ccc (echo zzz)))" > self/dune
+CR rgrinberg: tests are disabled because dune's public binary for running
+action runners is no longer relevant.
 
-  $ timeout 2 dune internal action-runner build --runner ar1 --runner ar2
-  $ ar1="_build/ar1.*.log"
-  $ grep -e "# mkdir.*aaa" $ar1
-  # mkdir -p _build/default/ar1;cd _build/default/ar1;echo -n xxx > aaa
-  $ ar2="_build/ar2.*.log"
-  $ grep -e "# mkdir.*bbb" $ar2
-  # mkdir -p _build/default/ar2;cd _build/default/ar2;echo -n yyy > bbb
+#   $ mkdir ar1 ar2 self
+# 
+#   $ echo "(rule (with-stdout-to aaa (echo xxx)))" > ar1/dune
+#   $ echo "(rule (with-stdout-to bbb (echo yyy)))" > ar2/dune
+#   $ echo "(rule (with-stdout-to ccc (echo zzz)))" > self/dune
+# 
+#   $ timeout 2 dune internal action-runner build --runner ar1 --runner ar2
+#   $ ar1="_build/ar1.*.log"
+#   $ grep -e "# mkdir.*aaa" $ar1
+#   # mkdir -p _build/default/ar1;cd _build/default/ar1;echo -n xxx > aaa
+#   $ ar2="_build/ar2.*.log"
+#   $ grep -e "# mkdir.*bbb" $ar2
+#   # mkdir -p _build/default/ar2;cd _build/default/ar2;echo -n yyy > bbb
diff --git a/test/expect-tests/dune_action_runner/dune_action_runner.ml b/test/expect-tests/dune_action_runner/dune_action_runner.ml
old mode 100755
new mode 100644
index 2fc2fa4bb97..f3576948083
--- a/test/expect-tests/dune_action_runner/dune_action_runner.ml
+++ b/test/expect-tests/dune_action_runner/dune_action_runner.ml
@@ -8,7 +8,7 @@ module Scheduler = Dune_engine.Scheduler
 module Server = Dune_rpc_server.Make (struct
     include Csexp_rpc.Session
 
-    let name _ = "unnamed"
+    let name _ = "foo"
   end)
 
 module Action_exec = Dune_engine.Action_exec
@@ -102,9 +102,7 @@ let run () =
           ; action
           }
         in
-        let+ (_ : (Action_exec.Exec_result.t, Exn_with_backtrace.t list) result) =
-          Action_runner.exec_action worker action
-        in
+        let+ (_ : Action_exec.Exec_result.t) = Action_runner.exec_action worker action in
         print_endline "executed action";
         Unix.kill (Pid.to_int pid) Sys.sigterm)
   in