Skip to content

Commit

Permalink
Keep builds even if they fail
Browse files Browse the repository at this point in the history
  • Loading branch information
patricoferris committed May 3, 2024
1 parent efd76cb commit cf81e9a
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 24 deletions.
29 changes: 19 additions & 10 deletions lib/build.ml
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ module Make (Raw_store : S.STORE) (Sandbox : S.SANDBOX) (Fetch : S.FETCHER) = st
end >>!= fun src_dir ->
let src_manifest = sequence (List.map (Manifest.generate ~exclude ~src_dir) src) in
match Result.bind src_manifest (to_copy_op ~dst) with
| Error _ as e -> Lwt.return e
| Error _ as e -> Lwt.return (e :> ('a, [> `Msg of string | `Failed of (S.id * string) | `Cancelled ]) result)
| Ok op ->
let details = {
base;
Expand All @@ -200,7 +200,7 @@ module Make (Raw_store : S.STORE) (Sandbox : S.SANDBOX) (Fetch : S.FETCHER) = st
} in
(* Fmt.pr "COPY: %a@." Sexplib.Sexp.pp_hum (sexp_of_copy_details details); *)
let id = Sha256.to_hex (Sha256.string (Sexplib.Sexp.to_string (sexp_of_copy_details details))) in
Store.build t.store ?switch ~base ~id ~log (fun ~cancelled ~log result_tmp ->
let res = Store.build t.store ?switch ~base ~id ~log (fun ~cancelled ~log result_tmp ->
let argv = `Run ["tar"; "-xf"; "-"] in
let config = Config.v
~cwd:"/"
Expand Down Expand Up @@ -231,7 +231,8 @@ module Make (Raw_store : S.STORE) (Sandbox : S.SANDBOX) (Fetch : S.FETCHER) = st
proc >>= fun result ->
send >>= fun () ->
Lwt.return result
)
) in
(res : (string, [`Cancelled | `Failed of (S.id * string)]) Lwt_result.t :> (string, [> `Cancelled | `Msg of string | `Failed of (S.id * string) ]) Lwt_result.t)

let pp_op ~(context:Context.t) f op =
Fmt.pf f "@[<v2>%s: %a@]" context.workdir Obuilder_spec.pp_op op
Expand All @@ -255,25 +256,30 @@ module Make (Raw_store : S.STORE) (Sandbox : S.SANDBOX) (Fetch : S.FETCHER) = st
result >>= fun result ->
mount_secret values secret >>| fun resolved_secret ->
(resolved_secret :: result) ) (Ok []) secrets

let rec run_steps t ~(context:Context.t) ~base = function
| [] -> Sandbox.finished () >>= fun () -> Lwt_result.return base
| op :: ops ->
context.log `Heading Fmt.(str "%a" (pp_op ~context) op);
let k = run_steps t ops in
let k : context:Context.t -> base:string -> ( string, [ `Cancelled | `Failed of string * string | `Msg of string ] ) Lwt_result.t = fun ~context ~base ->
(run_steps t ops ~context ~base :> ( string, [ `Cancelled | `Failed of string * string | `Msg of string ] ) Lwt_result.t)
in
match op with
| `Comment _ -> k ~base ~context
| `Workdir workdir -> k ~base ~context:(update_workdir ~context workdir)
| `User user -> k ~base ~context:{context with user}
| `Run { shell = cmd; cache; network; secrets = mount_secrets; rom } ->
| `Run { shell = cmd; cache; network; secrets = mount_secrets; rom } -> (
let result =
let { Context.switch; workdir; user; env; shell; log; src_dir = _; scope = _; secrets } = context in
resolve_secrets secrets mount_secrets |> Result.map @@ fun mount_secrets ->
(switch, { base; workdir; user; env; cmd; shell; network; mount_secrets }, log)
in
Lwt.return result >>!= fun (switch, run_input, log) ->
run t ~switch ~log ~cache ~rom run_input >>!= fun base ->
k ~base ~context
run t ~switch ~log ~cache ~rom run_input >>= fun base ->
match base with
| Ok base -> k ~base ~context
| Error _ as e -> Lwt.return e
)
| `Copy x ->
copy t ~context ~base x >>!= fun base ->
k ~base ~context
Expand Down Expand Up @@ -356,7 +362,10 @@ module Make (Raw_store : S.STORE) (Sandbox : S.SANDBOX) (Fetch : S.FETCHER) = st

let build t context spec =
let r = build t context spec in
(r : (string, [ `Cancelled | `Msg of string ]) Lwt_result.t :> (string, [> `Cancelled | `Msg of string ]) Lwt_result.t)
(r : ( string,
[ `Cancelled | `Failed of string * string | `Msg of string ]
)
Lwt_result.t :> (string, [> `Cancelled | `Msg of string | `Failed of (S.id * string) ]) Lwt_result.t)

let delete ?log t id =
Store.delete ?log t.store id
Expand Down Expand Up @@ -638,7 +647,7 @@ module Make_Docker (Raw_store : S.STORE) = struct

let build t context spec =
let r = build ~scope:[] t context spec in
(r : (string, [ `Cancelled | `Msg of string ]) Lwt_result.t :> (string, [> `Cancelled | `Msg of string ]) Lwt_result.t)
(r :> (string, [> `Cancelled | `Msg of string | `Failed of (S.id * string) ]) Lwt_result.t)

let delete ?log t id =
Store.delete ?log t.store id
Expand Down
27 changes: 17 additions & 10 deletions lib/db_store.ml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ module Make (Raw : S.STORE) = struct
mutable users : int;
set_cancelled : unit Lwt.u; (* Resolve this to cancel (when [users = 0]). *)
log : Build_log.t Lwt.t;
result : (([`Loaded | `Saved] * S.id), [`Cancelled | `Msg of string]) Lwt_result.t;
result : (([`Loaded | `Saved] * S.id), [`Cancelled | `Failed of (S.id * string)]) Lwt_result.t;
base : string option;
}

Expand Down Expand Up @@ -69,10 +69,13 @@ module Make (Raw : S.STORE) = struct
Lwt.wakeup set_log log;
fn ~cancelled ~log dir
)
>>!= fun () ->
let now = Unix.(gmtime (gettimeofday () )) in
Dao.add t.dao ?parent:base ~id ~now;
Lwt_result.return (`Saved, id)
>>= function
| Ok () ->
let now = Unix.(gmtime (gettimeofday () )) in
Dao.add t.dao ?parent:base ~id ~now;
Lwt_result.return (`Saved, id)
| Error `Cancelled -> Lwt.return (Error `Cancelled)
| Error (`Msg m) -> Lwt.return (Error (`Failed (id, m)))

let log_ty client_log ~id = function
| `Loaded -> client_log `Note (Fmt.str "---> using %S from cache" id)
Expand Down Expand Up @@ -120,21 +123,25 @@ module Make (Raw : S.STORE) = struct
(fun () -> get_build t ~base ~id ~cancelled ~set_log fn)
(fun r ->
t.in_progress <- Builds.remove id t.in_progress;
Lwt.wakeup_later set_result r;
finish_log ~set_log log
finish_log ~set_log log >|= fun () ->
Lwt.wakeup_later set_result r
)
(fun ex ->
Log.info (fun f -> f "Build %S error: %a" id Fmt.exn ex);
t.in_progress <- Builds.remove id t.in_progress;
Lwt.wakeup_later_exn set_result ex;
finish_log ~set_log log
finish_log ~set_log log >|= fun () ->
Lwt.wakeup_later_exn set_result ex
)
);
tail_log >>!= fun () ->
result >>!= fun (ty, r) ->
log_ty client_log ~id ty;
Lwt_result.return r


let build ?switch t ?base ~id ~log:client_log fn =
let res = build ?switch t ?base ~id ~log:client_log fn in
(res : (string, [ `Cancelled | `Failed of string * string ]) Lwt_result.t :> (string, [> `Cancelled | `Failed of string * string ]) Lwt_result.t)

let result t id = Raw.result t.raw id
let count t = Dao.count t.dao
let df t = Raw.df t.raw
Expand Down
2 changes: 1 addition & 1 deletion lib/db_store.mli
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module Make (Raw : S.STORE) : sig
id:S.id ->
log:S.logger ->
(cancelled:unit Lwt.t -> log:Build_log.t -> string -> (unit, [`Cancelled | `Msg of string]) Lwt_result.t) ->
(S.id, [`Cancelled | `Msg of string]) Lwt_result.t
(S.id, [> `Cancelled | `Failed of (S.id * string)]) Lwt_result.t
(** [build t ~id ~log fn] ensures that [id] is cached, using [fn ~cancelled ~log dir] to build it if not.
If [cancelled] resolves, the build should be cancelled.
If [id] is already in the process of being built, this just attaches to the existing build.
Expand Down
2 changes: 1 addition & 1 deletion lib/rsync_store.ml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ let build t ?base ~id fn =
(fun r ->
begin match r with
| Ok () -> Rsync.rename_with_sharing ~mode:t.mode ~base ~src:result_tmp ~dst:result
| Error _ -> Rsync.delete result_tmp
| Error _ -> Rsync.rename_with_sharing ~mode:t.mode ~base ~src:result_tmp ~dst:result
end >>= fun () ->
Lwt.return r
)
Expand Down
2 changes: 1 addition & 1 deletion lib/s.ml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ module type BUILDER = sig
t ->
context ->
Obuilder_spec.t ->
(id, [> `Cancelled | `Msg of string]) Lwt_result.t
(id, [> `Cancelled | `Msg of string | `Failed of (id * string)]) Lwt_result.t

val shell :
t ->
Expand Down
4 changes: 3 additions & 1 deletion lib/zfs_store.ml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ end = struct

let state = "state"
let result_group = "result"
let failed_group = "failed"
let cache_group = "cache"
let cache_tmp_group = "cache-tmp"

Expand Down Expand Up @@ -277,7 +278,8 @@ let build t ?base ~id fn =
Lwt_result.return ()
| Error _ as e ->
Log.debug (fun f -> f "zfs: build %S failed" id);
Zfs.destroy t ds `And_snapshots >>= fun () ->
(* Don't delete build results that fail *)
(* Zfs.destroy t ds `And_snapshots >>= fun () -> *)
Lwt.return e
)
(fun ex ->
Expand Down

0 comments on commit cf81e9a

Please sign in to comment.